1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
33 /* Doesn't require avx512vl, used in avx512dqintrin.h */
34 static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
35 _mm_setzero_di(void) {
36 return (__m128i)(__v2di){ 0LL, 0LL};
41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
43 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
49 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54 _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
55 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
59 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60 _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
61 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
65 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
67 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
71 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
72 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
73 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
77 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
78 _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
79 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
83 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
84 _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
85 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
89 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
90 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
91 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
95 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
96 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
97 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
101 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
102 _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
103 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
107 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
108 _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
109 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
114 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
115 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
119 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
120 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
121 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
125 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
126 _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
127 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
131 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
132 _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
133 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
138 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
139 _mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
140 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
144 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
145 _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
146 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
150 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
151 _mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
152 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
156 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
157 _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
158 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
162 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
163 _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
164 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
168 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
169 _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
170 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
174 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
175 _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
176 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
181 _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
182 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
187 _mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
188 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
193 _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
194 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
198 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
199 _mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
200 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
204 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
205 _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
206 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
210 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
211 _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
212 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
216 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
217 _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
218 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
222 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
223 _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
224 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
228 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
229 _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
230 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
235 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
236 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
241 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
242 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
247 _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
248 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
252 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
253 _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
254 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
258 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
259 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
260 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
264 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
265 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
266 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
270 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
271 _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
272 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
276 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
277 _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
278 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
282 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
283 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
284 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
288 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
289 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
290 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
294 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
295 _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
296 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
300 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
301 _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
302 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
306 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
307 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
308 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
312 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
313 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
314 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
318 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
319 _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
320 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
324 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
325 _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
326 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
330 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
331 _mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
332 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
336 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
337 _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
338 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
342 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
343 _mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
344 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
348 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
349 _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
350 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
354 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
355 _mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
356 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
360 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
361 _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
362 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
366 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
367 _mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
368 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
372 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
373 _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
374 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
378 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
379 _mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
380 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
384 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
385 _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
386 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
390 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
391 _mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
392 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
396 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
397 _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
398 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
402 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
403 _mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
404 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
408 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
409 _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
410 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
414 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
415 _mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
416 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
420 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
421 _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
422 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
426 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
427 _mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
428 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
432 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
433 _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
434 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
438 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
439 _mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
440 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
444 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
445 _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
446 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
450 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
451 _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
452 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
456 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
457 _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
458 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
462 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
463 _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
464 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
468 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
469 _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
470 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
474 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
475 _mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
476 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
480 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
481 _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
482 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
486 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
487 _mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
488 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
492 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
493 _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
494 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
499 _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
500 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
504 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
505 _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
506 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
510 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
511 _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
512 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
516 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
517 _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
518 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
522 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
523 _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
524 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
529 _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
530 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
534 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
535 _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
536 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
540 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
541 _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
542 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
546 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
547 _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
548 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
552 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
553 _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
554 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
559 _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
560 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
564 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
565 _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
566 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
570 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
571 _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
572 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
576 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
577 _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
578 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
582 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
583 _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
584 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
589 _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
590 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
594 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
595 _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
596 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
600 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
601 _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
602 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
606 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
607 _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
608 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
612 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
613 _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
614 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
618 static __inline__ __m256i __DEFAULT_FN_ATTRS
619 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
621 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
622 (__v8si)_mm256_add_epi32(__A, __B),
626 static __inline__ __m256i __DEFAULT_FN_ATTRS
627 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
629 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
630 (__v8si)_mm256_add_epi32(__A, __B),
631 (__v8si)_mm256_setzero_si256());
634 static __inline__ __m256i __DEFAULT_FN_ATTRS
635 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
637 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
638 (__v4di)_mm256_add_epi64(__A, __B),
642 static __inline__ __m256i __DEFAULT_FN_ATTRS
643 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
645 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
646 (__v4di)_mm256_add_epi64(__A, __B),
647 (__v4di)_mm256_setzero_si256());
650 static __inline__ __m256i __DEFAULT_FN_ATTRS
651 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
653 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
654 (__v8si)_mm256_sub_epi32(__A, __B),
658 static __inline__ __m256i __DEFAULT_FN_ATTRS
659 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
661 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
662 (__v8si)_mm256_sub_epi32(__A, __B),
663 (__v8si)_mm256_setzero_si256());
666 static __inline__ __m256i __DEFAULT_FN_ATTRS
667 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
669 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
670 (__v4di)_mm256_sub_epi64(__A, __B),
674 static __inline__ __m256i __DEFAULT_FN_ATTRS
675 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
677 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
678 (__v4di)_mm256_sub_epi64(__A, __B),
679 (__v4di)_mm256_setzero_si256());
682 static __inline__ __m128i __DEFAULT_FN_ATTRS
683 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
685 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
686 (__v4si)_mm_add_epi32(__A, __B),
690 static __inline__ __m128i __DEFAULT_FN_ATTRS
691 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
693 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
694 (__v4si)_mm_add_epi32(__A, __B),
695 (__v4si)_mm_setzero_si128());
698 static __inline__ __m128i __DEFAULT_FN_ATTRS
699 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
701 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
702 (__v2di)_mm_add_epi64(__A, __B),
706 static __inline__ __m128i __DEFAULT_FN_ATTRS
707 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
709 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
710 (__v2di)_mm_add_epi64(__A, __B),
711 (__v2di)_mm_setzero_si128());
714 static __inline__ __m128i __DEFAULT_FN_ATTRS
715 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
717 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
718 (__v4si)_mm_sub_epi32(__A, __B),
722 static __inline__ __m128i __DEFAULT_FN_ATTRS
723 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
725 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
726 (__v4si)_mm_sub_epi32(__A, __B),
727 (__v4si)_mm_setzero_si128());
730 static __inline__ __m128i __DEFAULT_FN_ATTRS
731 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
733 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
734 (__v2di)_mm_sub_epi64(__A, __B),
738 static __inline__ __m128i __DEFAULT_FN_ATTRS
739 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
741 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
742 (__v2di)_mm_sub_epi64(__A, __B),
743 (__v2di)_mm_setzero_si128());
746 static __inline__ __m256i __DEFAULT_FN_ATTRS
747 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
749 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
750 (__v4di)_mm256_mul_epi32(__X, __Y),
754 static __inline__ __m256i __DEFAULT_FN_ATTRS
755 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
757 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
758 (__v4di)_mm256_mul_epi32(__X, __Y),
759 (__v4di)_mm256_setzero_si256());
762 static __inline__ __m128i __DEFAULT_FN_ATTRS
763 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
765 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
766 (__v2di)_mm_mul_epi32(__X, __Y),
770 static __inline__ __m128i __DEFAULT_FN_ATTRS
771 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
773 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
774 (__v2di)_mm_mul_epi32(__X, __Y),
775 (__v2di)_mm_setzero_si128());
778 static __inline__ __m256i __DEFAULT_FN_ATTRS
779 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
781 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
782 (__v4di)_mm256_mul_epu32(__X, __Y),
786 static __inline__ __m256i __DEFAULT_FN_ATTRS
787 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
789 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
790 (__v4di)_mm256_mul_epu32(__X, __Y),
791 (__v4di)_mm256_setzero_si256());
794 static __inline__ __m128i __DEFAULT_FN_ATTRS
795 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
797 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
798 (__v2di)_mm_mul_epu32(__X, __Y),
802 static __inline__ __m128i __DEFAULT_FN_ATTRS
803 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
805 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
806 (__v2di)_mm_mul_epu32(__X, __Y),
807 (__v2di)_mm_setzero_si128());
810 static __inline__ __m256i __DEFAULT_FN_ATTRS
811 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
813 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
814 (__v8si)_mm256_mullo_epi32(__A, __B),
815 (__v8si)_mm256_setzero_si256());
818 static __inline__ __m256i __DEFAULT_FN_ATTRS
819 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
821 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
822 (__v8si)_mm256_mullo_epi32(__A, __B),
826 static __inline__ __m128i __DEFAULT_FN_ATTRS
827 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
829 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
830 (__v4si)_mm_mullo_epi32(__A, __B),
831 (__v4si)_mm_setzero_si128());
834 static __inline__ __m128i __DEFAULT_FN_ATTRS
835 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
837 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
838 (__v4si)_mm_mullo_epi32(__A, __B),
842 static __inline__ __m256i __DEFAULT_FN_ATTRS
843 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
845 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
846 (__v8si)_mm256_and_si256(__A, __B),
850 static __inline__ __m256i __DEFAULT_FN_ATTRS
851 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
853 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
856 static __inline__ __m128i __DEFAULT_FN_ATTRS
857 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
859 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
860 (__v4si)_mm_and_si128(__A, __B),
864 static __inline__ __m128i __DEFAULT_FN_ATTRS
865 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
867 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
870 static __inline__ __m256i __DEFAULT_FN_ATTRS
871 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
873 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
874 (__v8si)_mm256_andnot_si256(__A, __B),
878 static __inline__ __m256i __DEFAULT_FN_ATTRS
879 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
881 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
885 static __inline__ __m128i __DEFAULT_FN_ATTRS
886 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
888 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
889 (__v4si)_mm_andnot_si128(__A, __B),
893 static __inline__ __m128i __DEFAULT_FN_ATTRS
894 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
896 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
899 static __inline__ __m256i __DEFAULT_FN_ATTRS
900 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
902 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
903 (__v8si)_mm256_or_si256(__A, __B),
907 static __inline__ __m256i __DEFAULT_FN_ATTRS
908 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
910 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
913 static __inline__ __m128i __DEFAULT_FN_ATTRS
914 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
916 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
917 (__v4si)_mm_or_si128(__A, __B),
921 static __inline__ __m128i __DEFAULT_FN_ATTRS
922 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
924 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
927 static __inline__ __m256i __DEFAULT_FN_ATTRS
928 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
930 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
931 (__v8si)_mm256_xor_si256(__A, __B),
935 static __inline__ __m256i __DEFAULT_FN_ATTRS
936 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
938 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
941 static __inline__ __m128i __DEFAULT_FN_ATTRS
942 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
945 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
946 (__v4si)_mm_xor_si128(__A, __B),
950 static __inline__ __m128i __DEFAULT_FN_ATTRS
951 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
953 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
956 static __inline__ __m256i __DEFAULT_FN_ATTRS
957 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
960 (__v4di)_mm256_and_si256(__A, __B),
964 static __inline__ __m256i __DEFAULT_FN_ATTRS
965 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
967 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
970 static __inline__ __m128i __DEFAULT_FN_ATTRS
971 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
973 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
974 (__v2di)_mm_and_si128(__A, __B),
978 static __inline__ __m128i __DEFAULT_FN_ATTRS
979 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
981 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
984 static __inline__ __m256i __DEFAULT_FN_ATTRS
985 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
987 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
988 (__v4di)_mm256_andnot_si256(__A, __B),
992 static __inline__ __m256i __DEFAULT_FN_ATTRS
993 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
995 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
999 static __inline__ __m128i __DEFAULT_FN_ATTRS
1000 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1003 (__v2di)_mm_andnot_si128(__A, __B),
1007 static __inline__ __m128i __DEFAULT_FN_ATTRS
1008 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1010 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
1013 static __inline__ __m256i __DEFAULT_FN_ATTRS
1014 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1016 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1017 (__v4di)_mm256_or_si256(__A, __B),
1021 static __inline__ __m256i __DEFAULT_FN_ATTRS
1022 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1024 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
1027 static __inline__ __m128i __DEFAULT_FN_ATTRS
1028 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1030 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1031 (__v2di)_mm_or_si128(__A, __B),
1035 static __inline__ __m128i __DEFAULT_FN_ATTRS
1036 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1038 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
1041 static __inline__ __m256i __DEFAULT_FN_ATTRS
1042 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1044 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1045 (__v4di)_mm256_xor_si256(__A, __B),
1049 static __inline__ __m256i __DEFAULT_FN_ATTRS
1050 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1052 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
1055 static __inline__ __m128i __DEFAULT_FN_ATTRS
1056 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
1059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1060 (__v2di)_mm_xor_si128(__A, __B),
1064 static __inline__ __m128i __DEFAULT_FN_ATTRS
1065 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1067 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
1070 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
1071 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1072 (__v4si)(__m128i)(b), (int)(p), \
1075 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1076 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1077 (__v4si)(__m128i)(b), (int)(p), \
1080 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
1081 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1082 (__v4si)(__m128i)(b), (int)(p), \
1085 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1086 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1087 (__v4si)(__m128i)(b), (int)(p), \
1090 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
1091 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1092 (__v8si)(__m256i)(b), (int)(p), \
1095 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1096 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1097 (__v8si)(__m256i)(b), (int)(p), \
1100 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
1101 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1102 (__v8si)(__m256i)(b), (int)(p), \
1105 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1106 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1107 (__v8si)(__m256i)(b), (int)(p), \
1110 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
1111 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1112 (__v2di)(__m128i)(b), (int)(p), \
1115 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1116 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1117 (__v2di)(__m128i)(b), (int)(p), \
1120 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
1121 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1122 (__v2di)(__m128i)(b), (int)(p), \
1125 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1126 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1127 (__v2di)(__m128i)(b), (int)(p), \
1130 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
1131 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1132 (__v4di)(__m256i)(b), (int)(p), \
1135 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1136 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1137 (__v4di)(__m256i)(b), (int)(p), \
1140 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
1141 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1142 (__v4di)(__m256i)(b), (int)(p), \
1145 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1146 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1147 (__v4di)(__m256i)(b), (int)(p), \
1150 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \
1151 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1152 (__v8sf)(__m256)(b), (int)(p), \
1155 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1156 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1157 (__v8sf)(__m256)(b), (int)(p), \
1160 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \
1161 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1162 (__v4df)(__m256d)(b), (int)(p), \
1165 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1166 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1167 (__v4df)(__m256d)(b), (int)(p), \
1170 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \
1171 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1172 (__v4sf)(__m128)(b), (int)(p), \
1175 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1176 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1177 (__v4sf)(__m128)(b), (int)(p), \
1180 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \
1181 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1182 (__v2df)(__m128d)(b), (int)(p), \
1185 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1186 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1187 (__v2df)(__m128d)(b), (int)(p), \
1190 static __inline__ __m128d __DEFAULT_FN_ATTRS
1191 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1193 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1199 static __inline__ __m128d __DEFAULT_FN_ATTRS
1200 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1202 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
1208 static __inline__ __m128d __DEFAULT_FN_ATTRS
1209 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1211 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1217 static __inline__ __m128d __DEFAULT_FN_ATTRS
1218 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1220 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1226 static __inline__ __m128d __DEFAULT_FN_ATTRS
1227 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1229 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1235 static __inline__ __m128d __DEFAULT_FN_ATTRS
1236 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1238 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
1244 static __inline__ __m128d __DEFAULT_FN_ATTRS
1245 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1247 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1253 static __inline__ __m128d __DEFAULT_FN_ATTRS
1254 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1256 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1262 static __inline__ __m256d __DEFAULT_FN_ATTRS
1263 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1265 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1271 static __inline__ __m256d __DEFAULT_FN_ATTRS
1272 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1274 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
1280 static __inline__ __m256d __DEFAULT_FN_ATTRS
1281 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1283 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1289 static __inline__ __m256d __DEFAULT_FN_ATTRS
1290 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1292 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1298 static __inline__ __m256d __DEFAULT_FN_ATTRS
1299 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1301 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1307 static __inline__ __m256d __DEFAULT_FN_ATTRS
1308 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1310 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
1316 static __inline__ __m256d __DEFAULT_FN_ATTRS
1317 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1319 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1325 static __inline__ __m256d __DEFAULT_FN_ATTRS
1326 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1328 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1334 static __inline__ __m128 __DEFAULT_FN_ATTRS
1335 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1337 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1343 static __inline__ __m128 __DEFAULT_FN_ATTRS
1344 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1346 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
1352 static __inline__ __m128 __DEFAULT_FN_ATTRS
1353 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1355 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1361 static __inline__ __m128 __DEFAULT_FN_ATTRS
1362 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1364 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1370 static __inline__ __m128 __DEFAULT_FN_ATTRS
1371 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1373 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1379 static __inline__ __m128 __DEFAULT_FN_ATTRS
1380 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1382 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1388 static __inline__ __m128 __DEFAULT_FN_ATTRS
1389 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1391 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1397 static __inline__ __m128 __DEFAULT_FN_ATTRS
1398 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1400 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1406 static __inline__ __m256 __DEFAULT_FN_ATTRS
1407 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1409 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1415 static __inline__ __m256 __DEFAULT_FN_ATTRS
1416 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1418 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1424 static __inline__ __m256 __DEFAULT_FN_ATTRS
1425 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1427 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1433 static __inline__ __m256 __DEFAULT_FN_ATTRS
1434 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1436 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1442 static __inline__ __m256 __DEFAULT_FN_ATTRS
1443 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1445 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1451 static __inline__ __m256 __DEFAULT_FN_ATTRS
1452 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1454 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1460 static __inline__ __m256 __DEFAULT_FN_ATTRS
1461 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1463 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1469 static __inline__ __m256 __DEFAULT_FN_ATTRS
1470 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1472 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1478 static __inline__ __m128d __DEFAULT_FN_ATTRS
1479 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1481 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1487 static __inline__ __m128d __DEFAULT_FN_ATTRS
1488 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1490 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1497 static __inline__ __m128d __DEFAULT_FN_ATTRS
1498 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1500 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1507 static __inline__ __m128d __DEFAULT_FN_ATTRS
1508 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1510 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1516 static __inline__ __m128d __DEFAULT_FN_ATTRS
1517 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1519 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1526 static __inline__ __m256d __DEFAULT_FN_ATTRS
1527 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1529 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1535 static __inline__ __m256d __DEFAULT_FN_ATTRS
1536 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1538 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1545 static __inline__ __m256d __DEFAULT_FN_ATTRS
1546 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1548 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1555 static __inline__ __m256d __DEFAULT_FN_ATTRS
1556 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1558 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1564 static __inline__ __m256d __DEFAULT_FN_ATTRS
1565 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1567 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1574 static __inline__ __m128 __DEFAULT_FN_ATTRS
1575 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1577 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1583 static __inline__ __m128 __DEFAULT_FN_ATTRS
1584 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1586 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1592 static __inline__ __m128 __DEFAULT_FN_ATTRS
1593 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1595 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1601 static __inline__ __m128 __DEFAULT_FN_ATTRS
1602 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1604 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1610 static __inline__ __m128 __DEFAULT_FN_ATTRS
1611 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1613 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1619 static __inline__ __m256 __DEFAULT_FN_ATTRS
1620 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1623 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1629 static __inline__ __m256 __DEFAULT_FN_ATTRS
1630 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1632 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1638 static __inline__ __m256 __DEFAULT_FN_ATTRS
1639 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1641 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1647 static __inline__ __m256 __DEFAULT_FN_ATTRS
1648 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1650 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1656 static __inline__ __m256 __DEFAULT_FN_ATTRS
1657 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1659 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1665 static __inline__ __m128d __DEFAULT_FN_ATTRS
1666 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1668 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1674 static __inline__ __m256d __DEFAULT_FN_ATTRS
1675 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1677 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1683 static __inline__ __m128 __DEFAULT_FN_ATTRS
1684 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1686 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1692 static __inline__ __m256 __DEFAULT_FN_ATTRS
1693 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1695 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1701 static __inline__ __m128d __DEFAULT_FN_ATTRS
1702 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1704 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1711 static __inline__ __m256d __DEFAULT_FN_ATTRS
1712 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1714 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1721 static __inline__ __m128 __DEFAULT_FN_ATTRS
1722 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1724 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1730 static __inline__ __m256 __DEFAULT_FN_ATTRS
1731 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1733 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1739 static __inline__ __m128d __DEFAULT_FN_ATTRS
1740 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1742 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1748 static __inline__ __m256d __DEFAULT_FN_ATTRS
1749 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1751 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1757 static __inline__ __m128 __DEFAULT_FN_ATTRS
1758 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1760 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1766 static __inline__ __m256 __DEFAULT_FN_ATTRS
1767 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1769 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1775 static __inline__ __m128d __DEFAULT_FN_ATTRS
1776 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1778 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1784 static __inline__ __m128d __DEFAULT_FN_ATTRS
1785 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1787 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1793 static __inline__ __m256d __DEFAULT_FN_ATTRS
1794 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1796 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1802 static __inline__ __m256d __DEFAULT_FN_ATTRS
1803 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1805 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1811 static __inline__ __m128 __DEFAULT_FN_ATTRS
1812 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1814 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1820 static __inline__ __m128 __DEFAULT_FN_ATTRS
1821 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1823 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1829 static __inline__ __m256 __DEFAULT_FN_ATTRS
1830 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1832 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1838 static __inline__ __m256 __DEFAULT_FN_ATTRS
1839 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1841 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1847 static __inline__ __m128d __DEFAULT_FN_ATTRS
1848 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1849 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1850 (__v2df)_mm_add_pd(__A, __B),
1854 static __inline__ __m128d __DEFAULT_FN_ATTRS
1855 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1856 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1857 (__v2df)_mm_add_pd(__A, __B),
1858 (__v2df)_mm_setzero_pd());
1861 static __inline__ __m256d __DEFAULT_FN_ATTRS
1862 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1863 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1864 (__v4df)_mm256_add_pd(__A, __B),
1868 static __inline__ __m256d __DEFAULT_FN_ATTRS
1869 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1870 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1871 (__v4df)_mm256_add_pd(__A, __B),
1872 (__v4df)_mm256_setzero_pd());
1875 static __inline__ __m128 __DEFAULT_FN_ATTRS
1876 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1877 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1878 (__v4sf)_mm_add_ps(__A, __B),
1882 static __inline__ __m128 __DEFAULT_FN_ATTRS
1883 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1884 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1885 (__v4sf)_mm_add_ps(__A, __B),
1886 (__v4sf)_mm_setzero_ps());
1889 static __inline__ __m256 __DEFAULT_FN_ATTRS
1890 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1891 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1892 (__v8sf)_mm256_add_ps(__A, __B),
1896 static __inline__ __m256 __DEFAULT_FN_ATTRS
1897 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1898 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1899 (__v8sf)_mm256_add_ps(__A, __B),
1900 (__v8sf)_mm256_setzero_ps());
1903 static __inline__ __m128i __DEFAULT_FN_ATTRS
1904 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1905 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1910 static __inline__ __m256i __DEFAULT_FN_ATTRS
1911 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1912 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1917 static __inline__ __m128d __DEFAULT_FN_ATTRS
1918 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1919 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1924 static __inline__ __m256d __DEFAULT_FN_ATTRS
1925 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1926 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1931 static __inline__ __m128 __DEFAULT_FN_ATTRS
1932 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1933 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1938 static __inline__ __m256 __DEFAULT_FN_ATTRS
1939 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1940 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1945 static __inline__ __m128i __DEFAULT_FN_ATTRS
1946 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1947 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1952 static __inline__ __m256i __DEFAULT_FN_ATTRS
1953 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1954 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1959 static __inline__ __m128d __DEFAULT_FN_ATTRS
1960 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1961 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1966 static __inline__ __m128d __DEFAULT_FN_ATTRS
1967 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1968 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1974 static __inline__ __m256d __DEFAULT_FN_ATTRS
1975 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1976 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1981 static __inline__ __m256d __DEFAULT_FN_ATTRS
1982 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1983 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1985 _mm256_setzero_pd (),
1989 static __inline__ __m128i __DEFAULT_FN_ATTRS
1990 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1991 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1996 static __inline__ __m128i __DEFAULT_FN_ATTRS
1997 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1998 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
2000 _mm_setzero_si128 (),
2004 static __inline__ __m256i __DEFAULT_FN_ATTRS
2005 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2006 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2011 static __inline__ __m256i __DEFAULT_FN_ATTRS
2012 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
2013 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2015 _mm256_setzero_si256 (),
2019 static __inline__ __m128 __DEFAULT_FN_ATTRS
2020 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2021 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2026 static __inline__ __m128 __DEFAULT_FN_ATTRS
2027 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
2028 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2034 static __inline__ __m256 __DEFAULT_FN_ATTRS
2035 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2036 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2041 static __inline__ __m256 __DEFAULT_FN_ATTRS
2042 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
2043 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2045 _mm256_setzero_ps (),
2049 static __inline__ __m128i __DEFAULT_FN_ATTRS
2050 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2051 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2056 static __inline__ __m128i __DEFAULT_FN_ATTRS
2057 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
2058 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2060 _mm_setzero_si128 (),
2064 static __inline__ __m256i __DEFAULT_FN_ATTRS
2065 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2066 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2071 static __inline__ __m256i __DEFAULT_FN_ATTRS
2072 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
2073 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2075 _mm256_setzero_si256 (),
2079 static __inline__ void __DEFAULT_FN_ATTRS
2080 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
2081 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
2086 static __inline__ void __DEFAULT_FN_ATTRS
2087 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
2088 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
2093 static __inline__ void __DEFAULT_FN_ATTRS
2094 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
2095 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
2100 static __inline__ void __DEFAULT_FN_ATTRS
2101 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
2102 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
2107 static __inline__ void __DEFAULT_FN_ATTRS
2108 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
2109 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
2114 static __inline__ void __DEFAULT_FN_ATTRS
2115 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
2116 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
2121 static __inline__ void __DEFAULT_FN_ATTRS
2122 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
2123 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
2128 static __inline__ void __DEFAULT_FN_ATTRS
2129 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
2130 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
2135 static __inline__ __m128d __DEFAULT_FN_ATTRS
2136 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2137 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2138 (__v2df)_mm_cvtepi32_pd(__A),
2142 static __inline__ __m128d __DEFAULT_FN_ATTRS
2143 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
2144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2145 (__v2df)_mm_cvtepi32_pd(__A),
2146 (__v2df)_mm_setzero_pd());
2149 static __inline__ __m256d __DEFAULT_FN_ATTRS
2150 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2151 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2152 (__v4df)_mm256_cvtepi32_pd(__A),
2156 static __inline__ __m256d __DEFAULT_FN_ATTRS
2157 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
2158 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2159 (__v4df)_mm256_cvtepi32_pd(__A),
2160 (__v4df)_mm256_setzero_pd());
2163 static __inline__ __m128 __DEFAULT_FN_ATTRS
2164 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2165 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2170 static __inline__ __m128 __DEFAULT_FN_ATTRS
2171 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
2172 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2178 static __inline__ __m256 __DEFAULT_FN_ATTRS
2179 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2180 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2185 static __inline__ __m256 __DEFAULT_FN_ATTRS
2186 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
2187 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2189 _mm256_setzero_ps (),
2193 static __inline__ __m128i __DEFAULT_FN_ATTRS
2194 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2195 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2200 static __inline__ __m128i __DEFAULT_FN_ATTRS
2201 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
2202 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2204 _mm_setzero_si128 (),
2208 static __inline__ __m128i __DEFAULT_FN_ATTRS
2209 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2210 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2215 static __inline__ __m128i __DEFAULT_FN_ATTRS
2216 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
2217 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2219 _mm_setzero_si128 (),
2223 static __inline__ __m128 __DEFAULT_FN_ATTRS
2224 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
2225 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2230 static __inline__ __m128 __DEFAULT_FN_ATTRS
2231 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2232 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2238 static __inline__ __m128 __DEFAULT_FN_ATTRS
2239 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2240 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2245 static __inline__ __m128 __DEFAULT_FN_ATTRS
2246 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2247 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2253 static __inline__ __m128i __DEFAULT_FN_ATTRS
2254 _mm_cvtpd_epu32 (__m128d __A) {
2255 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2257 _mm_setzero_si128 (),
2261 static __inline__ __m128i __DEFAULT_FN_ATTRS
2262 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2263 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2268 static __inline__ __m128i __DEFAULT_FN_ATTRS
2269 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2270 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2272 _mm_setzero_si128 (),
2276 static __inline__ __m128i __DEFAULT_FN_ATTRS
2277 _mm256_cvtpd_epu32 (__m256d __A) {
2278 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2280 _mm_setzero_si128 (),
2284 static __inline__ __m128i __DEFAULT_FN_ATTRS
2285 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2286 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2291 static __inline__ __m128i __DEFAULT_FN_ATTRS
2292 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2293 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2295 _mm_setzero_si128 (),
2299 static __inline__ __m128i __DEFAULT_FN_ATTRS
2300 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2301 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2306 static __inline__ __m128i __DEFAULT_FN_ATTRS
2307 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2308 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2310 _mm_setzero_si128 (),
2314 static __inline__ __m256i __DEFAULT_FN_ATTRS
2315 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2316 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2321 static __inline__ __m256i __DEFAULT_FN_ATTRS
2322 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2323 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2325 _mm256_setzero_si256 (),
2329 static __inline__ __m128d __DEFAULT_FN_ATTRS
2330 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2331 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2336 static __inline__ __m128d __DEFAULT_FN_ATTRS
2337 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2338 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2344 static __inline__ __m256d __DEFAULT_FN_ATTRS
2345 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2346 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2351 static __inline__ __m256d __DEFAULT_FN_ATTRS
2352 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2353 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2355 _mm256_setzero_pd (),
2359 static __inline__ __m128i __DEFAULT_FN_ATTRS
2360 _mm_cvtps_epu32 (__m128 __A) {
2361 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2363 _mm_setzero_si128 (),
2367 static __inline__ __m128i __DEFAULT_FN_ATTRS
2368 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2369 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2374 static __inline__ __m128i __DEFAULT_FN_ATTRS
2375 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2376 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2378 _mm_setzero_si128 (),
2382 static __inline__ __m256i __DEFAULT_FN_ATTRS
2383 _mm256_cvtps_epu32 (__m256 __A) {
2384 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2386 _mm256_setzero_si256 (),
2390 static __inline__ __m256i __DEFAULT_FN_ATTRS
2391 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2392 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2397 static __inline__ __m256i __DEFAULT_FN_ATTRS
2398 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2399 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2401 _mm256_setzero_si256 (),
2405 static __inline__ __m128i __DEFAULT_FN_ATTRS
2406 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2407 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2412 static __inline__ __m128i __DEFAULT_FN_ATTRS
2413 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2414 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2416 _mm_setzero_si128 (),
2420 static __inline__ __m128i __DEFAULT_FN_ATTRS
2421 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2422 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2427 static __inline__ __m128i __DEFAULT_FN_ATTRS
2428 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2429 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2431 _mm_setzero_si128 (),
2435 static __inline__ __m128i __DEFAULT_FN_ATTRS
2436 _mm_cvttpd_epu32 (__m128d __A) {
2437 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2439 _mm_setzero_si128 (),
2443 static __inline__ __m128i __DEFAULT_FN_ATTRS
2444 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2445 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2450 static __inline__ __m128i __DEFAULT_FN_ATTRS
2451 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2452 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2454 _mm_setzero_si128 (),
2458 static __inline__ __m128i __DEFAULT_FN_ATTRS
2459 _mm256_cvttpd_epu32 (__m256d __A) {
2460 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2462 _mm_setzero_si128 (),
2466 static __inline__ __m128i __DEFAULT_FN_ATTRS
2467 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2468 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2473 static __inline__ __m128i __DEFAULT_FN_ATTRS
2474 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2475 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2477 _mm_setzero_si128 (),
2481 static __inline__ __m128i __DEFAULT_FN_ATTRS
2482 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2483 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2488 static __inline__ __m128i __DEFAULT_FN_ATTRS
2489 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2490 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2492 _mm_setzero_si128 (),
2496 static __inline__ __m256i __DEFAULT_FN_ATTRS
2497 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2498 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2503 static __inline__ __m256i __DEFAULT_FN_ATTRS
2504 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2505 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2507 _mm256_setzero_si256 (),
2511 static __inline__ __m128i __DEFAULT_FN_ATTRS
2512 _mm_cvttps_epu32 (__m128 __A) {
2513 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2515 _mm_setzero_si128 (),
2519 static __inline__ __m128i __DEFAULT_FN_ATTRS
2520 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2521 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2526 static __inline__ __m128i __DEFAULT_FN_ATTRS
2527 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2528 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2530 _mm_setzero_si128 (),
2534 static __inline__ __m256i __DEFAULT_FN_ATTRS
2535 _mm256_cvttps_epu32 (__m256 __A) {
2536 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2538 _mm256_setzero_si256 (),
2542 static __inline__ __m256i __DEFAULT_FN_ATTRS
2543 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2544 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2549 static __inline__ __m256i __DEFAULT_FN_ATTRS
2550 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2551 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2553 _mm256_setzero_si256 (),
2557 static __inline__ __m128d __DEFAULT_FN_ATTRS
2558 _mm_cvtepu32_pd (__m128i __A) {
2559 return (__m128d) __builtin_convertvector(
2560 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2563 static __inline__ __m128d __DEFAULT_FN_ATTRS
2564 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2565 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2566 (__v2df)_mm_cvtepu32_pd(__A),
2570 static __inline__ __m128d __DEFAULT_FN_ATTRS
2571 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2572 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2573 (__v2df)_mm_cvtepu32_pd(__A),
2574 (__v2df)_mm_setzero_pd());
2577 static __inline__ __m256d __DEFAULT_FN_ATTRS
2578 _mm256_cvtepu32_pd (__m128i __A) {
2579 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2582 static __inline__ __m256d __DEFAULT_FN_ATTRS
2583 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2584 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2585 (__v4df)_mm256_cvtepu32_pd(__A),
2589 static __inline__ __m256d __DEFAULT_FN_ATTRS
2590 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2591 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2592 (__v4df)_mm256_cvtepu32_pd(__A),
2593 (__v4df)_mm256_setzero_pd());
2596 static __inline__ __m128 __DEFAULT_FN_ATTRS
2597 _mm_cvtepu32_ps (__m128i __A) {
2598 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2604 static __inline__ __m128 __DEFAULT_FN_ATTRS
2605 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2606 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2611 static __inline__ __m128 __DEFAULT_FN_ATTRS
2612 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2613 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2619 static __inline__ __m256 __DEFAULT_FN_ATTRS
2620 _mm256_cvtepu32_ps (__m256i __A) {
2621 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2623 _mm256_setzero_ps (),
2627 static __inline__ __m256 __DEFAULT_FN_ATTRS
2628 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2629 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2634 static __inline__ __m256 __DEFAULT_FN_ATTRS
2635 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2636 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2638 _mm256_setzero_ps (),
2642 static __inline__ __m128d __DEFAULT_FN_ATTRS
2643 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2644 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2645 (__v2df)_mm_div_pd(__A, __B),
2649 static __inline__ __m128d __DEFAULT_FN_ATTRS
2650 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2651 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2652 (__v2df)_mm_div_pd(__A, __B),
2653 (__v2df)_mm_setzero_pd());
2656 static __inline__ __m256d __DEFAULT_FN_ATTRS
2657 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2658 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2659 (__v4df)_mm256_div_pd(__A, __B),
2663 static __inline__ __m256d __DEFAULT_FN_ATTRS
2664 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2665 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2666 (__v4df)_mm256_div_pd(__A, __B),
2667 (__v4df)_mm256_setzero_pd());
2670 static __inline__ __m128 __DEFAULT_FN_ATTRS
2671 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2672 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2673 (__v4sf)_mm_div_ps(__A, __B),
2677 static __inline__ __m128 __DEFAULT_FN_ATTRS
2678 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2679 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2680 (__v4sf)_mm_div_ps(__A, __B),
2681 (__v4sf)_mm_setzero_ps());
2684 static __inline__ __m256 __DEFAULT_FN_ATTRS
2685 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2686 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2687 (__v8sf)_mm256_div_ps(__A, __B),
2691 static __inline__ __m256 __DEFAULT_FN_ATTRS
2692 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2693 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2694 (__v8sf)_mm256_div_ps(__A, __B),
2695 (__v8sf)_mm256_setzero_ps());
2698 static __inline__ __m128d __DEFAULT_FN_ATTRS
2699 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2700 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2705 static __inline__ __m128d __DEFAULT_FN_ATTRS
2706 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2707 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2713 static __inline__ __m256d __DEFAULT_FN_ATTRS
2714 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2715 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2720 static __inline__ __m256d __DEFAULT_FN_ATTRS
2721 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2722 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2724 _mm256_setzero_pd (),
2728 static __inline__ __m128i __DEFAULT_FN_ATTRS
2729 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2730 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2735 static __inline__ __m128i __DEFAULT_FN_ATTRS
2736 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2737 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2739 _mm_setzero_si128 (),
2743 static __inline__ __m256i __DEFAULT_FN_ATTRS
2744 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2745 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2750 static __inline__ __m256i __DEFAULT_FN_ATTRS
2751 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2752 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2754 _mm256_setzero_si256 (),
2758 static __inline__ __m128d __DEFAULT_FN_ATTRS
2759 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2760 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2766 static __inline__ __m128d __DEFAULT_FN_ATTRS
2767 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2768 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2775 static __inline__ __m256d __DEFAULT_FN_ATTRS
2776 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2777 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2783 static __inline__ __m256d __DEFAULT_FN_ATTRS
2784 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2785 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2787 _mm256_setzero_pd (),
2792 static __inline__ __m128i __DEFAULT_FN_ATTRS
2793 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2794 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2800 static __inline__ __m128i __DEFAULT_FN_ATTRS
2801 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2802 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2804 _mm_setzero_si128 (),
2809 static __inline__ __m256i __DEFAULT_FN_ATTRS
2810 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2812 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2818 static __inline__ __m256i __DEFAULT_FN_ATTRS
2819 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2820 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2822 _mm256_setzero_si256 (),
2827 static __inline__ __m128 __DEFAULT_FN_ATTRS
2828 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2829 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2834 static __inline__ __m128 __DEFAULT_FN_ATTRS
2835 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2836 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2843 static __inline__ __m256 __DEFAULT_FN_ATTRS
2844 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2845 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2850 static __inline__ __m256 __DEFAULT_FN_ATTRS
2851 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2852 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2854 _mm256_setzero_ps (),
2859 static __inline__ __m128i __DEFAULT_FN_ATTRS
2860 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2861 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2867 static __inline__ __m128i __DEFAULT_FN_ATTRS
2868 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2869 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2871 _mm_setzero_si128 (),
2875 static __inline__ __m256i __DEFAULT_FN_ATTRS
2876 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2878 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2884 static __inline__ __m256i __DEFAULT_FN_ATTRS
2885 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2886 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2888 _mm256_setzero_si256 (),
2893 static __inline__ __m128 __DEFAULT_FN_ATTRS
2894 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2895 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2900 static __inline__ __m128 __DEFAULT_FN_ATTRS
2901 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2902 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2908 static __inline__ __m256 __DEFAULT_FN_ATTRS
2909 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2910 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2915 static __inline__ __m256 __DEFAULT_FN_ATTRS
2916 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2917 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2919 _mm256_setzero_ps (),
2923 static __inline__ __m128i __DEFAULT_FN_ATTRS
2924 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2925 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2930 static __inline__ __m128i __DEFAULT_FN_ATTRS
2931 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2932 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2934 _mm_setzero_si128 (),
2938 static __inline__ __m256i __DEFAULT_FN_ATTRS
2939 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2940 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2945 static __inline__ __m256i __DEFAULT_FN_ATTRS
2946 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2947 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2949 _mm256_setzero_si256 (),
2953 static __inline__ __m128d __DEFAULT_FN_ATTRS
2954 _mm_getexp_pd (__m128d __A) {
2955 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2961 static __inline__ __m128d __DEFAULT_FN_ATTRS
2962 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2963 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2968 static __inline__ __m128d __DEFAULT_FN_ATTRS
2969 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2970 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2976 static __inline__ __m256d __DEFAULT_FN_ATTRS
2977 _mm256_getexp_pd (__m256d __A) {
2978 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2980 _mm256_setzero_pd (),
2984 static __inline__ __m256d __DEFAULT_FN_ATTRS
2985 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2986 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2991 static __inline__ __m256d __DEFAULT_FN_ATTRS
2992 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2993 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2995 _mm256_setzero_pd (),
2999 static __inline__ __m128 __DEFAULT_FN_ATTRS
3000 _mm_getexp_ps (__m128 __A) {
3001 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3007 static __inline__ __m128 __DEFAULT_FN_ATTRS
3008 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
3009 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3014 static __inline__ __m128 __DEFAULT_FN_ATTRS
3015 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
3016 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3022 static __inline__ __m256 __DEFAULT_FN_ATTRS
3023 _mm256_getexp_ps (__m256 __A) {
3024 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3026 _mm256_setzero_ps (),
3030 static __inline__ __m256 __DEFAULT_FN_ATTRS
3031 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
3032 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3037 static __inline__ __m256 __DEFAULT_FN_ATTRS
3038 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
3039 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3041 _mm256_setzero_ps (),
3045 static __inline__ __m128d __DEFAULT_FN_ATTRS
3046 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3047 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3048 (__v2df)_mm_max_pd(__A, __B),
3052 static __inline__ __m128d __DEFAULT_FN_ATTRS
3053 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3054 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3055 (__v2df)_mm_max_pd(__A, __B),
3056 (__v2df)_mm_setzero_pd());
3059 static __inline__ __m256d __DEFAULT_FN_ATTRS
3060 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3061 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3062 (__v4df)_mm256_max_pd(__A, __B),
3066 static __inline__ __m256d __DEFAULT_FN_ATTRS
3067 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3068 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3069 (__v4df)_mm256_max_pd(__A, __B),
3070 (__v4df)_mm256_setzero_pd());
3073 static __inline__ __m128 __DEFAULT_FN_ATTRS
3074 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3075 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3076 (__v4sf)_mm_max_ps(__A, __B),
3080 static __inline__ __m128 __DEFAULT_FN_ATTRS
3081 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3082 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3083 (__v4sf)_mm_max_ps(__A, __B),
3084 (__v4sf)_mm_setzero_ps());
3087 static __inline__ __m256 __DEFAULT_FN_ATTRS
3088 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3089 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3090 (__v8sf)_mm256_max_ps(__A, __B),
3094 static __inline__ __m256 __DEFAULT_FN_ATTRS
3095 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3096 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3097 (__v8sf)_mm256_max_ps(__A, __B),
3098 (__v8sf)_mm256_setzero_ps());
3101 static __inline__ __m128d __DEFAULT_FN_ATTRS
3102 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3103 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3104 (__v2df)_mm_min_pd(__A, __B),
3108 static __inline__ __m128d __DEFAULT_FN_ATTRS
3109 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3110 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3111 (__v2df)_mm_min_pd(__A, __B),
3112 (__v2df)_mm_setzero_pd());
3115 static __inline__ __m256d __DEFAULT_FN_ATTRS
3116 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3117 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3118 (__v4df)_mm256_min_pd(__A, __B),
3122 static __inline__ __m256d __DEFAULT_FN_ATTRS
3123 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3124 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3125 (__v4df)_mm256_min_pd(__A, __B),
3126 (__v4df)_mm256_setzero_pd());
3129 static __inline__ __m128 __DEFAULT_FN_ATTRS
3130 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3131 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3132 (__v4sf)_mm_min_ps(__A, __B),
3136 static __inline__ __m128 __DEFAULT_FN_ATTRS
3137 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3138 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3139 (__v4sf)_mm_min_ps(__A, __B),
3140 (__v4sf)_mm_setzero_ps());
3143 static __inline__ __m256 __DEFAULT_FN_ATTRS
3144 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3146 (__v8sf)_mm256_min_ps(__A, __B),
3150 static __inline__ __m256 __DEFAULT_FN_ATTRS
3151 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3152 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3153 (__v8sf)_mm256_min_ps(__A, __B),
3154 (__v8sf)_mm256_setzero_ps());
3157 static __inline__ __m128d __DEFAULT_FN_ATTRS
3158 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3159 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3160 (__v2df)_mm_mul_pd(__A, __B),
3164 static __inline__ __m128d __DEFAULT_FN_ATTRS
3165 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3166 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3167 (__v2df)_mm_mul_pd(__A, __B),
3168 (__v2df)_mm_setzero_pd());
3171 static __inline__ __m256d __DEFAULT_FN_ATTRS
3172 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3174 (__v4df)_mm256_mul_pd(__A, __B),
3178 static __inline__ __m256d __DEFAULT_FN_ATTRS
3179 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3180 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3181 (__v4df)_mm256_mul_pd(__A, __B),
3182 (__v4df)_mm256_setzero_pd());
3185 static __inline__ __m128 __DEFAULT_FN_ATTRS
3186 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3187 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3188 (__v4sf)_mm_mul_ps(__A, __B),
3192 static __inline__ __m128 __DEFAULT_FN_ATTRS
3193 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3194 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3195 (__v4sf)_mm_mul_ps(__A, __B),
3196 (__v4sf)_mm_setzero_ps());
3199 static __inline__ __m256 __DEFAULT_FN_ATTRS
3200 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3201 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3202 (__v8sf)_mm256_mul_ps(__A, __B),
3206 static __inline__ __m256 __DEFAULT_FN_ATTRS
3207 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3208 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3209 (__v8sf)_mm256_mul_ps(__A, __B),
3210 (__v8sf)_mm256_setzero_ps());
3213 static __inline__ __m128i __DEFAULT_FN_ATTRS
3214 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
3215 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3216 (__v4si)_mm_abs_epi32(__A),
3220 static __inline__ __m128i __DEFAULT_FN_ATTRS
3221 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
3222 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3223 (__v4si)_mm_abs_epi32(__A),
3224 (__v4si)_mm_setzero_si128());
3227 static __inline__ __m256i __DEFAULT_FN_ATTRS
3228 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
3229 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
3230 (__v8si)_mm256_abs_epi32(__A),
3234 static __inline__ __m256i __DEFAULT_FN_ATTRS
3235 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
3236 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
3237 (__v8si)_mm256_abs_epi32(__A),
3238 (__v8si)_mm256_setzero_si256());
3241 static __inline__ __m128i __DEFAULT_FN_ATTRS
3242 _mm_abs_epi64 (__m128i __A) {
3243 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3245 _mm_setzero_si128 (),
3249 static __inline__ __m128i __DEFAULT_FN_ATTRS
3250 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3251 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3256 static __inline__ __m128i __DEFAULT_FN_ATTRS
3257 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3258 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3260 _mm_setzero_si128 (),
3264 static __inline__ __m256i __DEFAULT_FN_ATTRS
3265 _mm256_abs_epi64 (__m256i __A) {
3266 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3268 _mm256_setzero_si256 (),
3272 static __inline__ __m256i __DEFAULT_FN_ATTRS
3273 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3274 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3279 static __inline__ __m256i __DEFAULT_FN_ATTRS
3280 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3281 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3283 _mm256_setzero_si256 (),
3287 static __inline__ __m128i __DEFAULT_FN_ATTRS
3288 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3289 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3290 (__v4si)_mm_max_epi32(__A, __B),
3291 (__v4si)_mm_setzero_si128());
3294 static __inline__ __m128i __DEFAULT_FN_ATTRS
3295 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3297 (__v4si)_mm_max_epi32(__A, __B),
3301 static __inline__ __m256i __DEFAULT_FN_ATTRS
3302 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3303 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3304 (__v8si)_mm256_max_epi32(__A, __B),
3305 (__v8si)_mm256_setzero_si256());
3308 static __inline__ __m256i __DEFAULT_FN_ATTRS
3309 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3310 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3311 (__v8si)_mm256_max_epi32(__A, __B),
3315 static __inline__ __m128i __DEFAULT_FN_ATTRS
3316 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3317 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3320 _mm_setzero_si128 (),
3324 static __inline__ __m128i __DEFAULT_FN_ATTRS
3325 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3327 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3332 static __inline__ __m128i __DEFAULT_FN_ATTRS
3333 _mm_max_epi64 (__m128i __A, __m128i __B) {
3334 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3337 _mm_setzero_si128 (),
3341 static __inline__ __m256i __DEFAULT_FN_ATTRS
3342 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3343 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3346 _mm256_setzero_si256 (),
3350 static __inline__ __m256i __DEFAULT_FN_ATTRS
3351 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3353 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3358 static __inline__ __m256i __DEFAULT_FN_ATTRS
3359 _mm256_max_epi64 (__m256i __A, __m256i __B) {
3360 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3363 _mm256_setzero_si256 (),
3367 static __inline__ __m128i __DEFAULT_FN_ATTRS
3368 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3369 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3370 (__v4si)_mm_max_epu32(__A, __B),
3371 (__v4si)_mm_setzero_si128());
3374 static __inline__ __m128i __DEFAULT_FN_ATTRS
3375 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3376 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3377 (__v4si)_mm_max_epu32(__A, __B),
3381 static __inline__ __m256i __DEFAULT_FN_ATTRS
3382 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3383 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3384 (__v8si)_mm256_max_epu32(__A, __B),
3385 (__v8si)_mm256_setzero_si256());
3388 static __inline__ __m256i __DEFAULT_FN_ATTRS
3389 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3390 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3391 (__v8si)_mm256_max_epu32(__A, __B),
3395 static __inline__ __m128i __DEFAULT_FN_ATTRS
3396 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3397 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3400 _mm_setzero_si128 (),
3404 static __inline__ __m128i __DEFAULT_FN_ATTRS
3405 _mm_max_epu64 (__m128i __A, __m128i __B) {
3406 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3409 _mm_setzero_si128 (),
3413 static __inline__ __m128i __DEFAULT_FN_ATTRS
3414 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3416 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3421 static __inline__ __m256i __DEFAULT_FN_ATTRS
3422 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3423 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3426 _mm256_setzero_si256 (),
3430 static __inline__ __m256i __DEFAULT_FN_ATTRS
3431 _mm256_max_epu64 (__m256i __A, __m256i __B) {
3432 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3435 _mm256_setzero_si256 (),
3439 static __inline__ __m256i __DEFAULT_FN_ATTRS
3440 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3442 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3447 static __inline__ __m128i __DEFAULT_FN_ATTRS
3448 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3449 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3450 (__v4si)_mm_min_epi32(__A, __B),
3451 (__v4si)_mm_setzero_si128());
3454 static __inline__ __m128i __DEFAULT_FN_ATTRS
3455 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3456 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3457 (__v4si)_mm_min_epi32(__A, __B),
3461 static __inline__ __m256i __DEFAULT_FN_ATTRS
3462 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3463 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3464 (__v8si)_mm256_min_epi32(__A, __B),
3465 (__v8si)_mm256_setzero_si256());
3468 static __inline__ __m256i __DEFAULT_FN_ATTRS
3469 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3470 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3471 (__v8si)_mm256_min_epi32(__A, __B),
3475 static __inline__ __m128i __DEFAULT_FN_ATTRS
3476 _mm_min_epi64 (__m128i __A, __m128i __B) {
3477 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3480 _mm_setzero_si128 (),
3484 static __inline__ __m128i __DEFAULT_FN_ATTRS
3485 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3487 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3492 static __inline__ __m128i __DEFAULT_FN_ATTRS
3493 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3494 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3497 _mm_setzero_si128 (),
3501 static __inline__ __m256i __DEFAULT_FN_ATTRS
3502 _mm256_min_epi64 (__m256i __A, __m256i __B) {
3503 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3506 _mm256_setzero_si256 (),
3510 static __inline__ __m256i __DEFAULT_FN_ATTRS
3511 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3513 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3518 static __inline__ __m256i __DEFAULT_FN_ATTRS
3519 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3520 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3523 _mm256_setzero_si256 (),
3527 static __inline__ __m128i __DEFAULT_FN_ATTRS
3528 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3529 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3530 (__v4si)_mm_min_epu32(__A, __B),
3531 (__v4si)_mm_setzero_si128());
3534 static __inline__ __m128i __DEFAULT_FN_ATTRS
3535 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3536 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3537 (__v4si)_mm_min_epu32(__A, __B),
3541 static __inline__ __m256i __DEFAULT_FN_ATTRS
3542 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3544 (__v8si)_mm256_min_epu32(__A, __B),
3545 (__v8si)_mm256_setzero_si256());
3548 static __inline__ __m256i __DEFAULT_FN_ATTRS
3549 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3550 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3551 (__v8si)_mm256_min_epu32(__A, __B),
3555 static __inline__ __m128i __DEFAULT_FN_ATTRS
3556 _mm_min_epu64 (__m128i __A, __m128i __B) {
3557 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3560 _mm_setzero_si128 (),
3564 static __inline__ __m128i __DEFAULT_FN_ATTRS
3565 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3567 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3572 static __inline__ __m128i __DEFAULT_FN_ATTRS
3573 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3574 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3577 _mm_setzero_si128 (),
3581 static __inline__ __m256i __DEFAULT_FN_ATTRS
3582 _mm256_min_epu64 (__m256i __A, __m256i __B) {
3583 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3586 _mm256_setzero_si256 (),
3590 static __inline__ __m256i __DEFAULT_FN_ATTRS
3591 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3593 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3598 static __inline__ __m256i __DEFAULT_FN_ATTRS
3599 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3600 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3603 _mm256_setzero_si256 (),
3607 #define _mm_roundscale_pd(A, imm) __extension__ ({ \
3608 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3610 (__v2df)_mm_setzero_pd(), \
3614 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3615 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3617 (__v2df)(__m128d)(W), \
3621 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3622 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3624 (__v2df)_mm_setzero_pd(), \
3628 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3629 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3631 (__v4df)_mm256_setzero_pd(), \
3635 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3636 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3638 (__v4df)(__m256d)(W), \
3642 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3643 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3645 (__v4df)_mm256_setzero_pd(), \
3648 #define _mm_roundscale_ps(A, imm) __extension__ ({ \
3649 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3650 (__v4sf)_mm_setzero_ps(), \
3654 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3655 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3656 (__v4sf)(__m128)(W), \
3660 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3661 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3662 (__v4sf)_mm_setzero_ps(), \
3665 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \
3666 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3667 (__v8sf)_mm256_setzero_ps(), \
3670 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3671 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3672 (__v8sf)(__m256)(W), \
3676 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3677 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3678 (__v8sf)_mm256_setzero_ps(), \
3681 static __inline__ __m128d __DEFAULT_FN_ATTRS
3682 _mm_scalef_pd (__m128d __A, __m128d __B) {
3683 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3690 static __inline__ __m128d __DEFAULT_FN_ATTRS
3691 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3693 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3699 static __inline__ __m128d __DEFAULT_FN_ATTRS
3700 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3701 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3708 static __inline__ __m256d __DEFAULT_FN_ATTRS
3709 _mm256_scalef_pd (__m256d __A, __m256d __B) {
3710 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3713 _mm256_setzero_pd (),
3717 static __inline__ __m256d __DEFAULT_FN_ATTRS
3718 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3720 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3726 static __inline__ __m256d __DEFAULT_FN_ATTRS
3727 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3728 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3731 _mm256_setzero_pd (),
3735 static __inline__ __m128 __DEFAULT_FN_ATTRS
3736 _mm_scalef_ps (__m128 __A, __m128 __B) {
3737 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3744 static __inline__ __m128 __DEFAULT_FN_ATTRS
3745 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3746 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3752 static __inline__ __m128 __DEFAULT_FN_ATTRS
3753 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3754 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3761 static __inline__ __m256 __DEFAULT_FN_ATTRS
3762 _mm256_scalef_ps (__m256 __A, __m256 __B) {
3763 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3766 _mm256_setzero_ps (),
3770 static __inline__ __m256 __DEFAULT_FN_ATTRS
3771 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3773 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3779 static __inline__ __m256 __DEFAULT_FN_ATTRS
3780 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3781 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3784 _mm256_setzero_ps (),
3788 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3789 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3790 (__v2di)(__m128i)(index), \
3791 (__v2df)(__m128d)(v1), (int)(scale)); })
3793 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3794 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3795 (__v2di)(__m128i)(index), \
3796 (__v2df)(__m128d)(v1), (int)(scale)); })
3798 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3799 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3800 (__v2di)(__m128i)(index), \
3801 (__v2di)(__m128i)(v1), (int)(scale)); })
3803 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3804 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3805 (__v2di)(__m128i)(index), \
3806 (__v2di)(__m128i)(v1), (int)(scale)); })
3808 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3809 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3810 (__v4di)(__m256i)(index), \
3811 (__v4df)(__m256d)(v1), (int)(scale)); })
3813 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3814 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3815 (__v4di)(__m256i)(index), \
3816 (__v4df)(__m256d)(v1), (int)(scale)); })
3818 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3819 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3820 (__v4di)(__m256i)(index), \
3821 (__v4di)(__m256i)(v1), (int)(scale)); })
3823 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3824 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3825 (__v4di)(__m256i)(index), \
3826 (__v4di)(__m256i)(v1), (int)(scale)); })
3828 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3829 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3830 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3833 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3834 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3835 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3838 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3839 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3840 (__v2di)(__m128i)(index), \
3841 (__v4si)(__m128i)(v1), (int)(scale)); })
3843 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3844 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3845 (__v2di)(__m128i)(index), \
3846 (__v4si)(__m128i)(v1), (int)(scale)); })
3848 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3849 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3850 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3853 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3854 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
3855 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3858 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3859 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
3860 (__v4di)(__m256i)(index), \
3861 (__v4si)(__m128i)(v1), (int)(scale)); })
3863 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3864 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
3865 (__v4di)(__m256i)(index), \
3866 (__v4si)(__m128i)(v1), (int)(scale)); })
3868 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3869 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
3870 (__v4si)(__m128i)(index), \
3871 (__v2df)(__m128d)(v1), (int)(scale)); })
3873 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3874 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
3875 (__v4si)(__m128i)(index), \
3876 (__v2df)(__m128d)(v1), (int)(scale)); })
3878 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3879 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
3880 (__v4si)(__m128i)(index), \
3881 (__v2di)(__m128i)(v1), (int)(scale)); })
3883 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3884 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
3885 (__v4si)(__m128i)(index), \
3886 (__v2di)(__m128i)(v1), (int)(scale)); })
3888 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3889 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
3890 (__v4si)(__m128i)(index), \
3891 (__v4df)(__m256d)(v1), (int)(scale)); })
3893 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3894 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
3895 (__v4si)(__m128i)(index), \
3896 (__v4df)(__m256d)(v1), (int)(scale)); })
3898 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3899 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
3900 (__v4si)(__m128i)(index), \
3901 (__v4di)(__m256i)(v1), (int)(scale)); })
3903 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3904 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
3905 (__v4si)(__m128i)(index), \
3906 (__v4di)(__m256i)(v1), (int)(scale)); })
3908 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3909 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
3910 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3913 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3914 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
3915 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3918 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3919 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
3920 (__v4si)(__m128i)(index), \
3921 (__v4si)(__m128i)(v1), (int)(scale)); })
3923 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3924 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
3925 (__v4si)(__m128i)(index), \
3926 (__v4si)(__m128i)(v1), (int)(scale)); })
3928 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3929 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
3930 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3933 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3934 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
3935 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3938 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3939 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
3940 (__v8si)(__m256i)(index), \
3941 (__v8si)(__m256i)(v1), (int)(scale)); })
3943 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3944 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
3945 (__v8si)(__m256i)(index), \
3946 (__v8si)(__m256i)(v1), (int)(scale)); })
3948 static __inline__ __m128d __DEFAULT_FN_ATTRS
3949 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3950 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3951 (__v2df)_mm_sqrt_pd(__A),
3955 static __inline__ __m128d __DEFAULT_FN_ATTRS
3956 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3957 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3958 (__v2df)_mm_sqrt_pd(__A),
3959 (__v2df)_mm_setzero_pd());
3962 static __inline__ __m256d __DEFAULT_FN_ATTRS
3963 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3964 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3965 (__v4df)_mm256_sqrt_pd(__A),
3969 static __inline__ __m256d __DEFAULT_FN_ATTRS
3970 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3971 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3972 (__v4df)_mm256_sqrt_pd(__A),
3973 (__v4df)_mm256_setzero_pd());
3976 static __inline__ __m128 __DEFAULT_FN_ATTRS
3977 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3978 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3979 (__v4sf)_mm_sqrt_ps(__A),
3983 static __inline__ __m128 __DEFAULT_FN_ATTRS
3984 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3985 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3986 (__v4sf)_mm_sqrt_ps(__A),
3987 (__v4sf)_mm_setzero_pd());
3990 static __inline__ __m256 __DEFAULT_FN_ATTRS
3991 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3992 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3993 (__v8sf)_mm256_sqrt_ps(__A),
3997 static __inline__ __m256 __DEFAULT_FN_ATTRS
3998 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3999 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4000 (__v8sf)_mm256_sqrt_ps(__A),
4001 (__v8sf)_mm256_setzero_ps());
4004 static __inline__ __m128d __DEFAULT_FN_ATTRS
4005 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
4006 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
4007 (__v2df)_mm_sub_pd(__A, __B),
4011 static __inline__ __m128d __DEFAULT_FN_ATTRS
4012 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
4013 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
4014 (__v2df)_mm_sub_pd(__A, __B),
4015 (__v2df)_mm_setzero_pd());
4018 static __inline__ __m256d __DEFAULT_FN_ATTRS
4019 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
4020 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
4021 (__v4df)_mm256_sub_pd(__A, __B),
4025 static __inline__ __m256d __DEFAULT_FN_ATTRS
4026 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
4027 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
4028 (__v4df)_mm256_sub_pd(__A, __B),
4029 (__v4df)_mm256_setzero_pd());
4032 static __inline__ __m128 __DEFAULT_FN_ATTRS
4033 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
4034 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
4035 (__v4sf)_mm_sub_ps(__A, __B),
4039 static __inline__ __m128 __DEFAULT_FN_ATTRS
4040 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
4041 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
4042 (__v4sf)_mm_sub_ps(__A, __B),
4043 (__v4sf)_mm_setzero_ps());
4046 static __inline__ __m256 __DEFAULT_FN_ATTRS
4047 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
4048 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4049 (__v8sf)_mm256_sub_ps(__A, __B),
4053 static __inline__ __m256 __DEFAULT_FN_ATTRS
4054 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
4055 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4056 (__v8sf)_mm256_sub_ps(__A, __B),
4057 (__v8sf)_mm256_setzero_ps());
4060 static __inline__ __m128i __DEFAULT_FN_ATTRS
4061 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
4063 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
4070 static __inline__ __m256i __DEFAULT_FN_ATTRS
4071 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
4072 __mmask8 __U, __m256i __B) {
4073 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
4080 static __inline__ __m128d __DEFAULT_FN_ATTRS
4081 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
4083 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
4091 static __inline__ __m256d __DEFAULT_FN_ATTRS
4092 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
4094 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
4102 static __inline__ __m128 __DEFAULT_FN_ATTRS
4103 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
4105 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
4112 static __inline__ __m256 __DEFAULT_FN_ATTRS
4113 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
4115 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
4122 static __inline__ __m128i __DEFAULT_FN_ATTRS
4123 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
4125 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
4132 static __inline__ __m256i __DEFAULT_FN_ATTRS
4133 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
4134 __mmask8 __U, __m256i __B) {
4135 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
4142 static __inline__ __m128i __DEFAULT_FN_ATTRS
4143 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
4144 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4151 static __inline__ __m128i __DEFAULT_FN_ATTRS
4152 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
4154 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4161 static __inline__ __m128i __DEFAULT_FN_ATTRS
4162 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
4164 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
4172 static __inline__ __m256i __DEFAULT_FN_ATTRS
4173 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
4174 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4181 static __inline__ __m256i __DEFAULT_FN_ATTRS
4182 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
4184 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4191 static __inline__ __m256i __DEFAULT_FN_ATTRS
4192 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
4193 __m256i __I, __m256i __B) {
4194 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
4202 static __inline__ __m128d __DEFAULT_FN_ATTRS
4203 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
4204 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4212 static __inline__ __m128d __DEFAULT_FN_ATTRS
4213 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
4215 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4223 static __inline__ __m128d __DEFAULT_FN_ATTRS
4224 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
4226 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
4234 static __inline__ __m256d __DEFAULT_FN_ATTRS
4235 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
4236 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4244 static __inline__ __m256d __DEFAULT_FN_ATTRS
4245 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
4247 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4255 static __inline__ __m256d __DEFAULT_FN_ATTRS
4256 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
4258 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
4266 static __inline__ __m128 __DEFAULT_FN_ATTRS
4267 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
4268 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4275 static __inline__ __m128 __DEFAULT_FN_ATTRS
4276 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
4278 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4285 static __inline__ __m128 __DEFAULT_FN_ATTRS
4286 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
4288 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
4296 static __inline__ __m256 __DEFAULT_FN_ATTRS
4297 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
4298 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4305 static __inline__ __m256 __DEFAULT_FN_ATTRS
4306 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
4308 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4315 static __inline__ __m256 __DEFAULT_FN_ATTRS
4316 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
4318 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
4326 static __inline__ __m128i __DEFAULT_FN_ATTRS
4327 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
4328 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4335 static __inline__ __m128i __DEFAULT_FN_ATTRS
4336 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
4338 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4345 static __inline__ __m128i __DEFAULT_FN_ATTRS
4346 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
4348 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
4357 static __inline__ __m256i __DEFAULT_FN_ATTRS
4358 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
4359 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4366 static __inline__ __m256i __DEFAULT_FN_ATTRS
4367 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
4369 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4376 static __inline__ __m256i __DEFAULT_FN_ATTRS
4377 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
4378 __m256i __I, __m256i __B) {
4379 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4387 static __inline__ __m128i __DEFAULT_FN_ATTRS
4388 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4390 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4391 (__v4si)_mm_cvtepi8_epi32(__A),
4395 static __inline__ __m128i __DEFAULT_FN_ATTRS
4396 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
4398 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4399 (__v4si)_mm_cvtepi8_epi32(__A),
4400 (__v4si)_mm_setzero_si128());
4403 static __inline__ __m256i __DEFAULT_FN_ATTRS
4404 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4406 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4407 (__v8si)_mm256_cvtepi8_epi32(__A),
4411 static __inline__ __m256i __DEFAULT_FN_ATTRS
4412 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4414 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4415 (__v8si)_mm256_cvtepi8_epi32(__A),
4416 (__v8si)_mm256_setzero_si256());
4419 static __inline__ __m128i __DEFAULT_FN_ATTRS
4420 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4422 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4423 (__v2di)_mm_cvtepi8_epi64(__A),
4427 static __inline__ __m128i __DEFAULT_FN_ATTRS
4428 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4430 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4431 (__v2di)_mm_cvtepi8_epi64(__A),
4432 (__v2di)_mm_setzero_si128());
4435 static __inline__ __m256i __DEFAULT_FN_ATTRS
4436 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4438 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4439 (__v4di)_mm256_cvtepi8_epi64(__A),
4443 static __inline__ __m256i __DEFAULT_FN_ATTRS
4444 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4446 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4447 (__v4di)_mm256_cvtepi8_epi64(__A),
4448 (__v4di)_mm256_setzero_si256());
4451 static __inline__ __m128i __DEFAULT_FN_ATTRS
4452 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4454 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4455 (__v2di)_mm_cvtepi32_epi64(__X),
4459 static __inline__ __m128i __DEFAULT_FN_ATTRS
4460 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4462 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4463 (__v2di)_mm_cvtepi32_epi64(__X),
4464 (__v2di)_mm_setzero_si128());
4467 static __inline__ __m256i __DEFAULT_FN_ATTRS
4468 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4471 (__v4di)_mm256_cvtepi32_epi64(__X),
4475 static __inline__ __m256i __DEFAULT_FN_ATTRS
4476 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4479 (__v4di)_mm256_cvtepi32_epi64(__X),
4480 (__v4di)_mm256_setzero_si256());
4483 static __inline__ __m128i __DEFAULT_FN_ATTRS
4484 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4486 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4487 (__v4si)_mm_cvtepi16_epi32(__A),
4491 static __inline__ __m128i __DEFAULT_FN_ATTRS
4492 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4494 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4495 (__v4si)_mm_cvtepi16_epi32(__A),
4496 (__v4si)_mm_setzero_si128());
4499 static __inline__ __m256i __DEFAULT_FN_ATTRS
4500 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4502 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4503 (__v8si)_mm256_cvtepi16_epi32(__A),
4507 static __inline__ __m256i __DEFAULT_FN_ATTRS
4508 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4510 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4511 (__v8si)_mm256_cvtepi16_epi32(__A),
4512 (__v8si)_mm256_setzero_si256());
4515 static __inline__ __m128i __DEFAULT_FN_ATTRS
4516 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4518 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4519 (__v2di)_mm_cvtepi16_epi64(__A),
4523 static __inline__ __m128i __DEFAULT_FN_ATTRS
4524 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4526 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4527 (__v2di)_mm_cvtepi16_epi64(__A),
4528 (__v2di)_mm_setzero_si128());
4531 static __inline__ __m256i __DEFAULT_FN_ATTRS
4532 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4534 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4535 (__v4di)_mm256_cvtepi16_epi64(__A),
4539 static __inline__ __m256i __DEFAULT_FN_ATTRS
4540 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4542 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4543 (__v4di)_mm256_cvtepi16_epi64(__A),
4544 (__v4di)_mm256_setzero_si256());
4548 static __inline__ __m128i __DEFAULT_FN_ATTRS
4549 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4551 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4552 (__v4si)_mm_cvtepu8_epi32(__A),
4556 static __inline__ __m128i __DEFAULT_FN_ATTRS
4557 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4559 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4560 (__v4si)_mm_cvtepu8_epi32(__A),
4561 (__v4si)_mm_setzero_si128());
4564 static __inline__ __m256i __DEFAULT_FN_ATTRS
4565 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4567 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4568 (__v8si)_mm256_cvtepu8_epi32(__A),
4572 static __inline__ __m256i __DEFAULT_FN_ATTRS
4573 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4575 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4576 (__v8si)_mm256_cvtepu8_epi32(__A),
4577 (__v8si)_mm256_setzero_si256());
4580 static __inline__ __m128i __DEFAULT_FN_ATTRS
4581 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4583 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4584 (__v2di)_mm_cvtepu8_epi64(__A),
4588 static __inline__ __m128i __DEFAULT_FN_ATTRS
4589 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4592 (__v2di)_mm_cvtepu8_epi64(__A),
4593 (__v2di)_mm_setzero_si128());
4596 static __inline__ __m256i __DEFAULT_FN_ATTRS
4597 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4599 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4600 (__v4di)_mm256_cvtepu8_epi64(__A),
4604 static __inline__ __m256i __DEFAULT_FN_ATTRS
4605 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4608 (__v4di)_mm256_cvtepu8_epi64(__A),
4609 (__v4di)_mm256_setzero_si256());
4612 static __inline__ __m128i __DEFAULT_FN_ATTRS
4613 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4615 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4616 (__v2di)_mm_cvtepu32_epi64(__X),
4620 static __inline__ __m128i __DEFAULT_FN_ATTRS
4621 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4623 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4624 (__v2di)_mm_cvtepu32_epi64(__X),
4625 (__v2di)_mm_setzero_si128());
4628 static __inline__ __m256i __DEFAULT_FN_ATTRS
4629 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4631 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4632 (__v4di)_mm256_cvtepu32_epi64(__X),
4636 static __inline__ __m256i __DEFAULT_FN_ATTRS
4637 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4639 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4640 (__v4di)_mm256_cvtepu32_epi64(__X),
4641 (__v4di)_mm256_setzero_si256());
4644 static __inline__ __m128i __DEFAULT_FN_ATTRS
4645 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4647 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4648 (__v4si)_mm_cvtepu16_epi32(__A),
4652 static __inline__ __m128i __DEFAULT_FN_ATTRS
4653 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4655 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4656 (__v4si)_mm_cvtepu16_epi32(__A),
4657 (__v4si)_mm_setzero_si128());
4660 static __inline__ __m256i __DEFAULT_FN_ATTRS
4661 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4663 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4664 (__v8si)_mm256_cvtepu16_epi32(__A),
4668 static __inline__ __m256i __DEFAULT_FN_ATTRS
4669 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4671 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4672 (__v8si)_mm256_cvtepu16_epi32(__A),
4673 (__v8si)_mm256_setzero_si256());
4676 static __inline__ __m128i __DEFAULT_FN_ATTRS
4677 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4679 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4680 (__v2di)_mm_cvtepu16_epi64(__A),
4684 static __inline__ __m128i __DEFAULT_FN_ATTRS
4685 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4687 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4688 (__v2di)_mm_cvtepu16_epi64(__A),
4689 (__v2di)_mm_setzero_si128());
4692 static __inline__ __m256i __DEFAULT_FN_ATTRS
4693 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4695 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4696 (__v4di)_mm256_cvtepu16_epi64(__A),
4700 static __inline__ __m256i __DEFAULT_FN_ATTRS
4701 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4703 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4704 (__v4di)_mm256_cvtepu16_epi64(__A),
4705 (__v4di)_mm256_setzero_si256());
4709 #define _mm_rol_epi32(a, b) __extension__ ({\
4710 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4711 (__v4si)_mm_setzero_si128(), \
4714 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4715 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4716 (__v4si)(__m128i)(w), (__mmask8)(u)); })
4718 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4719 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4720 (__v4si)_mm_setzero_si128(), \
4723 #define _mm256_rol_epi32(a, b) __extension__ ({\
4724 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4725 (__v8si)_mm256_setzero_si256(), \
4728 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4729 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4730 (__v8si)(__m256i)(w), (__mmask8)(u)); })
4732 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4733 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4734 (__v8si)_mm256_setzero_si256(), \
4737 #define _mm_rol_epi64(a, b) __extension__ ({\
4738 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4739 (__v2di)_mm_setzero_di(), \
4742 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4743 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4744 (__v2di)(__m128i)(w), (__mmask8)(u)); })
4746 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4747 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4748 (__v2di)_mm_setzero_di(), \
4751 #define _mm256_rol_epi64(a, b) __extension__ ({\
4752 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4753 (__v4di)_mm256_setzero_si256(), \
4756 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4757 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4758 (__v4di)(__m256i)(w), (__mmask8)(u)); })
4760 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4761 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4762 (__v4di)_mm256_setzero_si256(), \
4765 static __inline__ __m128i __DEFAULT_FN_ATTRS
4766 _mm_rolv_epi32 (__m128i __A, __m128i __B)
4768 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4771 _mm_setzero_si128 (),
4775 static __inline__ __m128i __DEFAULT_FN_ATTRS
4776 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4779 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4785 static __inline__ __m128i __DEFAULT_FN_ATTRS
4786 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4788 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4791 _mm_setzero_si128 (),
4795 static __inline__ __m256i __DEFAULT_FN_ATTRS
4796 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
4798 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4801 _mm256_setzero_si256 (),
4805 static __inline__ __m256i __DEFAULT_FN_ATTRS
4806 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4809 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4815 static __inline__ __m256i __DEFAULT_FN_ATTRS
4816 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4818 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4821 _mm256_setzero_si256 (),
4825 static __inline__ __m128i __DEFAULT_FN_ATTRS
4826 _mm_rolv_epi64 (__m128i __A, __m128i __B)
4828 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4835 static __inline__ __m128i __DEFAULT_FN_ATTRS
4836 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
4839 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4845 static __inline__ __m128i __DEFAULT_FN_ATTRS
4846 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4848 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4855 static __inline__ __m256i __DEFAULT_FN_ATTRS
4856 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
4858 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4861 _mm256_setzero_si256 (),
4865 static __inline__ __m256i __DEFAULT_FN_ATTRS
4866 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
4869 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4875 static __inline__ __m256i __DEFAULT_FN_ATTRS
4876 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4878 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4881 _mm256_setzero_si256 (),
4885 #define _mm_ror_epi32(A, B) __extension__ ({ \
4886 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4887 (__v4si)_mm_setzero_si128(), \
4890 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4891 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4892 (__v4si)(__m128i)(W), (__mmask8)(U)); })
4894 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
4895 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4896 (__v4si)_mm_setzero_si128(), \
4899 #define _mm256_ror_epi32(A, B) __extension__ ({ \
4900 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4901 (__v8si)_mm256_setzero_si256(), \
4904 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4905 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4906 (__v8si)(__m256i)(W), (__mmask8)(U)); })
4908 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
4909 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4910 (__v8si)_mm256_setzero_si256(), \
4913 #define _mm_ror_epi64(A, B) __extension__ ({ \
4914 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4915 (__v2di)_mm_setzero_di(), \
4918 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4919 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4920 (__v2di)(__m128i)(W), (__mmask8)(U)); })
4922 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
4923 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4924 (__v2di)_mm_setzero_di(), \
4927 #define _mm256_ror_epi64(A, B) __extension__ ({ \
4928 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4929 (__v4di)_mm256_setzero_si256(), \
4932 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4933 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4934 (__v4di)(__m256i)(W), (__mmask8)(U)); })
4936 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
4937 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4938 (__v4di)_mm256_setzero_si256(), \
4941 static __inline__ __m128i __DEFAULT_FN_ATTRS
4942 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4944 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4945 (__v4si)_mm_sll_epi32(__A, __B),
4949 static __inline__ __m128i __DEFAULT_FN_ATTRS
4950 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4952 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4953 (__v4si)_mm_sll_epi32(__A, __B),
4954 (__v4si)_mm_setzero_si128());
4957 static __inline__ __m256i __DEFAULT_FN_ATTRS
4958 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4960 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4961 (__v8si)_mm256_sll_epi32(__A, __B),
4965 static __inline__ __m256i __DEFAULT_FN_ATTRS
4966 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4968 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4969 (__v8si)_mm256_sll_epi32(__A, __B),
4970 (__v8si)_mm256_setzero_si256());
4973 static __inline__ __m128i __DEFAULT_FN_ATTRS
4974 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4976 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4977 (__v4si)_mm_slli_epi32(__A, __B),
4981 static __inline__ __m128i __DEFAULT_FN_ATTRS
4982 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4984 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4985 (__v4si)_mm_slli_epi32(__A, __B),
4986 (__v4si)_mm_setzero_si128());
4989 static __inline__ __m256i __DEFAULT_FN_ATTRS
4990 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4992 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4993 (__v8si)_mm256_slli_epi32(__A, __B),
4997 static __inline__ __m256i __DEFAULT_FN_ATTRS
4998 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
5000 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5001 (__v8si)_mm256_slli_epi32(__A, __B),
5002 (__v8si)_mm256_setzero_si256());
5005 static __inline__ __m128i __DEFAULT_FN_ATTRS
5006 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5008 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5009 (__v2di)_mm_sll_epi64(__A, __B),
5013 static __inline__ __m128i __DEFAULT_FN_ATTRS
5014 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
5016 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5017 (__v2di)_mm_sll_epi64(__A, __B),
5018 (__v2di)_mm_setzero_di());
5021 static __inline__ __m256i __DEFAULT_FN_ATTRS
5022 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5024 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5025 (__v4di)_mm256_sll_epi64(__A, __B),
5029 static __inline__ __m256i __DEFAULT_FN_ATTRS
5030 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
5032 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5033 (__v4di)_mm256_sll_epi64(__A, __B),
5034 (__v4di)_mm256_setzero_si256());
5037 static __inline__ __m128i __DEFAULT_FN_ATTRS
5038 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5040 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5041 (__v2di)_mm_slli_epi64(__A, __B),
5045 static __inline__ __m128i __DEFAULT_FN_ATTRS
5046 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
5048 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5049 (__v2di)_mm_slli_epi64(__A, __B),
5050 (__v2di)_mm_setzero_di());
5053 static __inline__ __m256i __DEFAULT_FN_ATTRS
5054 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5056 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5057 (__v4di)_mm256_slli_epi64(__A, __B),
5061 static __inline__ __m256i __DEFAULT_FN_ATTRS
5062 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
5064 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5065 (__v4di)_mm256_slli_epi64(__A, __B),
5066 (__v4di)_mm256_setzero_si256());
5069 static __inline__ __m128i __DEFAULT_FN_ATTRS
5070 _mm_rorv_epi32 (__m128i __A, __m128i __B)
5072 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5075 _mm_setzero_si128 (),
5079 static __inline__ __m128i __DEFAULT_FN_ATTRS
5080 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5083 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5089 static __inline__ __m128i __DEFAULT_FN_ATTRS
5090 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5092 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5095 _mm_setzero_si128 (),
5099 static __inline__ __m256i __DEFAULT_FN_ATTRS
5100 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
5102 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5105 _mm256_setzero_si256 (),
5109 static __inline__ __m256i __DEFAULT_FN_ATTRS
5110 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5113 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5119 static __inline__ __m256i __DEFAULT_FN_ATTRS
5120 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5122 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5125 _mm256_setzero_si256 (),
5129 static __inline__ __m128i __DEFAULT_FN_ATTRS
5130 _mm_rorv_epi64 (__m128i __A, __m128i __B)
5132 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5139 static __inline__ __m128i __DEFAULT_FN_ATTRS
5140 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5143 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5149 static __inline__ __m128i __DEFAULT_FN_ATTRS
5150 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5152 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5159 static __inline__ __m256i __DEFAULT_FN_ATTRS
5160 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
5162 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5165 _mm256_setzero_si256 (),
5169 static __inline__ __m256i __DEFAULT_FN_ATTRS
5170 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5173 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5179 static __inline__ __m256i __DEFAULT_FN_ATTRS
5180 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5182 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5185 _mm256_setzero_si256 (),
5189 static __inline__ __m128i __DEFAULT_FN_ATTRS
5190 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5192 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5193 (__v2di)_mm_sllv_epi64(__X, __Y),
5197 static __inline__ __m128i __DEFAULT_FN_ATTRS
5198 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5201 (__v2di)_mm_sllv_epi64(__X, __Y),
5202 (__v2di)_mm_setzero_di());
5205 static __inline__ __m256i __DEFAULT_FN_ATTRS
5206 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5208 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5209 (__v4di)_mm256_sllv_epi64(__X, __Y),
5213 static __inline__ __m256i __DEFAULT_FN_ATTRS
5214 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
5216 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5217 (__v4di)_mm256_sllv_epi64(__X, __Y),
5218 (__v4di)_mm256_setzero_si256());
5221 static __inline__ __m128i __DEFAULT_FN_ATTRS
5222 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5224 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5225 (__v4si)_mm_sllv_epi32(__X, __Y),
5229 static __inline__ __m128i __DEFAULT_FN_ATTRS
5230 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5232 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5233 (__v4si)_mm_sllv_epi32(__X, __Y),
5234 (__v4si)_mm_setzero_si128());
5237 static __inline__ __m256i __DEFAULT_FN_ATTRS
5238 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5240 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5241 (__v8si)_mm256_sllv_epi32(__X, __Y),
5245 static __inline__ __m256i __DEFAULT_FN_ATTRS
5246 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5249 (__v8si)_mm256_sllv_epi32(__X, __Y),
5250 (__v8si)_mm256_setzero_si256());
5253 static __inline__ __m128i __DEFAULT_FN_ATTRS
5254 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5256 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5257 (__v2di)_mm_srlv_epi64(__X, __Y),
5261 static __inline__ __m128i __DEFAULT_FN_ATTRS
5262 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5264 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5265 (__v2di)_mm_srlv_epi64(__X, __Y),
5266 (__v2di)_mm_setzero_di());
5269 static __inline__ __m256i __DEFAULT_FN_ATTRS
5270 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5272 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5273 (__v4di)_mm256_srlv_epi64(__X, __Y),
5277 static __inline__ __m256i __DEFAULT_FN_ATTRS
5278 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
5280 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5281 (__v4di)_mm256_srlv_epi64(__X, __Y),
5282 (__v4di)_mm256_setzero_si256());
5285 static __inline__ __m128i __DEFAULT_FN_ATTRS
5286 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5288 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5289 (__v4si)_mm_srlv_epi32(__X, __Y),
5293 static __inline__ __m128i __DEFAULT_FN_ATTRS
5294 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5296 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5297 (__v4si)_mm_srlv_epi32(__X, __Y),
5298 (__v4si)_mm_setzero_si128());
5301 static __inline__ __m256i __DEFAULT_FN_ATTRS
5302 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5304 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5305 (__v8si)_mm256_srlv_epi32(__X, __Y),
5309 static __inline__ __m256i __DEFAULT_FN_ATTRS
5310 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5312 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5313 (__v8si)_mm256_srlv_epi32(__X, __Y),
5314 (__v8si)_mm256_setzero_si256());
5317 static __inline__ __m128i __DEFAULT_FN_ATTRS
5318 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5320 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5321 (__v4si)_mm_srl_epi32(__A, __B),
5325 static __inline__ __m128i __DEFAULT_FN_ATTRS
5326 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
5328 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5329 (__v4si)_mm_srl_epi32(__A, __B),
5330 (__v4si)_mm_setzero_si128());
5333 static __inline__ __m256i __DEFAULT_FN_ATTRS
5334 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5336 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5337 (__v8si)_mm256_srl_epi32(__A, __B),
5341 static __inline__ __m256i __DEFAULT_FN_ATTRS
5342 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
5344 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5345 (__v8si)_mm256_srl_epi32(__A, __B),
5346 (__v8si)_mm256_setzero_si256());
5349 static __inline__ __m128i __DEFAULT_FN_ATTRS
5350 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5352 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5353 (__v4si)_mm_srli_epi32(__A, __B),
5357 static __inline__ __m128i __DEFAULT_FN_ATTRS
5358 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
5360 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5361 (__v4si)_mm_srli_epi32(__A, __B),
5362 (__v4si)_mm_setzero_si128());
5365 static __inline__ __m256i __DEFAULT_FN_ATTRS
5366 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5368 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5369 (__v8si)_mm256_srli_epi32(__A, __B),
5373 static __inline__ __m256i __DEFAULT_FN_ATTRS
5374 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
5376 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5377 (__v8si)_mm256_srli_epi32(__A, __B),
5378 (__v8si)_mm256_setzero_si256());
5381 static __inline__ __m128i __DEFAULT_FN_ATTRS
5382 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5384 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5385 (__v2di)_mm_srl_epi64(__A, __B),
5389 static __inline__ __m128i __DEFAULT_FN_ATTRS
5390 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
5392 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5393 (__v2di)_mm_srl_epi64(__A, __B),
5394 (__v2di)_mm_setzero_di());
5397 static __inline__ __m256i __DEFAULT_FN_ATTRS
5398 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5400 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5401 (__v4di)_mm256_srl_epi64(__A, __B),
5405 static __inline__ __m256i __DEFAULT_FN_ATTRS
5406 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
5408 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5409 (__v4di)_mm256_srl_epi64(__A, __B),
5410 (__v4di)_mm256_setzero_si256());
5413 static __inline__ __m128i __DEFAULT_FN_ATTRS
5414 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5416 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5417 (__v2di)_mm_srli_epi64(__A, __B),
5421 static __inline__ __m128i __DEFAULT_FN_ATTRS
5422 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
5424 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5425 (__v2di)_mm_srli_epi64(__A, __B),
5426 (__v2di)_mm_setzero_di());
5429 static __inline__ __m256i __DEFAULT_FN_ATTRS
5430 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5432 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5433 (__v4di)_mm256_srli_epi64(__A, __B),
5437 static __inline__ __m256i __DEFAULT_FN_ATTRS
5438 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
5440 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5441 (__v4di)_mm256_srli_epi64(__A, __B),
5442 (__v4di)_mm256_setzero_si256());
5445 static __inline__ __m128i __DEFAULT_FN_ATTRS
5446 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5448 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5449 (__v4si)_mm_srav_epi32(__X, __Y),
5453 static __inline__ __m128i __DEFAULT_FN_ATTRS
5454 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5456 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5457 (__v4si)_mm_srav_epi32(__X, __Y),
5458 (__v4si)_mm_setzero_si128());
5461 static __inline__ __m256i __DEFAULT_FN_ATTRS
5462 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5464 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5465 (__v8si)_mm256_srav_epi32(__X, __Y),
5469 static __inline__ __m256i __DEFAULT_FN_ATTRS
5470 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5472 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5473 (__v8si)_mm256_srav_epi32(__X, __Y),
5474 (__v8si)_mm256_setzero_si256());
5477 static __inline__ __m128i __DEFAULT_FN_ATTRS
5478 _mm_srav_epi64(__m128i __X, __m128i __Y)
5480 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5483 static __inline__ __m128i __DEFAULT_FN_ATTRS
5484 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5486 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5487 (__v2di)_mm_srav_epi64(__X, __Y),
5491 static __inline__ __m128i __DEFAULT_FN_ATTRS
5492 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5494 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5495 (__v2di)_mm_srav_epi64(__X, __Y),
5496 (__v2di)_mm_setzero_di());
5499 static __inline__ __m256i __DEFAULT_FN_ATTRS
5500 _mm256_srav_epi64(__m256i __X, __m256i __Y)
5502 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5505 static __inline__ __m256i __DEFAULT_FN_ATTRS
5506 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5508 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5509 (__v4di)_mm256_srav_epi64(__X, __Y),
5513 static __inline__ __m256i __DEFAULT_FN_ATTRS
5514 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5516 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5517 (__v4di)_mm256_srav_epi64(__X, __Y),
5518 (__v4di)_mm256_setzero_si256());
5521 static __inline__ __m128i __DEFAULT_FN_ATTRS
5522 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5524 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5529 static __inline__ __m128i __DEFAULT_FN_ATTRS
5530 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5532 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5534 (__v4si) _mm_setzero_si128 ());
5538 static __inline__ __m256i __DEFAULT_FN_ATTRS
5539 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5541 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5546 static __inline__ __m256i __DEFAULT_FN_ATTRS
5547 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5549 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5551 (__v8si) _mm256_setzero_si256 ());
5554 static __inline__ __m128i __DEFAULT_FN_ATTRS
5555 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5557 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5563 static __inline__ __m128i __DEFAULT_FN_ATTRS
5564 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5566 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5568 _mm_setzero_si128 (),
5573 static __inline__ __m256i __DEFAULT_FN_ATTRS
5574 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5576 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5582 static __inline__ __m256i __DEFAULT_FN_ATTRS
5583 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5585 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5587 _mm256_setzero_si256 (),
5592 static __inline__ void __DEFAULT_FN_ATTRS
5593 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5595 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5600 static __inline__ void __DEFAULT_FN_ATTRS
5601 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5603 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5608 static __inline__ __m128i __DEFAULT_FN_ATTRS
5609 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5611 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5616 static __inline__ __m128i __DEFAULT_FN_ATTRS
5617 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5619 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5621 (__v2di) _mm_setzero_di ());
5624 static __inline__ __m256i __DEFAULT_FN_ATTRS
5625 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5627 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5632 static __inline__ __m256i __DEFAULT_FN_ATTRS
5633 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5635 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5637 (__v4di) _mm256_setzero_si256 ());
5640 static __inline__ __m128i __DEFAULT_FN_ATTRS
5641 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5643 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5649 static __inline__ __m128i __DEFAULT_FN_ATTRS
5650 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5652 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5659 static __inline__ __m256i __DEFAULT_FN_ATTRS
5660 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5662 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5668 static __inline__ __m256i __DEFAULT_FN_ATTRS
5669 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5671 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5673 _mm256_setzero_si256 (),
5678 static __inline__ void __DEFAULT_FN_ATTRS
5679 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5681 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5686 static __inline__ void __DEFAULT_FN_ATTRS
5687 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5689 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5694 static __inline__ __m128d __DEFAULT_FN_ATTRS
5695 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5698 (__v2df)_mm_movedup_pd(__A),
5702 static __inline__ __m128d __DEFAULT_FN_ATTRS
5703 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5705 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5706 (__v2df)_mm_movedup_pd(__A),
5707 (__v2df)_mm_setzero_pd());
5710 static __inline__ __m256d __DEFAULT_FN_ATTRS
5711 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5713 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5714 (__v4df)_mm256_movedup_pd(__A),
5718 static __inline__ __m256d __DEFAULT_FN_ATTRS
5719 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5721 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5722 (__v4df)_mm256_movedup_pd(__A),
5723 (__v4df)_mm256_setzero_pd());
5727 #define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
5728 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5729 (__v4si)(__m128i)(O), \
5732 #define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
5733 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5734 (__v4si)_mm_setzero_si128(), \
5737 #define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
5738 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5739 (__v8si)(__m256i)(O), \
5742 #define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
5743 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5744 (__v8si)_mm256_setzero_si256(), \
5748 static __inline__ __m128i __DEFAULT_FN_ATTRS
5749 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5751 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
5755 static __inline__ __m128i __DEFAULT_FN_ATTRS
5756 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5758 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
5760 _mm_setzero_si128 (),
5764 static __inline__ __m256i __DEFAULT_FN_ATTRS
5765 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5767 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
5771 static __inline__ __m256i __DEFAULT_FN_ATTRS
5772 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5774 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
5776 _mm256_setzero_si256 (),
5781 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5782 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5783 (__v2df)(__m128d)(B), \
5784 (__v2di)(__m128i)(C), (int)(imm), \
5787 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5788 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5789 (__v2df)(__m128d)(B), \
5790 (__v2di)(__m128i)(C), (int)(imm), \
5793 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5794 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5795 (__v2df)(__m128d)(B), \
5796 (__v2di)(__m128i)(C), \
5797 (int)(imm), (__mmask8)(U)); })
5799 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5800 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5801 (__v4df)(__m256d)(B), \
5802 (__v4di)(__m256i)(C), (int)(imm), \
5805 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5806 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5807 (__v4df)(__m256d)(B), \
5808 (__v4di)(__m256i)(C), (int)(imm), \
5811 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5812 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5813 (__v4df)(__m256d)(B), \
5814 (__v4di)(__m256i)(C), \
5815 (int)(imm), (__mmask8)(U)); })
5817 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5818 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5819 (__v4sf)(__m128)(B), \
5820 (__v4si)(__m128i)(C), (int)(imm), \
5823 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5824 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5825 (__v4sf)(__m128)(B), \
5826 (__v4si)(__m128i)(C), (int)(imm), \
5829 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5830 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5831 (__v4sf)(__m128)(B), \
5832 (__v4si)(__m128i)(C), (int)(imm), \
5835 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5836 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5837 (__v8sf)(__m256)(B), \
5838 (__v8si)(__m256i)(C), (int)(imm), \
5841 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5842 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5843 (__v8sf)(__m256)(B), \
5844 (__v8si)(__m256i)(C), (int)(imm), \
5847 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5848 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5849 (__v8sf)(__m256)(B), \
5850 (__v8si)(__m256i)(C), (int)(imm), \
5853 static __inline__ __m128d __DEFAULT_FN_ATTRS
5854 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5856 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5861 static __inline__ __m128d __DEFAULT_FN_ATTRS
5862 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
5864 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5870 static __inline__ __m256d __DEFAULT_FN_ATTRS
5871 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5873 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5878 static __inline__ __m256d __DEFAULT_FN_ATTRS
5879 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5881 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5883 _mm256_setzero_pd (),
5887 static __inline__ __m128 __DEFAULT_FN_ATTRS
5888 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5890 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5895 static __inline__ __m128 __DEFAULT_FN_ATTRS
5896 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
5898 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5904 static __inline__ __m256 __DEFAULT_FN_ATTRS
5905 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5907 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5912 static __inline__ __m256 __DEFAULT_FN_ATTRS
5913 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5915 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5917 _mm256_setzero_ps (),
5921 static __inline__ __m128i __DEFAULT_FN_ATTRS
5922 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5924 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5929 static __inline__ __m128i __DEFAULT_FN_ATTRS
5930 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5932 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5934 _mm_setzero_si128 (),
5938 static __inline__ __m256i __DEFAULT_FN_ATTRS
5939 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5941 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5946 static __inline__ __m256i __DEFAULT_FN_ATTRS
5947 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5949 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5951 _mm256_setzero_si256 (),
5955 static __inline__ __m128i __DEFAULT_FN_ATTRS
5956 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5958 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5963 static __inline__ __m128i __DEFAULT_FN_ATTRS
5964 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5966 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5968 _mm_setzero_si128 (),
5972 static __inline__ __m256i __DEFAULT_FN_ATTRS
5973 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5975 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5980 static __inline__ __m256i __DEFAULT_FN_ATTRS
5981 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5983 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5985 _mm256_setzero_si256 (),
5989 static __inline__ __m128d __DEFAULT_FN_ATTRS
5990 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5992 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5997 static __inline__ __m128d __DEFAULT_FN_ATTRS
5998 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
6000 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6006 static __inline__ __m256d __DEFAULT_FN_ATTRS
6007 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6009 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6014 static __inline__ __m256d __DEFAULT_FN_ATTRS
6015 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
6017 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6019 _mm256_setzero_pd (),
6023 static __inline__ __m128 __DEFAULT_FN_ATTRS
6024 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6026 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6031 static __inline__ __m128 __DEFAULT_FN_ATTRS
6032 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
6034 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6040 static __inline__ __m256 __DEFAULT_FN_ATTRS
6041 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6043 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6048 static __inline__ __m256 __DEFAULT_FN_ATTRS
6049 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
6051 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6053 _mm256_setzero_ps (),
6057 static __inline__ void __DEFAULT_FN_ATTRS
6058 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
6060 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
6065 static __inline__ void __DEFAULT_FN_ATTRS
6066 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
6068 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
6073 static __inline__ void __DEFAULT_FN_ATTRS
6074 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
6076 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
6081 static __inline__ void __DEFAULT_FN_ATTRS
6082 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
6084 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
6089 static __inline__ void __DEFAULT_FN_ATTRS
6090 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
6092 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
6097 static __inline__ void __DEFAULT_FN_ATTRS
6098 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
6100 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
6105 static __inline__ void __DEFAULT_FN_ATTRS
6106 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
6108 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
6113 static __inline__ void __DEFAULT_FN_ATTRS
6114 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
6116 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
6121 static __inline__ void __DEFAULT_FN_ATTRS
6122 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
6124 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
6129 static __inline__ void __DEFAULT_FN_ATTRS
6130 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
6132 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
6137 static __inline__ void __DEFAULT_FN_ATTRS
6138 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
6140 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
6145 static __inline__ void __DEFAULT_FN_ATTRS
6146 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
6148 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
6154 static __inline__ __m128d __DEFAULT_FN_ATTRS
6155 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6157 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6158 (__v2df)_mm_unpackhi_pd(__A, __B),
6162 static __inline__ __m128d __DEFAULT_FN_ATTRS
6163 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
6165 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6166 (__v2df)_mm_unpackhi_pd(__A, __B),
6167 (__v2df)_mm_setzero_pd());
6170 static __inline__ __m256d __DEFAULT_FN_ATTRS
6171 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
6173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6174 (__v4df)_mm256_unpackhi_pd(__A, __B),
6178 static __inline__ __m256d __DEFAULT_FN_ATTRS
6179 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
6181 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6182 (__v4df)_mm256_unpackhi_pd(__A, __B),
6183 (__v4df)_mm256_setzero_pd());
6186 static __inline__ __m128 __DEFAULT_FN_ATTRS
6187 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6189 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6190 (__v4sf)_mm_unpackhi_ps(__A, __B),
6194 static __inline__ __m128 __DEFAULT_FN_ATTRS
6195 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
6197 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6198 (__v4sf)_mm_unpackhi_ps(__A, __B),
6199 (__v4sf)_mm_setzero_ps());
6202 static __inline__ __m256 __DEFAULT_FN_ATTRS
6203 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
6205 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6206 (__v8sf)_mm256_unpackhi_ps(__A, __B),
6210 static __inline__ __m256 __DEFAULT_FN_ATTRS
6211 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
6213 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6214 (__v8sf)_mm256_unpackhi_ps(__A, __B),
6215 (__v8sf)_mm256_setzero_ps());
6218 static __inline__ __m128d __DEFAULT_FN_ATTRS
6219 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6221 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6222 (__v2df)_mm_unpacklo_pd(__A, __B),
6226 static __inline__ __m128d __DEFAULT_FN_ATTRS
6227 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
6229 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6230 (__v2df)_mm_unpacklo_pd(__A, __B),
6231 (__v2df)_mm_setzero_pd());
6234 static __inline__ __m256d __DEFAULT_FN_ATTRS
6235 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
6237 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6238 (__v4df)_mm256_unpacklo_pd(__A, __B),
6242 static __inline__ __m256d __DEFAULT_FN_ATTRS
6243 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
6245 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6246 (__v4df)_mm256_unpacklo_pd(__A, __B),
6247 (__v4df)_mm256_setzero_pd());
6250 static __inline__ __m128 __DEFAULT_FN_ATTRS
6251 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6253 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6254 (__v4sf)_mm_unpacklo_ps(__A, __B),
6258 static __inline__ __m128 __DEFAULT_FN_ATTRS
6259 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
6261 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6262 (__v4sf)_mm_unpacklo_ps(__A, __B),
6263 (__v4sf)_mm_setzero_ps());
6266 static __inline__ __m256 __DEFAULT_FN_ATTRS
6267 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
6269 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6270 (__v8sf)_mm256_unpacklo_ps(__A, __B),
6274 static __inline__ __m256 __DEFAULT_FN_ATTRS
6275 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
6277 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6278 (__v8sf)_mm256_unpacklo_ps(__A, __B),
6279 (__v8sf)_mm256_setzero_ps());
6282 static __inline__ __m128d __DEFAULT_FN_ATTRS
6283 _mm_rcp14_pd (__m128d __A)
6285 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6291 static __inline__ __m128d __DEFAULT_FN_ATTRS
6292 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6294 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6299 static __inline__ __m128d __DEFAULT_FN_ATTRS
6300 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
6302 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6308 static __inline__ __m256d __DEFAULT_FN_ATTRS
6309 _mm256_rcp14_pd (__m256d __A)
6311 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6313 _mm256_setzero_pd (),
6317 static __inline__ __m256d __DEFAULT_FN_ATTRS
6318 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6320 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6325 static __inline__ __m256d __DEFAULT_FN_ATTRS
6326 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
6328 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6330 _mm256_setzero_pd (),
6334 static __inline__ __m128 __DEFAULT_FN_ATTRS
6335 _mm_rcp14_ps (__m128 __A)
6337 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6343 static __inline__ __m128 __DEFAULT_FN_ATTRS
6344 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6346 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6351 static __inline__ __m128 __DEFAULT_FN_ATTRS
6352 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6354 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6360 static __inline__ __m256 __DEFAULT_FN_ATTRS
6361 _mm256_rcp14_ps (__m256 __A)
6363 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6365 _mm256_setzero_ps (),
6369 static __inline__ __m256 __DEFAULT_FN_ATTRS
6370 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6372 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6377 static __inline__ __m256 __DEFAULT_FN_ATTRS
6378 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6380 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6382 _mm256_setzero_ps (),
6386 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6387 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6388 (__v2df)_mm_permute_pd((X), (C)), \
6389 (__v2df)(__m128d)(W)); })
6391 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6392 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6393 (__v2df)_mm_permute_pd((X), (C)), \
6394 (__v2df)_mm_setzero_pd()); })
6396 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6397 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6398 (__v4df)_mm256_permute_pd((X), (C)), \
6399 (__v4df)(__m256d)(W)); })
6401 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6402 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6403 (__v4df)_mm256_permute_pd((X), (C)), \
6404 (__v4df)_mm256_setzero_pd()); })
6406 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6407 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6408 (__v4sf)_mm_permute_ps((X), (C)), \
6409 (__v4sf)(__m128)(W)); })
6411 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6412 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6413 (__v4sf)_mm_permute_ps((X), (C)), \
6414 (__v4sf)_mm_setzero_ps()); })
6416 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6417 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6418 (__v8sf)_mm256_permute_ps((X), (C)), \
6419 (__v8sf)(__m256)(W)); })
6421 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6422 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6423 (__v8sf)_mm256_permute_ps((X), (C)), \
6424 (__v8sf)_mm256_setzero_ps()); })
6426 static __inline__ __m128d __DEFAULT_FN_ATTRS
6427 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6429 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6430 (__v2df)_mm_permutevar_pd(__A, __C),
6434 static __inline__ __m128d __DEFAULT_FN_ATTRS
6435 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6437 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6438 (__v2df)_mm_permutevar_pd(__A, __C),
6439 (__v2df)_mm_setzero_pd());
6442 static __inline__ __m256d __DEFAULT_FN_ATTRS
6443 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6445 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6446 (__v4df)_mm256_permutevar_pd(__A, __C),
6450 static __inline__ __m256d __DEFAULT_FN_ATTRS
6451 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6453 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6454 (__v4df)_mm256_permutevar_pd(__A, __C),
6455 (__v4df)_mm256_setzero_pd());
6458 static __inline__ __m128 __DEFAULT_FN_ATTRS
6459 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6461 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6462 (__v4sf)_mm_permutevar_ps(__A, __C),
6466 static __inline__ __m128 __DEFAULT_FN_ATTRS
6467 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6469 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6470 (__v4sf)_mm_permutevar_ps(__A, __C),
6471 (__v4sf)_mm_setzero_ps());
6474 static __inline__ __m256 __DEFAULT_FN_ATTRS
6475 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6477 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6478 (__v8sf)_mm256_permutevar_ps(__A, __C),
6482 static __inline__ __m256 __DEFAULT_FN_ATTRS
6483 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6485 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6486 (__v8sf)_mm256_permutevar_ps(__A, __C),
6487 (__v8sf)_mm256_setzero_ps());
6490 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6491 _mm_test_epi32_mask (__m128i __A, __m128i __B)
6493 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6498 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6499 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6501 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6505 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6506 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
6508 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6513 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6514 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6516 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6520 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6521 _mm_test_epi64_mask (__m128i __A, __m128i __B)
6523 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6528 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6529 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6531 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6535 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6536 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
6538 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6543 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6544 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6546 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6550 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6551 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
6553 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6558 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6559 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6561 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6565 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6566 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6568 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6573 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6574 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6576 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6580 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6581 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
6583 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6588 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6589 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6591 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6595 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6596 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6598 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6603 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6604 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6606 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6612 static __inline__ __m128i __DEFAULT_FN_ATTRS
6613 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6615 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6616 (__v4si)_mm_unpackhi_epi32(__A, __B),
6620 static __inline__ __m128i __DEFAULT_FN_ATTRS
6621 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6623 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6624 (__v4si)_mm_unpackhi_epi32(__A, __B),
6625 (__v4si)_mm_setzero_si128());
6628 static __inline__ __m256i __DEFAULT_FN_ATTRS
6629 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6631 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6632 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6636 static __inline__ __m256i __DEFAULT_FN_ATTRS
6637 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6639 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6640 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6641 (__v8si)_mm256_setzero_si256());
6644 static __inline__ __m128i __DEFAULT_FN_ATTRS
6645 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6647 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6648 (__v2di)_mm_unpackhi_epi64(__A, __B),
6652 static __inline__ __m128i __DEFAULT_FN_ATTRS
6653 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6655 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6656 (__v2di)_mm_unpackhi_epi64(__A, __B),
6657 (__v2di)_mm_setzero_di());
6660 static __inline__ __m256i __DEFAULT_FN_ATTRS
6661 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6663 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6664 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6668 static __inline__ __m256i __DEFAULT_FN_ATTRS
6669 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6671 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6672 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6673 (__v4di)_mm256_setzero_si256());
6676 static __inline__ __m128i __DEFAULT_FN_ATTRS
6677 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6679 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6680 (__v4si)_mm_unpacklo_epi32(__A, __B),
6684 static __inline__ __m128i __DEFAULT_FN_ATTRS
6685 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6687 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6688 (__v4si)_mm_unpacklo_epi32(__A, __B),
6689 (__v4si)_mm_setzero_si128());
6692 static __inline__ __m256i __DEFAULT_FN_ATTRS
6693 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6695 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6696 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6700 static __inline__ __m256i __DEFAULT_FN_ATTRS
6701 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6703 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6704 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6705 (__v8si)_mm256_setzero_si256());
6708 static __inline__ __m128i __DEFAULT_FN_ATTRS
6709 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6712 (__v2di)_mm_unpacklo_epi64(__A, __B),
6716 static __inline__ __m128i __DEFAULT_FN_ATTRS
6717 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6720 (__v2di)_mm_unpacklo_epi64(__A, __B),
6721 (__v2di)_mm_setzero_di());
6724 static __inline__ __m256i __DEFAULT_FN_ATTRS
6725 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6728 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6732 static __inline__ __m256i __DEFAULT_FN_ATTRS
6733 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6736 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6737 (__v4di)_mm256_setzero_si256());
6740 static __inline__ __m128i __DEFAULT_FN_ATTRS
6741 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6744 (__v4si)_mm_sra_epi32(__A, __B),
6748 static __inline__ __m128i __DEFAULT_FN_ATTRS
6749 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6752 (__v4si)_mm_sra_epi32(__A, __B),
6753 (__v4si)_mm_setzero_si128());
6756 static __inline__ __m256i __DEFAULT_FN_ATTRS
6757 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6760 (__v8si)_mm256_sra_epi32(__A, __B),
6764 static __inline__ __m256i __DEFAULT_FN_ATTRS
6765 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6768 (__v8si)_mm256_sra_epi32(__A, __B),
6769 (__v8si)_mm256_setzero_si256());
6772 static __inline__ __m128i __DEFAULT_FN_ATTRS
6773 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6775 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6776 (__v4si)_mm_srai_epi32(__A, __B),
6780 static __inline__ __m128i __DEFAULT_FN_ATTRS
6781 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6783 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6784 (__v4si)_mm_srai_epi32(__A, __B),
6785 (__v4si)_mm_setzero_si128());
6788 static __inline__ __m256i __DEFAULT_FN_ATTRS
6789 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6791 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6792 (__v8si)_mm256_srai_epi32(__A, __B),
6796 static __inline__ __m256i __DEFAULT_FN_ATTRS
6797 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6799 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6800 (__v8si)_mm256_srai_epi32(__A, __B),
6801 (__v8si)_mm256_setzero_si256());
6804 static __inline__ __m128i __DEFAULT_FN_ATTRS
6805 _mm_sra_epi64(__m128i __A, __m128i __B)
6807 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6810 static __inline__ __m128i __DEFAULT_FN_ATTRS
6811 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6813 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6814 (__v2di)_mm_sra_epi64(__A, __B), \
6818 static __inline__ __m128i __DEFAULT_FN_ATTRS
6819 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6821 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6822 (__v2di)_mm_sra_epi64(__A, __B), \
6823 (__v2di)_mm_setzero_di());
6826 static __inline__ __m256i __DEFAULT_FN_ATTRS
6827 _mm256_sra_epi64(__m256i __A, __m128i __B)
6829 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6832 static __inline__ __m256i __DEFAULT_FN_ATTRS
6833 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6835 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6836 (__v4di)_mm256_sra_epi64(__A, __B), \
6840 static __inline__ __m256i __DEFAULT_FN_ATTRS
6841 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6843 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6844 (__v4di)_mm256_sra_epi64(__A, __B), \
6845 (__v4di)_mm256_setzero_si256());
6848 static __inline__ __m128i __DEFAULT_FN_ATTRS
6849 _mm_srai_epi64(__m128i __A, int __imm)
6851 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6854 static __inline__ __m128i __DEFAULT_FN_ATTRS
6855 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6857 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6858 (__v2di)_mm_srai_epi64(__A, __imm), \
6862 static __inline__ __m128i __DEFAULT_FN_ATTRS
6863 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6865 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6866 (__v2di)_mm_srai_epi64(__A, __imm), \
6867 (__v2di)_mm_setzero_di());
6870 static __inline__ __m256i __DEFAULT_FN_ATTRS
6871 _mm256_srai_epi64(__m256i __A, int __imm)
6873 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6876 static __inline__ __m256i __DEFAULT_FN_ATTRS
6877 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6879 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6880 (__v4di)_mm256_srai_epi64(__A, __imm), \
6884 static __inline__ __m256i __DEFAULT_FN_ATTRS
6885 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6887 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6888 (__v4di)_mm256_srai_epi64(__A, __imm), \
6889 (__v4di)_mm256_setzero_si256());
6892 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6893 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6894 (__v4si)(__m128i)(B), \
6895 (__v4si)(__m128i)(C), (int)(imm), \
6898 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6899 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6900 (__v4si)(__m128i)(B), \
6901 (__v4si)(__m128i)(C), (int)(imm), \
6904 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6905 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6906 (__v4si)(__m128i)(B), \
6907 (__v4si)(__m128i)(C), (int)(imm), \
6910 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6911 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6912 (__v8si)(__m256i)(B), \
6913 (__v8si)(__m256i)(C), (int)(imm), \
6916 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6917 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6918 (__v8si)(__m256i)(B), \
6919 (__v8si)(__m256i)(C), (int)(imm), \
6922 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6923 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6924 (__v8si)(__m256i)(B), \
6925 (__v8si)(__m256i)(C), (int)(imm), \
6928 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6929 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6930 (__v2di)(__m128i)(B), \
6931 (__v2di)(__m128i)(C), (int)(imm), \
6934 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6935 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6936 (__v2di)(__m128i)(B), \
6937 (__v2di)(__m128i)(C), (int)(imm), \
6940 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6941 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6942 (__v2di)(__m128i)(B), \
6943 (__v2di)(__m128i)(C), (int)(imm), \
6946 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6947 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6948 (__v4di)(__m256i)(B), \
6949 (__v4di)(__m256i)(C), (int)(imm), \
6952 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6953 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6954 (__v4di)(__m256i)(B), \
6955 (__v4di)(__m256i)(C), (int)(imm), \
6958 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6959 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6960 (__v4di)(__m256i)(B), \
6961 (__v4di)(__m256i)(C), (int)(imm), \
6966 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
6967 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
6968 (__v8sf)(__m256)(B), (int)(imm), \
6969 (__v8sf)_mm256_setzero_ps(), \
6972 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
6973 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
6974 (__v8sf)(__m256)(B), (int)(imm), \
6975 (__v8sf)(__m256)(W), \
6978 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
6979 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
6980 (__v8sf)(__m256)(B), (int)(imm), \
6981 (__v8sf)_mm256_setzero_ps(), \
6984 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
6985 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
6986 (__v4df)(__m256d)(B), \
6988 (__v4df)_mm256_setzero_pd(), \
6991 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
6992 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
6993 (__v4df)(__m256d)(B), \
6995 (__v4df)(__m256d)(W), \
6998 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
6999 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7000 (__v4df)(__m256d)(B), \
7002 (__v4df)_mm256_setzero_pd(), \
7005 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
7006 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7007 (__v8si)(__m256i)(B), \
7009 (__v8si)_mm256_setzero_si256(), \
7012 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7013 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7014 (__v8si)(__m256i)(B), \
7016 (__v8si)(__m256i)(W), \
7019 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7020 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7021 (__v8si)(__m256i)(B), \
7023 (__v8si)_mm256_setzero_si256(), \
7026 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
7027 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7028 (__v4di)(__m256i)(B), \
7030 (__v4di)_mm256_setzero_si256(), \
7033 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7034 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7035 (__v4di)(__m256i)(B), \
7037 (__v4di)(__m256i)(W), \
7040 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7041 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7042 (__v4di)(__m256i)(B), \
7044 (__v4di)_mm256_setzero_si256(), \
7047 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7048 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7049 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7050 (__v2df)(__m128d)(W)); })
7052 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7053 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7054 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7055 (__v2df)_mm_setzero_pd()); })
7057 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7058 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7059 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7060 (__v4df)(__m256d)(W)); })
7062 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7063 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7064 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7065 (__v4df)_mm256_setzero_pd()); })
7067 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7068 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7069 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7070 (__v4sf)(__m128)(W)); })
7072 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7073 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7074 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7075 (__v4sf)_mm_setzero_ps()); })
7077 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7078 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7079 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7080 (__v8sf)(__m256)(W)); })
7082 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7083 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7084 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7085 (__v8sf)_mm256_setzero_ps()); })
7087 static __inline__ __m128d __DEFAULT_FN_ATTRS
7088 _mm_rsqrt14_pd (__m128d __A)
7090 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7096 static __inline__ __m128d __DEFAULT_FN_ATTRS
7097 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
7099 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7104 static __inline__ __m128d __DEFAULT_FN_ATTRS
7105 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
7107 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7113 static __inline__ __m256d __DEFAULT_FN_ATTRS
7114 _mm256_rsqrt14_pd (__m256d __A)
7116 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7118 _mm256_setzero_pd (),
7122 static __inline__ __m256d __DEFAULT_FN_ATTRS
7123 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
7125 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7130 static __inline__ __m256d __DEFAULT_FN_ATTRS
7131 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
7133 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7135 _mm256_setzero_pd (),
7139 static __inline__ __m128 __DEFAULT_FN_ATTRS
7140 _mm_rsqrt14_ps (__m128 __A)
7142 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7148 static __inline__ __m128 __DEFAULT_FN_ATTRS
7149 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
7151 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7156 static __inline__ __m128 __DEFAULT_FN_ATTRS
7157 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
7159 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7165 static __inline__ __m256 __DEFAULT_FN_ATTRS
7166 _mm256_rsqrt14_ps (__m256 __A)
7168 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7170 _mm256_setzero_ps (),
7174 static __inline__ __m256 __DEFAULT_FN_ATTRS
7175 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
7177 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7182 static __inline__ __m256 __DEFAULT_FN_ATTRS
7183 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
7185 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7187 _mm256_setzero_ps (),
7191 static __inline__ __m256 __DEFAULT_FN_ATTRS
7192 _mm256_broadcast_f32x4 (__m128 __A)
7194 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7195 (__v8sf)_mm256_undefined_pd (),
7199 static __inline__ __m256 __DEFAULT_FN_ATTRS
7200 _mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
7202 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7207 static __inline__ __m256 __DEFAULT_FN_ATTRS
7208 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
7210 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7211 (__v8sf) _mm256_setzero_ps (),
7215 static __inline__ __m256i __DEFAULT_FN_ATTRS
7216 _mm256_broadcast_i32x4 (__m128i __A)
7218 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7219 (__v8si)_mm256_undefined_si256 (),
7223 static __inline__ __m256i __DEFAULT_FN_ATTRS
7224 _mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
7226 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7231 static __inline__ __m256i __DEFAULT_FN_ATTRS
7232 _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
7234 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
7236 (__v8si) _mm256_setzero_si256 (),
7240 static __inline__ __m256d __DEFAULT_FN_ATTRS
7241 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
7243 return (__m256d)__builtin_ia32_selectpd_256(__M,
7244 (__v4df) _mm256_broadcastsd_pd(__A),
7248 static __inline__ __m256d __DEFAULT_FN_ATTRS
7249 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7251 return (__m256d)__builtin_ia32_selectpd_256(__M,
7252 (__v4df) _mm256_broadcastsd_pd(__A),
7253 (__v4df) _mm256_setzero_pd());
7256 static __inline__ __m128 __DEFAULT_FN_ATTRS
7257 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
7259 return (__m128)__builtin_ia32_selectps_128(__M,
7260 (__v4sf) _mm_broadcastss_ps(__A),
7264 static __inline__ __m128 __DEFAULT_FN_ATTRS
7265 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
7267 return (__m128)__builtin_ia32_selectps_128(__M,
7268 (__v4sf) _mm_broadcastss_ps(__A),
7269 (__v4sf) _mm_setzero_ps());
7272 static __inline__ __m256 __DEFAULT_FN_ATTRS
7273 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
7275 return (__m256)__builtin_ia32_selectps_256(__M,
7276 (__v8sf) _mm256_broadcastss_ps(__A),
7280 static __inline__ __m256 __DEFAULT_FN_ATTRS
7281 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
7283 return (__m256)__builtin_ia32_selectps_256(__M,
7284 (__v8sf) _mm256_broadcastss_ps(__A),
7285 (__v8sf) _mm256_setzero_ps());
7288 static __inline__ __m128i __DEFAULT_FN_ATTRS
7289 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7291 return (__m128i)__builtin_ia32_selectd_128(__M,
7292 (__v4si) _mm_broadcastd_epi32(__A),
7296 static __inline__ __m128i __DEFAULT_FN_ATTRS
7297 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
7299 return (__m128i)__builtin_ia32_selectd_128(__M,
7300 (__v4si) _mm_broadcastd_epi32(__A),
7301 (__v4si) _mm_setzero_si128());
7304 static __inline__ __m256i __DEFAULT_FN_ATTRS
7305 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
7307 return (__m256i)__builtin_ia32_selectd_256(__M,
7308 (__v8si) _mm256_broadcastd_epi32(__A),
7312 static __inline__ __m256i __DEFAULT_FN_ATTRS
7313 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
7315 return (__m256i)__builtin_ia32_selectd_256(__M,
7316 (__v8si) _mm256_broadcastd_epi32(__A),
7317 (__v8si) _mm256_setzero_si256());
7320 static __inline__ __m128i __DEFAULT_FN_ATTRS
7321 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
7323 return (__m128i)__builtin_ia32_selectq_128(__M,
7324 (__v2di) _mm_broadcastq_epi64(__A),
7328 static __inline__ __m128i __DEFAULT_FN_ATTRS
7329 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7331 return (__m128i)__builtin_ia32_selectq_128(__M,
7332 (__v2di) _mm_broadcastq_epi64(__A),
7333 (__v2di) _mm_setzero_si128());
7336 static __inline__ __m256i __DEFAULT_FN_ATTRS
7337 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
7339 return (__m256i)__builtin_ia32_selectq_256(__M,
7340 (__v4di) _mm256_broadcastq_epi64(__A),
7344 static __inline__ __m256i __DEFAULT_FN_ATTRS
7345 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7347 return (__m256i)__builtin_ia32_selectq_256(__M,
7348 (__v4di) _mm256_broadcastq_epi64(__A),
7349 (__v4di) _mm256_setzero_si256());
7352 static __inline__ __m128i __DEFAULT_FN_ATTRS
7353 _mm_cvtsepi32_epi8 (__m128i __A)
7355 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7356 (__v16qi)_mm_undefined_si128(),
7360 static __inline__ __m128i __DEFAULT_FN_ATTRS
7361 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7363 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7364 (__v16qi) __O, __M);
7367 static __inline__ __m128i __DEFAULT_FN_ATTRS
7368 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
7370 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7371 (__v16qi) _mm_setzero_si128 (),
7375 static __inline__ void __DEFAULT_FN_ATTRS
7376 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7378 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7381 static __inline__ __m128i __DEFAULT_FN_ATTRS
7382 _mm256_cvtsepi32_epi8 (__m256i __A)
7384 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7385 (__v16qi)_mm_undefined_si128(),
7389 static __inline__ __m128i __DEFAULT_FN_ATTRS
7390 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7392 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7393 (__v16qi) __O, __M);
7396 static __inline__ __m128i __DEFAULT_FN_ATTRS
7397 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7399 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7400 (__v16qi) _mm_setzero_si128 (),
7404 static __inline__ void __DEFAULT_FN_ATTRS
7405 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7407 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7410 static __inline__ __m128i __DEFAULT_FN_ATTRS
7411 _mm_cvtsepi32_epi16 (__m128i __A)
7413 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7414 (__v8hi)_mm_setzero_si128 (),
7418 static __inline__ __m128i __DEFAULT_FN_ATTRS
7419 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7421 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7426 static __inline__ __m128i __DEFAULT_FN_ATTRS
7427 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7429 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7430 (__v8hi) _mm_setzero_si128 (),
7434 static __inline__ void __DEFAULT_FN_ATTRS
7435 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7437 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7440 static __inline__ __m128i __DEFAULT_FN_ATTRS
7441 _mm256_cvtsepi32_epi16 (__m256i __A)
7443 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7444 (__v8hi)_mm_undefined_si128(),
7448 static __inline__ __m128i __DEFAULT_FN_ATTRS
7449 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7451 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7455 static __inline__ __m128i __DEFAULT_FN_ATTRS
7456 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7458 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7459 (__v8hi) _mm_setzero_si128 (),
7463 static __inline__ void __DEFAULT_FN_ATTRS
7464 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7466 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7469 static __inline__ __m128i __DEFAULT_FN_ATTRS
7470 _mm_cvtsepi64_epi8 (__m128i __A)
7472 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7473 (__v16qi)_mm_undefined_si128(),
7477 static __inline__ __m128i __DEFAULT_FN_ATTRS
7478 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7480 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7481 (__v16qi) __O, __M);
7484 static __inline__ __m128i __DEFAULT_FN_ATTRS
7485 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7487 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7488 (__v16qi) _mm_setzero_si128 (),
7492 static __inline__ void __DEFAULT_FN_ATTRS
7493 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7495 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7498 static __inline__ __m128i __DEFAULT_FN_ATTRS
7499 _mm256_cvtsepi64_epi8 (__m256i __A)
7501 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7502 (__v16qi)_mm_undefined_si128(),
7506 static __inline__ __m128i __DEFAULT_FN_ATTRS
7507 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7509 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7510 (__v16qi) __O, __M);
7513 static __inline__ __m128i __DEFAULT_FN_ATTRS
7514 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7516 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7517 (__v16qi) _mm_setzero_si128 (),
7521 static __inline__ void __DEFAULT_FN_ATTRS
7522 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7524 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7527 static __inline__ __m128i __DEFAULT_FN_ATTRS
7528 _mm_cvtsepi64_epi32 (__m128i __A)
7530 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7531 (__v4si)_mm_undefined_si128(),
7535 static __inline__ __m128i __DEFAULT_FN_ATTRS
7536 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7538 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7542 static __inline__ __m128i __DEFAULT_FN_ATTRS
7543 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7545 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7546 (__v4si) _mm_setzero_si128 (),
7550 static __inline__ void __DEFAULT_FN_ATTRS
7551 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7553 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7556 static __inline__ __m128i __DEFAULT_FN_ATTRS
7557 _mm256_cvtsepi64_epi32 (__m256i __A)
7559 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7560 (__v4si)_mm_undefined_si128(),
7564 static __inline__ __m128i __DEFAULT_FN_ATTRS
7565 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7567 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7572 static __inline__ __m128i __DEFAULT_FN_ATTRS
7573 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7575 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7576 (__v4si) _mm_setzero_si128 (),
7580 static __inline__ void __DEFAULT_FN_ATTRS
7581 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7583 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7586 static __inline__ __m128i __DEFAULT_FN_ATTRS
7587 _mm_cvtsepi64_epi16 (__m128i __A)
7589 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7590 (__v8hi)_mm_undefined_si128(),
7594 static __inline__ __m128i __DEFAULT_FN_ATTRS
7595 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7597 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7601 static __inline__ __m128i __DEFAULT_FN_ATTRS
7602 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7604 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7605 (__v8hi) _mm_setzero_si128 (),
7609 static __inline__ void __DEFAULT_FN_ATTRS
7610 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7612 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7615 static __inline__ __m128i __DEFAULT_FN_ATTRS
7616 _mm256_cvtsepi64_epi16 (__m256i __A)
7618 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7619 (__v8hi)_mm_undefined_si128(),
7623 static __inline__ __m128i __DEFAULT_FN_ATTRS
7624 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7626 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7630 static __inline__ __m128i __DEFAULT_FN_ATTRS
7631 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7633 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7634 (__v8hi) _mm_setzero_si128 (),
7638 static __inline__ void __DEFAULT_FN_ATTRS
7639 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7641 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7644 static __inline__ __m128i __DEFAULT_FN_ATTRS
7645 _mm_cvtusepi32_epi8 (__m128i __A)
7647 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7648 (__v16qi)_mm_undefined_si128(),
7652 static __inline__ __m128i __DEFAULT_FN_ATTRS
7653 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7655 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7660 static __inline__ __m128i __DEFAULT_FN_ATTRS
7661 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7663 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7664 (__v16qi) _mm_setzero_si128 (),
7668 static __inline__ void __DEFAULT_FN_ATTRS
7669 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7671 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7674 static __inline__ __m128i __DEFAULT_FN_ATTRS
7675 _mm256_cvtusepi32_epi8 (__m256i __A)
7677 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7678 (__v16qi)_mm_undefined_si128(),
7682 static __inline__ __m128i __DEFAULT_FN_ATTRS
7683 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7685 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7690 static __inline__ __m128i __DEFAULT_FN_ATTRS
7691 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7693 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7694 (__v16qi) _mm_setzero_si128 (),
7698 static __inline__ void __DEFAULT_FN_ATTRS
7699 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7701 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7704 static __inline__ __m128i __DEFAULT_FN_ATTRS
7705 _mm_cvtusepi32_epi16 (__m128i __A)
7707 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7708 (__v8hi)_mm_undefined_si128(),
7712 static __inline__ __m128i __DEFAULT_FN_ATTRS
7713 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7715 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7719 static __inline__ __m128i __DEFAULT_FN_ATTRS
7720 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7722 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7723 (__v8hi) _mm_setzero_si128 (),
7727 static __inline__ void __DEFAULT_FN_ATTRS
7728 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7730 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7733 static __inline__ __m128i __DEFAULT_FN_ATTRS
7734 _mm256_cvtusepi32_epi16 (__m256i __A)
7736 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7737 (__v8hi) _mm_undefined_si128(),
7741 static __inline__ __m128i __DEFAULT_FN_ATTRS
7742 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7744 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7748 static __inline__ __m128i __DEFAULT_FN_ATTRS
7749 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7751 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7752 (__v8hi) _mm_setzero_si128 (),
7756 static __inline__ void __DEFAULT_FN_ATTRS
7757 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7759 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7762 static __inline__ __m128i __DEFAULT_FN_ATTRS
7763 _mm_cvtusepi64_epi8 (__m128i __A)
7765 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7766 (__v16qi)_mm_undefined_si128(),
7770 static __inline__ __m128i __DEFAULT_FN_ATTRS
7771 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7773 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7778 static __inline__ __m128i __DEFAULT_FN_ATTRS
7779 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7781 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7782 (__v16qi) _mm_setzero_si128 (),
7786 static __inline__ void __DEFAULT_FN_ATTRS
7787 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7789 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7792 static __inline__ __m128i __DEFAULT_FN_ATTRS
7793 _mm256_cvtusepi64_epi8 (__m256i __A)
7795 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7796 (__v16qi)_mm_undefined_si128(),
7800 static __inline__ __m128i __DEFAULT_FN_ATTRS
7801 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7803 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7808 static __inline__ __m128i __DEFAULT_FN_ATTRS
7809 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7811 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7812 (__v16qi) _mm_setzero_si128 (),
7816 static __inline__ void __DEFAULT_FN_ATTRS
7817 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7819 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7822 static __inline__ __m128i __DEFAULT_FN_ATTRS
7823 _mm_cvtusepi64_epi32 (__m128i __A)
7825 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7826 (__v4si)_mm_undefined_si128(),
7830 static __inline__ __m128i __DEFAULT_FN_ATTRS
7831 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7833 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7837 static __inline__ __m128i __DEFAULT_FN_ATTRS
7838 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7840 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7841 (__v4si) _mm_setzero_si128 (),
7845 static __inline__ void __DEFAULT_FN_ATTRS
7846 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7848 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7851 static __inline__ __m128i __DEFAULT_FN_ATTRS
7852 _mm256_cvtusepi64_epi32 (__m256i __A)
7854 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7855 (__v4si)_mm_undefined_si128(),
7859 static __inline__ __m128i __DEFAULT_FN_ATTRS
7860 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7862 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7866 static __inline__ __m128i __DEFAULT_FN_ATTRS
7867 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7869 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7870 (__v4si) _mm_setzero_si128 (),
7874 static __inline__ void __DEFAULT_FN_ATTRS
7875 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7877 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7880 static __inline__ __m128i __DEFAULT_FN_ATTRS
7881 _mm_cvtusepi64_epi16 (__m128i __A)
7883 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7884 (__v8hi)_mm_undefined_si128(),
7888 static __inline__ __m128i __DEFAULT_FN_ATTRS
7889 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7891 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7895 static __inline__ __m128i __DEFAULT_FN_ATTRS
7896 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7898 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7899 (__v8hi) _mm_setzero_si128 (),
7903 static __inline__ void __DEFAULT_FN_ATTRS
7904 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7906 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7909 static __inline__ __m128i __DEFAULT_FN_ATTRS
7910 _mm256_cvtusepi64_epi16 (__m256i __A)
7912 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7913 (__v8hi)_mm_undefined_si128(),
7917 static __inline__ __m128i __DEFAULT_FN_ATTRS
7918 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7920 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7924 static __inline__ __m128i __DEFAULT_FN_ATTRS
7925 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7927 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7928 (__v8hi) _mm_setzero_si128 (),
7932 static __inline__ void __DEFAULT_FN_ATTRS
7933 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7935 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7938 static __inline__ __m128i __DEFAULT_FN_ATTRS
7939 _mm_cvtepi32_epi8 (__m128i __A)
7941 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7942 (__v16qi)_mm_undefined_si128(),
7946 static __inline__ __m128i __DEFAULT_FN_ATTRS
7947 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7949 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7950 (__v16qi) __O, __M);
7953 static __inline__ __m128i __DEFAULT_FN_ATTRS
7954 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7956 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7958 _mm_setzero_si128 (),
7962 static __inline__ void __DEFAULT_FN_ATTRS
7963 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7965 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7968 static __inline__ __m128i __DEFAULT_FN_ATTRS
7969 _mm256_cvtepi32_epi8 (__m256i __A)
7971 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7972 (__v16qi)_mm_undefined_si128(),
7976 static __inline__ __m128i __DEFAULT_FN_ATTRS
7977 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7979 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7980 (__v16qi) __O, __M);
7983 static __inline__ __m128i __DEFAULT_FN_ATTRS
7984 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7986 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7987 (__v16qi) _mm_setzero_si128 (),
7991 static __inline__ void __DEFAULT_FN_ATTRS
7992 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7994 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7997 static __inline__ __m128i __DEFAULT_FN_ATTRS
7998 _mm_cvtepi32_epi16 (__m128i __A)
8000 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8001 (__v8hi) _mm_setzero_si128 (),
8005 static __inline__ __m128i __DEFAULT_FN_ATTRS
8006 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8008 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8012 static __inline__ __m128i __DEFAULT_FN_ATTRS
8013 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
8015 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8016 (__v8hi) _mm_setzero_si128 (),
8020 static __inline__ void __DEFAULT_FN_ATTRS
8021 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8023 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
8026 static __inline__ __m128i __DEFAULT_FN_ATTRS
8027 _mm256_cvtepi32_epi16 (__m256i __A)
8029 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8030 (__v8hi)_mm_setzero_si128 (),
8034 static __inline__ __m128i __DEFAULT_FN_ATTRS
8035 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8037 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8041 static __inline__ __m128i __DEFAULT_FN_ATTRS
8042 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
8044 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8045 (__v8hi) _mm_setzero_si128 (),
8049 static __inline__ void __DEFAULT_FN_ATTRS
8050 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
8052 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8055 static __inline__ __m128i __DEFAULT_FN_ATTRS
8056 _mm_cvtepi64_epi8 (__m128i __A)
8058 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8059 (__v16qi) _mm_undefined_si128(),
8063 static __inline__ __m128i __DEFAULT_FN_ATTRS
8064 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
8066 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8067 (__v16qi) __O, __M);
8070 static __inline__ __m128i __DEFAULT_FN_ATTRS
8071 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
8073 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8074 (__v16qi) _mm_setzero_si128 (),
8078 static __inline__ void __DEFAULT_FN_ATTRS
8079 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
8081 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8084 static __inline__ __m128i __DEFAULT_FN_ATTRS
8085 _mm256_cvtepi64_epi8 (__m256i __A)
8087 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8088 (__v16qi) _mm_undefined_si128(),
8092 static __inline__ __m128i __DEFAULT_FN_ATTRS
8093 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
8095 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8096 (__v16qi) __O, __M);
8099 static __inline__ __m128i __DEFAULT_FN_ATTRS
8100 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
8102 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8103 (__v16qi) _mm_setzero_si128 (),
8107 static __inline__ void __DEFAULT_FN_ATTRS
8108 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
8110 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8113 static __inline__ __m128i __DEFAULT_FN_ATTRS
8114 _mm_cvtepi64_epi32 (__m128i __A)
8116 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8117 (__v4si)_mm_undefined_si128(),
8121 static __inline__ __m128i __DEFAULT_FN_ATTRS
8122 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
8124 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8128 static __inline__ __m128i __DEFAULT_FN_ATTRS
8129 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
8131 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8132 (__v4si) _mm_setzero_si128 (),
8136 static __inline__ void __DEFAULT_FN_ATTRS
8137 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
8139 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8142 static __inline__ __m128i __DEFAULT_FN_ATTRS
8143 _mm256_cvtepi64_epi32 (__m256i __A)
8145 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8146 (__v4si) _mm_undefined_si128(),
8150 static __inline__ __m128i __DEFAULT_FN_ATTRS
8151 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
8153 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8157 static __inline__ __m128i __DEFAULT_FN_ATTRS
8158 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
8160 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8161 (__v4si) _mm_setzero_si128 (),
8165 static __inline__ void __DEFAULT_FN_ATTRS
8166 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
8168 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8171 static __inline__ __m128i __DEFAULT_FN_ATTRS
8172 _mm_cvtepi64_epi16 (__m128i __A)
8174 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8175 (__v8hi) _mm_undefined_si128(),
8179 static __inline__ __m128i __DEFAULT_FN_ATTRS
8180 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8182 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8187 static __inline__ __m128i __DEFAULT_FN_ATTRS
8188 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
8190 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8191 (__v8hi) _mm_setzero_si128 (),
8195 static __inline__ void __DEFAULT_FN_ATTRS
8196 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8198 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8201 static __inline__ __m128i __DEFAULT_FN_ATTRS
8202 _mm256_cvtepi64_epi16 (__m256i __A)
8204 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8205 (__v8hi)_mm_undefined_si128(),
8209 static __inline__ __m128i __DEFAULT_FN_ATTRS
8210 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8212 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8216 static __inline__ __m128i __DEFAULT_FN_ATTRS
8217 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
8219 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8220 (__v8hi) _mm_setzero_si128 (),
8224 static __inline__ void __DEFAULT_FN_ATTRS
8225 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
8227 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8230 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
8231 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
8232 (__v8sf)_mm256_undefined_ps(), \
8233 ((imm) & 1) ? 4 : 0, \
8234 ((imm) & 1) ? 5 : 1, \
8235 ((imm) & 1) ? 6 : 2, \
8236 ((imm) & 1) ? 7 : 3); })
8238 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
8239 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
8240 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
8243 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
8244 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
8245 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
8246 (__v4sf)_mm_setzero_ps()); })
8248 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
8249 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
8250 (__v8si)_mm256_undefined_si256(), \
8251 ((imm) & 1) ? 4 : 0, \
8252 ((imm) & 1) ? 5 : 1, \
8253 ((imm) & 1) ? 6 : 2, \
8254 ((imm) & 1) ? 7 : 3); })
8256 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
8257 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8258 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
8261 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
8262 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8263 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
8264 (__v4si)_mm_setzero_si128()); })
8266 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
8267 (__m256)__builtin_shufflevector((__v8sf)(A), \
8268 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
8269 ((imm) & 0x1) ? 0 : 8, \
8270 ((imm) & 0x1) ? 1 : 9, \
8271 ((imm) & 0x1) ? 2 : 10, \
8272 ((imm) & 0x1) ? 3 : 11, \
8273 ((imm) & 0x1) ? 8 : 4, \
8274 ((imm) & 0x1) ? 9 : 5, \
8275 ((imm) & 0x1) ? 10 : 6, \
8276 ((imm) & 0x1) ? 11 : 7); })
8278 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8279 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
8280 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
8283 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8284 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
8285 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
8286 (__v8sf)_mm256_setzero_ps()); })
8288 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
8289 (__m256i)__builtin_shufflevector((__v8si)(A), \
8290 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
8291 ((imm) & 0x1) ? 0 : 8, \
8292 ((imm) & 0x1) ? 1 : 9, \
8293 ((imm) & 0x1) ? 2 : 10, \
8294 ((imm) & 0x1) ? 3 : 11, \
8295 ((imm) & 0x1) ? 8 : 4, \
8296 ((imm) & 0x1) ? 9 : 5, \
8297 ((imm) & 0x1) ? 10 : 6, \
8298 ((imm) & 0x1) ? 11 : 7); })
8300 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8301 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8302 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
8305 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8306 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8307 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
8308 (__v8si)_mm256_setzero_si256()); })
8310 #define _mm_getmant_pd(A, B, C) __extension__({\
8311 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8312 (int)(((C)<<2) | (B)), \
8313 (__v2df)_mm_setzero_pd(), \
8316 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
8317 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8318 (int)(((C)<<2) | (B)), \
8319 (__v2df)(__m128d)(W), \
8322 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
8323 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8324 (int)(((C)<<2) | (B)), \
8325 (__v2df)_mm_setzero_pd(), \
8328 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
8329 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8330 (int)(((C)<<2) | (B)), \
8331 (__v4df)_mm256_setzero_pd(), \
8334 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8335 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8336 (int)(((C)<<2) | (B)), \
8337 (__v4df)(__m256d)(W), \
8340 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8341 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8342 (int)(((C)<<2) | (B)), \
8343 (__v4df)_mm256_setzero_pd(), \
8346 #define _mm_getmant_ps(A, B, C) __extension__ ({ \
8347 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8348 (int)(((C)<<2) | (B)), \
8349 (__v4sf)_mm_setzero_ps(), \
8352 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8353 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8354 (int)(((C)<<2) | (B)), \
8355 (__v4sf)(__m128)(W), \
8358 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8359 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8360 (int)(((C)<<2) | (B)), \
8361 (__v4sf)_mm_setzero_ps(), \
8364 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
8365 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8366 (int)(((C)<<2) | (B)), \
8367 (__v8sf)_mm256_setzero_ps(), \
8370 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8371 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8372 (int)(((C)<<2) | (B)), \
8373 (__v8sf)(__m256)(W), \
8376 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8377 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8378 (int)(((C)<<2) | (B)), \
8379 (__v8sf)_mm256_setzero_ps(), \
8382 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8383 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
8384 (double const *)(addr), \
8385 (__v2di)(__m128i)(index), \
8386 (__mmask8)(mask), (int)(scale)); })
8388 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8389 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8390 (long long const *)(addr), \
8391 (__v2di)(__m128i)(index), \
8392 (__mmask8)(mask), (int)(scale)); })
8394 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8395 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8396 (double const *)(addr), \
8397 (__v4di)(__m256i)(index), \
8398 (__mmask8)(mask), (int)(scale)); })
8400 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8401 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8402 (long long const *)(addr), \
8403 (__v4di)(__m256i)(index), \
8404 (__mmask8)(mask), (int)(scale)); })
8406 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8407 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8408 (float const *)(addr), \
8409 (__v2di)(__m128i)(index), \
8410 (__mmask8)(mask), (int)(scale)); })
8412 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8413 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8414 (int const *)(addr), \
8415 (__v2di)(__m128i)(index), \
8416 (__mmask8)(mask), (int)(scale)); })
8418 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8419 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8420 (float const *)(addr), \
8421 (__v4di)(__m256i)(index), \
8422 (__mmask8)(mask), (int)(scale)); })
8424 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8425 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8426 (int const *)(addr), \
8427 (__v4di)(__m256i)(index), \
8428 (__mmask8)(mask), (int)(scale)); })
8430 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8431 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8432 (double const *)(addr), \
8433 (__v4si)(__m128i)(index), \
8434 (__mmask8)(mask), (int)(scale)); })
8436 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8437 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8438 (long long const *)(addr), \
8439 (__v4si)(__m128i)(index), \
8440 (__mmask8)(mask), (int)(scale)); })
8442 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8443 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8444 (double const *)(addr), \
8445 (__v4si)(__m128i)(index), \
8446 (__mmask8)(mask), (int)(scale)); })
8448 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8449 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8450 (long long const *)(addr), \
8451 (__v4si)(__m128i)(index), \
8452 (__mmask8)(mask), (int)(scale)); })
8454 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8455 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8456 (float const *)(addr), \
8457 (__v4si)(__m128i)(index), \
8458 (__mmask8)(mask), (int)(scale)); })
8460 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8461 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8462 (int const *)(addr), \
8463 (__v4si)(__m128i)(index), \
8464 (__mmask8)(mask), (int)(scale)); })
8466 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8467 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8468 (float const *)(addr), \
8469 (__v8si)(__m256i)(index), \
8470 (__mmask8)(mask), (int)(scale)); })
8472 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8473 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8474 (int const *)(addr), \
8475 (__v8si)(__m256i)(index), \
8476 (__mmask8)(mask), (int)(scale)); })
8478 #define _mm256_permutex_pd(X, C) __extension__ ({ \
8479 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8480 (__v4df)_mm256_undefined_pd(), \
8481 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8482 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8484 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8485 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8486 (__v4df)_mm256_permutex_pd((X), (C)), \
8487 (__v4df)(__m256d)(W)); })
8489 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8490 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8491 (__v4df)_mm256_permutex_pd((X), (C)), \
8492 (__v4df)_mm256_setzero_pd()); })
8494 #define _mm256_permutex_epi64(X, C) __extension__ ({ \
8495 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8496 (__v4di)_mm256_undefined_si256(), \
8497 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8498 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8500 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8501 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8502 (__v4di)_mm256_permutex_epi64((X), (C)), \
8503 (__v4di)(__m256i)(W)); })
8505 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8506 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8507 (__v4di)_mm256_permutex_epi64((X), (C)), \
8508 (__v4di)_mm256_setzero_si256()); })
8510 static __inline__ __m256d __DEFAULT_FN_ATTRS
8511 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8513 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8515 (__v4df) _mm256_undefined_si256 (),
8519 static __inline__ __m256d __DEFAULT_FN_ATTRS
8520 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8523 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8529 static __inline__ __m256d __DEFAULT_FN_ATTRS
8530 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8532 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8534 (__v4df) _mm256_setzero_pd (),
8538 static __inline__ __m256i __DEFAULT_FN_ATTRS
8539 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8541 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8543 (__v4di) _mm256_setzero_si256 (),
8547 static __inline__ __m256i __DEFAULT_FN_ATTRS
8548 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8550 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8552 (__v4di) _mm256_undefined_si256 (),
8556 static __inline__ __m256i __DEFAULT_FN_ATTRS
8557 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8560 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8566 static __inline__ __m256 __DEFAULT_FN_ATTRS
8567 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8570 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8576 static __inline__ __m256 __DEFAULT_FN_ATTRS
8577 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8579 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8581 (__v8sf) _mm256_setzero_ps (),
8585 static __inline__ __m256 __DEFAULT_FN_ATTRS
8586 _mm256_permutexvar_ps (__m256i __X, __m256 __Y)
8588 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8590 (__v8sf) _mm256_undefined_si256 (),
8594 static __inline__ __m256i __DEFAULT_FN_ATTRS
8595 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
8597 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8599 (__v8si) _mm256_setzero_si256 (),
8603 static __inline__ __m256i __DEFAULT_FN_ATTRS
8604 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
8607 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8613 static __inline__ __m256i __DEFAULT_FN_ATTRS
8614 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
8616 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8618 (__v8si) _mm256_undefined_si256(),
8622 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8623 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
8624 (__v4si)(__m128i)(A), \
8625 ((int)(imm) & 0x3) + 0, \
8626 ((int)(imm) & 0x3) + 1, \
8627 ((int)(imm) & 0x3) + 2, \
8628 ((int)(imm) & 0x3) + 3); })
8630 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8631 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8632 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8633 (__v4si)(__m128i)(W)); })
8635 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8636 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8637 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8638 (__v4si)_mm_setzero_si128()); })
8640 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8641 (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
8642 (__v8si)(__m256i)(A), \
8643 ((int)(imm) & 0x7) + 0, \
8644 ((int)(imm) & 0x7) + 1, \
8645 ((int)(imm) & 0x7) + 2, \
8646 ((int)(imm) & 0x7) + 3, \
8647 ((int)(imm) & 0x7) + 4, \
8648 ((int)(imm) & 0x7) + 5, \
8649 ((int)(imm) & 0x7) + 6, \
8650 ((int)(imm) & 0x7) + 7); })
8652 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8653 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8654 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8655 (__v8si)(__m256i)(W)); })
8657 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8658 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8659 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8660 (__v8si)_mm256_setzero_si256()); })
8662 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8663 (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
8664 (__v2di)(__m128i)(A), \
8665 ((int)(imm) & 0x1) + 0, \
8666 ((int)(imm) & 0x1) + 1); })
8668 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8669 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8670 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8671 (__v2di)(__m128i)(W)); })
8673 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8674 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8675 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8676 (__v2di)_mm_setzero_di()); })
8678 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8679 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
8680 (__v4di)(__m256i)(A), \
8681 ((int)(imm) & 0x3) + 0, \
8682 ((int)(imm) & 0x3) + 1, \
8683 ((int)(imm) & 0x3) + 2, \
8684 ((int)(imm) & 0x3) + 3); })
8686 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8687 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8688 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8689 (__v4di)(__m256i)(W)); })
8691 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8692 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8693 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8694 (__v4di)_mm256_setzero_si256()); })
8696 static __inline__ __m128 __DEFAULT_FN_ATTRS
8697 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8699 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8700 (__v4sf)_mm_movehdup_ps(__A),
8704 static __inline__ __m128 __DEFAULT_FN_ATTRS
8705 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8707 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8708 (__v4sf)_mm_movehdup_ps(__A),
8709 (__v4sf)_mm_setzero_ps());
8712 static __inline__ __m256 __DEFAULT_FN_ATTRS
8713 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8715 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8716 (__v8sf)_mm256_movehdup_ps(__A),
8720 static __inline__ __m256 __DEFAULT_FN_ATTRS
8721 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8723 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8724 (__v8sf)_mm256_movehdup_ps(__A),
8725 (__v8sf)_mm256_setzero_ps());
8728 static __inline__ __m128 __DEFAULT_FN_ATTRS
8729 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8731 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8732 (__v4sf)_mm_moveldup_ps(__A),
8736 static __inline__ __m128 __DEFAULT_FN_ATTRS
8737 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8739 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8740 (__v4sf)_mm_moveldup_ps(__A),
8741 (__v4sf)_mm_setzero_ps());
8744 static __inline__ __m256 __DEFAULT_FN_ATTRS
8745 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8747 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8748 (__v8sf)_mm256_moveldup_ps(__A),
8752 static __inline__ __m256 __DEFAULT_FN_ATTRS
8753 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8755 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8756 (__v8sf)_mm256_moveldup_ps(__A),
8757 (__v8sf)_mm256_setzero_ps());
8760 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8761 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8762 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8763 (__v8si)(__m256i)(W)); })
8765 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
8766 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8767 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8768 (__v8si)_mm256_setzero_si256()); })
8770 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
8771 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8772 (__v4si)_mm_shuffle_epi32((A), (I)), \
8773 (__v4si)(__m128i)(W)); })
8775 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
8776 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8777 (__v4si)_mm_shuffle_epi32((A), (I)), \
8778 (__v4si)_mm_setzero_si128()); })
8780 static __inline__ __m128d __DEFAULT_FN_ATTRS
8781 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8783 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8788 static __inline__ __m128d __DEFAULT_FN_ATTRS
8789 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8791 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8793 (__v2df) _mm_setzero_pd ());
8796 static __inline__ __m256d __DEFAULT_FN_ATTRS
8797 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8799 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8804 static __inline__ __m256d __DEFAULT_FN_ATTRS
8805 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8807 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8809 (__v4df) _mm256_setzero_pd ());
8812 static __inline__ __m128 __DEFAULT_FN_ATTRS
8813 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8815 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8820 static __inline__ __m128 __DEFAULT_FN_ATTRS
8821 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8823 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8825 (__v4sf) _mm_setzero_ps ());
8828 static __inline__ __m256 __DEFAULT_FN_ATTRS
8829 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8831 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8836 static __inline__ __m256 __DEFAULT_FN_ATTRS
8837 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8839 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8841 (__v8sf) _mm256_setzero_ps ());
8844 static __inline__ __m128 __DEFAULT_FN_ATTRS
8845 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8847 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8852 static __inline__ __m128 __DEFAULT_FN_ATTRS
8853 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8855 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8861 static __inline__ __m256 __DEFAULT_FN_ATTRS
8862 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8864 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8869 static __inline__ __m256 __DEFAULT_FN_ATTRS
8870 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8872 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8874 _mm256_setzero_ps (),
8878 static __inline __m128i __DEFAULT_FN_ATTRS
8879 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8881 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8886 static __inline __m128i __DEFAULT_FN_ATTRS
8887 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8889 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8890 (__v8hi) _mm_setzero_si128 (),
8894 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8895 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8896 (__v8hi)(__m128i)(W), \
8899 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8900 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8901 (__v8hi)_mm_setzero_si128(), \
8904 static __inline __m128i __DEFAULT_FN_ATTRS
8905 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8907 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8912 static __inline __m128i __DEFAULT_FN_ATTRS
8913 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8915 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8916 (__v8hi) _mm_setzero_si128(),
8919 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8920 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8921 (__v8hi)(__m128i)(W), \
8924 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8925 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8926 (__v8hi)_mm_setzero_si128(), \
8930 #undef __DEFAULT_FN_ATTRS
8932 #endif /* __AVX512VLINTRIN_H */