1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLDQINTRIN_H
29 #define __AVX512VLDQINTRIN_H
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
33 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
35 static __inline__ __m256i __DEFAULT_FN_ATTRS256
36 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
37 return (__m256i) ((__v4du) __A * (__v4du) __B);
40 static __inline__ __m256i __DEFAULT_FN_ATTRS256
41 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
42 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
43 (__v4di)_mm256_mullo_epi64(__A, __B),
47 static __inline__ __m256i __DEFAULT_FN_ATTRS256
48 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
49 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
50 (__v4di)_mm256_mullo_epi64(__A, __B),
51 (__v4di)_mm256_setzero_si256());
54 static __inline__ __m128i __DEFAULT_FN_ATTRS128
55 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
56 return (__m128i) ((__v2du) __A * (__v2du) __B);
59 static __inline__ __m128i __DEFAULT_FN_ATTRS128
60 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
61 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
62 (__v2di)_mm_mullo_epi64(__A, __B),
66 static __inline__ __m128i __DEFAULT_FN_ATTRS128
67 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
68 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
69 (__v2di)_mm_mullo_epi64(__A, __B),
70 (__v2di)_mm_setzero_si128());
73 static __inline__ __m256d __DEFAULT_FN_ATTRS256
74 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
75 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
76 (__v4df)_mm256_andnot_pd(__A, __B),
80 static __inline__ __m256d __DEFAULT_FN_ATTRS256
81 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
82 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
83 (__v4df)_mm256_andnot_pd(__A, __B),
84 (__v4df)_mm256_setzero_pd());
87 static __inline__ __m128d __DEFAULT_FN_ATTRS128
88 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
89 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
90 (__v2df)_mm_andnot_pd(__A, __B),
94 static __inline__ __m128d __DEFAULT_FN_ATTRS128
95 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
96 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
97 (__v2df)_mm_andnot_pd(__A, __B),
98 (__v2df)_mm_setzero_pd());
101 static __inline__ __m256 __DEFAULT_FN_ATTRS256
102 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
103 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
104 (__v8sf)_mm256_andnot_ps(__A, __B),
108 static __inline__ __m256 __DEFAULT_FN_ATTRS256
109 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
110 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
111 (__v8sf)_mm256_andnot_ps(__A, __B),
112 (__v8sf)_mm256_setzero_ps());
115 static __inline__ __m128 __DEFAULT_FN_ATTRS128
116 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
117 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
118 (__v4sf)_mm_andnot_ps(__A, __B),
122 static __inline__ __m128 __DEFAULT_FN_ATTRS128
123 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
124 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
125 (__v4sf)_mm_andnot_ps(__A, __B),
126 (__v4sf)_mm_setzero_ps());
129 static __inline__ __m256d __DEFAULT_FN_ATTRS256
130 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
131 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
132 (__v4df)_mm256_and_pd(__A, __B),
136 static __inline__ __m256d __DEFAULT_FN_ATTRS256
137 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
138 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
139 (__v4df)_mm256_and_pd(__A, __B),
140 (__v4df)_mm256_setzero_pd());
143 static __inline__ __m128d __DEFAULT_FN_ATTRS128
144 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
145 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
146 (__v2df)_mm_and_pd(__A, __B),
150 static __inline__ __m128d __DEFAULT_FN_ATTRS128
151 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
152 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
153 (__v2df)_mm_and_pd(__A, __B),
154 (__v2df)_mm_setzero_pd());
157 static __inline__ __m256 __DEFAULT_FN_ATTRS256
158 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
159 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
160 (__v8sf)_mm256_and_ps(__A, __B),
164 static __inline__ __m256 __DEFAULT_FN_ATTRS256
165 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
166 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
167 (__v8sf)_mm256_and_ps(__A, __B),
168 (__v8sf)_mm256_setzero_ps());
171 static __inline__ __m128 __DEFAULT_FN_ATTRS128
172 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
173 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
174 (__v4sf)_mm_and_ps(__A, __B),
178 static __inline__ __m128 __DEFAULT_FN_ATTRS128
179 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
180 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
181 (__v4sf)_mm_and_ps(__A, __B),
182 (__v4sf)_mm_setzero_ps());
185 static __inline__ __m256d __DEFAULT_FN_ATTRS256
186 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
187 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
188 (__v4df)_mm256_xor_pd(__A, __B),
192 static __inline__ __m256d __DEFAULT_FN_ATTRS256
193 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
194 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
195 (__v4df)_mm256_xor_pd(__A, __B),
196 (__v4df)_mm256_setzero_pd());
199 static __inline__ __m128d __DEFAULT_FN_ATTRS128
200 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
201 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
202 (__v2df)_mm_xor_pd(__A, __B),
206 static __inline__ __m128d __DEFAULT_FN_ATTRS128
207 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
208 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
209 (__v2df)_mm_xor_pd(__A, __B),
210 (__v2df)_mm_setzero_pd());
213 static __inline__ __m256 __DEFAULT_FN_ATTRS256
214 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
215 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
216 (__v8sf)_mm256_xor_ps(__A, __B),
220 static __inline__ __m256 __DEFAULT_FN_ATTRS256
221 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
222 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
223 (__v8sf)_mm256_xor_ps(__A, __B),
224 (__v8sf)_mm256_setzero_ps());
227 static __inline__ __m128 __DEFAULT_FN_ATTRS128
228 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
229 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
230 (__v4sf)_mm_xor_ps(__A, __B),
234 static __inline__ __m128 __DEFAULT_FN_ATTRS128
235 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
237 (__v4sf)_mm_xor_ps(__A, __B),
238 (__v4sf)_mm_setzero_ps());
241 static __inline__ __m256d __DEFAULT_FN_ATTRS256
242 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
243 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
244 (__v4df)_mm256_or_pd(__A, __B),
248 static __inline__ __m256d __DEFAULT_FN_ATTRS256
249 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
250 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
251 (__v4df)_mm256_or_pd(__A, __B),
252 (__v4df)_mm256_setzero_pd());
255 static __inline__ __m128d __DEFAULT_FN_ATTRS128
256 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
257 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
258 (__v2df)_mm_or_pd(__A, __B),
262 static __inline__ __m128d __DEFAULT_FN_ATTRS128
263 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
265 (__v2df)_mm_or_pd(__A, __B),
266 (__v2df)_mm_setzero_pd());
269 static __inline__ __m256 __DEFAULT_FN_ATTRS256
270 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
271 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
272 (__v8sf)_mm256_or_ps(__A, __B),
276 static __inline__ __m256 __DEFAULT_FN_ATTRS256
277 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
278 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
279 (__v8sf)_mm256_or_ps(__A, __B),
280 (__v8sf)_mm256_setzero_ps());
283 static __inline__ __m128 __DEFAULT_FN_ATTRS128
284 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
285 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
286 (__v4sf)_mm_or_ps(__A, __B),
290 static __inline__ __m128 __DEFAULT_FN_ATTRS128
291 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
292 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
293 (__v4sf)_mm_or_ps(__A, __B),
294 (__v4sf)_mm_setzero_ps());
297 static __inline__ __m128i __DEFAULT_FN_ATTRS128
298 _mm_cvtpd_epi64 (__m128d __A) {
299 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
300 (__v2di) _mm_setzero_si128(),
304 static __inline__ __m128i __DEFAULT_FN_ATTRS128
305 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
306 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
311 static __inline__ __m128i __DEFAULT_FN_ATTRS128
312 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
313 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
314 (__v2di) _mm_setzero_si128(),
318 static __inline__ __m256i __DEFAULT_FN_ATTRS256
319 _mm256_cvtpd_epi64 (__m256d __A) {
320 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
321 (__v4di) _mm256_setzero_si256(),
325 static __inline__ __m256i __DEFAULT_FN_ATTRS256
326 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
327 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
332 static __inline__ __m256i __DEFAULT_FN_ATTRS256
333 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
334 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
335 (__v4di) _mm256_setzero_si256(),
339 static __inline__ __m128i __DEFAULT_FN_ATTRS128
340 _mm_cvtpd_epu64 (__m128d __A) {
341 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
342 (__v2di) _mm_setzero_si128(),
346 static __inline__ __m128i __DEFAULT_FN_ATTRS128
347 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
348 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
353 static __inline__ __m128i __DEFAULT_FN_ATTRS128
354 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
355 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
356 (__v2di) _mm_setzero_si128(),
360 static __inline__ __m256i __DEFAULT_FN_ATTRS256
361 _mm256_cvtpd_epu64 (__m256d __A) {
362 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
363 (__v4di) _mm256_setzero_si256(),
367 static __inline__ __m256i __DEFAULT_FN_ATTRS256
368 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
369 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
374 static __inline__ __m256i __DEFAULT_FN_ATTRS256
375 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
376 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
377 (__v4di) _mm256_setzero_si256(),
381 static __inline__ __m128i __DEFAULT_FN_ATTRS128
382 _mm_cvtps_epi64 (__m128 __A) {
383 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
384 (__v2di) _mm_setzero_si128(),
388 static __inline__ __m128i __DEFAULT_FN_ATTRS128
389 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
390 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
395 static __inline__ __m128i __DEFAULT_FN_ATTRS128
396 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
397 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
398 (__v2di) _mm_setzero_si128(),
402 static __inline__ __m256i __DEFAULT_FN_ATTRS256
403 _mm256_cvtps_epi64 (__m128 __A) {
404 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
405 (__v4di) _mm256_setzero_si256(),
409 static __inline__ __m256i __DEFAULT_FN_ATTRS256
410 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
411 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
416 static __inline__ __m256i __DEFAULT_FN_ATTRS256
417 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
418 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
419 (__v4di) _mm256_setzero_si256(),
423 static __inline__ __m128i __DEFAULT_FN_ATTRS128
424 _mm_cvtps_epu64 (__m128 __A) {
425 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
426 (__v2di) _mm_setzero_si128(),
430 static __inline__ __m128i __DEFAULT_FN_ATTRS128
431 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
432 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
437 static __inline__ __m128i __DEFAULT_FN_ATTRS128
438 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
439 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
440 (__v2di) _mm_setzero_si128(),
444 static __inline__ __m256i __DEFAULT_FN_ATTRS256
445 _mm256_cvtps_epu64 (__m128 __A) {
446 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
447 (__v4di) _mm256_setzero_si256(),
451 static __inline__ __m256i __DEFAULT_FN_ATTRS256
452 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
453 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
458 static __inline__ __m256i __DEFAULT_FN_ATTRS256
459 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
460 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
461 (__v4di) _mm256_setzero_si256(),
465 static __inline__ __m128d __DEFAULT_FN_ATTRS128
466 _mm_cvtepi64_pd (__m128i __A) {
467 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
470 static __inline__ __m128d __DEFAULT_FN_ATTRS128
471 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
472 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
473 (__v2df)_mm_cvtepi64_pd(__A),
477 static __inline__ __m128d __DEFAULT_FN_ATTRS128
478 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
479 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
480 (__v2df)_mm_cvtepi64_pd(__A),
481 (__v2df)_mm_setzero_pd());
484 static __inline__ __m256d __DEFAULT_FN_ATTRS256
485 _mm256_cvtepi64_pd (__m256i __A) {
486 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
489 static __inline__ __m256d __DEFAULT_FN_ATTRS256
490 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
491 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
492 (__v4df)_mm256_cvtepi64_pd(__A),
496 static __inline__ __m256d __DEFAULT_FN_ATTRS256
497 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
498 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
499 (__v4df)_mm256_cvtepi64_pd(__A),
500 (__v4df)_mm256_setzero_pd());
503 static __inline__ __m128 __DEFAULT_FN_ATTRS128
504 _mm_cvtepi64_ps (__m128i __A) {
505 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
506 (__v4sf) _mm_setzero_ps(),
510 static __inline__ __m128 __DEFAULT_FN_ATTRS128
511 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
512 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
517 static __inline__ __m128 __DEFAULT_FN_ATTRS128
518 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
519 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
520 (__v4sf) _mm_setzero_ps(),
524 static __inline__ __m128 __DEFAULT_FN_ATTRS256
525 _mm256_cvtepi64_ps (__m256i __A) {
526 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
527 (__v4sf) _mm_setzero_ps(),
531 static __inline__ __m128 __DEFAULT_FN_ATTRS256
532 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
533 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
538 static __inline__ __m128 __DEFAULT_FN_ATTRS256
539 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
540 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
541 (__v4sf) _mm_setzero_ps(),
545 static __inline__ __m128i __DEFAULT_FN_ATTRS128
546 _mm_cvttpd_epi64 (__m128d __A) {
547 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
548 (__v2di) _mm_setzero_si128(),
552 static __inline__ __m128i __DEFAULT_FN_ATTRS128
553 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
554 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
559 static __inline__ __m128i __DEFAULT_FN_ATTRS128
560 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
561 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
562 (__v2di) _mm_setzero_si128(),
566 static __inline__ __m256i __DEFAULT_FN_ATTRS256
567 _mm256_cvttpd_epi64 (__m256d __A) {
568 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
569 (__v4di) _mm256_setzero_si256(),
573 static __inline__ __m256i __DEFAULT_FN_ATTRS256
574 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
575 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
580 static __inline__ __m256i __DEFAULT_FN_ATTRS256
581 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
582 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
583 (__v4di) _mm256_setzero_si256(),
587 static __inline__ __m128i __DEFAULT_FN_ATTRS128
588 _mm_cvttpd_epu64 (__m128d __A) {
589 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
590 (__v2di) _mm_setzero_si128(),
594 static __inline__ __m128i __DEFAULT_FN_ATTRS128
595 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
596 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
601 static __inline__ __m128i __DEFAULT_FN_ATTRS128
602 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
603 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
604 (__v2di) _mm_setzero_si128(),
608 static __inline__ __m256i __DEFAULT_FN_ATTRS256
609 _mm256_cvttpd_epu64 (__m256d __A) {
610 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
611 (__v4di) _mm256_setzero_si256(),
615 static __inline__ __m256i __DEFAULT_FN_ATTRS256
616 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
617 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
622 static __inline__ __m256i __DEFAULT_FN_ATTRS256
623 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
624 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
625 (__v4di) _mm256_setzero_si256(),
629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
630 _mm_cvttps_epi64 (__m128 __A) {
631 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
632 (__v2di) _mm_setzero_si128(),
636 static __inline__ __m128i __DEFAULT_FN_ATTRS128
637 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
638 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
643 static __inline__ __m128i __DEFAULT_FN_ATTRS128
644 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
645 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
646 (__v2di) _mm_setzero_si128(),
650 static __inline__ __m256i __DEFAULT_FN_ATTRS256
651 _mm256_cvttps_epi64 (__m128 __A) {
652 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
653 (__v4di) _mm256_setzero_si256(),
657 static __inline__ __m256i __DEFAULT_FN_ATTRS256
658 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
659 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
664 static __inline__ __m256i __DEFAULT_FN_ATTRS256
665 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
666 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
667 (__v4di) _mm256_setzero_si256(),
671 static __inline__ __m128i __DEFAULT_FN_ATTRS128
672 _mm_cvttps_epu64 (__m128 __A) {
673 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
674 (__v2di) _mm_setzero_si128(),
678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
679 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
680 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
685 static __inline__ __m128i __DEFAULT_FN_ATTRS128
686 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
687 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
688 (__v2di) _mm_setzero_si128(),
692 static __inline__ __m256i __DEFAULT_FN_ATTRS256
693 _mm256_cvttps_epu64 (__m128 __A) {
694 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
695 (__v4di) _mm256_setzero_si256(),
699 static __inline__ __m256i __DEFAULT_FN_ATTRS256
700 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
701 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
707 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
708 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
709 (__v4di) _mm256_setzero_si256(),
713 static __inline__ __m128d __DEFAULT_FN_ATTRS128
714 _mm_cvtepu64_pd (__m128i __A) {
715 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
718 static __inline__ __m128d __DEFAULT_FN_ATTRS128
719 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
720 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
721 (__v2df)_mm_cvtepu64_pd(__A),
725 static __inline__ __m128d __DEFAULT_FN_ATTRS128
726 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
727 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
728 (__v2df)_mm_cvtepu64_pd(__A),
729 (__v2df)_mm_setzero_pd());
732 static __inline__ __m256d __DEFAULT_FN_ATTRS256
733 _mm256_cvtepu64_pd (__m256i __A) {
734 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
737 static __inline__ __m256d __DEFAULT_FN_ATTRS256
738 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
739 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
740 (__v4df)_mm256_cvtepu64_pd(__A),
744 static __inline__ __m256d __DEFAULT_FN_ATTRS256
745 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
746 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
747 (__v4df)_mm256_cvtepu64_pd(__A),
748 (__v4df)_mm256_setzero_pd());
751 static __inline__ __m128 __DEFAULT_FN_ATTRS128
752 _mm_cvtepu64_ps (__m128i __A) {
753 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
754 (__v4sf) _mm_setzero_ps(),
758 static __inline__ __m128 __DEFAULT_FN_ATTRS128
759 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
760 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
765 static __inline__ __m128 __DEFAULT_FN_ATTRS128
766 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
767 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
768 (__v4sf) _mm_setzero_ps(),
772 static __inline__ __m128 __DEFAULT_FN_ATTRS256
773 _mm256_cvtepu64_ps (__m256i __A) {
774 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
775 (__v4sf) _mm_setzero_ps(),
779 static __inline__ __m128 __DEFAULT_FN_ATTRS256
780 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
781 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
786 static __inline__ __m128 __DEFAULT_FN_ATTRS256
787 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
788 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
789 (__v4sf) _mm_setzero_ps(),
793 #define _mm_range_pd(A, B, C) \
794 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
795 (__v2df)(__m128d)(B), (int)(C), \
796 (__v2df)_mm_setzero_pd(), \
799 #define _mm_mask_range_pd(W, U, A, B, C) \
800 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
801 (__v2df)(__m128d)(B), (int)(C), \
802 (__v2df)(__m128d)(W), \
805 #define _mm_maskz_range_pd(U, A, B, C) \
806 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
807 (__v2df)(__m128d)(B), (int)(C), \
808 (__v2df)_mm_setzero_pd(), \
811 #define _mm256_range_pd(A, B, C) \
812 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
813 (__v4df)(__m256d)(B), (int)(C), \
814 (__v4df)_mm256_setzero_pd(), \
817 #define _mm256_mask_range_pd(W, U, A, B, C) \
818 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
819 (__v4df)(__m256d)(B), (int)(C), \
820 (__v4df)(__m256d)(W), \
823 #define _mm256_maskz_range_pd(U, A, B, C) \
824 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
825 (__v4df)(__m256d)(B), (int)(C), \
826 (__v4df)_mm256_setzero_pd(), \
829 #define _mm_range_ps(A, B, C) \
830 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
831 (__v4sf)(__m128)(B), (int)(C), \
832 (__v4sf)_mm_setzero_ps(), \
835 #define _mm_mask_range_ps(W, U, A, B, C) \
836 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
837 (__v4sf)(__m128)(B), (int)(C), \
838 (__v4sf)(__m128)(W), (__mmask8)(U))
840 #define _mm_maskz_range_ps(U, A, B, C) \
841 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
842 (__v4sf)(__m128)(B), (int)(C), \
843 (__v4sf)_mm_setzero_ps(), \
846 #define _mm256_range_ps(A, B, C) \
847 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
848 (__v8sf)(__m256)(B), (int)(C), \
849 (__v8sf)_mm256_setzero_ps(), \
852 #define _mm256_mask_range_ps(W, U, A, B, C) \
853 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
854 (__v8sf)(__m256)(B), (int)(C), \
855 (__v8sf)(__m256)(W), (__mmask8)(U))
857 #define _mm256_maskz_range_ps(U, A, B, C) \
858 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
859 (__v8sf)(__m256)(B), (int)(C), \
860 (__v8sf)_mm256_setzero_ps(), \
863 #define _mm_reduce_pd(A, B) \
864 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
865 (__v2df)_mm_setzero_pd(), \
868 #define _mm_mask_reduce_pd(W, U, A, B) \
869 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
870 (__v2df)(__m128d)(W), \
873 #define _mm_maskz_reduce_pd(U, A, B) \
874 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
875 (__v2df)_mm_setzero_pd(), \
878 #define _mm256_reduce_pd(A, B) \
879 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
880 (__v4df)_mm256_setzero_pd(), \
883 #define _mm256_mask_reduce_pd(W, U, A, B) \
884 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
885 (__v4df)(__m256d)(W), \
888 #define _mm256_maskz_reduce_pd(U, A, B) \
889 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
890 (__v4df)_mm256_setzero_pd(), \
893 #define _mm_reduce_ps(A, B) \
894 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
895 (__v4sf)_mm_setzero_ps(), \
898 #define _mm_mask_reduce_ps(W, U, A, B) \
899 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
900 (__v4sf)(__m128)(W), \
903 #define _mm_maskz_reduce_ps(U, A, B) \
904 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
905 (__v4sf)_mm_setzero_ps(), \
908 #define _mm256_reduce_ps(A, B) \
909 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
910 (__v8sf)_mm256_setzero_ps(), \
913 #define _mm256_mask_reduce_ps(W, U, A, B) \
914 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
915 (__v8sf)(__m256)(W), \
918 #define _mm256_maskz_reduce_ps(U, A, B) \
919 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
920 (__v8sf)_mm256_setzero_ps(), \
923 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
924 _mm_movepi32_mask (__m128i __A)
926 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
929 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
930 _mm256_movepi32_mask (__m256i __A)
932 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
935 static __inline__ __m128i __DEFAULT_FN_ATTRS128
936 _mm_movm_epi32 (__mmask8 __A)
938 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
941 static __inline__ __m256i __DEFAULT_FN_ATTRS256
942 _mm256_movm_epi32 (__mmask8 __A)
944 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
947 static __inline__ __m128i __DEFAULT_FN_ATTRS128
948 _mm_movm_epi64 (__mmask8 __A)
950 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
953 static __inline__ __m256i __DEFAULT_FN_ATTRS256
954 _mm256_movm_epi64 (__mmask8 __A)
956 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
959 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
960 _mm_movepi64_mask (__m128i __A)
962 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
965 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
966 _mm256_movepi64_mask (__m256i __A)
968 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
971 static __inline__ __m256 __DEFAULT_FN_ATTRS256
972 _mm256_broadcast_f32x2 (__m128 __A)
974 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
975 0, 1, 0, 1, 0, 1, 0, 1);
978 static __inline__ __m256 __DEFAULT_FN_ATTRS256
979 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
981 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
982 (__v8sf)_mm256_broadcast_f32x2(__A),
986 static __inline__ __m256 __DEFAULT_FN_ATTRS256
987 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
989 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
990 (__v8sf)_mm256_broadcast_f32x2(__A),
991 (__v8sf)_mm256_setzero_ps());
994 static __inline__ __m256d __DEFAULT_FN_ATTRS256
995 _mm256_broadcast_f64x2(__m128d __A)
997 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1001 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1002 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
1004 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1005 (__v4df)_mm256_broadcast_f64x2(__A),
1009 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1010 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1012 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1013 (__v4df)_mm256_broadcast_f64x2(__A),
1014 (__v4df)_mm256_setzero_pd());
1017 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1018 _mm_broadcast_i32x2 (__m128i __A)
1020 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1025 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1027 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1028 (__v4si)_mm_broadcast_i32x2(__A),
1032 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1033 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1035 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1036 (__v4si)_mm_broadcast_i32x2(__A),
1037 (__v4si)_mm_setzero_si128());
1040 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1041 _mm256_broadcast_i32x2 (__m128i __A)
1043 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1044 0, 1, 0, 1, 0, 1, 0, 1);
1047 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1048 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1051 (__v8si)_mm256_broadcast_i32x2(__A),
1055 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1056 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1058 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1059 (__v8si)_mm256_broadcast_i32x2(__A),
1060 (__v8si)_mm256_setzero_si256());
1063 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1064 _mm256_broadcast_i64x2(__m128i __A)
1066 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1070 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1071 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1073 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1074 (__v4di)_mm256_broadcast_i64x2(__A),
1078 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1079 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1082 (__v4di)_mm256_broadcast_i64x2(__A),
1083 (__v4di)_mm256_setzero_si256());
1086 #define _mm256_extractf64x2_pd(A, imm) \
1087 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1089 (__v2df)_mm_undefined_pd(), \
1092 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1093 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1095 (__v2df)(__m128d)(W), \
1098 #define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1099 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1101 (__v2df)_mm_setzero_pd(), \
1104 #define _mm256_extracti64x2_epi64(A, imm) \
1105 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1107 (__v2di)_mm_undefined_si128(), \
1110 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1111 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1113 (__v2di)(__m128i)(W), \
1116 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1117 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1119 (__v2di)_mm_setzero_si128(), \
1122 #define _mm256_insertf64x2(A, B, imm) \
1123 (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1124 (__v2df)(__m128d)(B), (int)(imm))
1126 #define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1127 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1128 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1129 (__v4df)(__m256d)(W))
1131 #define _mm256_maskz_insertf64x2(U, A, B, imm) \
1132 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1133 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1134 (__v4df)_mm256_setzero_pd())
1136 #define _mm256_inserti64x2(A, B, imm) \
1137 (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1138 (__v2di)(__m128i)(B), (int)(imm))
1140 #define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1141 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1142 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1143 (__v4di)(__m256i)(W))
1145 #define _mm256_maskz_inserti64x2(U, A, B, imm) \
1146 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1147 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1148 (__v4di)_mm256_setzero_si256())
1150 #define _mm_mask_fpclass_pd_mask(U, A, imm) \
1151 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1154 #define _mm_fpclass_pd_mask(A, imm) \
1155 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1158 #define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1159 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1162 #define _mm256_fpclass_pd_mask(A, imm) \
1163 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1166 #define _mm_mask_fpclass_ps_mask(U, A, imm) \
1167 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1170 #define _mm_fpclass_ps_mask(A, imm) \
1171 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1174 #define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1175 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1178 #define _mm256_fpclass_ps_mask(A, imm) \
1179 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1182 #undef __DEFAULT_FN_ATTRS128
1183 #undef __DEFAULT_FN_ATTRS256