1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLDQINTRIN_H
29 #define __AVX512VLDQINTRIN_H
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
34 static __inline__ __m256i __DEFAULT_FN_ATTRS
35 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36 return (__m256i) ((__v4du) __A * (__v4du) __B);
39 static __inline__ __m256i __DEFAULT_FN_ATTRS
40 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
42 (__v4di)_mm256_mullo_epi64(__A, __B),
46 static __inline__ __m256i __DEFAULT_FN_ATTRS
47 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
48 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
49 (__v4di)_mm256_mullo_epi64(__A, __B),
50 (__v4di)_mm256_setzero_si256());
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
54 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
55 return (__m128i) ((__v2du) __A * (__v2du) __B);
58 static __inline__ __m128i __DEFAULT_FN_ATTRS
59 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
60 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
61 (__v2di)_mm_mullo_epi64(__A, __B),
65 static __inline__ __m128i __DEFAULT_FN_ATTRS
66 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
67 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
68 (__v2di)_mm_mullo_epi64(__A, __B),
69 (__v2di)_mm_setzero_si128());
72 static __inline__ __m256d __DEFAULT_FN_ATTRS
73 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
74 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
75 (__v4df)_mm256_andnot_pd(__A, __B),
79 static __inline__ __m256d __DEFAULT_FN_ATTRS
80 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
81 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
82 (__v4df)_mm256_andnot_pd(__A, __B),
83 (__v4df)_mm256_setzero_pd());
86 static __inline__ __m128d __DEFAULT_FN_ATTRS
87 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
88 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
89 (__v2df)_mm_andnot_pd(__A, __B),
93 static __inline__ __m128d __DEFAULT_FN_ATTRS
94 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
95 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
96 (__v2df)_mm_andnot_pd(__A, __B),
97 (__v2df)_mm_setzero_pd());
100 static __inline__ __m256 __DEFAULT_FN_ATTRS
101 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
102 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
103 (__v8sf)_mm256_andnot_ps(__A, __B),
107 static __inline__ __m256 __DEFAULT_FN_ATTRS
108 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
109 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
110 (__v8sf)_mm256_andnot_ps(__A, __B),
111 (__v8sf)_mm256_setzero_ps());
114 static __inline__ __m128 __DEFAULT_FN_ATTRS
115 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
116 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
117 (__v4sf)_mm_andnot_ps(__A, __B),
121 static __inline__ __m128 __DEFAULT_FN_ATTRS
122 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
123 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
124 (__v4sf)_mm_andnot_ps(__A, __B),
125 (__v4sf)_mm_setzero_ps());
128 static __inline__ __m256d __DEFAULT_FN_ATTRS
129 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
130 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
131 (__v4df)_mm256_and_pd(__A, __B),
135 static __inline__ __m256d __DEFAULT_FN_ATTRS
136 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
137 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
138 (__v4df)_mm256_and_pd(__A, __B),
139 (__v4df)_mm256_setzero_pd());
142 static __inline__ __m128d __DEFAULT_FN_ATTRS
143 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
145 (__v2df)_mm_and_pd(__A, __B),
149 static __inline__ __m128d __DEFAULT_FN_ATTRS
150 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
151 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
152 (__v2df)_mm_and_pd(__A, __B),
153 (__v2df)_mm_setzero_pd());
156 static __inline__ __m256 __DEFAULT_FN_ATTRS
157 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
158 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
159 (__v8sf)_mm256_and_ps(__A, __B),
163 static __inline__ __m256 __DEFAULT_FN_ATTRS
164 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
165 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
166 (__v8sf)_mm256_and_ps(__A, __B),
167 (__v8sf)_mm256_setzero_ps());
170 static __inline__ __m128 __DEFAULT_FN_ATTRS
171 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
172 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
173 (__v4sf)_mm_and_ps(__A, __B),
177 static __inline__ __m128 __DEFAULT_FN_ATTRS
178 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
179 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
180 (__v4sf)_mm_and_ps(__A, __B),
181 (__v4sf)_mm_setzero_ps());
184 static __inline__ __m256d __DEFAULT_FN_ATTRS
185 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
186 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
187 (__v4df)_mm256_xor_pd(__A, __B),
191 static __inline__ __m256d __DEFAULT_FN_ATTRS
192 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
193 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
194 (__v4df)_mm256_xor_pd(__A, __B),
195 (__v4df)_mm256_setzero_pd());
198 static __inline__ __m128d __DEFAULT_FN_ATTRS
199 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
200 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
201 (__v2df)_mm_xor_pd(__A, __B),
205 static __inline__ __m128d __DEFAULT_FN_ATTRS
206 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
207 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
208 (__v2df)_mm_xor_pd(__A, __B),
209 (__v2df)_mm_setzero_pd());
212 static __inline__ __m256 __DEFAULT_FN_ATTRS
213 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
214 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
215 (__v8sf)_mm256_xor_ps(__A, __B),
219 static __inline__ __m256 __DEFAULT_FN_ATTRS
220 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
221 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
222 (__v8sf)_mm256_xor_ps(__A, __B),
223 (__v8sf)_mm256_setzero_ps());
226 static __inline__ __m128 __DEFAULT_FN_ATTRS
227 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
228 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
229 (__v4sf)_mm_xor_ps(__A, __B),
233 static __inline__ __m128 __DEFAULT_FN_ATTRS
234 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
235 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
236 (__v4sf)_mm_xor_ps(__A, __B),
237 (__v4sf)_mm_setzero_ps());
240 static __inline__ __m256d __DEFAULT_FN_ATTRS
241 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
242 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
243 (__v4df)_mm256_or_pd(__A, __B),
247 static __inline__ __m256d __DEFAULT_FN_ATTRS
248 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
249 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
250 (__v4df)_mm256_or_pd(__A, __B),
251 (__v4df)_mm256_setzero_pd());
254 static __inline__ __m128d __DEFAULT_FN_ATTRS
255 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
256 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
257 (__v2df)_mm_or_pd(__A, __B),
261 static __inline__ __m128d __DEFAULT_FN_ATTRS
262 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
263 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
264 (__v2df)_mm_or_pd(__A, __B),
265 (__v2df)_mm_setzero_pd());
268 static __inline__ __m256 __DEFAULT_FN_ATTRS
269 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
270 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
271 (__v8sf)_mm256_or_ps(__A, __B),
275 static __inline__ __m256 __DEFAULT_FN_ATTRS
276 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
277 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
278 (__v8sf)_mm256_or_ps(__A, __B),
279 (__v8sf)_mm256_setzero_ps());
282 static __inline__ __m128 __DEFAULT_FN_ATTRS
283 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
284 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
285 (__v4sf)_mm_or_ps(__A, __B),
289 static __inline__ __m128 __DEFAULT_FN_ATTRS
290 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
291 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
292 (__v4sf)_mm_or_ps(__A, __B),
293 (__v4sf)_mm_setzero_ps());
296 static __inline__ __m128i __DEFAULT_FN_ATTRS
297 _mm_cvtpd_epi64 (__m128d __A) {
298 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
299 (__v2di) _mm_setzero_si128(),
303 static __inline__ __m128i __DEFAULT_FN_ATTRS
304 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
305 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
310 static __inline__ __m128i __DEFAULT_FN_ATTRS
311 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
312 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
313 (__v2di) _mm_setzero_si128(),
317 static __inline__ __m256i __DEFAULT_FN_ATTRS
318 _mm256_cvtpd_epi64 (__m256d __A) {
319 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
320 (__v4di) _mm256_setzero_si256(),
324 static __inline__ __m256i __DEFAULT_FN_ATTRS
325 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
326 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
331 static __inline__ __m256i __DEFAULT_FN_ATTRS
332 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
333 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
334 (__v4di) _mm256_setzero_si256(),
338 static __inline__ __m128i __DEFAULT_FN_ATTRS
339 _mm_cvtpd_epu64 (__m128d __A) {
340 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
341 (__v2di) _mm_setzero_si128(),
345 static __inline__ __m128i __DEFAULT_FN_ATTRS
346 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
347 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
352 static __inline__ __m128i __DEFAULT_FN_ATTRS
353 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
354 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
355 (__v2di) _mm_setzero_si128(),
359 static __inline__ __m256i __DEFAULT_FN_ATTRS
360 _mm256_cvtpd_epu64 (__m256d __A) {
361 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
362 (__v4di) _mm256_setzero_si256(),
366 static __inline__ __m256i __DEFAULT_FN_ATTRS
367 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
368 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
373 static __inline__ __m256i __DEFAULT_FN_ATTRS
374 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
375 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
376 (__v4di) _mm256_setzero_si256(),
380 static __inline__ __m128i __DEFAULT_FN_ATTRS
381 _mm_cvtps_epi64 (__m128 __A) {
382 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
383 (__v2di) _mm_setzero_si128(),
387 static __inline__ __m128i __DEFAULT_FN_ATTRS
388 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
389 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
394 static __inline__ __m128i __DEFAULT_FN_ATTRS
395 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
396 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
397 (__v2di) _mm_setzero_si128(),
401 static __inline__ __m256i __DEFAULT_FN_ATTRS
402 _mm256_cvtps_epi64 (__m128 __A) {
403 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
404 (__v4di) _mm256_setzero_si256(),
408 static __inline__ __m256i __DEFAULT_FN_ATTRS
409 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
410 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
415 static __inline__ __m256i __DEFAULT_FN_ATTRS
416 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
417 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
418 (__v4di) _mm256_setzero_si256(),
422 static __inline__ __m128i __DEFAULT_FN_ATTRS
423 _mm_cvtps_epu64 (__m128 __A) {
424 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
425 (__v2di) _mm_setzero_si128(),
429 static __inline__ __m128i __DEFAULT_FN_ATTRS
430 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
431 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
436 static __inline__ __m128i __DEFAULT_FN_ATTRS
437 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
438 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
439 (__v2di) _mm_setzero_si128(),
443 static __inline__ __m256i __DEFAULT_FN_ATTRS
444 _mm256_cvtps_epu64 (__m128 __A) {
445 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
446 (__v4di) _mm256_setzero_si256(),
450 static __inline__ __m256i __DEFAULT_FN_ATTRS
451 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
452 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
457 static __inline__ __m256i __DEFAULT_FN_ATTRS
458 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
459 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
460 (__v4di) _mm256_setzero_si256(),
464 static __inline__ __m128d __DEFAULT_FN_ATTRS
465 _mm_cvtepi64_pd (__m128i __A) {
466 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
467 (__v2df) _mm_setzero_pd(),
471 static __inline__ __m128d __DEFAULT_FN_ATTRS
472 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
473 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
478 static __inline__ __m128d __DEFAULT_FN_ATTRS
479 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
480 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
481 (__v2df) _mm_setzero_pd(),
485 static __inline__ __m256d __DEFAULT_FN_ATTRS
486 _mm256_cvtepi64_pd (__m256i __A) {
487 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
488 (__v4df) _mm256_setzero_pd(),
492 static __inline__ __m256d __DEFAULT_FN_ATTRS
493 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
494 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
499 static __inline__ __m256d __DEFAULT_FN_ATTRS
500 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
501 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
502 (__v4df) _mm256_setzero_pd(),
506 static __inline__ __m128 __DEFAULT_FN_ATTRS
507 _mm_cvtepi64_ps (__m128i __A) {
508 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
509 (__v4sf) _mm_setzero_ps(),
513 static __inline__ __m128 __DEFAULT_FN_ATTRS
514 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
515 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
520 static __inline__ __m128 __DEFAULT_FN_ATTRS
521 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
522 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
523 (__v4sf) _mm_setzero_ps(),
527 static __inline__ __m128 __DEFAULT_FN_ATTRS
528 _mm256_cvtepi64_ps (__m256i __A) {
529 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
530 (__v4sf) _mm_setzero_ps(),
534 static __inline__ __m128 __DEFAULT_FN_ATTRS
535 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
536 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
541 static __inline__ __m128 __DEFAULT_FN_ATTRS
542 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
543 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
544 (__v4sf) _mm_setzero_ps(),
548 static __inline__ __m128i __DEFAULT_FN_ATTRS
549 _mm_cvttpd_epi64 (__m128d __A) {
550 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
551 (__v2di) _mm_setzero_si128(),
555 static __inline__ __m128i __DEFAULT_FN_ATTRS
556 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
557 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
562 static __inline__ __m128i __DEFAULT_FN_ATTRS
563 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
564 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
565 (__v2di) _mm_setzero_si128(),
569 static __inline__ __m256i __DEFAULT_FN_ATTRS
570 _mm256_cvttpd_epi64 (__m256d __A) {
571 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
572 (__v4di) _mm256_setzero_si256(),
576 static __inline__ __m256i __DEFAULT_FN_ATTRS
577 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
578 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
583 static __inline__ __m256i __DEFAULT_FN_ATTRS
584 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
585 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
586 (__v4di) _mm256_setzero_si256(),
590 static __inline__ __m128i __DEFAULT_FN_ATTRS
591 _mm_cvttpd_epu64 (__m128d __A) {
592 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
593 (__v2di) _mm_setzero_si128(),
597 static __inline__ __m128i __DEFAULT_FN_ATTRS
598 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
599 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
604 static __inline__ __m128i __DEFAULT_FN_ATTRS
605 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
606 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
607 (__v2di) _mm_setzero_si128(),
611 static __inline__ __m256i __DEFAULT_FN_ATTRS
612 _mm256_cvttpd_epu64 (__m256d __A) {
613 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
614 (__v4di) _mm256_setzero_si256(),
618 static __inline__ __m256i __DEFAULT_FN_ATTRS
619 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
620 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
625 static __inline__ __m256i __DEFAULT_FN_ATTRS
626 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
627 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
628 (__v4di) _mm256_setzero_si256(),
632 static __inline__ __m128i __DEFAULT_FN_ATTRS
633 _mm_cvttps_epi64 (__m128 __A) {
634 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
635 (__v2di) _mm_setzero_si128(),
639 static __inline__ __m128i __DEFAULT_FN_ATTRS
640 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
641 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
646 static __inline__ __m128i __DEFAULT_FN_ATTRS
647 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
648 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
649 (__v2di) _mm_setzero_si128(),
653 static __inline__ __m256i __DEFAULT_FN_ATTRS
654 _mm256_cvttps_epi64 (__m128 __A) {
655 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
656 (__v4di) _mm256_setzero_si256(),
660 static __inline__ __m256i __DEFAULT_FN_ATTRS
661 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
662 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
667 static __inline__ __m256i __DEFAULT_FN_ATTRS
668 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
669 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
670 (__v4di) _mm256_setzero_si256(),
674 static __inline__ __m128i __DEFAULT_FN_ATTRS
675 _mm_cvttps_epu64 (__m128 __A) {
676 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
677 (__v2di) _mm_setzero_si128(),
681 static __inline__ __m128i __DEFAULT_FN_ATTRS
682 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
683 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
689 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
690 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
691 (__v2di) _mm_setzero_si128(),
695 static __inline__ __m256i __DEFAULT_FN_ATTRS
696 _mm256_cvttps_epu64 (__m128 __A) {
697 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
698 (__v4di) _mm256_setzero_si256(),
702 static __inline__ __m256i __DEFAULT_FN_ATTRS
703 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
704 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
709 static __inline__ __m256i __DEFAULT_FN_ATTRS
710 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
711 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
712 (__v4di) _mm256_setzero_si256(),
716 static __inline__ __m128d __DEFAULT_FN_ATTRS
717 _mm_cvtepu64_pd (__m128i __A) {
718 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
719 (__v2df) _mm_setzero_pd(),
723 static __inline__ __m128d __DEFAULT_FN_ATTRS
724 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
725 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
730 static __inline__ __m128d __DEFAULT_FN_ATTRS
731 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
732 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
733 (__v2df) _mm_setzero_pd(),
737 static __inline__ __m256d __DEFAULT_FN_ATTRS
738 _mm256_cvtepu64_pd (__m256i __A) {
739 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
740 (__v4df) _mm256_setzero_pd(),
744 static __inline__ __m256d __DEFAULT_FN_ATTRS
745 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
746 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
751 static __inline__ __m256d __DEFAULT_FN_ATTRS
752 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
753 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
754 (__v4df) _mm256_setzero_pd(),
758 static __inline__ __m128 __DEFAULT_FN_ATTRS
759 _mm_cvtepu64_ps (__m128i __A) {
760 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
761 (__v4sf) _mm_setzero_ps(),
765 static __inline__ __m128 __DEFAULT_FN_ATTRS
766 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
767 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
772 static __inline__ __m128 __DEFAULT_FN_ATTRS
773 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
774 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
775 (__v4sf) _mm_setzero_ps(),
779 static __inline__ __m128 __DEFAULT_FN_ATTRS
780 _mm256_cvtepu64_ps (__m256i __A) {
781 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
782 (__v4sf) _mm_setzero_ps(),
786 static __inline__ __m128 __DEFAULT_FN_ATTRS
787 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
788 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
793 static __inline__ __m128 __DEFAULT_FN_ATTRS
794 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
795 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
796 (__v4sf) _mm_setzero_ps(),
800 #define _mm_range_pd(A, B, C) __extension__ ({ \
801 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
802 (__v2df)(__m128d)(B), (int)(C), \
803 (__v2df)_mm_setzero_pd(), \
806 #define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \
807 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
808 (__v2df)(__m128d)(B), (int)(C), \
809 (__v2df)(__m128d)(W), \
812 #define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \
813 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
814 (__v2df)(__m128d)(B), (int)(C), \
815 (__v2df)_mm_setzero_pd(), \
818 #define _mm256_range_pd(A, B, C) __extension__ ({ \
819 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
820 (__v4df)(__m256d)(B), (int)(C), \
821 (__v4df)_mm256_setzero_pd(), \
824 #define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \
825 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
826 (__v4df)(__m256d)(B), (int)(C), \
827 (__v4df)(__m256d)(W), \
830 #define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \
831 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
832 (__v4df)(__m256d)(B), (int)(C), \
833 (__v4df)_mm256_setzero_pd(), \
836 #define _mm_range_ps(A, B, C) __extension__ ({ \
837 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
838 (__v4sf)(__m128)(B), (int)(C), \
839 (__v4sf)_mm_setzero_ps(), \
842 #define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \
843 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
844 (__v4sf)(__m128)(B), (int)(C), \
845 (__v4sf)(__m128)(W), (__mmask8)(U)); })
847 #define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \
848 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
849 (__v4sf)(__m128)(B), (int)(C), \
850 (__v4sf)_mm_setzero_ps(), \
853 #define _mm256_range_ps(A, B, C) __extension__ ({ \
854 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
855 (__v8sf)(__m256)(B), (int)(C), \
856 (__v8sf)_mm256_setzero_ps(), \
859 #define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \
860 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
861 (__v8sf)(__m256)(B), (int)(C), \
862 (__v8sf)(__m256)(W), (__mmask8)(U)); })
864 #define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \
865 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
866 (__v8sf)(__m256)(B), (int)(C), \
867 (__v8sf)_mm256_setzero_ps(), \
870 #define _mm_reduce_pd(A, B) __extension__ ({ \
871 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
872 (__v2df)_mm_setzero_pd(), \
875 #define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
876 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
877 (__v2df)(__m128d)(W), \
880 #define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \
881 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
882 (__v2df)_mm_setzero_pd(), \
885 #define _mm256_reduce_pd(A, B) __extension__ ({ \
886 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
887 (__v4df)_mm256_setzero_pd(), \
890 #define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
891 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
892 (__v4df)(__m256d)(W), \
895 #define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \
896 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
897 (__v4df)_mm256_setzero_pd(), \
900 #define _mm_reduce_ps(A, B) __extension__ ({ \
901 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
902 (__v4sf)_mm_setzero_ps(), \
905 #define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \
906 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
907 (__v4sf)(__m128)(W), \
910 #define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \
911 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
912 (__v4sf)_mm_setzero_ps(), \
915 #define _mm256_reduce_ps(A, B) __extension__ ({ \
916 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
917 (__v8sf)_mm256_setzero_ps(), \
920 #define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
921 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
922 (__v8sf)(__m256)(W), \
925 #define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \
926 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
927 (__v8sf)_mm256_setzero_ps(), \
930 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
931 _mm_movepi32_mask (__m128i __A)
933 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
936 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
937 _mm256_movepi32_mask (__m256i __A)
939 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
942 static __inline__ __m128i __DEFAULT_FN_ATTRS
943 _mm_movm_epi32 (__mmask8 __A)
945 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
948 static __inline__ __m256i __DEFAULT_FN_ATTRS
949 _mm256_movm_epi32 (__mmask8 __A)
951 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
954 static __inline__ __m128i __DEFAULT_FN_ATTRS
955 _mm_movm_epi64 (__mmask8 __A)
957 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
960 static __inline__ __m256i __DEFAULT_FN_ATTRS
961 _mm256_movm_epi64 (__mmask8 __A)
963 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
967 _mm_movepi64_mask (__m128i __A)
969 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
972 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
973 _mm256_movepi64_mask (__m256i __A)
975 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
978 static __inline__ __m256 __DEFAULT_FN_ATTRS
979 _mm256_broadcast_f32x2 (__m128 __A)
981 return (__m256)__builtin_shufflevector((__v4sf)__A,
982 (__v4sf)_mm_undefined_ps(),
983 0, 1, 0, 1, 0, 1, 0, 1);
986 static __inline__ __m256 __DEFAULT_FN_ATTRS
987 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
989 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
990 (__v8sf)_mm256_broadcast_f32x2(__A),
994 static __inline__ __m256 __DEFAULT_FN_ATTRS
995 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
997 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
998 (__v8sf)_mm256_broadcast_f32x2(__A),
999 (__v8sf)_mm256_setzero_ps());
1002 static __inline__ __m256d __DEFAULT_FN_ATTRS
1003 _mm256_broadcast_f64x2(__m128d __A)
1005 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1009 static __inline__ __m256d __DEFAULT_FN_ATTRS
1010 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
1012 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1013 (__v4df)_mm256_broadcast_f64x2(__A),
1017 static __inline__ __m256d __DEFAULT_FN_ATTRS
1018 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1020 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1021 (__v4df)_mm256_broadcast_f64x2(__A),
1022 (__v4df)_mm256_setzero_pd());
1025 static __inline__ __m128i __DEFAULT_FN_ATTRS
1026 _mm_broadcast_i32x2 (__m128i __A)
1028 return (__m128i)__builtin_shufflevector((__v4si)__A,
1029 (__v4si)_mm_undefined_si128(),
1033 static __inline__ __m128i __DEFAULT_FN_ATTRS
1034 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1037 (__v4si)_mm_broadcast_i32x2(__A),
1041 static __inline__ __m128i __DEFAULT_FN_ATTRS
1042 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1044 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1045 (__v4si)_mm_broadcast_i32x2(__A),
1046 (__v4si)_mm_setzero_si128());
1049 static __inline__ __m256i __DEFAULT_FN_ATTRS
1050 _mm256_broadcast_i32x2 (__m128i __A)
1052 return (__m256i)__builtin_shufflevector((__v4si)__A,
1053 (__v4si)_mm_undefined_si128(),
1054 0, 1, 0, 1, 0, 1, 0, 1);
1057 static __inline__ __m256i __DEFAULT_FN_ATTRS
1058 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1060 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1061 (__v8si)_mm256_broadcast_i32x2(__A),
1065 static __inline__ __m256i __DEFAULT_FN_ATTRS
1066 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1068 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1069 (__v8si)_mm256_broadcast_i32x2(__A),
1070 (__v8si)_mm256_setzero_si256());
1073 static __inline__ __m256i __DEFAULT_FN_ATTRS
1074 _mm256_broadcast_i64x2(__m128i __A)
1076 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1080 static __inline__ __m256i __DEFAULT_FN_ATTRS
1081 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1083 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1084 (__v4di)_mm256_broadcast_i64x2(__A),
1088 static __inline__ __m256i __DEFAULT_FN_ATTRS
1089 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1091 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1092 (__v4di)_mm256_broadcast_i64x2(__A),
1093 (__v4di)_mm256_setzero_si256());
1096 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
1097 (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \
1098 (__v4df)_mm256_undefined_pd(), \
1099 ((imm) & 1) ? 2 : 0, \
1100 ((imm) & 1) ? 3 : 1); })
1102 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1103 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1104 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
1107 #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1108 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1109 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
1110 (__v2df)_mm_setzero_pd()); })
1112 #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
1113 (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \
1114 (__v4di)_mm256_undefined_si256(), \
1115 ((imm) & 1) ? 2 : 0, \
1116 ((imm) & 1) ? 3 : 1); })
1118 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1119 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
1120 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
1123 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1124 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
1125 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
1126 (__v2di)_mm_setzero_di()); })
1128 #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
1129 (__m256d)__builtin_shufflevector((__v4df)(A), \
1130 (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \
1131 ((imm) & 0x1) ? 0 : 4, \
1132 ((imm) & 0x1) ? 1 : 5, \
1133 ((imm) & 0x1) ? 4 : 2, \
1134 ((imm) & 0x1) ? 5 : 3); })
1136 #define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1137 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1138 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1141 #define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1142 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1143 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1144 (__v4df)_mm256_setzero_pd()); })
1146 #define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
1147 (__m256i)__builtin_shufflevector((__v4di)(A), \
1148 (__v4di)_mm256_castsi128_si256((__m128i)(B)), \
1149 ((imm) & 0x1) ? 0 : 4, \
1150 ((imm) & 0x1) ? 1 : 5, \
1151 ((imm) & 0x1) ? 4 : 2, \
1152 ((imm) & 0x1) ? 5 : 3); })
1154 #define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1155 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1156 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1159 #define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1160 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1161 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1162 (__v4di)_mm256_setzero_si256()); })
1164 #define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1165 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1168 #define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
1169 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1172 #define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1173 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1176 #define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
1177 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1180 #define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1181 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1184 #define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
1185 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1188 #define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1189 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1192 #define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
1193 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1196 #undef __DEFAULT_FN_ATTRS