1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512DQINTRIN_H
29 #define __AVX512DQINTRIN_H
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
34 static __inline__ __m512i __DEFAULT_FN_ATTRS
35 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36 return (__m512i) ((__v8du) __A * (__v8du) __B);
39 static __inline__ __m512i __DEFAULT_FN_ATTRS
40 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
42 (__v8di)_mm512_mullo_epi64(__A, __B),
46 static __inline__ __m512i __DEFAULT_FN_ATTRS
47 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
48 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
49 (__v8di)_mm512_mullo_epi64(__A, __B),
50 (__v8di)_mm512_setzero_si512());
53 static __inline__ __m512d __DEFAULT_FN_ATTRS
54 _mm512_xor_pd(__m512d __A, __m512d __B) {
55 return (__m512d)((__v8du)__A ^ (__v8du)__B);
58 static __inline__ __m512d __DEFAULT_FN_ATTRS
59 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
60 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
61 (__v8df)_mm512_xor_pd(__A, __B),
65 static __inline__ __m512d __DEFAULT_FN_ATTRS
66 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
67 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
68 (__v8df)_mm512_xor_pd(__A, __B),
69 (__v8df)_mm512_setzero_pd());
72 static __inline__ __m512 __DEFAULT_FN_ATTRS
73 _mm512_xor_ps (__m512 __A, __m512 __B) {
74 return (__m512)((__v16su)__A ^ (__v16su)__B);
77 static __inline__ __m512 __DEFAULT_FN_ATTRS
78 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
79 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
80 (__v16sf)_mm512_xor_ps(__A, __B),
84 static __inline__ __m512 __DEFAULT_FN_ATTRS
85 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
86 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
87 (__v16sf)_mm512_xor_ps(__A, __B),
88 (__v16sf)_mm512_setzero_ps());
91 static __inline__ __m512d __DEFAULT_FN_ATTRS
92 _mm512_or_pd(__m512d __A, __m512d __B) {
93 return (__m512d)((__v8du)__A | (__v8du)__B);
96 static __inline__ __m512d __DEFAULT_FN_ATTRS
97 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
98 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
99 (__v8df)_mm512_or_pd(__A, __B),
103 static __inline__ __m512d __DEFAULT_FN_ATTRS
104 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
105 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
106 (__v8df)_mm512_or_pd(__A, __B),
107 (__v8df)_mm512_setzero_pd());
110 static __inline__ __m512 __DEFAULT_FN_ATTRS
111 _mm512_or_ps(__m512 __A, __m512 __B) {
112 return (__m512)((__v16su)__A | (__v16su)__B);
115 static __inline__ __m512 __DEFAULT_FN_ATTRS
116 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
117 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
118 (__v16sf)_mm512_or_ps(__A, __B),
122 static __inline__ __m512 __DEFAULT_FN_ATTRS
123 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
124 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
125 (__v16sf)_mm512_or_ps(__A, __B),
126 (__v16sf)_mm512_setzero_ps());
129 static __inline__ __m512d __DEFAULT_FN_ATTRS
130 _mm512_and_pd(__m512d __A, __m512d __B) {
131 return (__m512d)((__v8du)__A & (__v8du)__B);
134 static __inline__ __m512d __DEFAULT_FN_ATTRS
135 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
137 (__v8df)_mm512_and_pd(__A, __B),
141 static __inline__ __m512d __DEFAULT_FN_ATTRS
142 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
143 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
144 (__v8df)_mm512_and_pd(__A, __B),
145 (__v8df)_mm512_setzero_pd());
148 static __inline__ __m512 __DEFAULT_FN_ATTRS
149 _mm512_and_ps(__m512 __A, __m512 __B) {
150 return (__m512)((__v16su)__A & (__v16su)__B);
153 static __inline__ __m512 __DEFAULT_FN_ATTRS
154 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
155 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
156 (__v16sf)_mm512_and_ps(__A, __B),
160 static __inline__ __m512 __DEFAULT_FN_ATTRS
161 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
162 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
163 (__v16sf)_mm512_and_ps(__A, __B),
164 (__v16sf)_mm512_setzero_ps());
167 static __inline__ __m512d __DEFAULT_FN_ATTRS
168 _mm512_andnot_pd(__m512d __A, __m512d __B) {
169 return (__m512d)(~(__v8du)__A & (__v8du)__B);
172 static __inline__ __m512d __DEFAULT_FN_ATTRS
173 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
174 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
175 (__v8df)_mm512_andnot_pd(__A, __B),
179 static __inline__ __m512d __DEFAULT_FN_ATTRS
180 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
181 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
182 (__v8df)_mm512_andnot_pd(__A, __B),
183 (__v8df)_mm512_setzero_pd());
186 static __inline__ __m512 __DEFAULT_FN_ATTRS
187 _mm512_andnot_ps(__m512 __A, __m512 __B) {
188 return (__m512)(~(__v16su)__A & (__v16su)__B);
191 static __inline__ __m512 __DEFAULT_FN_ATTRS
192 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
193 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
194 (__v16sf)_mm512_andnot_ps(__A, __B),
198 static __inline__ __m512 __DEFAULT_FN_ATTRS
199 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
200 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
201 (__v16sf)_mm512_andnot_ps(__A, __B),
202 (__v16sf)_mm512_setzero_ps());
205 static __inline__ __m512i __DEFAULT_FN_ATTRS
206 _mm512_cvtpd_epi64 (__m512d __A) {
207 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
208 (__v8di) _mm512_setzero_si512(),
210 _MM_FROUND_CUR_DIRECTION);
213 static __inline__ __m512i __DEFAULT_FN_ATTRS
214 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
215 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
218 _MM_FROUND_CUR_DIRECTION);
221 static __inline__ __m512i __DEFAULT_FN_ATTRS
222 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
223 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
224 (__v8di) _mm512_setzero_si512(),
226 _MM_FROUND_CUR_DIRECTION);
229 #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \
230 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
231 (__v8di)_mm512_setzero_si512(), \
232 (__mmask8)-1, (int)(R)); })
234 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
235 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
236 (__v8di)(__m512i)(W), \
237 (__mmask8)(U), (int)(R)); })
239 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \
240 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
241 (__v8di)_mm512_setzero_si512(), \
242 (__mmask8)(U), (int)(R)); })
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
245 _mm512_cvtpd_epu64 (__m512d __A) {
246 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
247 (__v8di) _mm512_setzero_si512(),
249 _MM_FROUND_CUR_DIRECTION);
252 static __inline__ __m512i __DEFAULT_FN_ATTRS
253 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
254 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
257 _MM_FROUND_CUR_DIRECTION);
260 static __inline__ __m512i __DEFAULT_FN_ATTRS
261 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
262 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
263 (__v8di) _mm512_setzero_si512(),
265 _MM_FROUND_CUR_DIRECTION);
268 #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \
269 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
270 (__v8di)_mm512_setzero_si512(), \
271 (__mmask8)-1, (int)(R)); })
273 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
274 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
275 (__v8di)(__m512i)(W), \
276 (__mmask8)(U), (int)(R)); })
278 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \
279 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
280 (__v8di)_mm512_setzero_si512(), \
281 (__mmask8)(U), (int)(R)); })
283 static __inline__ __m512i __DEFAULT_FN_ATTRS
284 _mm512_cvtps_epi64 (__m256 __A) {
285 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
286 (__v8di) _mm512_setzero_si512(),
288 _MM_FROUND_CUR_DIRECTION);
291 static __inline__ __m512i __DEFAULT_FN_ATTRS
292 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
293 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
296 _MM_FROUND_CUR_DIRECTION);
299 static __inline__ __m512i __DEFAULT_FN_ATTRS
300 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
301 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
302 (__v8di) _mm512_setzero_si512(),
304 _MM_FROUND_CUR_DIRECTION);
307 #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \
308 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
309 (__v8di)_mm512_setzero_si512(), \
310 (__mmask8)-1, (int)(R)); })
312 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
313 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
314 (__v8di)(__m512i)(W), \
315 (__mmask8)(U), (int)(R)); })
317 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \
318 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
319 (__v8di)_mm512_setzero_si512(), \
320 (__mmask8)(U), (int)(R)); })
322 static __inline__ __m512i __DEFAULT_FN_ATTRS
323 _mm512_cvtps_epu64 (__m256 __A) {
324 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
325 (__v8di) _mm512_setzero_si512(),
327 _MM_FROUND_CUR_DIRECTION);
330 static __inline__ __m512i __DEFAULT_FN_ATTRS
331 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
332 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
335 _MM_FROUND_CUR_DIRECTION);
338 static __inline__ __m512i __DEFAULT_FN_ATTRS
339 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
340 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
341 (__v8di) _mm512_setzero_si512(),
343 _MM_FROUND_CUR_DIRECTION);
346 #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \
347 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
348 (__v8di)_mm512_setzero_si512(), \
349 (__mmask8)-1, (int)(R)); })
351 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
352 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
353 (__v8di)(__m512i)(W), \
354 (__mmask8)(U), (int)(R)); })
356 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \
357 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
358 (__v8di)_mm512_setzero_si512(), \
359 (__mmask8)(U), (int)(R)); })
362 static __inline__ __m512d __DEFAULT_FN_ATTRS
363 _mm512_cvtepi64_pd (__m512i __A) {
364 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
365 (__v8df) _mm512_setzero_pd(),
367 _MM_FROUND_CUR_DIRECTION);
370 static __inline__ __m512d __DEFAULT_FN_ATTRS
371 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
372 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
375 _MM_FROUND_CUR_DIRECTION);
378 static __inline__ __m512d __DEFAULT_FN_ATTRS
379 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
380 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
381 (__v8df) _mm512_setzero_pd(),
383 _MM_FROUND_CUR_DIRECTION);
386 #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \
387 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
388 (__v8df)_mm512_setzero_pd(), \
389 (__mmask8)-1, (int)(R)); })
391 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
392 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
393 (__v8df)(__m512d)(W), \
394 (__mmask8)(U), (int)(R)); })
396 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
397 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
398 (__v8df)_mm512_setzero_pd(), \
399 (__mmask8)(U), (int)(R)); })
401 static __inline__ __m256 __DEFAULT_FN_ATTRS
402 _mm512_cvtepi64_ps (__m512i __A) {
403 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
404 (__v8sf) _mm256_setzero_ps(),
406 _MM_FROUND_CUR_DIRECTION);
409 static __inline__ __m256 __DEFAULT_FN_ATTRS
410 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
411 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
414 _MM_FROUND_CUR_DIRECTION);
417 static __inline__ __m256 __DEFAULT_FN_ATTRS
418 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
419 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
420 (__v8sf) _mm256_setzero_ps(),
422 _MM_FROUND_CUR_DIRECTION);
425 #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \
426 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
427 (__v8sf)_mm256_setzero_ps(), \
428 (__mmask8)-1, (int)(R)); })
430 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
431 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
432 (__v8sf)(__m256)(W), (__mmask8)(U), \
435 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
436 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
437 (__v8sf)_mm256_setzero_ps(), \
438 (__mmask8)(U), (int)(R)); })
441 static __inline__ __m512i __DEFAULT_FN_ATTRS
442 _mm512_cvttpd_epi64 (__m512d __A) {
443 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
444 (__v8di) _mm512_setzero_si512(),
446 _MM_FROUND_CUR_DIRECTION);
449 static __inline__ __m512i __DEFAULT_FN_ATTRS
450 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
451 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
454 _MM_FROUND_CUR_DIRECTION);
457 static __inline__ __m512i __DEFAULT_FN_ATTRS
458 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
459 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
460 (__v8di) _mm512_setzero_si512(),
462 _MM_FROUND_CUR_DIRECTION);
465 #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \
466 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
467 (__v8di)_mm512_setzero_si512(), \
468 (__mmask8)-1, (int)(R)); })
470 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
471 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
472 (__v8di)(__m512i)(W), \
473 (__mmask8)(U), (int)(R)); })
475 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
476 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
477 (__v8di)_mm512_setzero_si512(), \
478 (__mmask8)(U), (int)(R)); })
480 static __inline__ __m512i __DEFAULT_FN_ATTRS
481 _mm512_cvttpd_epu64 (__m512d __A) {
482 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
483 (__v8di) _mm512_setzero_si512(),
485 _MM_FROUND_CUR_DIRECTION);
488 static __inline__ __m512i __DEFAULT_FN_ATTRS
489 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
490 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
493 _MM_FROUND_CUR_DIRECTION);
496 static __inline__ __m512i __DEFAULT_FN_ATTRS
497 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
498 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
499 (__v8di) _mm512_setzero_si512(),
501 _MM_FROUND_CUR_DIRECTION);
504 #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \
505 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
506 (__v8di)_mm512_setzero_si512(), \
507 (__mmask8)-1, (int)(R)); })
509 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
510 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
511 (__v8di)(__m512i)(W), \
512 (__mmask8)(U), (int)(R)); })
514 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \
515 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
516 (__v8di)_mm512_setzero_si512(), \
517 (__mmask8)(U), (int)(R)); })
519 static __inline__ __m512i __DEFAULT_FN_ATTRS
520 _mm512_cvttps_epi64 (__m256 __A) {
521 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
522 (__v8di) _mm512_setzero_si512(),
524 _MM_FROUND_CUR_DIRECTION);
527 static __inline__ __m512i __DEFAULT_FN_ATTRS
528 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
529 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
532 _MM_FROUND_CUR_DIRECTION);
535 static __inline__ __m512i __DEFAULT_FN_ATTRS
536 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
537 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
538 (__v8di) _mm512_setzero_si512(),
540 _MM_FROUND_CUR_DIRECTION);
543 #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \
544 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
545 (__v8di)_mm512_setzero_si512(), \
546 (__mmask8)-1, (int)(R)); })
548 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
549 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
550 (__v8di)(__m512i)(W), \
551 (__mmask8)(U), (int)(R)); })
553 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \
554 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
555 (__v8di)_mm512_setzero_si512(), \
556 (__mmask8)(U), (int)(R)); })
558 static __inline__ __m512i __DEFAULT_FN_ATTRS
559 _mm512_cvttps_epu64 (__m256 __A) {
560 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
561 (__v8di) _mm512_setzero_si512(),
563 _MM_FROUND_CUR_DIRECTION);
566 static __inline__ __m512i __DEFAULT_FN_ATTRS
567 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
568 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
571 _MM_FROUND_CUR_DIRECTION);
574 static __inline__ __m512i __DEFAULT_FN_ATTRS
575 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
577 (__v8di) _mm512_setzero_si512(),
579 _MM_FROUND_CUR_DIRECTION);
582 #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \
583 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
584 (__v8di)_mm512_setzero_si512(), \
585 (__mmask8)-1, (int)(R)); })
587 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
588 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
589 (__v8di)(__m512i)(W), \
590 (__mmask8)(U), (int)(R)); })
592 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \
593 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
594 (__v8di)_mm512_setzero_si512(), \
595 (__mmask8)(U), (int)(R)); })
597 static __inline__ __m512d __DEFAULT_FN_ATTRS
598 _mm512_cvtepu64_pd (__m512i __A) {
599 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
600 (__v8df) _mm512_setzero_pd(),
602 _MM_FROUND_CUR_DIRECTION);
605 static __inline__ __m512d __DEFAULT_FN_ATTRS
606 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
607 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
610 _MM_FROUND_CUR_DIRECTION);
613 static __inline__ __m512d __DEFAULT_FN_ATTRS
614 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
615 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
616 (__v8df) _mm512_setzero_pd(),
618 _MM_FROUND_CUR_DIRECTION);
621 #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \
622 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
623 (__v8df)_mm512_setzero_pd(), \
624 (__mmask8)-1, (int)(R)); })
626 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
627 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
628 (__v8df)(__m512d)(W), \
629 (__mmask8)(U), (int)(R)); })
632 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
633 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
634 (__v8df)_mm512_setzero_pd(), \
635 (__mmask8)(U), (int)(R)); })
638 static __inline__ __m256 __DEFAULT_FN_ATTRS
639 _mm512_cvtepu64_ps (__m512i __A) {
640 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
641 (__v8sf) _mm256_setzero_ps(),
643 _MM_FROUND_CUR_DIRECTION);
646 static __inline__ __m256 __DEFAULT_FN_ATTRS
647 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
648 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
651 _MM_FROUND_CUR_DIRECTION);
654 static __inline__ __m256 __DEFAULT_FN_ATTRS
655 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
656 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
657 (__v8sf) _mm256_setzero_ps(),
659 _MM_FROUND_CUR_DIRECTION);
662 #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \
663 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
664 (__v8sf)_mm256_setzero_ps(), \
665 (__mmask8)-1, (int)(R)); })
667 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
668 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
669 (__v8sf)(__m256)(W), (__mmask8)(U), \
672 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
673 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
674 (__v8sf)_mm256_setzero_ps(), \
675 (__mmask8)(U), (int)(R)); })
677 #define _mm512_range_pd(A, B, C) __extension__ ({ \
678 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
679 (__v8df)(__m512d)(B), (int)(C), \
680 (__v8df)_mm512_setzero_pd(), \
682 _MM_FROUND_CUR_DIRECTION); })
684 #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \
685 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
686 (__v8df)(__m512d)(B), (int)(C), \
687 (__v8df)(__m512d)(W), (__mmask8)(U), \
688 _MM_FROUND_CUR_DIRECTION); })
690 #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \
691 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
692 (__v8df)(__m512d)(B), (int)(C), \
693 (__v8df)_mm512_setzero_pd(), \
695 _MM_FROUND_CUR_DIRECTION); })
697 #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \
698 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
699 (__v8df)(__m512d)(B), (int)(C), \
700 (__v8df)_mm512_setzero_pd(), \
701 (__mmask8)-1, (int)(R)); })
703 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
704 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
705 (__v8df)(__m512d)(B), (int)(C), \
706 (__v8df)(__m512d)(W), (__mmask8)(U), \
709 #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
710 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
711 (__v8df)(__m512d)(B), (int)(C), \
712 (__v8df)_mm512_setzero_pd(), \
713 (__mmask8)(U), (int)(R)); })
715 #define _mm512_range_ps(A, B, C) __extension__ ({ \
716 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
717 (__v16sf)(__m512)(B), (int)(C), \
718 (__v16sf)_mm512_setzero_ps(), \
720 _MM_FROUND_CUR_DIRECTION); })
722 #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \
723 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
724 (__v16sf)(__m512)(B), (int)(C), \
725 (__v16sf)(__m512)(W), (__mmask16)(U), \
726 _MM_FROUND_CUR_DIRECTION); })
728 #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \
729 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
730 (__v16sf)(__m512)(B), (int)(C), \
731 (__v16sf)_mm512_setzero_ps(), \
733 _MM_FROUND_CUR_DIRECTION); })
735 #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \
736 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
737 (__v16sf)(__m512)(B), (int)(C), \
738 (__v16sf)_mm512_setzero_ps(), \
739 (__mmask16)-1, (int)(R)); })
741 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
742 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
743 (__v16sf)(__m512)(B), (int)(C), \
744 (__v16sf)(__m512)(W), (__mmask16)(U), \
747 #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
748 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
749 (__v16sf)(__m512)(B), (int)(C), \
750 (__v16sf)_mm512_setzero_ps(), \
751 (__mmask16)(U), (int)(R)); })
753 #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \
754 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
755 (__v4sf)(__m128)(B), \
756 (__v4sf)_mm_setzero_ps(), \
757 (__mmask8) -1, (int)(C),\
760 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
762 #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
763 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
764 (__v4sf)(__m128)(B), \
765 (__v4sf)(__m128)(W),\
766 (__mmask8)(U), (int)(C),\
769 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
771 #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
772 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
773 (__v4sf)(__m128)(B), \
774 (__v4sf)_mm_setzero_ps(), \
775 (__mmask8)(U), (int)(C),\
778 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
780 #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \
781 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
782 (__v2df)(__m128d)(B), \
783 (__v2df)_mm_setzero_pd(), \
784 (__mmask8) -1, (int)(C),\
787 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
789 #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
790 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
791 (__v2df)(__m128d)(B), \
792 (__v2df)(__m128d)(W),\
793 (__mmask8)(U), (int)(C),\
796 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
798 #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
799 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
800 (__v2df)(__m128d)(B), \
801 (__v2df)_mm_setzero_pd(), \
802 (__mmask8)(U), (int)(C),\
805 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
807 #define _mm512_reduce_pd(A, B) __extension__ ({ \
808 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
809 (__v8df)_mm512_setzero_pd(), \
811 _MM_FROUND_CUR_DIRECTION); })
813 #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
814 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
815 (__v8df)(__m512d)(W), \
817 _MM_FROUND_CUR_DIRECTION); })
819 #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \
820 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
821 (__v8df)_mm512_setzero_pd(), \
823 _MM_FROUND_CUR_DIRECTION); })
825 #define _mm512_reduce_ps(A, B) __extension__ ({ \
826 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
827 (__v16sf)_mm512_setzero_ps(), \
829 _MM_FROUND_CUR_DIRECTION); })
831 #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \
832 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
833 (__v16sf)(__m512)(W), \
835 _MM_FROUND_CUR_DIRECTION); })
837 #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \
838 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
839 (__v16sf)_mm512_setzero_ps(), \
841 _MM_FROUND_CUR_DIRECTION); })
843 #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
844 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
845 (__v8df)_mm512_setzero_pd(), \
846 (__mmask8)-1, (int)(R)); })
848 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
849 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
850 (__v8df)(__m512d)(W), \
851 (__mmask8)(U), (int)(R)); })
853 #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
854 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
855 (__v8df)_mm512_setzero_pd(), \
856 (__mmask8)(U), (int)(R)); })
858 #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
859 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
860 (__v16sf)_mm512_setzero_ps(), \
861 (__mmask16)-1, (int)(R)); })
863 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
864 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
865 (__v16sf)(__m512)(W), \
866 (__mmask16)(U), (int)(R)); })
868 #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
869 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
870 (__v16sf)_mm512_setzero_ps(), \
871 (__mmask16)(U), (int)(R)); })
873 #define _mm_reduce_ss(A, B, C) __extension__ ({ \
874 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
875 (__v4sf)(__m128)(B), \
876 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
877 (int)(C), _MM_FROUND_CUR_DIRECTION); })
879 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \
880 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
881 (__v4sf)(__m128)(B), \
882 (__v4sf)(__m128)(W), (__mmask8)(U), \
883 (int)(C), _MM_FROUND_CUR_DIRECTION); })
885 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \
886 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
887 (__v4sf)(__m128)(B), \
888 (__v4sf)_mm_setzero_ps(), \
889 (__mmask8)(U), (int)(C), \
890 _MM_FROUND_CUR_DIRECTION); })
892 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \
893 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
894 (__v4sf)(__m128)(B), \
895 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
896 (int)(C), (int)(R)); })
898 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \
899 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
900 (__v4sf)(__m128)(B), \
901 (__v4sf)(__m128)(W), (__mmask8)(U), \
902 (int)(C), (int)(R)); })
904 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \
905 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
906 (__v4sf)(__m128)(B), \
907 (__v4sf)_mm_setzero_ps(), \
908 (__mmask8)(U), (int)(C), (int)(R)); })
910 #define _mm_reduce_sd(A, B, C) __extension__ ({ \
911 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
912 (__v2df)(__m128d)(B), \
913 (__v2df)_mm_setzero_pd(), \
914 (__mmask8)-1, (int)(C), \
915 _MM_FROUND_CUR_DIRECTION); })
917 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \
918 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
919 (__v2df)(__m128d)(B), \
920 (__v2df)(__m128d)(W), (__mmask8)(U), \
921 (int)(C), _MM_FROUND_CUR_DIRECTION); })
923 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \
924 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
925 (__v2df)(__m128d)(B), \
926 (__v2df)_mm_setzero_pd(), \
927 (__mmask8)(U), (int)(C), \
928 _MM_FROUND_CUR_DIRECTION); })
930 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \
931 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
932 (__v2df)(__m128d)(B), \
933 (__v2df)_mm_setzero_pd(), \
934 (__mmask8)-1, (int)(C), (int)(R)); })
936 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \
937 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
938 (__v2df)(__m128d)(B), \
939 (__v2df)(__m128d)(W), (__mmask8)(U), \
940 (int)(C), (int)(R)); })
942 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \
943 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
944 (__v2df)(__m128d)(B), \
945 (__v2df)_mm_setzero_pd(), \
946 (__mmask8)(U), (int)(C), (int)(R)); })
948 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
949 _mm512_movepi32_mask (__m512i __A)
951 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
954 static __inline__ __m512i __DEFAULT_FN_ATTRS
955 _mm512_movm_epi32 (__mmask16 __A)
957 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
960 static __inline__ __m512i __DEFAULT_FN_ATTRS
961 _mm512_movm_epi64 (__mmask8 __A)
963 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
966 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
967 _mm512_movepi64_mask (__m512i __A)
969 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
973 static __inline__ __m512 __DEFAULT_FN_ATTRS
974 _mm512_broadcast_f32x2 (__m128 __A)
976 return (__m512)__builtin_shufflevector((__v4sf)__A,
977 (__v4sf)_mm_undefined_ps(),
978 0, 1, 0, 1, 0, 1, 0, 1,
979 0, 1, 0, 1, 0, 1, 0, 1);
982 static __inline__ __m512 __DEFAULT_FN_ATTRS
983 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
985 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
986 (__v16sf)_mm512_broadcast_f32x2(__A),
990 static __inline__ __m512 __DEFAULT_FN_ATTRS
991 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
993 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
994 (__v16sf)_mm512_broadcast_f32x2(__A),
995 (__v16sf)_mm512_setzero_ps());
998 static __inline__ __m512 __DEFAULT_FN_ATTRS
999 _mm512_broadcast_f32x8(__m256 __A)
1001 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1002 0, 1, 2, 3, 4, 5, 6, 7,
1003 0, 1, 2, 3, 4, 5, 6, 7);
1006 static __inline__ __m512 __DEFAULT_FN_ATTRS
1007 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
1009 return (__m512)__builtin_ia32_selectps_512((__mmask8)__M,
1010 (__v16sf)_mm512_broadcast_f32x8(__A),
1014 static __inline__ __m512 __DEFAULT_FN_ATTRS
1015 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
1017 return (__m512)__builtin_ia32_selectps_512((__mmask8)__M,
1018 (__v16sf)_mm512_broadcast_f32x8(__A),
1019 (__v16sf)_mm512_setzero_ps());
1022 static __inline__ __m512d __DEFAULT_FN_ATTRS
1023 _mm512_broadcast_f64x2(__m128d __A)
1025 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1026 0, 1, 0, 1, 0, 1, 0, 1);
1029 static __inline__ __m512d __DEFAULT_FN_ATTRS
1030 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
1032 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1033 (__v8df)_mm512_broadcast_f64x2(__A),
1037 static __inline__ __m512d __DEFAULT_FN_ATTRS
1038 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
1040 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1041 (__v8df)_mm512_broadcast_f64x2(__A),
1042 (__v8df)_mm512_setzero_pd());
1045 static __inline__ __m512i __DEFAULT_FN_ATTRS
1046 _mm512_broadcast_i32x2 (__m128i __A)
1048 return (__m512i)__builtin_shufflevector((__v4si)__A,
1049 (__v4si)_mm_undefined_si128(),
1050 0, 1, 0, 1, 0, 1, 0, 1,
1051 0, 1, 0, 1, 0, 1, 0, 1);
1054 static __inline__ __m512i __DEFAULT_FN_ATTRS
1055 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1057 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1058 (__v16si)_mm512_broadcast_i32x2(__A),
1062 static __inline__ __m512i __DEFAULT_FN_ATTRS
1063 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1065 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1066 (__v16si)_mm512_broadcast_i32x2(__A),
1067 (__v16si)_mm512_setzero_si512());
1070 static __inline__ __m512i __DEFAULT_FN_ATTRS
1071 _mm512_broadcast_i32x8(__m256i __A)
1073 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1074 0, 1, 2, 3, 4, 5, 6, 7,
1075 0, 1, 2, 3, 4, 5, 6, 7);
1078 static __inline__ __m512i __DEFAULT_FN_ATTRS
1079 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
1081 return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M,
1082 (__v16si)_mm512_broadcast_i32x8(__A),
1086 static __inline__ __m512i __DEFAULT_FN_ATTRS
1087 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
1089 return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M,
1090 (__v16si)_mm512_broadcast_i32x8(__A),
1091 (__v16si)_mm512_setzero_si512());
1094 static __inline__ __m512i __DEFAULT_FN_ATTRS
1095 _mm512_broadcast_i64x2(__m128i __A)
1097 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1098 0, 1, 0, 1, 0, 1, 0, 1);
1101 static __inline__ __m512i __DEFAULT_FN_ATTRS
1102 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
1104 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1105 (__v8di)_mm512_broadcast_i64x2(__A),
1109 static __inline__ __m512i __DEFAULT_FN_ATTRS
1110 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
1112 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1113 (__v8di)_mm512_broadcast_i64x2(__A),
1114 (__v8di)_mm512_setzero_si512());
1117 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
1118 (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
1119 (__v16sf)_mm512_undefined_ps(), \
1120 ((imm) & 1) ? 8 : 0, \
1121 ((imm) & 1) ? 9 : 1, \
1122 ((imm) & 1) ? 10 : 2, \
1123 ((imm) & 1) ? 11 : 3, \
1124 ((imm) & 1) ? 12 : 4, \
1125 ((imm) & 1) ? 13 : 5, \
1126 ((imm) & 1) ? 14 : 6, \
1127 ((imm) & 1) ? 15 : 7); })
1129 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
1130 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
1131 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
1134 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
1135 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
1136 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
1137 (__v8sf)_mm256_setzero_ps()); })
1139 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
1140 (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
1141 (__v8df)_mm512_undefined_pd(), \
1142 0 + ((imm) & 0x3) * 2, \
1143 1 + ((imm) & 0x3) * 2); })
1145 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1146 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1147 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
1150 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1151 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1152 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
1153 (__v2df)_mm_setzero_pd()); })
1155 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
1156 (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
1157 (__v16si)_mm512_undefined_epi32(), \
1158 ((imm) & 1) ? 8 : 0, \
1159 ((imm) & 1) ? 9 : 1, \
1160 ((imm) & 1) ? 10 : 2, \
1161 ((imm) & 1) ? 11 : 3, \
1162 ((imm) & 1) ? 12 : 4, \
1163 ((imm) & 1) ? 13 : 5, \
1164 ((imm) & 1) ? 14 : 6, \
1165 ((imm) & 1) ? 15 : 7); })
1167 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
1168 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
1169 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
1172 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
1173 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
1174 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
1175 (__v8si)_mm256_setzero_si256()); })
1177 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
1178 (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
1179 (__v8di)_mm512_undefined_epi32(), \
1180 0 + ((imm) & 0x3) * 2, \
1181 1 + ((imm) & 0x3) * 2); })
1183 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1184 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
1185 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
1188 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1189 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
1190 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
1191 (__v2di)_mm_setzero_di()); })
1193 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
1194 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
1195 (__v16sf)_mm512_castps256_ps512((__m256)(B)),\
1196 ((imm) & 0x1) ? 0 : 16, \
1197 ((imm) & 0x1) ? 1 : 17, \
1198 ((imm) & 0x1) ? 2 : 18, \
1199 ((imm) & 0x1) ? 3 : 19, \
1200 ((imm) & 0x1) ? 4 : 20, \
1201 ((imm) & 0x1) ? 5 : 21, \
1202 ((imm) & 0x1) ? 6 : 22, \
1203 ((imm) & 0x1) ? 7 : 23, \
1204 ((imm) & 0x1) ? 16 : 8, \
1205 ((imm) & 0x1) ? 17 : 9, \
1206 ((imm) & 0x1) ? 18 : 10, \
1207 ((imm) & 0x1) ? 19 : 11, \
1208 ((imm) & 0x1) ? 20 : 12, \
1209 ((imm) & 0x1) ? 21 : 13, \
1210 ((imm) & 0x1) ? 22 : 14, \
1211 ((imm) & 0x1) ? 23 : 15); })
1213 #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
1214 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1215 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1218 #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
1219 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1220 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1221 (__v16sf)_mm512_setzero_ps()); })
1223 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
1224 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
1225 (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\
1226 (((imm) & 0x3) == 0) ? 8 : 0, \
1227 (((imm) & 0x3) == 0) ? 9 : 1, \
1228 (((imm) & 0x3) == 1) ? 8 : 2, \
1229 (((imm) & 0x3) == 1) ? 9 : 3, \
1230 (((imm) & 0x3) == 2) ? 8 : 4, \
1231 (((imm) & 0x3) == 2) ? 9 : 5, \
1232 (((imm) & 0x3) == 3) ? 8 : 6, \
1233 (((imm) & 0x3) == 3) ? 9 : 7); })
1235 #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1236 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1237 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1240 #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1241 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1242 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1243 (__v8df)_mm512_setzero_pd()); })
1245 #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
1246 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
1247 (__v16si)_mm512_castsi256_si512((__m256i)(B)),\
1248 ((imm) & 0x1) ? 0 : 16, \
1249 ((imm) & 0x1) ? 1 : 17, \
1250 ((imm) & 0x1) ? 2 : 18, \
1251 ((imm) & 0x1) ? 3 : 19, \
1252 ((imm) & 0x1) ? 4 : 20, \
1253 ((imm) & 0x1) ? 5 : 21, \
1254 ((imm) & 0x1) ? 6 : 22, \
1255 ((imm) & 0x1) ? 7 : 23, \
1256 ((imm) & 0x1) ? 16 : 8, \
1257 ((imm) & 0x1) ? 17 : 9, \
1258 ((imm) & 0x1) ? 18 : 10, \
1259 ((imm) & 0x1) ? 19 : 11, \
1260 ((imm) & 0x1) ? 20 : 12, \
1261 ((imm) & 0x1) ? 21 : 13, \
1262 ((imm) & 0x1) ? 22 : 14, \
1263 ((imm) & 0x1) ? 23 : 15); })
1265 #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
1266 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1267 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1270 #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
1271 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1272 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1273 (__v16si)_mm512_setzero_si512()); })
1275 #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
1276 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
1277 (__v8di)_mm512_castsi128_si512((__m128i)(B)),\
1278 (((imm) & 0x3) == 0) ? 8 : 0, \
1279 (((imm) & 0x3) == 0) ? 9 : 1, \
1280 (((imm) & 0x3) == 1) ? 8 : 2, \
1281 (((imm) & 0x3) == 1) ? 9 : 3, \
1282 (((imm) & 0x3) == 2) ? 8 : 4, \
1283 (((imm) & 0x3) == 2) ? 9 : 5, \
1284 (((imm) & 0x3) == 3) ? 8 : 6, \
1285 (((imm) & 0x3) == 3) ? 9 : 7); })
1287 #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1288 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1289 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1292 #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1293 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1294 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1295 (__v8di)_mm512_setzero_si512()); })
1297 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1298 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1299 (int)(imm), (__mmask16)(U)); })
1301 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
1302 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1303 (int)(imm), (__mmask16)-1); })
1305 #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1306 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1309 #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
1310 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1313 #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
1314 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1317 #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
1318 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1321 #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
1322 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1325 #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
1326 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1329 #undef __DEFAULT_FN_ATTRS