]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/Headers/avx512dqintrin.h
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / Headers / avx512dqintrin.h
1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23
24 #ifndef __IMMINTRIN_H
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef __AVX512DQINTRIN_H
29 #define __AVX512DQINTRIN_H
30
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
33 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
34
35 static __inline __mmask8 __DEFAULT_FN_ATTRS
36 _knot_mask8(__mmask8 __M)
37 {
38   return __builtin_ia32_knotqi(__M);
39 }
40
41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42 _kand_mask8(__mmask8 __A, __mmask8 __B)
43 {
44   return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
45 }
46
47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48 _kandn_mask8(__mmask8 __A, __mmask8 __B)
49 {
50   return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
51 }
52
53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54 _kor_mask8(__mmask8 __A, __mmask8 __B)
55 {
56   return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
57 }
58
59 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60 _kxnor_mask8(__mmask8 __A, __mmask8 __B)
61 {
62   return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
63 }
64
65 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66 _kxor_mask8(__mmask8 __A, __mmask8 __B)
67 {
68   return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
69 }
70
71 static __inline__ unsigned char __DEFAULT_FN_ATTRS
72 _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
73 {
74   return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
75 }
76
77 static __inline__ unsigned char __DEFAULT_FN_ATTRS
78 _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
79 {
80   return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
81 }
82
83 static __inline__ unsigned char __DEFAULT_FN_ATTRS
84 _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
85   *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
86   return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
87 }
88
89 static __inline__ unsigned char __DEFAULT_FN_ATTRS
90 _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
91 {
92   return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
93 }
94
95 static __inline__ unsigned char __DEFAULT_FN_ATTRS
96 _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
97 {
98   return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
99 }
100
101 static __inline__ unsigned char __DEFAULT_FN_ATTRS
102 _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
103   *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
104   return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
105 }
106
107 static __inline__ unsigned char __DEFAULT_FN_ATTRS
108 _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
109 {
110   return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
111 }
112
113 static __inline__ unsigned char __DEFAULT_FN_ATTRS
114 _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
115 {
116   return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
117 }
118
119 static __inline__ unsigned char __DEFAULT_FN_ATTRS
120 _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
121   *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
122   return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
123 }
124
125 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
126 _kadd_mask8(__mmask8 __A, __mmask8 __B)
127 {
128   return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
129 }
130
131 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
132 _kadd_mask16(__mmask16 __A, __mmask16 __B)
133 {
134   return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
135 }
136
137 #define _kshiftli_mask8(A, I) \
138   (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
139
140 #define _kshiftri_mask8(A, I) \
141   (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
142
143 static __inline__ unsigned int __DEFAULT_FN_ATTRS
144 _cvtmask8_u32(__mmask8 __A) {
145   return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
146 }
147
148 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
149 _cvtu32_mask8(unsigned int __A) {
150   return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
151 }
152
153 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
154 _load_mask8(__mmask8 *__A) {
155   return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
156 }
157
158 static __inline__ void __DEFAULT_FN_ATTRS
159 _store_mask8(__mmask8 *__A, __mmask8 __B) {
160   *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
161 }
162
163 static __inline__ __m512i __DEFAULT_FN_ATTRS512
164 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
165   return (__m512i) ((__v8du) __A * (__v8du) __B);
166 }
167
168 static __inline__ __m512i __DEFAULT_FN_ATTRS512
169 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
170   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
171                                              (__v8di)_mm512_mullo_epi64(__A, __B),
172                                              (__v8di)__W);
173 }
174
175 static __inline__ __m512i __DEFAULT_FN_ATTRS512
176 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
177   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
178                                              (__v8di)_mm512_mullo_epi64(__A, __B),
179                                              (__v8di)_mm512_setzero_si512());
180 }
181
182 static __inline__ __m512d __DEFAULT_FN_ATTRS512
183 _mm512_xor_pd(__m512d __A, __m512d __B) {
184   return (__m512d)((__v8du)__A ^ (__v8du)__B);
185 }
186
187 static __inline__ __m512d __DEFAULT_FN_ATTRS512
188 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
189   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
190                                               (__v8df)_mm512_xor_pd(__A, __B),
191                                               (__v8df)__W);
192 }
193
194 static __inline__ __m512d __DEFAULT_FN_ATTRS512
195 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
196   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
197                                               (__v8df)_mm512_xor_pd(__A, __B),
198                                               (__v8df)_mm512_setzero_pd());
199 }
200
201 static __inline__ __m512 __DEFAULT_FN_ATTRS512
202 _mm512_xor_ps (__m512 __A, __m512 __B) {
203   return (__m512)((__v16su)__A ^ (__v16su)__B);
204 }
205
206 static __inline__ __m512 __DEFAULT_FN_ATTRS512
207 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
208   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
209                                              (__v16sf)_mm512_xor_ps(__A, __B),
210                                              (__v16sf)__W);
211 }
212
213 static __inline__ __m512 __DEFAULT_FN_ATTRS512
214 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
215   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
216                                              (__v16sf)_mm512_xor_ps(__A, __B),
217                                              (__v16sf)_mm512_setzero_ps());
218 }
219
220 static __inline__ __m512d __DEFAULT_FN_ATTRS512
221 _mm512_or_pd(__m512d __A, __m512d __B) {
222   return (__m512d)((__v8du)__A | (__v8du)__B);
223 }
224
225 static __inline__ __m512d __DEFAULT_FN_ATTRS512
226 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
227   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
228                                               (__v8df)_mm512_or_pd(__A, __B),
229                                               (__v8df)__W);
230 }
231
232 static __inline__ __m512d __DEFAULT_FN_ATTRS512
233 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
234   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
235                                               (__v8df)_mm512_or_pd(__A, __B),
236                                               (__v8df)_mm512_setzero_pd());
237 }
238
239 static __inline__ __m512 __DEFAULT_FN_ATTRS512
240 _mm512_or_ps(__m512 __A, __m512 __B) {
241   return (__m512)((__v16su)__A | (__v16su)__B);
242 }
243
244 static __inline__ __m512 __DEFAULT_FN_ATTRS512
245 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
246   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
247                                              (__v16sf)_mm512_or_ps(__A, __B),
248                                              (__v16sf)__W);
249 }
250
251 static __inline__ __m512 __DEFAULT_FN_ATTRS512
252 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
253   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
254                                              (__v16sf)_mm512_or_ps(__A, __B),
255                                              (__v16sf)_mm512_setzero_ps());
256 }
257
258 static __inline__ __m512d __DEFAULT_FN_ATTRS512
259 _mm512_and_pd(__m512d __A, __m512d __B) {
260   return (__m512d)((__v8du)__A & (__v8du)__B);
261 }
262
263 static __inline__ __m512d __DEFAULT_FN_ATTRS512
264 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
265   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
266                                               (__v8df)_mm512_and_pd(__A, __B),
267                                               (__v8df)__W);
268 }
269
270 static __inline__ __m512d __DEFAULT_FN_ATTRS512
271 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
272   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
273                                               (__v8df)_mm512_and_pd(__A, __B),
274                                               (__v8df)_mm512_setzero_pd());
275 }
276
277 static __inline__ __m512 __DEFAULT_FN_ATTRS512
278 _mm512_and_ps(__m512 __A, __m512 __B) {
279   return (__m512)((__v16su)__A & (__v16su)__B);
280 }
281
282 static __inline__ __m512 __DEFAULT_FN_ATTRS512
283 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
284   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
285                                              (__v16sf)_mm512_and_ps(__A, __B),
286                                              (__v16sf)__W);
287 }
288
289 static __inline__ __m512 __DEFAULT_FN_ATTRS512
290 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
291   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
292                                              (__v16sf)_mm512_and_ps(__A, __B),
293                                              (__v16sf)_mm512_setzero_ps());
294 }
295
296 static __inline__ __m512d __DEFAULT_FN_ATTRS512
297 _mm512_andnot_pd(__m512d __A, __m512d __B) {
298   return (__m512d)(~(__v8du)__A & (__v8du)__B);
299 }
300
301 static __inline__ __m512d __DEFAULT_FN_ATTRS512
302 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
303   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
304                                               (__v8df)_mm512_andnot_pd(__A, __B),
305                                               (__v8df)__W);
306 }
307
308 static __inline__ __m512d __DEFAULT_FN_ATTRS512
309 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
310   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
311                                               (__v8df)_mm512_andnot_pd(__A, __B),
312                                               (__v8df)_mm512_setzero_pd());
313 }
314
315 static __inline__ __m512 __DEFAULT_FN_ATTRS512
316 _mm512_andnot_ps(__m512 __A, __m512 __B) {
317   return (__m512)(~(__v16su)__A & (__v16su)__B);
318 }
319
320 static __inline__ __m512 __DEFAULT_FN_ATTRS512
321 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
322   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
323                                              (__v16sf)_mm512_andnot_ps(__A, __B),
324                                              (__v16sf)__W);
325 }
326
327 static __inline__ __m512 __DEFAULT_FN_ATTRS512
328 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
329   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
330                                              (__v16sf)_mm512_andnot_ps(__A, __B),
331                                              (__v16sf)_mm512_setzero_ps());
332 }
333
334 static __inline__ __m512i __DEFAULT_FN_ATTRS512
335 _mm512_cvtpd_epi64 (__m512d __A) {
336   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
337                 (__v8di) _mm512_setzero_si512(),
338                 (__mmask8) -1,
339                 _MM_FROUND_CUR_DIRECTION);
340 }
341
342 static __inline__ __m512i __DEFAULT_FN_ATTRS512
343 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
344   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
345                 (__v8di) __W,
346                 (__mmask8) __U,
347                 _MM_FROUND_CUR_DIRECTION);
348 }
349
350 static __inline__ __m512i __DEFAULT_FN_ATTRS512
351 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
352   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
353                 (__v8di) _mm512_setzero_si512(),
354                 (__mmask8) __U,
355                 _MM_FROUND_CUR_DIRECTION);
356 }
357
358 #define _mm512_cvt_roundpd_epi64(A, R) \
359   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
360                                            (__v8di)_mm512_setzero_si512(), \
361                                            (__mmask8)-1, (int)(R))
362
363 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
364   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
365                                            (__v8di)(__m512i)(W), \
366                                            (__mmask8)(U), (int)(R))
367
368 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
369   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
370                                            (__v8di)_mm512_setzero_si512(), \
371                                            (__mmask8)(U), (int)(R))
372
373 static __inline__ __m512i __DEFAULT_FN_ATTRS512
374 _mm512_cvtpd_epu64 (__m512d __A) {
375   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
376                  (__v8di) _mm512_setzero_si512(),
377                  (__mmask8) -1,
378                  _MM_FROUND_CUR_DIRECTION);
379 }
380
381 static __inline__ __m512i __DEFAULT_FN_ATTRS512
382 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
383   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
384                  (__v8di) __W,
385                  (__mmask8) __U,
386                  _MM_FROUND_CUR_DIRECTION);
387 }
388
389 static __inline__ __m512i __DEFAULT_FN_ATTRS512
390 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
391   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
392                  (__v8di) _mm512_setzero_si512(),
393                  (__mmask8) __U,
394                  _MM_FROUND_CUR_DIRECTION);
395 }
396
397 #define _mm512_cvt_roundpd_epu64(A, R) \
398   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
399                                             (__v8di)_mm512_setzero_si512(), \
400                                             (__mmask8)-1, (int)(R))
401
402 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
403   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
404                                             (__v8di)(__m512i)(W), \
405                                             (__mmask8)(U), (int)(R))
406
407 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
408   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
409                                             (__v8di)_mm512_setzero_si512(), \
410                                             (__mmask8)(U), (int)(R))
411
412 static __inline__ __m512i __DEFAULT_FN_ATTRS512
413 _mm512_cvtps_epi64 (__m256 __A) {
414   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
415                 (__v8di) _mm512_setzero_si512(),
416                 (__mmask8) -1,
417                 _MM_FROUND_CUR_DIRECTION);
418 }
419
420 static __inline__ __m512i __DEFAULT_FN_ATTRS512
421 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
422   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
423                 (__v8di) __W,
424                 (__mmask8) __U,
425                 _MM_FROUND_CUR_DIRECTION);
426 }
427
428 static __inline__ __m512i __DEFAULT_FN_ATTRS512
429 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
430   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
431                 (__v8di) _mm512_setzero_si512(),
432                 (__mmask8) __U,
433                 _MM_FROUND_CUR_DIRECTION);
434 }
435
436 #define _mm512_cvt_roundps_epi64(A, R) \
437   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
438                                            (__v8di)_mm512_setzero_si512(), \
439                                            (__mmask8)-1, (int)(R))
440
441 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
442   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
443                                            (__v8di)(__m512i)(W), \
444                                            (__mmask8)(U), (int)(R))
445
446 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
447   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
448                                            (__v8di)_mm512_setzero_si512(), \
449                                            (__mmask8)(U), (int)(R))
450
451 static __inline__ __m512i __DEFAULT_FN_ATTRS512
452 _mm512_cvtps_epu64 (__m256 __A) {
453   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
454                  (__v8di) _mm512_setzero_si512(),
455                  (__mmask8) -1,
456                  _MM_FROUND_CUR_DIRECTION);
457 }
458
459 static __inline__ __m512i __DEFAULT_FN_ATTRS512
460 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
461   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
462                  (__v8di) __W,
463                  (__mmask8) __U,
464                  _MM_FROUND_CUR_DIRECTION);
465 }
466
467 static __inline__ __m512i __DEFAULT_FN_ATTRS512
468 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
469   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
470                  (__v8di) _mm512_setzero_si512(),
471                  (__mmask8) __U,
472                  _MM_FROUND_CUR_DIRECTION);
473 }
474
475 #define _mm512_cvt_roundps_epu64(A, R) \
476   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
477                                             (__v8di)_mm512_setzero_si512(), \
478                                             (__mmask8)-1, (int)(R))
479
480 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
481   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
482                                             (__v8di)(__m512i)(W), \
483                                             (__mmask8)(U), (int)(R))
484
485 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
486   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
487                                             (__v8di)_mm512_setzero_si512(), \
488                                             (__mmask8)(U), (int)(R))
489
490
491 static __inline__ __m512d __DEFAULT_FN_ATTRS512
492 _mm512_cvtepi64_pd (__m512i __A) {
493   return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
494 }
495
496 static __inline__ __m512d __DEFAULT_FN_ATTRS512
497 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
498   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
499                                               (__v8df)_mm512_cvtepi64_pd(__A),
500                                               (__v8df)__W);
501 }
502
503 static __inline__ __m512d __DEFAULT_FN_ATTRS512
504 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
505   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
506                                               (__v8df)_mm512_cvtepi64_pd(__A),
507                                               (__v8df)_mm512_setzero_pd());
508 }
509
510 #define _mm512_cvt_roundepi64_pd(A, R) \
511   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
512                                            (__v8df)_mm512_setzero_pd(), \
513                                            (__mmask8)-1, (int)(R))
514
515 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
516   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
517                                            (__v8df)(__m512d)(W), \
518                                            (__mmask8)(U), (int)(R))
519
520 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
521   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
522                                            (__v8df)_mm512_setzero_pd(), \
523                                            (__mmask8)(U), (int)(R))
524
525 static __inline__ __m256 __DEFAULT_FN_ATTRS512
526 _mm512_cvtepi64_ps (__m512i __A) {
527   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
528                (__v8sf) _mm256_setzero_ps(),
529                (__mmask8) -1,
530                _MM_FROUND_CUR_DIRECTION);
531 }
532
533 static __inline__ __m256 __DEFAULT_FN_ATTRS512
534 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
535   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
536                (__v8sf) __W,
537                (__mmask8) __U,
538                _MM_FROUND_CUR_DIRECTION);
539 }
540
541 static __inline__ __m256 __DEFAULT_FN_ATTRS512
542 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
543   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
544                (__v8sf) _mm256_setzero_ps(),
545                (__mmask8) __U,
546                _MM_FROUND_CUR_DIRECTION);
547 }
548
549 #define _mm512_cvt_roundepi64_ps(A, R) \
550   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
551                                           (__v8sf)_mm256_setzero_ps(), \
552                                           (__mmask8)-1, (int)(R))
553
554 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
555   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
556                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
557                                           (int)(R))
558
559 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
560   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
561                                           (__v8sf)_mm256_setzero_ps(), \
562                                           (__mmask8)(U), (int)(R))
563
564
565 static __inline__ __m512i __DEFAULT_FN_ATTRS512
566 _mm512_cvttpd_epi64 (__m512d __A) {
567   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
568                  (__v8di) _mm512_setzero_si512(),
569                  (__mmask8) -1,
570                  _MM_FROUND_CUR_DIRECTION);
571 }
572
573 static __inline__ __m512i __DEFAULT_FN_ATTRS512
574 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
575   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
576                  (__v8di) __W,
577                  (__mmask8) __U,
578                  _MM_FROUND_CUR_DIRECTION);
579 }
580
581 static __inline__ __m512i __DEFAULT_FN_ATTRS512
582 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
583   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
584                  (__v8di) _mm512_setzero_si512(),
585                  (__mmask8) __U,
586                  _MM_FROUND_CUR_DIRECTION);
587 }
588
589 #define _mm512_cvtt_roundpd_epi64(A, R) \
590   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
591                                             (__v8di)_mm512_setzero_si512(), \
592                                             (__mmask8)-1, (int)(R))
593
594 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
595   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
596                                             (__v8di)(__m512i)(W), \
597                                             (__mmask8)(U), (int)(R))
598
599 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
600   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
601                                             (__v8di)_mm512_setzero_si512(), \
602                                             (__mmask8)(U), (int)(R))
603
604 static __inline__ __m512i __DEFAULT_FN_ATTRS512
605 _mm512_cvttpd_epu64 (__m512d __A) {
606   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
607                   (__v8di) _mm512_setzero_si512(),
608                   (__mmask8) -1,
609                   _MM_FROUND_CUR_DIRECTION);
610 }
611
612 static __inline__ __m512i __DEFAULT_FN_ATTRS512
613 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
614   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
615                   (__v8di) __W,
616                   (__mmask8) __U,
617                   _MM_FROUND_CUR_DIRECTION);
618 }
619
620 static __inline__ __m512i __DEFAULT_FN_ATTRS512
621 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
622   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
623                   (__v8di) _mm512_setzero_si512(),
624                   (__mmask8) __U,
625                   _MM_FROUND_CUR_DIRECTION);
626 }
627
628 #define _mm512_cvtt_roundpd_epu64(A, R) \
629   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
630                                              (__v8di)_mm512_setzero_si512(), \
631                                              (__mmask8)-1, (int)(R))
632
633 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
634   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
635                                              (__v8di)(__m512i)(W), \
636                                              (__mmask8)(U), (int)(R))
637
638 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
639   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
640                                              (__v8di)_mm512_setzero_si512(), \
641                                              (__mmask8)(U), (int)(R))
642
643 static __inline__ __m512i __DEFAULT_FN_ATTRS512
644 _mm512_cvttps_epi64 (__m256 __A) {
645   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
646                  (__v8di) _mm512_setzero_si512(),
647                  (__mmask8) -1,
648                  _MM_FROUND_CUR_DIRECTION);
649 }
650
651 static __inline__ __m512i __DEFAULT_FN_ATTRS512
652 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
653   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
654                  (__v8di) __W,
655                  (__mmask8) __U,
656                  _MM_FROUND_CUR_DIRECTION);
657 }
658
659 static __inline__ __m512i __DEFAULT_FN_ATTRS512
660 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
661   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
662                  (__v8di) _mm512_setzero_si512(),
663                  (__mmask8) __U,
664                  _MM_FROUND_CUR_DIRECTION);
665 }
666
667 #define _mm512_cvtt_roundps_epi64(A, R) \
668   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
669                                             (__v8di)_mm512_setzero_si512(), \
670                                             (__mmask8)-1, (int)(R))
671
672 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
673   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
674                                             (__v8di)(__m512i)(W), \
675                                             (__mmask8)(U), (int)(R))
676
677 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
678   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
679                                             (__v8di)_mm512_setzero_si512(), \
680                                             (__mmask8)(U), (int)(R))
681
682 static __inline__ __m512i __DEFAULT_FN_ATTRS512
683 _mm512_cvttps_epu64 (__m256 __A) {
684   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
685                   (__v8di) _mm512_setzero_si512(),
686                   (__mmask8) -1,
687                   _MM_FROUND_CUR_DIRECTION);
688 }
689
690 static __inline__ __m512i __DEFAULT_FN_ATTRS512
691 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
692   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
693                   (__v8di) __W,
694                   (__mmask8) __U,
695                   _MM_FROUND_CUR_DIRECTION);
696 }
697
698 static __inline__ __m512i __DEFAULT_FN_ATTRS512
699 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
700   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
701                   (__v8di) _mm512_setzero_si512(),
702                   (__mmask8) __U,
703                   _MM_FROUND_CUR_DIRECTION);
704 }
705
706 #define _mm512_cvtt_roundps_epu64(A, R) \
707   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
708                                              (__v8di)_mm512_setzero_si512(), \
709                                              (__mmask8)-1, (int)(R))
710
711 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
712   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
713                                              (__v8di)(__m512i)(W), \
714                                              (__mmask8)(U), (int)(R))
715
716 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
717   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
718                                              (__v8di)_mm512_setzero_si512(), \
719                                              (__mmask8)(U), (int)(R))
720
721 static __inline__ __m512d __DEFAULT_FN_ATTRS512
722 _mm512_cvtepu64_pd (__m512i __A) {
723   return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
724 }
725
726 static __inline__ __m512d __DEFAULT_FN_ATTRS512
727 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
728   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
729                                               (__v8df)_mm512_cvtepu64_pd(__A),
730                                               (__v8df)__W);
731 }
732
733 static __inline__ __m512d __DEFAULT_FN_ATTRS512
734 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
735   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
736                                               (__v8df)_mm512_cvtepu64_pd(__A),
737                                               (__v8df)_mm512_setzero_pd());
738 }
739
740 #define _mm512_cvt_roundepu64_pd(A, R) \
741   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
742                                             (__v8df)_mm512_setzero_pd(), \
743                                             (__mmask8)-1, (int)(R))
744
745 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
746   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
747                                             (__v8df)(__m512d)(W), \
748                                             (__mmask8)(U), (int)(R))
749
750
751 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
752   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
753                                             (__v8df)_mm512_setzero_pd(), \
754                                             (__mmask8)(U), (int)(R))
755
756
757 static __inline__ __m256 __DEFAULT_FN_ATTRS512
758 _mm512_cvtepu64_ps (__m512i __A) {
759   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
760                 (__v8sf) _mm256_setzero_ps(),
761                 (__mmask8) -1,
762                 _MM_FROUND_CUR_DIRECTION);
763 }
764
765 static __inline__ __m256 __DEFAULT_FN_ATTRS512
766 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
767   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
768                 (__v8sf) __W,
769                 (__mmask8) __U,
770                 _MM_FROUND_CUR_DIRECTION);
771 }
772
773 static __inline__ __m256 __DEFAULT_FN_ATTRS512
774 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
775   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
776                 (__v8sf) _mm256_setzero_ps(),
777                 (__mmask8) __U,
778                 _MM_FROUND_CUR_DIRECTION);
779 }
780
781 #define _mm512_cvt_roundepu64_ps(A, R) \
782   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
783                                            (__v8sf)_mm256_setzero_ps(), \
784                                            (__mmask8)-1, (int)(R))
785
786 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
787   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
788                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
789                                            (int)(R))
790
791 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
792   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
793                                            (__v8sf)_mm256_setzero_ps(), \
794                                            (__mmask8)(U), (int)(R))
795
796 #define _mm512_range_pd(A, B, C) \
797   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
798                                           (__v8df)(__m512d)(B), (int)(C), \
799                                           (__v8df)_mm512_setzero_pd(), \
800                                           (__mmask8)-1, \
801                                           _MM_FROUND_CUR_DIRECTION)
802
803 #define _mm512_mask_range_pd(W, U, A, B, C) \
804   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
805                                           (__v8df)(__m512d)(B), (int)(C), \
806                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
807                                           _MM_FROUND_CUR_DIRECTION)
808
809 #define _mm512_maskz_range_pd(U, A, B, C) \
810   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
811                                           (__v8df)(__m512d)(B), (int)(C), \
812                                           (__v8df)_mm512_setzero_pd(), \
813                                           (__mmask8)(U), \
814                                           _MM_FROUND_CUR_DIRECTION)
815
816 #define _mm512_range_round_pd(A, B, C, R) \
817   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
818                                           (__v8df)(__m512d)(B), (int)(C), \
819                                           (__v8df)_mm512_setzero_pd(), \
820                                           (__mmask8)-1, (int)(R))
821
822 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
823   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
824                                           (__v8df)(__m512d)(B), (int)(C), \
825                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
826                                           (int)(R))
827
828 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
829   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
830                                           (__v8df)(__m512d)(B), (int)(C), \
831                                           (__v8df)_mm512_setzero_pd(), \
832                                           (__mmask8)(U), (int)(R))
833
834 #define _mm512_range_ps(A, B, C) \
835   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
836                                          (__v16sf)(__m512)(B), (int)(C), \
837                                          (__v16sf)_mm512_setzero_ps(), \
838                                          (__mmask16)-1, \
839                                          _MM_FROUND_CUR_DIRECTION)
840
841 #define _mm512_mask_range_ps(W, U, A, B, C) \
842   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
843                                          (__v16sf)(__m512)(B), (int)(C), \
844                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
845                                          _MM_FROUND_CUR_DIRECTION)
846
847 #define _mm512_maskz_range_ps(U, A, B, C) \
848   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
849                                          (__v16sf)(__m512)(B), (int)(C), \
850                                          (__v16sf)_mm512_setzero_ps(), \
851                                          (__mmask16)(U), \
852                                          _MM_FROUND_CUR_DIRECTION)
853
854 #define _mm512_range_round_ps(A, B, C, R) \
855   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
856                                          (__v16sf)(__m512)(B), (int)(C), \
857                                          (__v16sf)_mm512_setzero_ps(), \
858                                          (__mmask16)-1, (int)(R))
859
860 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
861   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
862                                          (__v16sf)(__m512)(B), (int)(C), \
863                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
864                                          (int)(R))
865
866 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
867   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
868                                          (__v16sf)(__m512)(B), (int)(C), \
869                                          (__v16sf)_mm512_setzero_ps(), \
870                                          (__mmask16)(U), (int)(R))
871
872 #define _mm_range_round_ss(A, B, C, R) \
873   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
874                                                (__v4sf)(__m128)(B), \
875                                                (__v4sf)_mm_setzero_ps(), \
876                                                (__mmask8) -1, (int)(C),\
877                                                (int)(R))
878
879 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
880
881 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \
882   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
883                                                (__v4sf)(__m128)(B), \
884                                                (__v4sf)(__m128)(W),\
885                                                (__mmask8)(U), (int)(C),\
886                                                (int)(R))
887
888 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
889
890 #define _mm_maskz_range_round_ss(U, A, B, C, R) \
891   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
892                                                (__v4sf)(__m128)(B), \
893                                                (__v4sf)_mm_setzero_ps(), \
894                                                (__mmask8)(U), (int)(C),\
895                                                (int)(R))
896
897 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
898
899 #define _mm_range_round_sd(A, B, C, R) \
900   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
901                                                 (__v2df)(__m128d)(B), \
902                                                 (__v2df)_mm_setzero_pd(), \
903                                                 (__mmask8) -1, (int)(C),\
904                                                 (int)(R))
905
906 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
907
908 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \
909   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
910                                                 (__v2df)(__m128d)(B), \
911                                                 (__v2df)(__m128d)(W),\
912                                                 (__mmask8)(U), (int)(C),\
913                                                 (int)(R))
914
915 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
916
917 #define _mm_maskz_range_round_sd(U, A, B, C, R) \
918   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
919                                                 (__v2df)(__m128d)(B), \
920                                                 (__v2df)_mm_setzero_pd(), \
921                                                 (__mmask8)(U), (int)(C),\
922                                                 (int)(R))
923
924 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
925
926 #define _mm512_reduce_pd(A, B) \
927   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
928                                            (__v8df)_mm512_setzero_pd(), \
929                                            (__mmask8)-1, \
930                                            _MM_FROUND_CUR_DIRECTION)
931
932 #define _mm512_mask_reduce_pd(W, U, A, B) \
933   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
934                                            (__v8df)(__m512d)(W), \
935                                            (__mmask8)(U), \
936                                            _MM_FROUND_CUR_DIRECTION)
937
938 #define _mm512_maskz_reduce_pd(U, A, B) \
939   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
940                                            (__v8df)_mm512_setzero_pd(), \
941                                            (__mmask8)(U), \
942                                            _MM_FROUND_CUR_DIRECTION)
943
944 #define _mm512_reduce_ps(A, B) \
945   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
946                                           (__v16sf)_mm512_setzero_ps(), \
947                                           (__mmask16)-1, \
948                                           _MM_FROUND_CUR_DIRECTION)
949
950 #define _mm512_mask_reduce_ps(W, U, A, B) \
951   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
952                                           (__v16sf)(__m512)(W), \
953                                           (__mmask16)(U), \
954                                           _MM_FROUND_CUR_DIRECTION)
955
956 #define _mm512_maskz_reduce_ps(U, A, B) \
957   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
958                                           (__v16sf)_mm512_setzero_ps(), \
959                                           (__mmask16)(U), \
960                                           _MM_FROUND_CUR_DIRECTION)
961
962 #define _mm512_reduce_round_pd(A, B, R) \
963   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
964                                            (__v8df)_mm512_setzero_pd(), \
965                                            (__mmask8)-1, (int)(R))
966
967 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
968   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
969                                            (__v8df)(__m512d)(W), \
970                                            (__mmask8)(U), (int)(R))
971
972 #define _mm512_maskz_reduce_round_pd(U, A, B, R) \
973   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
974                                            (__v8df)_mm512_setzero_pd(), \
975                                            (__mmask8)(U), (int)(R))
976
977 #define _mm512_reduce_round_ps(A, B, R) \
978   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
979                                           (__v16sf)_mm512_setzero_ps(), \
980                                           (__mmask16)-1, (int)(R))
981
982 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
983   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
984                                           (__v16sf)(__m512)(W), \
985                                           (__mmask16)(U), (int)(R))
986
987 #define _mm512_maskz_reduce_round_ps(U, A, B, R) \
988   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
989                                           (__v16sf)_mm512_setzero_ps(), \
990                                           (__mmask16)(U), (int)(R))
991
992 #define _mm_reduce_ss(A, B, C) \
993   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
994                                        (__v4sf)(__m128)(B), \
995                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
996                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
997
998 #define _mm_mask_reduce_ss(W, U, A, B, C) \
999   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1000                                        (__v4sf)(__m128)(B), \
1001                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
1002                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
1003
1004 #define _mm_maskz_reduce_ss(U, A, B, C) \
1005   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1006                                        (__v4sf)(__m128)(B), \
1007                                        (__v4sf)_mm_setzero_ps(), \
1008                                        (__mmask8)(U), (int)(C), \
1009                                        _MM_FROUND_CUR_DIRECTION)
1010
1011 #define _mm_reduce_round_ss(A, B, C, R) \
1012   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1013                                        (__v4sf)(__m128)(B), \
1014                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
1015                                        (int)(C), (int)(R))
1016
1017 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
1018   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1019                                        (__v4sf)(__m128)(B), \
1020                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
1021                                        (int)(C), (int)(R))
1022
1023 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
1024   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1025                                        (__v4sf)(__m128)(B), \
1026                                        (__v4sf)_mm_setzero_ps(), \
1027                                        (__mmask8)(U), (int)(C), (int)(R))
1028
1029 #define _mm_reduce_sd(A, B, C) \
1030   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1031                                         (__v2df)(__m128d)(B), \
1032                                         (__v2df)_mm_setzero_pd(), \
1033                                         (__mmask8)-1, (int)(C), \
1034                                         _MM_FROUND_CUR_DIRECTION)
1035
1036 #define _mm_mask_reduce_sd(W, U, A, B, C) \
1037   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1038                                         (__v2df)(__m128d)(B), \
1039                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
1040                                         (int)(C), _MM_FROUND_CUR_DIRECTION)
1041
1042 #define _mm_maskz_reduce_sd(U, A, B, C) \
1043   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1044                                         (__v2df)(__m128d)(B), \
1045                                         (__v2df)_mm_setzero_pd(), \
1046                                         (__mmask8)(U), (int)(C), \
1047                                         _MM_FROUND_CUR_DIRECTION)
1048
1049 #define _mm_reduce_round_sd(A, B, C, R) \
1050   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1051                                         (__v2df)(__m128d)(B), \
1052                                         (__v2df)_mm_setzero_pd(), \
1053                                         (__mmask8)-1, (int)(C), (int)(R))
1054
1055 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
1056   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1057                                         (__v2df)(__m128d)(B), \
1058                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
1059                                         (int)(C), (int)(R))
1060
1061 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
1062   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1063                                         (__v2df)(__m128d)(B), \
1064                                         (__v2df)_mm_setzero_pd(), \
1065                                         (__mmask8)(U), (int)(C), (int)(R))
1066
1067 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
1068 _mm512_movepi32_mask (__m512i __A)
1069 {
1070   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
1071 }
1072
1073 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1074 _mm512_movm_epi32 (__mmask16 __A)
1075 {
1076   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
1077 }
1078
1079 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1080 _mm512_movm_epi64 (__mmask8 __A)
1081 {
1082   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
1083 }
1084
1085 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
1086 _mm512_movepi64_mask (__m512i __A)
1087 {
1088   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1089 }
1090
1091
1092 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1093 _mm512_broadcast_f32x2 (__m128 __A)
1094 {
1095   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
1096                                          0, 1, 0, 1, 0, 1, 0, 1,
1097                                          0, 1, 0, 1, 0, 1, 0, 1);
1098 }
1099
1100 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1101 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1102 {
1103   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1104                                              (__v16sf)_mm512_broadcast_f32x2(__A),
1105                                              (__v16sf)__O);
1106 }
1107
1108 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1109 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1110 {
1111   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1112                                              (__v16sf)_mm512_broadcast_f32x2(__A),
1113                                              (__v16sf)_mm512_setzero_ps());
1114 }
1115
1116 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1117 _mm512_broadcast_f32x8(__m256 __A)
1118 {
1119   return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1120                                          0, 1, 2, 3, 4, 5, 6, 7,
1121                                          0, 1, 2, 3, 4, 5, 6, 7);
1122 }
1123
1124 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1125 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
1126 {
1127   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1128                                            (__v16sf)_mm512_broadcast_f32x8(__A),
1129                                            (__v16sf)__O);
1130 }
1131
1132 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1133 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
1134 {
1135   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1136                                            (__v16sf)_mm512_broadcast_f32x8(__A),
1137                                            (__v16sf)_mm512_setzero_ps());
1138 }
1139
1140 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1141 _mm512_broadcast_f64x2(__m128d __A)
1142 {
1143   return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1144                                           0, 1, 0, 1, 0, 1, 0, 1);
1145 }
1146
1147 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1148 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
1149 {
1150   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1151                                             (__v8df)_mm512_broadcast_f64x2(__A),
1152                                             (__v8df)__O);
1153 }
1154
1155 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1156 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
1157 {
1158   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1159                                             (__v8df)_mm512_broadcast_f64x2(__A),
1160                                             (__v8df)_mm512_setzero_pd());
1161 }
1162
1163 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1164 _mm512_broadcast_i32x2 (__m128i __A)
1165 {
1166   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1167                                           0, 1, 0, 1, 0, 1, 0, 1,
1168                                           0, 1, 0, 1, 0, 1, 0, 1);
1169 }
1170
1171 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1172 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1173 {
1174   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1175                                              (__v16si)_mm512_broadcast_i32x2(__A),
1176                                              (__v16si)__O);
1177 }
1178
1179 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1180 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1181 {
1182   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1183                                              (__v16si)_mm512_broadcast_i32x2(__A),
1184                                              (__v16si)_mm512_setzero_si512());
1185 }
1186
1187 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1188 _mm512_broadcast_i32x8(__m256i __A)
1189 {
1190   return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1191                                           0, 1, 2, 3, 4, 5, 6, 7,
1192                                           0, 1, 2, 3, 4, 5, 6, 7);
1193 }
1194
1195 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1196 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
1197 {
1198   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1199                                            (__v16si)_mm512_broadcast_i32x8(__A),
1200                                            (__v16si)__O);
1201 }
1202
1203 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1204 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
1205 {
1206   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1207                                            (__v16si)_mm512_broadcast_i32x8(__A),
1208                                            (__v16si)_mm512_setzero_si512());
1209 }
1210
1211 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1212 _mm512_broadcast_i64x2(__m128i __A)
1213 {
1214   return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1215                                           0, 1, 0, 1, 0, 1, 0, 1);
1216 }
1217
1218 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1219 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
1220 {
1221   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1222                                             (__v8di)_mm512_broadcast_i64x2(__A),
1223                                             (__v8di)__O);
1224 }
1225
1226 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1227 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
1228 {
1229   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1230                                             (__v8di)_mm512_broadcast_i64x2(__A),
1231                                             (__v8di)_mm512_setzero_si512());
1232 }
1233
1234 #define _mm512_extractf32x8_ps(A, imm) \
1235   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1236                                            (__v8sf)_mm256_undefined_ps(), \
1237                                            (__mmask8)-1)
1238
1239 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
1240   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1241                                            (__v8sf)(__m256)(W), \
1242                                            (__mmask8)(U))
1243
1244 #define _mm512_maskz_extractf32x8_ps(U, A, imm) \
1245   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1246                                            (__v8sf)_mm256_setzero_ps(), \
1247                                            (__mmask8)(U))
1248
1249 #define _mm512_extractf64x2_pd(A, imm) \
1250   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1251                                                 (int)(imm), \
1252                                                 (__v2df)_mm_undefined_pd(), \
1253                                                 (__mmask8)-1)
1254
1255 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
1256   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1257                                                 (int)(imm), \
1258                                                 (__v2df)(__m128d)(W), \
1259                                                 (__mmask8)(U))
1260
1261 #define _mm512_maskz_extractf64x2_pd(U, A, imm) \
1262   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1263                                                 (int)(imm), \
1264                                                 (__v2df)_mm_setzero_pd(), \
1265                                                 (__mmask8)(U))
1266
1267 #define _mm512_extracti32x8_epi32(A, imm) \
1268   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1269                                             (__v8si)_mm256_undefined_si256(), \
1270                                             (__mmask8)-1)
1271
1272 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
1273   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1274                                             (__v8si)(__m256i)(W), \
1275                                             (__mmask8)(U))
1276
1277 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
1278   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1279                                             (__v8si)_mm256_setzero_si256(), \
1280                                             (__mmask8)(U))
1281
1282 #define _mm512_extracti64x2_epi64(A, imm) \
1283   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1284                                                 (int)(imm), \
1285                                                 (__v2di)_mm_undefined_si128(), \
1286                                                 (__mmask8)-1)
1287
1288 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
1289   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1290                                                 (int)(imm), \
1291                                                 (__v2di)(__m128i)(W), \
1292                                                 (__mmask8)(U))
1293
1294 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
1295   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1296                                                 (int)(imm), \
1297                                                 (__v2di)_mm_setzero_si128(), \
1298                                                 (__mmask8)(U))
1299
1300 #define _mm512_insertf32x8(A, B, imm) \
1301   (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
1302                                      (__v8sf)(__m256)(B), (int)(imm))
1303
1304 #define _mm512_mask_insertf32x8(W, U, A, B, imm) \
1305   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1306                                  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1307                                  (__v16sf)(__m512)(W))
1308
1309 #define _mm512_maskz_insertf32x8(U, A, B, imm) \
1310   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1311                                  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1312                                  (__v16sf)_mm512_setzero_ps())
1313
1314 #define _mm512_insertf64x2(A, B, imm) \
1315   (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
1316                                           (__v2df)(__m128d)(B), (int)(imm))
1317
1318 #define _mm512_mask_insertf64x2(W, U, A, B, imm) \
1319   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1320                                   (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1321                                   (__v8df)(__m512d)(W))
1322
1323 #define _mm512_maskz_insertf64x2(U, A, B, imm) \
1324   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1325                                   (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1326                                   (__v8df)_mm512_setzero_pd())
1327
1328 #define _mm512_inserti32x8(A, B, imm) \
1329   (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
1330                                       (__v8si)(__m256i)(B), (int)(imm))
1331
1332 #define _mm512_mask_inserti32x8(W, U, A, B, imm) \
1333   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1334                                  (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1335                                  (__v16si)(__m512i)(W))
1336
1337 #define _mm512_maskz_inserti32x8(U, A, B, imm) \
1338   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1339                                  (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1340                                  (__v16si)_mm512_setzero_si512())
1341
1342 #define _mm512_inserti64x2(A, B, imm) \
1343   (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
1344                                           (__v2di)(__m128i)(B), (int)(imm))
1345
1346 #define _mm512_mask_inserti64x2(W, U, A, B, imm) \
1347   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1348                                   (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1349                                   (__v8di)(__m512i)(W))
1350
1351 #define _mm512_maskz_inserti64x2(U, A, B, imm) \
1352   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1353                                   (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1354                                   (__v8di)_mm512_setzero_si512())
1355
1356 #define _mm512_mask_fpclass_ps_mask(U, A, imm) \
1357   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1358                                               (int)(imm), (__mmask16)(U))
1359
1360 #define _mm512_fpclass_ps_mask(A, imm) \
1361   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1362                                               (int)(imm), (__mmask16)-1)
1363
1364 #define _mm512_mask_fpclass_pd_mask(U, A, imm) \
1365   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1366                                              (__mmask8)(U))
1367
1368 #define _mm512_fpclass_pd_mask(A, imm) \
1369   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1370                                              (__mmask8)-1)
1371
1372 #define _mm_fpclass_sd_mask(A, imm) \
1373   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1374                                           (__mmask8)-1)
1375
1376 #define _mm_mask_fpclass_sd_mask(U, A, imm) \
1377   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1378                                           (__mmask8)(U))
1379
1380 #define _mm_fpclass_ss_mask(A, imm) \
1381   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1382                                           (__mmask8)-1)
1383
1384 #define _mm_mask_fpclass_ss_mask(U, A, imm) \
1385   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1386                                           (__mmask8)(U))
1387
1388 #undef __DEFAULT_FN_ATTRS512
1389 #undef __DEFAULT_FN_ATTRS
1390
1391 #endif