]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/clang/lib/Headers/avx512vldqintrin.h
Merge llvm-project main llvmorg-15-init-17485-ga3e38b4a206b
[FreeBSD/FreeBSD.git] / contrib / llvm-project / clang / lib / Headers / avx512vldqintrin.h
1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9
10 #ifndef __IMMINTRIN_H
11 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
12 #endif
13
14 #ifndef __AVX512VLDQINTRIN_H
15 #define __AVX512VLDQINTRIN_H
16
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
20
21 static __inline__ __m256i __DEFAULT_FN_ATTRS256
22 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
23   return (__m256i) ((__v4du) __A * (__v4du) __B);
24 }
25
26 static __inline__ __m256i __DEFAULT_FN_ATTRS256
27 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
28   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
29                                              (__v4di)_mm256_mullo_epi64(__A, __B),
30                                              (__v4di)__W);
31 }
32
33 static __inline__ __m256i __DEFAULT_FN_ATTRS256
34 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
35   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
36                                              (__v4di)_mm256_mullo_epi64(__A, __B),
37                                              (__v4di)_mm256_setzero_si256());
38 }
39
40 static __inline__ __m128i __DEFAULT_FN_ATTRS128
41 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
42   return (__m128i) ((__v2du) __A * (__v2du) __B);
43 }
44
45 static __inline__ __m128i __DEFAULT_FN_ATTRS128
46 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
47   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
48                                              (__v2di)_mm_mullo_epi64(__A, __B),
49                                              (__v2di)__W);
50 }
51
52 static __inline__ __m128i __DEFAULT_FN_ATTRS128
53 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
54   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
55                                              (__v2di)_mm_mullo_epi64(__A, __B),
56                                              (__v2di)_mm_setzero_si128());
57 }
58
59 static __inline__ __m256d __DEFAULT_FN_ATTRS256
60 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
61   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
62                                               (__v4df)_mm256_andnot_pd(__A, __B),
63                                               (__v4df)__W);
64 }
65
66 static __inline__ __m256d __DEFAULT_FN_ATTRS256
67 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
68   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
69                                               (__v4df)_mm256_andnot_pd(__A, __B),
70                                               (__v4df)_mm256_setzero_pd());
71 }
72
73 static __inline__ __m128d __DEFAULT_FN_ATTRS128
74 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
75   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
76                                               (__v2df)_mm_andnot_pd(__A, __B),
77                                               (__v2df)__W);
78 }
79
80 static __inline__ __m128d __DEFAULT_FN_ATTRS128
81 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
82   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
83                                               (__v2df)_mm_andnot_pd(__A, __B),
84                                               (__v2df)_mm_setzero_pd());
85 }
86
87 static __inline__ __m256 __DEFAULT_FN_ATTRS256
88 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
89   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
90                                              (__v8sf)_mm256_andnot_ps(__A, __B),
91                                              (__v8sf)__W);
92 }
93
94 static __inline__ __m256 __DEFAULT_FN_ATTRS256
95 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
96   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
97                                              (__v8sf)_mm256_andnot_ps(__A, __B),
98                                              (__v8sf)_mm256_setzero_ps());
99 }
100
101 static __inline__ __m128 __DEFAULT_FN_ATTRS128
102 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
103   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
104                                              (__v4sf)_mm_andnot_ps(__A, __B),
105                                              (__v4sf)__W);
106 }
107
108 static __inline__ __m128 __DEFAULT_FN_ATTRS128
109 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
110   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
111                                              (__v4sf)_mm_andnot_ps(__A, __B),
112                                              (__v4sf)_mm_setzero_ps());
113 }
114
115 static __inline__ __m256d __DEFAULT_FN_ATTRS256
116 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
117   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
118                                               (__v4df)_mm256_and_pd(__A, __B),
119                                               (__v4df)__W);
120 }
121
122 static __inline__ __m256d __DEFAULT_FN_ATTRS256
123 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
124   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
125                                               (__v4df)_mm256_and_pd(__A, __B),
126                                               (__v4df)_mm256_setzero_pd());
127 }
128
129 static __inline__ __m128d __DEFAULT_FN_ATTRS128
130 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
131   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
132                                               (__v2df)_mm_and_pd(__A, __B),
133                                               (__v2df)__W);
134 }
135
136 static __inline__ __m128d __DEFAULT_FN_ATTRS128
137 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
138   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
139                                               (__v2df)_mm_and_pd(__A, __B),
140                                               (__v2df)_mm_setzero_pd());
141 }
142
143 static __inline__ __m256 __DEFAULT_FN_ATTRS256
144 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
145   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
146                                              (__v8sf)_mm256_and_ps(__A, __B),
147                                              (__v8sf)__W);
148 }
149
150 static __inline__ __m256 __DEFAULT_FN_ATTRS256
151 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
152   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
153                                              (__v8sf)_mm256_and_ps(__A, __B),
154                                              (__v8sf)_mm256_setzero_ps());
155 }
156
157 static __inline__ __m128 __DEFAULT_FN_ATTRS128
158 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
159   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
160                                              (__v4sf)_mm_and_ps(__A, __B),
161                                              (__v4sf)__W);
162 }
163
164 static __inline__ __m128 __DEFAULT_FN_ATTRS128
165 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
166   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
167                                              (__v4sf)_mm_and_ps(__A, __B),
168                                              (__v4sf)_mm_setzero_ps());
169 }
170
171 static __inline__ __m256d __DEFAULT_FN_ATTRS256
172 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
173   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
174                                               (__v4df)_mm256_xor_pd(__A, __B),
175                                               (__v4df)__W);
176 }
177
178 static __inline__ __m256d __DEFAULT_FN_ATTRS256
179 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
180   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
181                                               (__v4df)_mm256_xor_pd(__A, __B),
182                                               (__v4df)_mm256_setzero_pd());
183 }
184
185 static __inline__ __m128d __DEFAULT_FN_ATTRS128
186 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
187   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
188                                               (__v2df)_mm_xor_pd(__A, __B),
189                                               (__v2df)__W);
190 }
191
192 static __inline__ __m128d __DEFAULT_FN_ATTRS128
193 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
194   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
195                                               (__v2df)_mm_xor_pd(__A, __B),
196                                               (__v2df)_mm_setzero_pd());
197 }
198
199 static __inline__ __m256 __DEFAULT_FN_ATTRS256
200 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
201   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
202                                              (__v8sf)_mm256_xor_ps(__A, __B),
203                                              (__v8sf)__W);
204 }
205
206 static __inline__ __m256 __DEFAULT_FN_ATTRS256
207 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
208   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
209                                              (__v8sf)_mm256_xor_ps(__A, __B),
210                                              (__v8sf)_mm256_setzero_ps());
211 }
212
213 static __inline__ __m128 __DEFAULT_FN_ATTRS128
214 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
215   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
216                                              (__v4sf)_mm_xor_ps(__A, __B),
217                                              (__v4sf)__W);
218 }
219
220 static __inline__ __m128 __DEFAULT_FN_ATTRS128
221 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
222   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
223                                              (__v4sf)_mm_xor_ps(__A, __B),
224                                              (__v4sf)_mm_setzero_ps());
225 }
226
227 static __inline__ __m256d __DEFAULT_FN_ATTRS256
228 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
229   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
230                                               (__v4df)_mm256_or_pd(__A, __B),
231                                               (__v4df)__W);
232 }
233
234 static __inline__ __m256d __DEFAULT_FN_ATTRS256
235 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
236   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
237                                               (__v4df)_mm256_or_pd(__A, __B),
238                                               (__v4df)_mm256_setzero_pd());
239 }
240
241 static __inline__ __m128d __DEFAULT_FN_ATTRS128
242 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
243   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
244                                               (__v2df)_mm_or_pd(__A, __B),
245                                               (__v2df)__W);
246 }
247
248 static __inline__ __m128d __DEFAULT_FN_ATTRS128
249 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
250   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
251                                               (__v2df)_mm_or_pd(__A, __B),
252                                               (__v2df)_mm_setzero_pd());
253 }
254
255 static __inline__ __m256 __DEFAULT_FN_ATTRS256
256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
257   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
258                                              (__v8sf)_mm256_or_ps(__A, __B),
259                                              (__v8sf)__W);
260 }
261
262 static __inline__ __m256 __DEFAULT_FN_ATTRS256
263 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
264   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
265                                              (__v8sf)_mm256_or_ps(__A, __B),
266                                              (__v8sf)_mm256_setzero_ps());
267 }
268
269 static __inline__ __m128 __DEFAULT_FN_ATTRS128
270 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
271   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
272                                              (__v4sf)_mm_or_ps(__A, __B),
273                                              (__v4sf)__W);
274 }
275
276 static __inline__ __m128 __DEFAULT_FN_ATTRS128
277 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
278   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
279                                              (__v4sf)_mm_or_ps(__A, __B),
280                                              (__v4sf)_mm_setzero_ps());
281 }
282
283 static __inline__ __m128i __DEFAULT_FN_ATTRS128
284 _mm_cvtpd_epi64 (__m128d __A) {
285   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
286                 (__v2di) _mm_setzero_si128(),
287                 (__mmask8) -1);
288 }
289
290 static __inline__ __m128i __DEFAULT_FN_ATTRS128
291 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
292   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
293                 (__v2di) __W,
294                 (__mmask8) __U);
295 }
296
297 static __inline__ __m128i __DEFAULT_FN_ATTRS128
298 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
299   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
300                 (__v2di) _mm_setzero_si128(),
301                 (__mmask8) __U);
302 }
303
304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
305 _mm256_cvtpd_epi64 (__m256d __A) {
306   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
307                 (__v4di) _mm256_setzero_si256(),
308                 (__mmask8) -1);
309 }
310
311 static __inline__ __m256i __DEFAULT_FN_ATTRS256
312 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
313   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
314                 (__v4di) __W,
315                 (__mmask8) __U);
316 }
317
318 static __inline__ __m256i __DEFAULT_FN_ATTRS256
319 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
320   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
321                 (__v4di) _mm256_setzero_si256(),
322                 (__mmask8) __U);
323 }
324
325 static __inline__ __m128i __DEFAULT_FN_ATTRS128
326 _mm_cvtpd_epu64 (__m128d __A) {
327   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
328                 (__v2di) _mm_setzero_si128(),
329                 (__mmask8) -1);
330 }
331
332 static __inline__ __m128i __DEFAULT_FN_ATTRS128
333 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
334   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
335                 (__v2di) __W,
336                 (__mmask8) __U);
337 }
338
339 static __inline__ __m128i __DEFAULT_FN_ATTRS128
340 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
341   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
342                 (__v2di) _mm_setzero_si128(),
343                 (__mmask8) __U);
344 }
345
346 static __inline__ __m256i __DEFAULT_FN_ATTRS256
347 _mm256_cvtpd_epu64 (__m256d __A) {
348   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
349                 (__v4di) _mm256_setzero_si256(),
350                 (__mmask8) -1);
351 }
352
353 static __inline__ __m256i __DEFAULT_FN_ATTRS256
354 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
355   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
356                 (__v4di) __W,
357                 (__mmask8) __U);
358 }
359
360 static __inline__ __m256i __DEFAULT_FN_ATTRS256
361 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
362   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
363                 (__v4di) _mm256_setzero_si256(),
364                 (__mmask8) __U);
365 }
366
367 static __inline__ __m128i __DEFAULT_FN_ATTRS128
368 _mm_cvtps_epi64 (__m128 __A) {
369   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
370                 (__v2di) _mm_setzero_si128(),
371                 (__mmask8) -1);
372 }
373
374 static __inline__ __m128i __DEFAULT_FN_ATTRS128
375 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
376   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
377                 (__v2di) __W,
378                 (__mmask8) __U);
379 }
380
381 static __inline__ __m128i __DEFAULT_FN_ATTRS128
382 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
383   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
384                 (__v2di) _mm_setzero_si128(),
385                 (__mmask8) __U);
386 }
387
388 static __inline__ __m256i __DEFAULT_FN_ATTRS256
389 _mm256_cvtps_epi64 (__m128 __A) {
390   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
391                 (__v4di) _mm256_setzero_si256(),
392                 (__mmask8) -1);
393 }
394
395 static __inline__ __m256i __DEFAULT_FN_ATTRS256
396 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
397   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
398                 (__v4di) __W,
399                 (__mmask8) __U);
400 }
401
402 static __inline__ __m256i __DEFAULT_FN_ATTRS256
403 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
404   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
405                 (__v4di) _mm256_setzero_si256(),
406                 (__mmask8) __U);
407 }
408
409 static __inline__ __m128i __DEFAULT_FN_ATTRS128
410 _mm_cvtps_epu64 (__m128 __A) {
411   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
412                 (__v2di) _mm_setzero_si128(),
413                 (__mmask8) -1);
414 }
415
416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
417 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
418   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
419                 (__v2di) __W,
420                 (__mmask8) __U);
421 }
422
423 static __inline__ __m128i __DEFAULT_FN_ATTRS128
424 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
425   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
426                 (__v2di) _mm_setzero_si128(),
427                 (__mmask8) __U);
428 }
429
430 static __inline__ __m256i __DEFAULT_FN_ATTRS256
431 _mm256_cvtps_epu64 (__m128 __A) {
432   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
433                 (__v4di) _mm256_setzero_si256(),
434                 (__mmask8) -1);
435 }
436
437 static __inline__ __m256i __DEFAULT_FN_ATTRS256
438 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
439   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
440                 (__v4di) __W,
441                 (__mmask8) __U);
442 }
443
444 static __inline__ __m256i __DEFAULT_FN_ATTRS256
445 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
446   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
447                 (__v4di) _mm256_setzero_si256(),
448                 (__mmask8) __U);
449 }
450
451 static __inline__ __m128d __DEFAULT_FN_ATTRS128
452 _mm_cvtepi64_pd (__m128i __A) {
453   return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
454 }
455
456 static __inline__ __m128d __DEFAULT_FN_ATTRS128
457 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
458   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
459                                               (__v2df)_mm_cvtepi64_pd(__A),
460                                               (__v2df)__W);
461 }
462
463 static __inline__ __m128d __DEFAULT_FN_ATTRS128
464 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
465   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
466                                               (__v2df)_mm_cvtepi64_pd(__A),
467                                               (__v2df)_mm_setzero_pd());
468 }
469
470 static __inline__ __m256d __DEFAULT_FN_ATTRS256
471 _mm256_cvtepi64_pd (__m256i __A) {
472   return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
473 }
474
475 static __inline__ __m256d __DEFAULT_FN_ATTRS256
476 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
477   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
478                                               (__v4df)_mm256_cvtepi64_pd(__A),
479                                               (__v4df)__W);
480 }
481
482 static __inline__ __m256d __DEFAULT_FN_ATTRS256
483 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
484   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
485                                               (__v4df)_mm256_cvtepi64_pd(__A),
486                                               (__v4df)_mm256_setzero_pd());
487 }
488
489 static __inline__ __m128 __DEFAULT_FN_ATTRS128
490 _mm_cvtepi64_ps (__m128i __A) {
491   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
492                 (__v4sf) _mm_setzero_ps(),
493                 (__mmask8) -1);
494 }
495
496 static __inline__ __m128 __DEFAULT_FN_ATTRS128
497 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
498   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
499                 (__v4sf) __W,
500                 (__mmask8) __U);
501 }
502
503 static __inline__ __m128 __DEFAULT_FN_ATTRS128
504 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
505   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
506                 (__v4sf) _mm_setzero_ps(),
507                 (__mmask8) __U);
508 }
509
510 static __inline__ __m128 __DEFAULT_FN_ATTRS256
511 _mm256_cvtepi64_ps (__m256i __A) {
512   return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
513 }
514
515 static __inline__ __m128 __DEFAULT_FN_ATTRS256
516 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
517   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
518                                              (__v4sf)_mm256_cvtepi64_ps(__A),
519                                              (__v4sf)__W);
520 }
521
522 static __inline__ __m128 __DEFAULT_FN_ATTRS256
523 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
524   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
525                                              (__v4sf)_mm256_cvtepi64_ps(__A),
526                                              (__v4sf)_mm_setzero_ps());
527 }
528
529 static __inline__ __m128i __DEFAULT_FN_ATTRS128
530 _mm_cvttpd_epi64 (__m128d __A) {
531   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
532                 (__v2di) _mm_setzero_si128(),
533                 (__mmask8) -1);
534 }
535
536 static __inline__ __m128i __DEFAULT_FN_ATTRS128
537 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
538   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
539                 (__v2di) __W,
540                 (__mmask8) __U);
541 }
542
543 static __inline__ __m128i __DEFAULT_FN_ATTRS128
544 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
545   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
546                 (__v2di) _mm_setzero_si128(),
547                 (__mmask8) __U);
548 }
549
550 static __inline__ __m256i __DEFAULT_FN_ATTRS256
551 _mm256_cvttpd_epi64 (__m256d __A) {
552   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
553                 (__v4di) _mm256_setzero_si256(),
554                 (__mmask8) -1);
555 }
556
557 static __inline__ __m256i __DEFAULT_FN_ATTRS256
558 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
559   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
560                 (__v4di) __W,
561                 (__mmask8) __U);
562 }
563
564 static __inline__ __m256i __DEFAULT_FN_ATTRS256
565 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
566   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
567                 (__v4di) _mm256_setzero_si256(),
568                 (__mmask8) __U);
569 }
570
571 static __inline__ __m128i __DEFAULT_FN_ATTRS128
572 _mm_cvttpd_epu64 (__m128d __A) {
573   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
574                 (__v2di) _mm_setzero_si128(),
575                 (__mmask8) -1);
576 }
577
578 static __inline__ __m128i __DEFAULT_FN_ATTRS128
579 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
580   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
581                 (__v2di) __W,
582                 (__mmask8) __U);
583 }
584
585 static __inline__ __m128i __DEFAULT_FN_ATTRS128
586 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
587   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
588                 (__v2di) _mm_setzero_si128(),
589                 (__mmask8) __U);
590 }
591
592 static __inline__ __m256i __DEFAULT_FN_ATTRS256
593 _mm256_cvttpd_epu64 (__m256d __A) {
594   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
595                 (__v4di) _mm256_setzero_si256(),
596                 (__mmask8) -1);
597 }
598
599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
600 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
601   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
602                 (__v4di) __W,
603                 (__mmask8) __U);
604 }
605
606 static __inline__ __m256i __DEFAULT_FN_ATTRS256
607 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
608   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
609                 (__v4di) _mm256_setzero_si256(),
610                 (__mmask8) __U);
611 }
612
613 static __inline__ __m128i __DEFAULT_FN_ATTRS128
614 _mm_cvttps_epi64 (__m128 __A) {
615   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
616                 (__v2di) _mm_setzero_si128(),
617                 (__mmask8) -1);
618 }
619
620 static __inline__ __m128i __DEFAULT_FN_ATTRS128
621 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
622   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
623                 (__v2di) __W,
624                 (__mmask8) __U);
625 }
626
627 static __inline__ __m128i __DEFAULT_FN_ATTRS128
628 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
629   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
630                 (__v2di) _mm_setzero_si128(),
631                 (__mmask8) __U);
632 }
633
634 static __inline__ __m256i __DEFAULT_FN_ATTRS256
635 _mm256_cvttps_epi64 (__m128 __A) {
636   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
637                 (__v4di) _mm256_setzero_si256(),
638                 (__mmask8) -1);
639 }
640
641 static __inline__ __m256i __DEFAULT_FN_ATTRS256
642 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
643   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
644                 (__v4di) __W,
645                 (__mmask8) __U);
646 }
647
648 static __inline__ __m256i __DEFAULT_FN_ATTRS256
649 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
650   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
651                 (__v4di) _mm256_setzero_si256(),
652                 (__mmask8) __U);
653 }
654
655 static __inline__ __m128i __DEFAULT_FN_ATTRS128
656 _mm_cvttps_epu64 (__m128 __A) {
657   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
658                 (__v2di) _mm_setzero_si128(),
659                 (__mmask8) -1);
660 }
661
662 static __inline__ __m128i __DEFAULT_FN_ATTRS128
663 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
664   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
665                 (__v2di) __W,
666                 (__mmask8) __U);
667 }
668
669 static __inline__ __m128i __DEFAULT_FN_ATTRS128
670 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
671   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
672                 (__v2di) _mm_setzero_si128(),
673                 (__mmask8) __U);
674 }
675
676 static __inline__ __m256i __DEFAULT_FN_ATTRS256
677 _mm256_cvttps_epu64 (__m128 __A) {
678   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
679                 (__v4di) _mm256_setzero_si256(),
680                 (__mmask8) -1);
681 }
682
683 static __inline__ __m256i __DEFAULT_FN_ATTRS256
684 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
685   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
686                 (__v4di) __W,
687                 (__mmask8) __U);
688 }
689
690 static __inline__ __m256i __DEFAULT_FN_ATTRS256
691 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
692   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
693                 (__v4di) _mm256_setzero_si256(),
694                 (__mmask8) __U);
695 }
696
697 static __inline__ __m128d __DEFAULT_FN_ATTRS128
698 _mm_cvtepu64_pd (__m128i __A) {
699   return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
700 }
701
702 static __inline__ __m128d __DEFAULT_FN_ATTRS128
703 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
704   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
705                                               (__v2df)_mm_cvtepu64_pd(__A),
706                                               (__v2df)__W);
707 }
708
709 static __inline__ __m128d __DEFAULT_FN_ATTRS128
710 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
711   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
712                                               (__v2df)_mm_cvtepu64_pd(__A),
713                                               (__v2df)_mm_setzero_pd());
714 }
715
716 static __inline__ __m256d __DEFAULT_FN_ATTRS256
717 _mm256_cvtepu64_pd (__m256i __A) {
718   return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
719 }
720
721 static __inline__ __m256d __DEFAULT_FN_ATTRS256
722 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
723   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
724                                               (__v4df)_mm256_cvtepu64_pd(__A),
725                                               (__v4df)__W);
726 }
727
728 static __inline__ __m256d __DEFAULT_FN_ATTRS256
729 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
730   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
731                                               (__v4df)_mm256_cvtepu64_pd(__A),
732                                               (__v4df)_mm256_setzero_pd());
733 }
734
735 static __inline__ __m128 __DEFAULT_FN_ATTRS128
736 _mm_cvtepu64_ps (__m128i __A) {
737   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
738                 (__v4sf) _mm_setzero_ps(),
739                 (__mmask8) -1);
740 }
741
742 static __inline__ __m128 __DEFAULT_FN_ATTRS128
743 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
744   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
745                 (__v4sf) __W,
746                 (__mmask8) __U);
747 }
748
749 static __inline__ __m128 __DEFAULT_FN_ATTRS128
750 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
751   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
752                 (__v4sf) _mm_setzero_ps(),
753                 (__mmask8) __U);
754 }
755
756 static __inline__ __m128 __DEFAULT_FN_ATTRS256
757 _mm256_cvtepu64_ps (__m256i __A) {
758   return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
759 }
760
761 static __inline__ __m128 __DEFAULT_FN_ATTRS256
762 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
763   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
764                                              (__v4sf)_mm256_cvtepu64_ps(__A),
765                                              (__v4sf)__W);
766 }
767
768 static __inline__ __m128 __DEFAULT_FN_ATTRS256
769 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
770   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
771                                              (__v4sf)_mm256_cvtepu64_ps(__A),
772                                              (__v4sf)_mm_setzero_ps());
773 }
774
775 #define _mm_range_pd(A, B, C) \
776   ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
777                                            (__v2df)(__m128d)(B), (int)(C), \
778                                            (__v2df)_mm_setzero_pd(), \
779                                            (__mmask8)-1))
780
781 #define _mm_mask_range_pd(W, U, A, B, C) \
782   ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
783                                            (__v2df)(__m128d)(B), (int)(C), \
784                                            (__v2df)(__m128d)(W), \
785                                            (__mmask8)(U)))
786
787 #define _mm_maskz_range_pd(U, A, B, C) \
788   ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789                                            (__v2df)(__m128d)(B), (int)(C), \
790                                            (__v2df)_mm_setzero_pd(), \
791                                            (__mmask8)(U)))
792
793 #define _mm256_range_pd(A, B, C) \
794   ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
795                                            (__v4df)(__m256d)(B), (int)(C), \
796                                            (__v4df)_mm256_setzero_pd(), \
797                                            (__mmask8)-1))
798
799 #define _mm256_mask_range_pd(W, U, A, B, C) \
800   ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
801                                            (__v4df)(__m256d)(B), (int)(C), \
802                                            (__v4df)(__m256d)(W), \
803                                            (__mmask8)(U)))
804
805 #define _mm256_maskz_range_pd(U, A, B, C) \
806   ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807                                            (__v4df)(__m256d)(B), (int)(C), \
808                                            (__v4df)_mm256_setzero_pd(), \
809                                            (__mmask8)(U)))
810
811 #define _mm_range_ps(A, B, C) \
812   ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
813                                           (__v4sf)(__m128)(B), (int)(C), \
814                                           (__v4sf)_mm_setzero_ps(), \
815                                           (__mmask8)-1))
816
817 #define _mm_mask_range_ps(W, U, A, B, C) \
818   ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
819                                           (__v4sf)(__m128)(B), (int)(C), \
820                                           (__v4sf)(__m128)(W), (__mmask8)(U)))
821
822 #define _mm_maskz_range_ps(U, A, B, C) \
823   ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
824                                           (__v4sf)(__m128)(B), (int)(C), \
825                                           (__v4sf)_mm_setzero_ps(), \
826                                           (__mmask8)(U)))
827
828 #define _mm256_range_ps(A, B, C) \
829   ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
830                                           (__v8sf)(__m256)(B), (int)(C), \
831                                           (__v8sf)_mm256_setzero_ps(), \
832                                           (__mmask8)-1))
833
834 #define _mm256_mask_range_ps(W, U, A, B, C) \
835   ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
836                                           (__v8sf)(__m256)(B), (int)(C), \
837                                           (__v8sf)(__m256)(W), (__mmask8)(U)))
838
839 #define _mm256_maskz_range_ps(U, A, B, C) \
840   ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
841                                           (__v8sf)(__m256)(B), (int)(C), \
842                                           (__v8sf)_mm256_setzero_ps(), \
843                                           (__mmask8)(U)))
844
845 #define _mm_reduce_pd(A, B) \
846   ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
847                                             (__v2df)_mm_setzero_pd(), \
848                                             (__mmask8)-1))
849
850 #define _mm_mask_reduce_pd(W, U, A, B) \
851   ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
852                                             (__v2df)(__m128d)(W), \
853                                             (__mmask8)(U)))
854
855 #define _mm_maskz_reduce_pd(U, A, B) \
856   ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
857                                             (__v2df)_mm_setzero_pd(), \
858                                             (__mmask8)(U)))
859
860 #define _mm256_reduce_pd(A, B) \
861   ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
862                                             (__v4df)_mm256_setzero_pd(), \
863                                             (__mmask8)-1))
864
865 #define _mm256_mask_reduce_pd(W, U, A, B) \
866   ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
867                                             (__v4df)(__m256d)(W), \
868                                             (__mmask8)(U)))
869
870 #define _mm256_maskz_reduce_pd(U, A, B) \
871   ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
872                                             (__v4df)_mm256_setzero_pd(), \
873                                             (__mmask8)(U)))
874
875 #define _mm_reduce_ps(A, B) \
876   ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
877                                            (__v4sf)_mm_setzero_ps(), \
878                                            (__mmask8)-1))
879
880 #define _mm_mask_reduce_ps(W, U, A, B) \
881   ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
882                                            (__v4sf)(__m128)(W), \
883                                            (__mmask8)(U)))
884
885 #define _mm_maskz_reduce_ps(U, A, B) \
886   ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
887                                            (__v4sf)_mm_setzero_ps(), \
888                                            (__mmask8)(U)))
889
890 #define _mm256_reduce_ps(A, B) \
891   ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
892                                            (__v8sf)_mm256_setzero_ps(), \
893                                            (__mmask8)-1))
894
895 #define _mm256_mask_reduce_ps(W, U, A, B) \
896   ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
897                                            (__v8sf)(__m256)(W), \
898                                            (__mmask8)(U)))
899
900 #define _mm256_maskz_reduce_ps(U, A, B) \
901   ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
902                                            (__v8sf)_mm256_setzero_ps(), \
903                                            (__mmask8)(U)))
904
905 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
906 _mm_movepi32_mask (__m128i __A)
907 {
908   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
909 }
910
911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
912 _mm256_movepi32_mask (__m256i __A)
913 {
914   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
915 }
916
917 static __inline__ __m128i __DEFAULT_FN_ATTRS128
918 _mm_movm_epi32 (__mmask8 __A)
919 {
920   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
921 }
922
923 static __inline__ __m256i __DEFAULT_FN_ATTRS256
924 _mm256_movm_epi32 (__mmask8 __A)
925 {
926   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
927 }
928
929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
930 _mm_movm_epi64 (__mmask8 __A)
931 {
932   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
933 }
934
935 static __inline__ __m256i __DEFAULT_FN_ATTRS256
936 _mm256_movm_epi64 (__mmask8 __A)
937 {
938   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
939 }
940
941 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
942 _mm_movepi64_mask (__m128i __A)
943 {
944   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
945 }
946
947 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
948 _mm256_movepi64_mask (__m256i __A)
949 {
950   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
951 }
952
953 static __inline__ __m256 __DEFAULT_FN_ATTRS256
954 _mm256_broadcast_f32x2 (__m128 __A)
955 {
956   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
957                                          0, 1, 0, 1, 0, 1, 0, 1);
958 }
959
960 static __inline__ __m256 __DEFAULT_FN_ATTRS256
961 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
962 {
963   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
964                                              (__v8sf)_mm256_broadcast_f32x2(__A),
965                                              (__v8sf)__O);
966 }
967
968 static __inline__ __m256 __DEFAULT_FN_ATTRS256
969 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
970 {
971   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
972                                              (__v8sf)_mm256_broadcast_f32x2(__A),
973                                              (__v8sf)_mm256_setzero_ps());
974 }
975
976 static __inline__ __m256d __DEFAULT_FN_ATTRS256
977 _mm256_broadcast_f64x2(__m128d __A)
978 {
979   return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
980                                           0, 1, 0, 1);
981 }
982
983 static __inline__ __m256d __DEFAULT_FN_ATTRS256
984 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
985 {
986   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
987                                             (__v4df)_mm256_broadcast_f64x2(__A),
988                                             (__v4df)__O);
989 }
990
991 static __inline__ __m256d __DEFAULT_FN_ATTRS256
992 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
993 {
994   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
995                                             (__v4df)_mm256_broadcast_f64x2(__A),
996                                             (__v4df)_mm256_setzero_pd());
997 }
998
999 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1000 _mm_broadcast_i32x2 (__m128i __A)
1001 {
1002   return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1003                                           0, 1, 0, 1);
1004 }
1005
1006 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1007 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1008 {
1009   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1010                                              (__v4si)_mm_broadcast_i32x2(__A),
1011                                              (__v4si)__O);
1012 }
1013
1014 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1015 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1016 {
1017   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
1018                                              (__v4si)_mm_broadcast_i32x2(__A),
1019                                              (__v4si)_mm_setzero_si128());
1020 }
1021
1022 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1023 _mm256_broadcast_i32x2 (__m128i __A)
1024 {
1025   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1026                                           0, 1, 0, 1, 0, 1, 0, 1);
1027 }
1028
1029 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1030 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1031 {
1032   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1033                                              (__v8si)_mm256_broadcast_i32x2(__A),
1034                                              (__v8si)__O);
1035 }
1036
1037 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1038 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1039 {
1040   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
1041                                              (__v8si)_mm256_broadcast_i32x2(__A),
1042                                              (__v8si)_mm256_setzero_si256());
1043 }
1044
1045 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1046 _mm256_broadcast_i64x2(__m128i __A)
1047 {
1048   return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1049                                           0, 1, 0, 1);
1050 }
1051
1052 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1053 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1054 {
1055   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1056                                             (__v4di)_mm256_broadcast_i64x2(__A),
1057                                             (__v4di)__O);
1058 }
1059
1060 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1061 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1062 {
1063   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1064                                             (__v4di)_mm256_broadcast_i64x2(__A),
1065                                             (__v4di)_mm256_setzero_si256());
1066 }
1067
1068 #define _mm256_extractf64x2_pd(A, imm) \
1069   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1070                                                  (int)(imm), \
1071                                                  (__v2df)_mm_undefined_pd(), \
1072                                                  (__mmask8)-1))
1073
1074 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1075   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1076                                                  (int)(imm), \
1077                                                  (__v2df)(__m128d)(W), \
1078                                                  (__mmask8)(U)))
1079
1080 #define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1081   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1082                                                  (int)(imm), \
1083                                                  (__v2df)_mm_setzero_pd(), \
1084                                                  (__mmask8)(U)))
1085
1086 #define _mm256_extracti64x2_epi64(A, imm) \
1087   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1088                                                 (int)(imm), \
1089                                                 (__v2di)_mm_undefined_si128(), \
1090                                                 (__mmask8)-1))
1091
1092 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1093   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1094                                                  (int)(imm), \
1095                                                  (__v2di)(__m128i)(W), \
1096                                                  (__mmask8)(U)))
1097
1098 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1099   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1100                                                  (int)(imm), \
1101                                                  (__v2di)_mm_setzero_si128(), \
1102                                                  (__mmask8)(U)))
1103
1104 #define _mm256_insertf64x2(A, B, imm) \
1105   ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1106                                            (__v2df)(__m128d)(B), (int)(imm)))
1107
1108 #define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1109   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1110                                   (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1111                                   (__v4df)(__m256d)(W)))
1112
1113 #define _mm256_maskz_insertf64x2(U, A, B, imm) \
1114   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1115                                   (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1116                                   (__v4df)_mm256_setzero_pd()))
1117
1118 #define _mm256_inserti64x2(A, B, imm) \
1119   ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1120                                            (__v2di)(__m128i)(B), (int)(imm)))
1121
1122 #define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1123   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1124                                    (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1125                                    (__v4di)(__m256i)(W)))
1126
1127 #define _mm256_maskz_inserti64x2(U, A, B, imm) \
1128   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1129                                    (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1130                                    (__v4di)_mm256_setzero_si256()))
1131
1132 #define _mm_mask_fpclass_pd_mask(U, A, imm) \
1133   ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1134                                               (__mmask8)(U)))
1135
1136 #define _mm_fpclass_pd_mask(A, imm) \
1137   ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1138                                               (__mmask8)-1))
1139
1140 #define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1141   ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1142                                               (__mmask8)(U)))
1143
1144 #define _mm256_fpclass_pd_mask(A, imm) \
1145   ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1146                                               (__mmask8)-1))
1147
1148 #define _mm_mask_fpclass_ps_mask(U, A, imm) \
1149   ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1150                                               (__mmask8)(U)))
1151
1152 #define _mm_fpclass_ps_mask(A, imm) \
1153   ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1154                                               (__mmask8)-1))
1155
1156 #define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1157   ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1158                                               (__mmask8)(U)))
1159
1160 #define _mm256_fpclass_ps_mask(A, imm) \
1161   ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1162                                               (__mmask8)-1))
1163
1164 #undef __DEFAULT_FN_ATTRS128
1165 #undef __DEFAULT_FN_ATTRS256
1166
1167 #endif