]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/Headers/avx512vlintrin.h
Merge clang 7.0.1 and several follow-up changes
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / Headers / avx512vlintrin.h
1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23
24 #ifndef __IMMINTRIN_H
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
30
31 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
32 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
33
34 typedef short __v2hi __attribute__((__vector_size__(4)));
35 typedef char __v4qi __attribute__((__vector_size__(4)));
36 typedef char __v2qi __attribute__((__vector_size__(2)));
37
38 /* Integer compare */
39
40 #define _mm_cmpeq_epi32_mask(A, B) \
41     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
42 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
43     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
44 #define _mm_cmpge_epi32_mask(A, B) \
45     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
47     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
48 #define _mm_cmpgt_epi32_mask(A, B) \
49     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
50 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
51     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
52 #define _mm_cmple_epi32_mask(A, B) \
53     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
54 #define _mm_mask_cmple_epi32_mask(k, A, B) \
55     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
56 #define _mm_cmplt_epi32_mask(A, B) \
57     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
58 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
59     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
60 #define _mm_cmpneq_epi32_mask(A, B) \
61     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
62 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
63     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
64
65 #define _mm256_cmpeq_epi32_mask(A, B) \
66     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
67 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
68     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
69 #define _mm256_cmpge_epi32_mask(A, B) \
70     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
71 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
72     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
73 #define _mm256_cmpgt_epi32_mask(A, B) \
74     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
75 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
76     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
77 #define _mm256_cmple_epi32_mask(A, B) \
78     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
79 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
80     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
81 #define _mm256_cmplt_epi32_mask(A, B) \
82     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
83 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
84     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
85 #define _mm256_cmpneq_epi32_mask(A, B) \
86     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
87 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
88     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
89
90 #define _mm_cmpeq_epu32_mask(A, B) \
91     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
92 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
93     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
94 #define _mm_cmpge_epu32_mask(A, B) \
95     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
96 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
97     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
98 #define _mm_cmpgt_epu32_mask(A, B) \
99     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
100 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
101     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
102 #define _mm_cmple_epu32_mask(A, B) \
103     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
104 #define _mm_mask_cmple_epu32_mask(k, A, B) \
105     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
106 #define _mm_cmplt_epu32_mask(A, B) \
107     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
108 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
109     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
110 #define _mm_cmpneq_epu32_mask(A, B) \
111     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
112 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
113     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
114
115 #define _mm256_cmpeq_epu32_mask(A, B) \
116     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
117 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
118     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
119 #define _mm256_cmpge_epu32_mask(A, B) \
120     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
121 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
122     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
123 #define _mm256_cmpgt_epu32_mask(A, B) \
124     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
125 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
126     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
127 #define _mm256_cmple_epu32_mask(A, B) \
128     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
129 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
130     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
131 #define _mm256_cmplt_epu32_mask(A, B) \
132     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
133 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
134     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
135 #define _mm256_cmpneq_epu32_mask(A, B) \
136     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
137 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
138     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
139
140 #define _mm_cmpeq_epi64_mask(A, B) \
141     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
142 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
143     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
144 #define _mm_cmpge_epi64_mask(A, B) \
145     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
146 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
147     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
148 #define _mm_cmpgt_epi64_mask(A, B) \
149     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
150 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
151     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
152 #define _mm_cmple_epi64_mask(A, B) \
153     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
154 #define _mm_mask_cmple_epi64_mask(k, A, B) \
155     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
156 #define _mm_cmplt_epi64_mask(A, B) \
157     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
158 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
159     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
160 #define _mm_cmpneq_epi64_mask(A, B) \
161     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
162 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
163     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
164
165 #define _mm256_cmpeq_epi64_mask(A, B) \
166     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
167 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
168     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
169 #define _mm256_cmpge_epi64_mask(A, B) \
170     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
171 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
172     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
173 #define _mm256_cmpgt_epi64_mask(A, B) \
174     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
175 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
176     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
177 #define _mm256_cmple_epi64_mask(A, B) \
178     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
179 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
180     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
181 #define _mm256_cmplt_epi64_mask(A, B) \
182     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
183 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
184     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
185 #define _mm256_cmpneq_epi64_mask(A, B) \
186     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
187 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
188     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
189
190 #define _mm_cmpeq_epu64_mask(A, B) \
191     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
192 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
193     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
194 #define _mm_cmpge_epu64_mask(A, B) \
195     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
196 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
197     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
198 #define _mm_cmpgt_epu64_mask(A, B) \
199     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
200 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
201     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
202 #define _mm_cmple_epu64_mask(A, B) \
203     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
204 #define _mm_mask_cmple_epu64_mask(k, A, B) \
205     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
206 #define _mm_cmplt_epu64_mask(A, B) \
207     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
208 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
209     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
210 #define _mm_cmpneq_epu64_mask(A, B) \
211     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
212 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
213     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
214
215 #define _mm256_cmpeq_epu64_mask(A, B) \
216     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
217 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
218     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
219 #define _mm256_cmpge_epu64_mask(A, B) \
220     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
221 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
222     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
223 #define _mm256_cmpgt_epu64_mask(A, B) \
224     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
225 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
226     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
227 #define _mm256_cmple_epu64_mask(A, B) \
228     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
229 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
230     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
231 #define _mm256_cmplt_epu64_mask(A, B) \
232     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
233 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
234     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
235 #define _mm256_cmpneq_epu64_mask(A, B) \
236     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
237 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
238     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
239
240 static __inline__ __m256i __DEFAULT_FN_ATTRS256
241 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
242 {
243   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244                                              (__v8si)_mm256_add_epi32(__A, __B),
245                                              (__v8si)__W);
246 }
247
248 static __inline__ __m256i __DEFAULT_FN_ATTRS256
249 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
250 {
251   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
252                                              (__v8si)_mm256_add_epi32(__A, __B),
253                                              (__v8si)_mm256_setzero_si256());
254 }
255
256 static __inline__ __m256i __DEFAULT_FN_ATTRS256
257 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
258 {
259   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260                                              (__v4di)_mm256_add_epi64(__A, __B),
261                                              (__v4di)__W);
262 }
263
264 static __inline__ __m256i __DEFAULT_FN_ATTRS256
265 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
266 {
267   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
268                                              (__v4di)_mm256_add_epi64(__A, __B),
269                                              (__v4di)_mm256_setzero_si256());
270 }
271
272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
273 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
274 {
275   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276                                              (__v8si)_mm256_sub_epi32(__A, __B),
277                                              (__v8si)__W);
278 }
279
280 static __inline__ __m256i __DEFAULT_FN_ATTRS256
281 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
282 {
283   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
284                                              (__v8si)_mm256_sub_epi32(__A, __B),
285                                              (__v8si)_mm256_setzero_si256());
286 }
287
288 static __inline__ __m256i __DEFAULT_FN_ATTRS256
289 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
290 {
291   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292                                              (__v4di)_mm256_sub_epi64(__A, __B),
293                                              (__v4di)__W);
294 }
295
296 static __inline__ __m256i __DEFAULT_FN_ATTRS256
297 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
298 {
299   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
300                                              (__v4di)_mm256_sub_epi64(__A, __B),
301                                              (__v4di)_mm256_setzero_si256());
302 }
303
304 static __inline__ __m128i __DEFAULT_FN_ATTRS128
305 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
306 {
307   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308                                              (__v4si)_mm_add_epi32(__A, __B),
309                                              (__v4si)__W);
310 }
311
312 static __inline__ __m128i __DEFAULT_FN_ATTRS128
313 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
314 {
315   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
316                                              (__v4si)_mm_add_epi32(__A, __B),
317                                              (__v4si)_mm_setzero_si128());
318 }
319
320 static __inline__ __m128i __DEFAULT_FN_ATTRS128
321 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
322 {
323   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324                                              (__v2di)_mm_add_epi64(__A, __B),
325                                              (__v2di)__W);
326 }
327
328 static __inline__ __m128i __DEFAULT_FN_ATTRS128
329 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
330 {
331   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
332                                              (__v2di)_mm_add_epi64(__A, __B),
333                                              (__v2di)_mm_setzero_si128());
334 }
335
336 static __inline__ __m128i __DEFAULT_FN_ATTRS128
337 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
338 {
339   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340                                              (__v4si)_mm_sub_epi32(__A, __B),
341                                              (__v4si)__W);
342 }
343
344 static __inline__ __m128i __DEFAULT_FN_ATTRS128
345 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
346 {
347   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
348                                              (__v4si)_mm_sub_epi32(__A, __B),
349                                              (__v4si)_mm_setzero_si128());
350 }
351
352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
353 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
354 {
355   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356                                              (__v2di)_mm_sub_epi64(__A, __B),
357                                              (__v2di)__W);
358 }
359
360 static __inline__ __m128i __DEFAULT_FN_ATTRS128
361 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
362 {
363   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
364                                              (__v2di)_mm_sub_epi64(__A, __B),
365                                              (__v2di)_mm_setzero_si128());
366 }
367
368 static __inline__ __m256i __DEFAULT_FN_ATTRS256
369 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
370 {
371   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372                                              (__v4di)_mm256_mul_epi32(__X, __Y),
373                                              (__v4di)__W);
374 }
375
376 static __inline__ __m256i __DEFAULT_FN_ATTRS256
377 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
378 {
379   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
380                                              (__v4di)_mm256_mul_epi32(__X, __Y),
381                                              (__v4di)_mm256_setzero_si256());
382 }
383
384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
385 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
386 {
387   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388                                              (__v2di)_mm_mul_epi32(__X, __Y),
389                                              (__v2di)__W);
390 }
391
392 static __inline__ __m128i __DEFAULT_FN_ATTRS128
393 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
394 {
395   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
396                                              (__v2di)_mm_mul_epi32(__X, __Y),
397                                              (__v2di)_mm_setzero_si128());
398 }
399
400 static __inline__ __m256i __DEFAULT_FN_ATTRS256
401 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
402 {
403   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404                                              (__v4di)_mm256_mul_epu32(__X, __Y),
405                                              (__v4di)__W);
406 }
407
408 static __inline__ __m256i __DEFAULT_FN_ATTRS256
409 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
410 {
411   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
412                                              (__v4di)_mm256_mul_epu32(__X, __Y),
413                                              (__v4di)_mm256_setzero_si256());
414 }
415
416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
417 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
418 {
419   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420                                              (__v2di)_mm_mul_epu32(__X, __Y),
421                                              (__v2di)__W);
422 }
423
424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
425 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
426 {
427   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
428                                              (__v2di)_mm_mul_epu32(__X, __Y),
429                                              (__v2di)_mm_setzero_si128());
430 }
431
432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
433 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
434 {
435   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436                                              (__v8si)_mm256_mullo_epi32(__A, __B),
437                                              (__v8si)_mm256_setzero_si256());
438 }
439
440 static __inline__ __m256i __DEFAULT_FN_ATTRS256
441 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
442 {
443   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
444                                              (__v8si)_mm256_mullo_epi32(__A, __B),
445                                              (__v8si)__W);
446 }
447
448 static __inline__ __m128i __DEFAULT_FN_ATTRS128
449 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
450 {
451   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452                                              (__v4si)_mm_mullo_epi32(__A, __B),
453                                              (__v4si)_mm_setzero_si128());
454 }
455
456 static __inline__ __m128i __DEFAULT_FN_ATTRS128
457 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
458 {
459   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
460                                              (__v4si)_mm_mullo_epi32(__A, __B),
461                                              (__v4si)__W);
462 }
463
464 static __inline__ __m256i __DEFAULT_FN_ATTRS256
465 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
466 {
467   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
468                                              (__v8si)_mm256_and_si256(__A, __B),
469                                              (__v8si)__W);
470 }
471
472 static __inline__ __m256i __DEFAULT_FN_ATTRS256
473 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
474 {
475   return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
476 }
477
478 static __inline__ __m128i __DEFAULT_FN_ATTRS128
479 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
480 {
481   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
482                                              (__v4si)_mm_and_si128(__A, __B),
483                                              (__v4si)__W);
484 }
485
486 static __inline__ __m128i __DEFAULT_FN_ATTRS128
487 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
488 {
489   return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
490 }
491
492 static __inline__ __m256i __DEFAULT_FN_ATTRS256
493 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
494 {
495   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
496                                           (__v8si)_mm256_andnot_si256(__A, __B),
497                                           (__v8si)__W);
498 }
499
500 static __inline__ __m256i __DEFAULT_FN_ATTRS256
501 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
502 {
503   return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
504                                            __U, __A, __B);
505 }
506
507 static __inline__ __m128i __DEFAULT_FN_ATTRS128
508 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
509 {
510   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
511                                              (__v4si)_mm_andnot_si128(__A, __B),
512                                              (__v4si)__W);
513 }
514
515 static __inline__ __m128i __DEFAULT_FN_ATTRS128
516 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
517 {
518   return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
519 }
520
521 static __inline__ __m256i __DEFAULT_FN_ATTRS256
522 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
523 {
524   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
525                                              (__v8si)_mm256_or_si256(__A, __B),
526                                              (__v8si)__W);
527 }
528
529 static __inline__ __m256i __DEFAULT_FN_ATTRS256
530 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
531 {
532   return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
533 }
534
535 static __inline__ __m128i __DEFAULT_FN_ATTRS128
536 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
537 {
538   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
539                                              (__v4si)_mm_or_si128(__A, __B),
540                                              (__v4si)__W);
541 }
542
543 static __inline__ __m128i __DEFAULT_FN_ATTRS128
544 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
545 {
546   return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
547 }
548
549 static __inline__ __m256i __DEFAULT_FN_ATTRS256
550 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
551 {
552   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
553                                              (__v8si)_mm256_xor_si256(__A, __B),
554                                              (__v8si)__W);
555 }
556
557 static __inline__ __m256i __DEFAULT_FN_ATTRS256
558 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
559 {
560   return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
561 }
562
563 static __inline__ __m128i __DEFAULT_FN_ATTRS128
564 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
565         __m128i __B)
566 {
567   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
568                                              (__v4si)_mm_xor_si128(__A, __B),
569                                              (__v4si)__W);
570 }
571
572 static __inline__ __m128i __DEFAULT_FN_ATTRS128
573 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
574 {
575   return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
576 }
577
578 static __inline__ __m256i __DEFAULT_FN_ATTRS256
579 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
580 {
581   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
582                                              (__v4di)_mm256_and_si256(__A, __B),
583                                              (__v4di)__W);
584 }
585
586 static __inline__ __m256i __DEFAULT_FN_ATTRS256
587 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
588 {
589   return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
590 }
591
592 static __inline__ __m128i __DEFAULT_FN_ATTRS128
593 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
594 {
595   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
596                                              (__v2di)_mm_and_si128(__A, __B),
597                                              (__v2di)__W);
598 }
599
600 static __inline__ __m128i __DEFAULT_FN_ATTRS128
601 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
602 {
603   return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
604 }
605
606 static __inline__ __m256i __DEFAULT_FN_ATTRS256
607 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
608 {
609   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
610                                           (__v4di)_mm256_andnot_si256(__A, __B),
611                                           (__v4di)__W);
612 }
613
614 static __inline__ __m256i __DEFAULT_FN_ATTRS256
615 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
616 {
617   return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
618                                            __U, __A, __B);
619 }
620
621 static __inline__ __m128i __DEFAULT_FN_ATTRS128
622 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
623 {
624   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
625                                              (__v2di)_mm_andnot_si128(__A, __B),
626                                              (__v2di)__W);
627 }
628
629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
630 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
631 {
632   return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
633 }
634
635 static __inline__ __m256i __DEFAULT_FN_ATTRS256
636 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
637 {
638   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
639                                              (__v4di)_mm256_or_si256(__A, __B),
640                                              (__v4di)__W);
641 }
642
643 static __inline__ __m256i __DEFAULT_FN_ATTRS256
644 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
645 {
646   return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
647 }
648
649 static __inline__ __m128i __DEFAULT_FN_ATTRS128
650 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
651 {
652   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
653                                              (__v2di)_mm_or_si128(__A, __B),
654                                              (__v2di)__W);
655 }
656
657 static __inline__ __m128i __DEFAULT_FN_ATTRS128
658 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
659 {
660   return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
661 }
662
663 static __inline__ __m256i __DEFAULT_FN_ATTRS256
664 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
665 {
666   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
667                                              (__v4di)_mm256_xor_si256(__A, __B),
668                                              (__v4di)__W);
669 }
670
671 static __inline__ __m256i __DEFAULT_FN_ATTRS256
672 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
673 {
674   return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
675 }
676
677 static __inline__ __m128i __DEFAULT_FN_ATTRS128
678 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
679         __m128i __B)
680 {
681   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
682                                              (__v2di)_mm_xor_si128(__A, __B),
683                                              (__v2di)__W);
684 }
685
686 static __inline__ __m128i __DEFAULT_FN_ATTRS128
687 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
688 {
689   return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
690 }
691
692 #define _mm_cmp_epi32_mask(a, b, p) \
693   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
694                                         (__v4si)(__m128i)(b), (int)(p), \
695                                         (__mmask8)-1)
696
697 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
698   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
699                                         (__v4si)(__m128i)(b), (int)(p), \
700                                         (__mmask8)(m))
701
702 #define _mm_cmp_epu32_mask(a, b, p) \
703   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
704                                          (__v4si)(__m128i)(b), (int)(p), \
705                                          (__mmask8)-1)
706
707 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
708   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
709                                          (__v4si)(__m128i)(b), (int)(p), \
710                                          (__mmask8)(m))
711
712 #define _mm256_cmp_epi32_mask(a, b, p) \
713   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
714                                         (__v8si)(__m256i)(b), (int)(p), \
715                                         (__mmask8)-1)
716
717 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
718   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
719                                         (__v8si)(__m256i)(b), (int)(p), \
720                                         (__mmask8)(m))
721
722 #define _mm256_cmp_epu32_mask(a, b, p) \
723   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
724                                          (__v8si)(__m256i)(b), (int)(p), \
725                                          (__mmask8)-1)
726
727 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
728   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
729                                          (__v8si)(__m256i)(b), (int)(p), \
730                                          (__mmask8)(m))
731
732 #define _mm_cmp_epi64_mask(a, b, p) \
733   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
734                                         (__v2di)(__m128i)(b), (int)(p), \
735                                         (__mmask8)-1)
736
737 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
738   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
739                                         (__v2di)(__m128i)(b), (int)(p), \
740                                         (__mmask8)(m))
741
742 #define _mm_cmp_epu64_mask(a, b, p) \
743   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
744                                          (__v2di)(__m128i)(b), (int)(p), \
745                                          (__mmask8)-1)
746
747 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
748   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
749                                          (__v2di)(__m128i)(b), (int)(p), \
750                                          (__mmask8)(m))
751
752 #define _mm256_cmp_epi64_mask(a, b, p) \
753   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
754                                         (__v4di)(__m256i)(b), (int)(p), \
755                                         (__mmask8)-1)
756
757 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
758   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
759                                         (__v4di)(__m256i)(b), (int)(p), \
760                                         (__mmask8)(m))
761
762 #define _mm256_cmp_epu64_mask(a, b, p) \
763   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
764                                          (__v4di)(__m256i)(b), (int)(p), \
765                                          (__mmask8)-1)
766
767 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
768   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
769                                          (__v4di)(__m256i)(b), (int)(p), \
770                                          (__mmask8)(m))
771
772 #define _mm256_cmp_ps_mask(a, b, p)  \
773   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
774                                          (__v8sf)(__m256)(b), (int)(p), \
775                                          (__mmask8)-1)
776
777 #define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
778   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
779                                          (__v8sf)(__m256)(b), (int)(p), \
780                                          (__mmask8)(m))
781
782 #define _mm256_cmp_pd_mask(a, b, p)  \
783   (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
784                                          (__v4df)(__m256d)(b), (int)(p), \
785                                          (__mmask8)-1)
786
787 #define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
788   (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
789                                          (__v4df)(__m256d)(b), (int)(p), \
790                                          (__mmask8)(m))
791
792 #define _mm_cmp_ps_mask(a, b, p)  \
793   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
794                                          (__v4sf)(__m128)(b), (int)(p), \
795                                          (__mmask8)-1)
796
797 #define _mm_mask_cmp_ps_mask(m, a, b, p)  \
798   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
799                                          (__v4sf)(__m128)(b), (int)(p), \
800                                          (__mmask8)(m))
801
802 #define _mm_cmp_pd_mask(a, b, p)  \
803   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
804                                          (__v2df)(__m128d)(b), (int)(p), \
805                                          (__mmask8)-1)
806
807 #define _mm_mask_cmp_pd_mask(m, a, b, p)  \
808   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
809                                          (__v2df)(__m128d)(b), (int)(p), \
810                                          (__mmask8)(m))
811
812 static __inline__ __m128d __DEFAULT_FN_ATTRS128
813 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
814 {
815   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
816                     __builtin_ia32_vfmaddpd ((__v2df) __A,
817                                              (__v2df) __B,
818                                              (__v2df) __C),
819                     (__v2df) __A);
820 }
821
822 static __inline__ __m128d __DEFAULT_FN_ATTRS128
823 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
824 {
825   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
826                     __builtin_ia32_vfmaddpd ((__v2df) __A,
827                                              (__v2df) __B,
828                                              (__v2df) __C),
829                     (__v2df) __C);
830 }
831
832 static __inline__ __m128d __DEFAULT_FN_ATTRS128
833 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
834 {
835   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
836                     __builtin_ia32_vfmaddpd ((__v2df) __A,
837                                              (__v2df) __B,
838                                              (__v2df) __C),
839                     (__v2df)_mm_setzero_pd());
840 }
841
842 static __inline__ __m128d __DEFAULT_FN_ATTRS128
843 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
844 {
845   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
846                     __builtin_ia32_vfmaddpd ((__v2df) __A,
847                                              (__v2df) __B,
848                                              -(__v2df) __C),
849                     (__v2df) __A);
850 }
851
852 static __inline__ __m128d __DEFAULT_FN_ATTRS128
853 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
854 {
855   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
856                     __builtin_ia32_vfmaddpd ((__v2df) __A,
857                                              (__v2df) __B,
858                                              -(__v2df) __C),
859                     (__v2df)_mm_setzero_pd());
860 }
861
862 static __inline__ __m128d __DEFAULT_FN_ATTRS128
863 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
864 {
865   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
866                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
867                                              (__v2df) __B,
868                                              (__v2df) __C),
869                     (__v2df) __C);
870 }
871
872 static __inline__ __m128d __DEFAULT_FN_ATTRS128
873 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
874 {
875   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
876                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
877                                              (__v2df) __B,
878                                              (__v2df) __C),
879                     (__v2df)_mm_setzero_pd());
880 }
881
882 static __inline__ __m128d __DEFAULT_FN_ATTRS128
883 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
884 {
885   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
886                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
887                                              (__v2df) __B,
888                                              -(__v2df) __C),
889                     (__v2df)_mm_setzero_pd());
890 }
891
892 static __inline__ __m256d __DEFAULT_FN_ATTRS256
893 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
894 {
895   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
896                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
897                                                 (__v4df) __B,
898                                                 (__v4df) __C),
899                     (__v4df) __A);
900 }
901
902 static __inline__ __m256d __DEFAULT_FN_ATTRS256
903 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
904 {
905   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
906                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
907                                                 (__v4df) __B,
908                                                 (__v4df) __C),
909                     (__v4df) __C);
910 }
911
912 static __inline__ __m256d __DEFAULT_FN_ATTRS256
913 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
914 {
915   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
916                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
917                                                 (__v4df) __B,
918                                                 (__v4df) __C),
919                     (__v4df)_mm256_setzero_pd());
920 }
921
922 static __inline__ __m256d __DEFAULT_FN_ATTRS256
923 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
924 {
925   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
926                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
927                                                 (__v4df) __B,
928                                                 -(__v4df) __C),
929                     (__v4df) __A);
930 }
931
932 static __inline__ __m256d __DEFAULT_FN_ATTRS256
933 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
934 {
935   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
936                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
937                                                 (__v4df) __B,
938                                                 -(__v4df) __C),
939                     (__v4df)_mm256_setzero_pd());
940 }
941
942 static __inline__ __m256d __DEFAULT_FN_ATTRS256
943 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
944 {
945   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
946                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
947                                                 (__v4df) __B,
948                                                 (__v4df) __C),
949                     (__v4df) __C);
950 }
951
952 static __inline__ __m256d __DEFAULT_FN_ATTRS256
953 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
954 {
955   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
956                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
957                                                 (__v4df) __B,
958                                                 (__v4df) __C),
959                     (__v4df)_mm256_setzero_pd());
960 }
961
962 static __inline__ __m256d __DEFAULT_FN_ATTRS256
963 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
964 {
965   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
966                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
967                                                 (__v4df) __B,
968                                                 -(__v4df) __C),
969                     (__v4df)_mm256_setzero_pd());
970 }
971
972 static __inline__ __m128 __DEFAULT_FN_ATTRS128
973 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
974 {
975   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
976                     __builtin_ia32_vfmaddps ((__v4sf) __A,
977                                              (__v4sf) __B,
978                                              (__v4sf) __C),
979                     (__v4sf) __A);
980 }
981
982 static __inline__ __m128 __DEFAULT_FN_ATTRS128
983 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
984 {
985   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
986                     __builtin_ia32_vfmaddps ((__v4sf) __A,
987                                              (__v4sf) __B,
988                                              (__v4sf) __C),
989                     (__v4sf) __C);
990 }
991
992 static __inline__ __m128 __DEFAULT_FN_ATTRS128
993 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
994 {
995   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
996                     __builtin_ia32_vfmaddps ((__v4sf) __A,
997                                              (__v4sf) __B,
998                                              (__v4sf) __C),
999                     (__v4sf)_mm_setzero_ps());
1000 }
1001
1002 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1003 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1004 {
1005   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1006                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1007                                              (__v4sf) __B,
1008                                              -(__v4sf) __C),
1009                     (__v4sf) __A);
1010 }
1011
1012 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1013 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1014 {
1015   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1016                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1017                                              (__v4sf) __B,
1018                                              -(__v4sf) __C),
1019                     (__v4sf)_mm_setzero_ps());
1020 }
1021
1022 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1023 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1024 {
1025   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1026                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
1027                                              (__v4sf) __B,
1028                                              (__v4sf) __C),
1029                     (__v4sf) __C);
1030 }
1031
1032 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1033 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1034 {
1035   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1036                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
1037                                              (__v4sf) __B,
1038                                              (__v4sf) __C),
1039                     (__v4sf)_mm_setzero_ps());
1040 }
1041
1042 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1043 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1044 {
1045   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1046                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
1047                                              (__v4sf) __B,
1048                                              -(__v4sf) __C),
1049                     (__v4sf)_mm_setzero_ps());
1050 }
1051
1052 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1053 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1054 {
1055   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1056                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1057                                                 (__v8sf) __B,
1058                                                 (__v8sf) __C),
1059                     (__v8sf) __A);
1060 }
1061
1062 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1063 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1064 {
1065   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1066                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1067                                                 (__v8sf) __B,
1068                                                 (__v8sf) __C),
1069                     (__v8sf) __C);
1070 }
1071
1072 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1073 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1074 {
1075   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1076                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1077                                                 (__v8sf) __B,
1078                                                 (__v8sf) __C),
1079                     (__v8sf)_mm256_setzero_ps());
1080 }
1081
1082 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1083 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1084 {
1085   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1086                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1087                                                 (__v8sf) __B,
1088                                                 -(__v8sf) __C),
1089                     (__v8sf) __A);
1090 }
1091
1092 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1093 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1094 {
1095   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1096                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1097                                                 (__v8sf) __B,
1098                                                 -(__v8sf) __C),
1099                     (__v8sf)_mm256_setzero_ps());
1100 }
1101
1102 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1103 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1104 {
1105   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1106                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1107                                                 (__v8sf) __B,
1108                                                 (__v8sf) __C),
1109                     (__v8sf) __C);
1110 }
1111
1112 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1113 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1114 {
1115   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1116                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1117                                                 (__v8sf) __B,
1118                                                 (__v8sf) __C),
1119                     (__v8sf)_mm256_setzero_ps());
1120 }
1121
1122 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1123 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1124 {
1125   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1126                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1127                                                 (__v8sf) __B,
1128                                                 -(__v8sf) __C),
1129                     (__v8sf)_mm256_setzero_ps());
1130 }
1131
1132 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1133 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1134 {
1135   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1136                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1137                                                 (__v2df) __B,
1138                                                 (__v2df) __C),
1139                     (__v2df) __A);
1140 }
1141
1142 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1143 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1144 {
1145   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1146                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1147                                                 (__v2df) __B,
1148                                                 (__v2df) __C),
1149                     (__v2df) __C);
1150 }
1151
1152 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1153 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1154 {
1155   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1156                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1157                                                 (__v2df) __B,
1158                                                 (__v2df) __C),
1159                     (__v2df)_mm_setzero_pd());
1160 }
1161
1162 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1163 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1164 {
1165   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1166                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1167                                                 (__v2df) __B,
1168                                                 -(__v2df) __C),
1169                     (__v2df) __A);
1170 }
1171
1172 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1173 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1174 {
1175   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1176                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1177                                                 (__v2df) __B,
1178                                                 -(__v2df) __C),
1179                     (__v2df)_mm_setzero_pd());
1180 }
1181
1182 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1183 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1184 {
1185   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1186                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1187                                                    (__v4df) __B,
1188                                                    (__v4df) __C),
1189                     (__v4df) __A);
1190 }
1191
1192 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1193 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1194 {
1195   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1196                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1197                                                    (__v4df) __B,
1198                                                    (__v4df) __C),
1199                     (__v4df) __C);
1200 }
1201
1202 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1203 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1204 {
1205   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1206                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1207                                                    (__v4df) __B,
1208                                                    (__v4df) __C),
1209                     (__v4df)_mm256_setzero_pd());
1210 }
1211
1212 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1213 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1214 {
1215   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1216                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1217                                                    (__v4df) __B,
1218                                                    -(__v4df) __C),
1219                     (__v4df) __A);
1220 }
1221
1222 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1223 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1224 {
1225   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1226                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1227                                                    (__v4df) __B,
1228                                                    -(__v4df) __C),
1229                     (__v4df)_mm256_setzero_pd());
1230 }
1231
1232 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1233 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1234 {
1235   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1236                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1237                                                 (__v4sf) __B,
1238                                                 (__v4sf) __C),
1239                     (__v4sf) __A);
1240 }
1241
1242 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1243 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1244 {
1245   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1246                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1247                                                 (__v4sf) __B,
1248                                                 (__v4sf) __C),
1249                     (__v4sf) __C);
1250 }
1251
1252 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1253 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1254 {
1255   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1256                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1257                                                 (__v4sf) __B,
1258                                                 (__v4sf) __C),
1259                     (__v4sf)_mm_setzero_ps());
1260 }
1261
1262 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1263 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1264 {
1265   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1266                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1267                                                 (__v4sf) __B,
1268                                                 -(__v4sf) __C),
1269                     (__v4sf) __A);
1270 }
1271
1272 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1273 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1274 {
1275   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1276                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1277                                                 (__v4sf) __B,
1278                                                 -(__v4sf) __C),
1279                     (__v4sf)_mm_setzero_ps());
1280 }
1281
1282 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1283 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1284                          __m256 __C)
1285 {
1286   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1287                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1288                                                    (__v8sf) __B,
1289                                                    (__v8sf) __C),
1290                     (__v8sf) __A);
1291 }
1292
1293 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1294 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1295 {
1296   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1297                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1298                                                    (__v8sf) __B,
1299                                                    (__v8sf) __C),
1300                     (__v8sf) __C);
1301 }
1302
1303 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1304 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1305 {
1306   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1307                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1308                                                    (__v8sf) __B,
1309                                                    (__v8sf) __C),
1310                     (__v8sf)_mm256_setzero_ps());
1311 }
1312
1313 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1314 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1315 {
1316   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1317                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1318                                                    (__v8sf) __B,
1319                                                    -(__v8sf) __C),
1320                     (__v8sf) __A);
1321 }
1322
1323 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1324 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1325 {
1326   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1327                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1328                                                    (__v8sf) __B,
1329                                                    -(__v8sf) __C),
1330                     (__v8sf)_mm256_setzero_ps());
1331 }
1332
1333 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1334 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1335 {
1336   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1337                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1338                                              (__v2df) __B,
1339                                              -(__v2df) __C),
1340                     (__v2df) __C);
1341 }
1342
1343 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1344 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1345 {
1346   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1347                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1348                                                 (__v4df) __B,
1349                                                 -(__v4df) __C),
1350                     (__v4df) __C);
1351 }
1352
1353 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1354 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1355 {
1356   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1357                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1358                                              (__v4sf) __B,
1359                                              -(__v4sf) __C),
1360                     (__v4sf) __C);
1361 }
1362
1363 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1364 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1365 {
1366   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1367                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1368                                                 (__v8sf) __B,
1369                                                 -(__v8sf) __C),
1370                     (__v8sf) __C);
1371 }
1372
1373 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1374 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1375 {
1376   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1377                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1378                                                 (__v2df) __B,
1379                                                 -(__v2df) __C),
1380                     (__v2df) __C);
1381 }
1382
1383 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1384 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1385 {
1386   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1387                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1388                                                    (__v4df) __B,
1389                                                    -(__v4df) __C),
1390                     (__v4df) __C);
1391 }
1392
1393 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1394 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1395 {
1396   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1397                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1398                                                 (__v4sf) __B,
1399                                                 -(__v4sf) __C),
1400                     (__v4sf) __C);
1401 }
1402
1403 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1404 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1405 {
1406   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1407                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1408                                                    (__v8sf) __B,
1409                                                    -(__v8sf) __C),
1410                     (__v8sf) __C);
1411 }
1412
1413 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1414 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1415 {
1416   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1417                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1418                                              -(__v2df) __B,
1419                                              (__v2df) __C),
1420                     (__v2df) __A);
1421 }
1422
1423 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1424 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1425 {
1426   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1427                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1428                                                 -(__v4df) __B,
1429                                                 (__v4df) __C),
1430                     (__v4df) __A);
1431 }
1432
1433 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1434 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1435 {
1436   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1437                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1438                                              -(__v4sf) __B,
1439                                              (__v4sf) __C),
1440                     (__v4sf) __A);
1441 }
1442
1443 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1444 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1445 {
1446   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1447                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1448                                                 -(__v8sf) __B,
1449                                                 (__v8sf) __C),
1450                     (__v8sf) __A);
1451 }
1452
1453 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1454 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1455 {
1456   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1457                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1458                                              -(__v2df) __B,
1459                                              -(__v2df) __C),
1460                     (__v2df) __A);
1461 }
1462
1463 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1464 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1465 {
1466   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1467                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1468                                              -(__v2df) __B,
1469                                              -(__v2df) __C),
1470                     (__v2df) __C);
1471 }
1472
1473 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1474 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1475 {
1476   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1477                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1478                                                 -(__v4df) __B,
1479                                                 -(__v4df) __C),
1480                     (__v4df) __A);
1481 }
1482
1483 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1484 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1485 {
1486   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1487                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1488                                                 -(__v4df) __B,
1489                                                 -(__v4df) __C),
1490                     (__v4df) __C);
1491 }
1492
1493 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1494 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1495 {
1496   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1497                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1498                                              -(__v4sf) __B,
1499                                              -(__v4sf) __C),
1500                     (__v4sf) __A);
1501 }
1502
1503 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1504 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1505 {
1506   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1507                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1508                                              -(__v4sf) __B,
1509                                              -(__v4sf) __C),
1510                     (__v4sf) __C);
1511 }
1512
1513 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1514 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1515 {
1516   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1517                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1518                                                 -(__v8sf) __B,
1519                                                 -(__v8sf) __C),
1520                     (__v8sf) __A);
1521 }
1522
1523 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1524 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1525 {
1526   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1527                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1528                                                 -(__v8sf) __B,
1529                                                 -(__v8sf) __C),
1530                     (__v8sf) __C);
1531 }
1532
1533 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1534 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1535   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1536                                               (__v2df)_mm_add_pd(__A, __B),
1537                                               (__v2df)__W);
1538 }
1539
1540 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1541 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1542   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1543                                               (__v2df)_mm_add_pd(__A, __B),
1544                                               (__v2df)_mm_setzero_pd());
1545 }
1546
1547 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1548 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1549   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1550                                               (__v4df)_mm256_add_pd(__A, __B),
1551                                               (__v4df)__W);
1552 }
1553
1554 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1555 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1556   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1557                                               (__v4df)_mm256_add_pd(__A, __B),
1558                                               (__v4df)_mm256_setzero_pd());
1559 }
1560
1561 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1562 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1563   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1564                                              (__v4sf)_mm_add_ps(__A, __B),
1565                                              (__v4sf)__W);
1566 }
1567
1568 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1569 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1570   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1571                                              (__v4sf)_mm_add_ps(__A, __B),
1572                                              (__v4sf)_mm_setzero_ps());
1573 }
1574
1575 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1576 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1577   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1578                                              (__v8sf)_mm256_add_ps(__A, __B),
1579                                              (__v8sf)__W);
1580 }
1581
1582 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1583 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1584   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1585                                              (__v8sf)_mm256_add_ps(__A, __B),
1586                                              (__v8sf)_mm256_setzero_ps());
1587 }
1588
1589 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1590 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1591   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1592                 (__v4si) __W,
1593                 (__v4si) __A);
1594 }
1595
1596 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1597 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1598   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1599                 (__v8si) __W,
1600                 (__v8si) __A);
1601 }
1602
1603 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1604 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1605   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1606                  (__v2df) __W,
1607                  (__v2df) __A);
1608 }
1609
1610 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1611 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1612   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1613                  (__v4df) __W,
1614                  (__v4df) __A);
1615 }
1616
1617 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1618 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1619   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1620                 (__v4sf) __W,
1621                 (__v4sf) __A);
1622 }
1623
1624 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1625 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1626   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1627                 (__v8sf) __W,
1628                 (__v8sf) __A);
1629 }
1630
1631 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1632 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1633   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1634                 (__v2di) __W,
1635                 (__v2di) __A);
1636 }
1637
1638 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1640   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1641                 (__v4di) __W,
1642                 (__v4di) __A);
1643 }
1644
1645 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1646 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1647   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1648                   (__v2df) __W,
1649                   (__mmask8) __U);
1650 }
1651
1652 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1653 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1654   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1655                   (__v2df)
1656                   _mm_setzero_pd (),
1657                   (__mmask8) __U);
1658 }
1659
1660 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1661 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1662   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1663                   (__v4df) __W,
1664                   (__mmask8) __U);
1665 }
1666
1667 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1668 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1669   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1670                   (__v4df)
1671                   _mm256_setzero_pd (),
1672                   (__mmask8) __U);
1673 }
1674
1675 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1676 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1677   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1678                   (__v2di) __W,
1679                   (__mmask8) __U);
1680 }
1681
1682 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1683 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1684   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1685                   (__v2di)
1686                   _mm_setzero_si128 (),
1687                   (__mmask8) __U);
1688 }
1689
1690 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1691 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1692   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1693                   (__v4di) __W,
1694                   (__mmask8) __U);
1695 }
1696
1697 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1698 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1699   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1700                   (__v4di)
1701                   _mm256_setzero_si256 (),
1702                   (__mmask8) __U);
1703 }
1704
1705 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1706 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1707   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1708                  (__v4sf) __W,
1709                  (__mmask8) __U);
1710 }
1711
1712 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1713 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1714   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1715                  (__v4sf)
1716                  _mm_setzero_ps (),
1717                  (__mmask8) __U);
1718 }
1719
1720 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1721 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1722   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1723                  (__v8sf) __W,
1724                  (__mmask8) __U);
1725 }
1726
1727 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1728 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1729   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1730                  (__v8sf)
1731                  _mm256_setzero_ps (),
1732                  (__mmask8) __U);
1733 }
1734
1735 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1736 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1737   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1738                   (__v4si) __W,
1739                   (__mmask8) __U);
1740 }
1741
1742 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1743 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1744   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1745                   (__v4si)
1746                   _mm_setzero_si128 (),
1747                   (__mmask8) __U);
1748 }
1749
1750 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1751 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1752   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1753                   (__v8si) __W,
1754                   (__mmask8) __U);
1755 }
1756
1757 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1758 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1759   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1760                   (__v8si)
1761                   _mm256_setzero_si256 (),
1762                   (__mmask8) __U);
1763 }
1764
1765 static __inline__ void __DEFAULT_FN_ATTRS128
1766 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1767   __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1768             (__v2df) __A,
1769             (__mmask8) __U);
1770 }
1771
1772 static __inline__ void __DEFAULT_FN_ATTRS256
1773 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1774   __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1775             (__v4df) __A,
1776             (__mmask8) __U);
1777 }
1778
1779 static __inline__ void __DEFAULT_FN_ATTRS128
1780 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1781   __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1782             (__v2di) __A,
1783             (__mmask8) __U);
1784 }
1785
1786 static __inline__ void __DEFAULT_FN_ATTRS256
1787 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1788   __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1789             (__v4di) __A,
1790             (__mmask8) __U);
1791 }
1792
1793 static __inline__ void __DEFAULT_FN_ATTRS128
1794 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1795   __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1796             (__v4sf) __A,
1797             (__mmask8) __U);
1798 }
1799
1800 static __inline__ void __DEFAULT_FN_ATTRS256
1801 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1802   __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1803             (__v8sf) __A,
1804             (__mmask8) __U);
1805 }
1806
1807 static __inline__ void __DEFAULT_FN_ATTRS128
1808 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1809   __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1810             (__v4si) __A,
1811             (__mmask8) __U);
1812 }
1813
1814 static __inline__ void __DEFAULT_FN_ATTRS256
1815 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1816   __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1817             (__v8si) __A,
1818             (__mmask8) __U);
1819 }
1820
1821 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1822 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1823   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1824                                               (__v2df)_mm_cvtepi32_pd(__A),
1825                                               (__v2df)__W);
1826 }
1827
1828 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1829 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1830   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1831                                               (__v2df)_mm_cvtepi32_pd(__A),
1832                                               (__v2df)_mm_setzero_pd());
1833 }
1834
1835 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1836 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1837   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1838                                               (__v4df)_mm256_cvtepi32_pd(__A),
1839                                               (__v4df)__W);
1840 }
1841
1842 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1843 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1844   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1845                                               (__v4df)_mm256_cvtepi32_pd(__A),
1846                                               (__v4df)_mm256_setzero_pd());
1847 }
1848
1849 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1850 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1851   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1852                                              (__v4sf)_mm_cvtepi32_ps(__A),
1853                                              (__v4sf)__W);
1854 }
1855
1856 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1857 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1858   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1859                                              (__v4sf)_mm_cvtepi32_ps(__A),
1860                                              (__v4sf)_mm_setzero_ps());
1861 }
1862
1863 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1864 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1865   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1866                                              (__v8sf)_mm256_cvtepi32_ps(__A),
1867                                              (__v8sf)__W);
1868 }
1869
1870 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1871 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1872   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1873                                              (__v8sf)_mm256_cvtepi32_ps(__A),
1874                                              (__v8sf)_mm256_setzero_ps());
1875 }
1876
1877 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1878 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1879   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1880                 (__v4si) __W,
1881                 (__mmask8) __U);
1882 }
1883
1884 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1885 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1886   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1887                 (__v4si)
1888                 _mm_setzero_si128 (),
1889                 (__mmask8) __U);
1890 }
1891
1892 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1893 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1894   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1895                                              (__v4si)_mm256_cvtpd_epi32(__A),
1896                                              (__v4si)__W);
1897 }
1898
1899 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1900 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1901   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1902                                              (__v4si)_mm256_cvtpd_epi32(__A),
1903                                              (__v4si)_mm_setzero_si128());
1904 }
1905
1906 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1907 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1908   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1909             (__v4sf) __W,
1910             (__mmask8) __U);
1911 }
1912
1913 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1914 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1915   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1916             (__v4sf)
1917             _mm_setzero_ps (),
1918             (__mmask8) __U);
1919 }
1920
1921 static __inline__ __m128 __DEFAULT_FN_ATTRS256
1922 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
1923   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1924                                              (__v4sf)_mm256_cvtpd_ps(__A),
1925                                              (__v4sf)__W);
1926 }
1927
1928 static __inline__ __m128 __DEFAULT_FN_ATTRS256
1929 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
1930   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1931                                              (__v4sf)_mm256_cvtpd_ps(__A),
1932                                              (__v4sf)_mm_setzero_ps());
1933 }
1934
1935 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1936 _mm_cvtpd_epu32 (__m128d __A) {
1937   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1938                  (__v4si)
1939                  _mm_setzero_si128 (),
1940                  (__mmask8) -1);
1941 }
1942
1943 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1944 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
1945   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1946                  (__v4si) __W,
1947                  (__mmask8) __U);
1948 }
1949
1950 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1951 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
1952   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1953                  (__v4si)
1954                  _mm_setzero_si128 (),
1955                  (__mmask8) __U);
1956 }
1957
1958 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1959 _mm256_cvtpd_epu32 (__m256d __A) {
1960   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1961                  (__v4si)
1962                  _mm_setzero_si128 (),
1963                  (__mmask8) -1);
1964 }
1965
1966 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1967 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
1968   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1969                  (__v4si) __W,
1970                  (__mmask8) __U);
1971 }
1972
1973 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1974 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
1975   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1976                  (__v4si)
1977                  _mm_setzero_si128 (),
1978                  (__mmask8) __U);
1979 }
1980
1981 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1982 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
1983   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1984                                              (__v4si)_mm_cvtps_epi32(__A),
1985                                              (__v4si)__W);
1986 }
1987
1988 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1989 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
1990   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1991                                              (__v4si)_mm_cvtps_epi32(__A),
1992                                              (__v4si)_mm_setzero_si128());
1993 }
1994
1995 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1996 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
1997   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1998                                              (__v8si)_mm256_cvtps_epi32(__A),
1999                                              (__v8si)__W);
2000 }
2001
2002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2003 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2004   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2005                                              (__v8si)_mm256_cvtps_epi32(__A),
2006                                              (__v8si)_mm256_setzero_si256());
2007 }
2008
2009 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2010 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2011   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2012                                               (__v2df)_mm_cvtps_pd(__A),
2013                                               (__v2df)__W);
2014 }
2015
2016 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2017 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2018   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2019                                               (__v2df)_mm_cvtps_pd(__A),
2020                                               (__v2df)_mm_setzero_pd());
2021 }
2022
2023 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2024 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2025   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2026                                               (__v4df)_mm256_cvtps_pd(__A),
2027                                               (__v4df)__W);
2028 }
2029
2030 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2031 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2032   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2033                                               (__v4df)_mm256_cvtps_pd(__A),
2034                                               (__v4df)_mm256_setzero_pd());
2035 }
2036
2037 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2038 _mm_cvtps_epu32 (__m128 __A) {
2039   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2040                  (__v4si)
2041                  _mm_setzero_si128 (),
2042                  (__mmask8) -1);
2043 }
2044
2045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2046 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2047   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2048                  (__v4si) __W,
2049                  (__mmask8) __U);
2050 }
2051
2052 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2053 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2054   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2055                  (__v4si)
2056                  _mm_setzero_si128 (),
2057                  (__mmask8) __U);
2058 }
2059
2060 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2061 _mm256_cvtps_epu32 (__m256 __A) {
2062   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2063                  (__v8si)
2064                  _mm256_setzero_si256 (),
2065                  (__mmask8) -1);
2066 }
2067
2068 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2069 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2070   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2071                  (__v8si) __W,
2072                  (__mmask8) __U);
2073 }
2074
2075 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2076 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2077   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2078                  (__v8si)
2079                  _mm256_setzero_si256 (),
2080                  (__mmask8) __U);
2081 }
2082
2083 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2084 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2085   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2086                  (__v4si) __W,
2087                  (__mmask8) __U);
2088 }
2089
2090 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2091 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2092   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2093                  (__v4si)
2094                  _mm_setzero_si128 (),
2095                  (__mmask8) __U);
2096 }
2097
2098 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2099 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2100   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2101                                              (__v4si)_mm256_cvttpd_epi32(__A),
2102                                              (__v4si)__W);
2103 }
2104
2105 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2106 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2107   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2108                                              (__v4si)_mm256_cvttpd_epi32(__A),
2109                                              (__v4si)_mm_setzero_si128());
2110 }
2111
2112 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2113 _mm_cvttpd_epu32 (__m128d __A) {
2114   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2115                   (__v4si)
2116                   _mm_setzero_si128 (),
2117                   (__mmask8) -1);
2118 }
2119
2120 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2121 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2122   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2123                   (__v4si) __W,
2124                   (__mmask8) __U);
2125 }
2126
2127 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2128 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2129   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2130                   (__v4si)
2131                   _mm_setzero_si128 (),
2132                   (__mmask8) __U);
2133 }
2134
2135 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2136 _mm256_cvttpd_epu32 (__m256d __A) {
2137   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2138                   (__v4si)
2139                   _mm_setzero_si128 (),
2140                   (__mmask8) -1);
2141 }
2142
2143 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2144 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2145   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2146                   (__v4si) __W,
2147                   (__mmask8) __U);
2148 }
2149
2150 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2151 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2152   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2153                   (__v4si)
2154                   _mm_setzero_si128 (),
2155                   (__mmask8) __U);
2156 }
2157
2158 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2159 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2160   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2161                                              (__v4si)_mm_cvttps_epi32(__A),
2162                                              (__v4si)__W);
2163 }
2164
2165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2166 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2167   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2168                                              (__v4si)_mm_cvttps_epi32(__A),
2169                                              (__v4si)_mm_setzero_si128());
2170 }
2171
2172 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2173 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2174   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2175                                              (__v8si)_mm256_cvttps_epi32(__A),
2176                                              (__v8si)__W);
2177 }
2178
2179 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2180 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2181   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2182                                              (__v8si)_mm256_cvttps_epi32(__A),
2183                                              (__v8si)_mm256_setzero_si256());
2184 }
2185
2186 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2187 _mm_cvttps_epu32 (__m128 __A) {
2188   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2189                   (__v4si)
2190                   _mm_setzero_si128 (),
2191                   (__mmask8) -1);
2192 }
2193
2194 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2195 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2196   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2197                   (__v4si) __W,
2198                   (__mmask8) __U);
2199 }
2200
2201 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2203   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2204                   (__v4si)
2205                   _mm_setzero_si128 (),
2206                   (__mmask8) __U);
2207 }
2208
2209 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2210 _mm256_cvttps_epu32 (__m256 __A) {
2211   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2212                   (__v8si)
2213                   _mm256_setzero_si256 (),
2214                   (__mmask8) -1);
2215 }
2216
2217 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2218 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2219   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2220                   (__v8si) __W,
2221                   (__mmask8) __U);
2222 }
2223
2224 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2225 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2226   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2227                   (__v8si)
2228                   _mm256_setzero_si256 (),
2229                   (__mmask8) __U);
2230 }
2231
2232 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2233 _mm_cvtepu32_pd (__m128i __A) {
2234   return (__m128d) __builtin_convertvector(
2235       __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2236 }
2237
2238 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2239 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2240   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2241                                               (__v2df)_mm_cvtepu32_pd(__A),
2242                                               (__v2df)__W);
2243 }
2244
2245 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2246 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2247   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2248                                               (__v2df)_mm_cvtepu32_pd(__A),
2249                                               (__v2df)_mm_setzero_pd());
2250 }
2251
2252 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2253 _mm256_cvtepu32_pd (__m128i __A) {
2254   return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2255 }
2256
2257 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2258 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2259   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2260                                               (__v4df)_mm256_cvtepu32_pd(__A),
2261                                               (__v4df)__W);
2262 }
2263
2264 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2265 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2266   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2267                                               (__v4df)_mm256_cvtepu32_pd(__A),
2268                                               (__v4df)_mm256_setzero_pd());
2269 }
2270
2271 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2272 _mm_cvtepu32_ps (__m128i __A) {
2273   return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2274 }
2275
2276 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2277 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2278   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2279                                              (__v4sf)_mm_cvtepu32_ps(__A),
2280                                              (__v4sf)__W);
2281 }
2282
2283 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2284 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2285   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2286                                              (__v4sf)_mm_cvtepu32_ps(__A),
2287                                              (__v4sf)_mm_setzero_ps());
2288 }
2289
2290 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2291 _mm256_cvtepu32_ps (__m256i __A) {
2292   return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2293 }
2294
2295 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2296 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2297   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2298                                              (__v8sf)_mm256_cvtepu32_ps(__A),
2299                                              (__v8sf)__W);
2300 }
2301
2302 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2303 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2304   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2305                                              (__v8sf)_mm256_cvtepu32_ps(__A),
2306                                              (__v8sf)_mm256_setzero_ps());
2307 }
2308
2309 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2310 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2311   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2312                                               (__v2df)_mm_div_pd(__A, __B),
2313                                               (__v2df)__W);
2314 }
2315
2316 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2317 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2318   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2319                                               (__v2df)_mm_div_pd(__A, __B),
2320                                               (__v2df)_mm_setzero_pd());
2321 }
2322
2323 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2324 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2325   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2326                                               (__v4df)_mm256_div_pd(__A, __B),
2327                                               (__v4df)__W);
2328 }
2329
2330 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2331 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2332   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2333                                               (__v4df)_mm256_div_pd(__A, __B),
2334                                               (__v4df)_mm256_setzero_pd());
2335 }
2336
2337 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2338 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2339   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2340                                              (__v4sf)_mm_div_ps(__A, __B),
2341                                              (__v4sf)__W);
2342 }
2343
2344 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2345 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2346   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2347                                              (__v4sf)_mm_div_ps(__A, __B),
2348                                              (__v4sf)_mm_setzero_ps());
2349 }
2350
2351 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2352 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2353   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2354                                              (__v8sf)_mm256_div_ps(__A, __B),
2355                                              (__v8sf)__W);
2356 }
2357
2358 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2359 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2360   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2361                                              (__v8sf)_mm256_div_ps(__A, __B),
2362                                              (__v8sf)_mm256_setzero_ps());
2363 }
2364
2365 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2366 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2367   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2368                 (__v2df) __W,
2369                 (__mmask8) __U);
2370 }
2371
2372 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2373 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2374   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2375                  (__v2df)
2376                  _mm_setzero_pd (),
2377                  (__mmask8) __U);
2378 }
2379
2380 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2381 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2382   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2383                 (__v4df) __W,
2384                 (__mmask8) __U);
2385 }
2386
2387 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2388 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2389   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2390                  (__v4df)
2391                  _mm256_setzero_pd (),
2392                  (__mmask8) __U);
2393 }
2394
2395 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2396 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2397   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2398                 (__v2di) __W,
2399                 (__mmask8) __U);
2400 }
2401
2402 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2403 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2404   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2405                  (__v2di)
2406                  _mm_setzero_si128 (),
2407                  (__mmask8) __U);
2408 }
2409
2410 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2411 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2412   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2413                 (__v4di) __W,
2414                 (__mmask8) __U);
2415 }
2416
2417 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2418 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2419   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2420                  (__v4di)
2421                  _mm256_setzero_si256 (),
2422                  (__mmask8) __U);
2423 }
2424
2425 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2426 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2427   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2428               (__v2df) __W,
2429               (__mmask8)
2430               __U);
2431 }
2432
2433 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2434 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2435   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2436                (__v2df)
2437                _mm_setzero_pd (),
2438                (__mmask8)
2439                __U);
2440 }
2441
2442 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2443 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2444   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2445               (__v4df) __W,
2446               (__mmask8)
2447               __U);
2448 }
2449
2450 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2451 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2452   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2453                (__v4df)
2454                _mm256_setzero_pd (),
2455                (__mmask8)
2456                __U);
2457 }
2458
2459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2460 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2461   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2462               (__v2di) __W,
2463               (__mmask8)
2464               __U);
2465 }
2466
2467 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2468 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2469   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2470                (__v2di)
2471                _mm_setzero_si128 (),
2472                (__mmask8)
2473                __U);
2474 }
2475
2476 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2477 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2478              void const *__P) {
2479   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2480               (__v4di) __W,
2481               (__mmask8)
2482               __U);
2483 }
2484
2485 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2486 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2487   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2488                (__v4di)
2489                _mm256_setzero_si256 (),
2490                (__mmask8)
2491                __U);
2492 }
2493
2494 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2495 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2496   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2497                    (__v4sf) __W,
2498                    (__mmask8) __U);
2499 }
2500
2501 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2502 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2503   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2504               (__v4sf)
2505               _mm_setzero_ps (),
2506               (__mmask8)
2507               __U);
2508 }
2509
2510 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2511 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2512   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2513                    (__v8sf) __W,
2514                    (__mmask8) __U);
2515 }
2516
2517 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2518 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2519   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2520               (__v8sf)
2521               _mm256_setzero_ps (),
2522               (__mmask8)
2523               __U);
2524 }
2525
2526 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2527 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2528   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2529               (__v4si) __W,
2530               (__mmask8)
2531               __U);
2532 }
2533
2534 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2535 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2536   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2537                (__v4si)
2538                _mm_setzero_si128 (),
2539                (__mmask8)     __U);
2540 }
2541
2542 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2543 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2544              void const *__P) {
2545   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2546               (__v8si) __W,
2547               (__mmask8)
2548               __U);
2549 }
2550
2551 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2552 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2553   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2554                (__v8si)
2555                _mm256_setzero_si256 (),
2556                (__mmask8)
2557                __U);
2558 }
2559
2560 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2561 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2562   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2563                (__v4sf) __W,
2564                (__mmask8) __U);
2565 }
2566
2567 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2568 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2569   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2570                 (__v4sf)
2571                 _mm_setzero_ps (),
2572                 (__mmask8) __U);
2573 }
2574
2575 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2576 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2577   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2578                (__v8sf) __W,
2579                (__mmask8) __U);
2580 }
2581
2582 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2583 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2584   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2585                 (__v8sf)
2586                 _mm256_setzero_ps (),
2587                 (__mmask8) __U);
2588 }
2589
2590 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2591 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2592   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2593                 (__v4si) __W,
2594                 (__mmask8) __U);
2595 }
2596
2597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2598 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2599   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2600                  (__v4si)
2601                  _mm_setzero_si128 (),
2602                  (__mmask8) __U);
2603 }
2604
2605 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2606 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2607   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2608                 (__v8si) __W,
2609                 (__mmask8) __U);
2610 }
2611
2612 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2613 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2614   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2615                  (__v8si)
2616                  _mm256_setzero_si256 (),
2617                  (__mmask8) __U);
2618 }
2619
2620 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2621 _mm_getexp_pd (__m128d __A) {
2622   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2623                 (__v2df)
2624                 _mm_setzero_pd (),
2625                 (__mmask8) -1);
2626 }
2627
2628 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2629 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2630   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2631                 (__v2df) __W,
2632                 (__mmask8) __U);
2633 }
2634
2635 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2636 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2637   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2638                 (__v2df)
2639                 _mm_setzero_pd (),
2640                 (__mmask8) __U);
2641 }
2642
2643 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2644 _mm256_getexp_pd (__m256d __A) {
2645   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2646                 (__v4df)
2647                 _mm256_setzero_pd (),
2648                 (__mmask8) -1);
2649 }
2650
2651 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2652 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2653   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2654                 (__v4df) __W,
2655                 (__mmask8) __U);
2656 }
2657
2658 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2659 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2660   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2661                 (__v4df)
2662                 _mm256_setzero_pd (),
2663                 (__mmask8) __U);
2664 }
2665
2666 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2667 _mm_getexp_ps (__m128 __A) {
2668   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2669                (__v4sf)
2670                _mm_setzero_ps (),
2671                (__mmask8) -1);
2672 }
2673
2674 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2675 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2676   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2677                (__v4sf) __W,
2678                (__mmask8) __U);
2679 }
2680
2681 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2682 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2683   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2684                (__v4sf)
2685                _mm_setzero_ps (),
2686                (__mmask8) __U);
2687 }
2688
2689 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2690 _mm256_getexp_ps (__m256 __A) {
2691   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2692                (__v8sf)
2693                _mm256_setzero_ps (),
2694                (__mmask8) -1);
2695 }
2696
2697 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2698 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2699   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2700                (__v8sf) __W,
2701                (__mmask8) __U);
2702 }
2703
2704 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2705 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2706   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2707                (__v8sf)
2708                _mm256_setzero_ps (),
2709                (__mmask8) __U);
2710 }
2711
2712 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2713 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2714   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2715                                               (__v2df)_mm_max_pd(__A, __B),
2716                                               (__v2df)__W);
2717 }
2718
2719 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2720 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2721   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2722                                               (__v2df)_mm_max_pd(__A, __B),
2723                                               (__v2df)_mm_setzero_pd());
2724 }
2725
2726 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2727 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2728   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2729                                               (__v4df)_mm256_max_pd(__A, __B),
2730                                               (__v4df)__W);
2731 }
2732
2733 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2734 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2735   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2736                                               (__v4df)_mm256_max_pd(__A, __B),
2737                                               (__v4df)_mm256_setzero_pd());
2738 }
2739
2740 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2741 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2742   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2743                                              (__v4sf)_mm_max_ps(__A, __B),
2744                                              (__v4sf)__W);
2745 }
2746
2747 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2748 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2749   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2750                                              (__v4sf)_mm_max_ps(__A, __B),
2751                                              (__v4sf)_mm_setzero_ps());
2752 }
2753
2754 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2755 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2756   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2757                                              (__v8sf)_mm256_max_ps(__A, __B),
2758                                              (__v8sf)__W);
2759 }
2760
2761 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2762 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2763   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2764                                              (__v8sf)_mm256_max_ps(__A, __B),
2765                                              (__v8sf)_mm256_setzero_ps());
2766 }
2767
2768 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2769 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2770   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2771                                               (__v2df)_mm_min_pd(__A, __B),
2772                                               (__v2df)__W);
2773 }
2774
2775 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2776 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2777   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2778                                               (__v2df)_mm_min_pd(__A, __B),
2779                                               (__v2df)_mm_setzero_pd());
2780 }
2781
2782 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2783 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2784   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2785                                               (__v4df)_mm256_min_pd(__A, __B),
2786                                               (__v4df)__W);
2787 }
2788
2789 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2790 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2791   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2792                                               (__v4df)_mm256_min_pd(__A, __B),
2793                                               (__v4df)_mm256_setzero_pd());
2794 }
2795
2796 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2797 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2798   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2799                                              (__v4sf)_mm_min_ps(__A, __B),
2800                                              (__v4sf)__W);
2801 }
2802
2803 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2804 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2805   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2806                                              (__v4sf)_mm_min_ps(__A, __B),
2807                                              (__v4sf)_mm_setzero_ps());
2808 }
2809
2810 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2811 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2812   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2813                                              (__v8sf)_mm256_min_ps(__A, __B),
2814                                              (__v8sf)__W);
2815 }
2816
2817 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2818 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2819   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2820                                              (__v8sf)_mm256_min_ps(__A, __B),
2821                                              (__v8sf)_mm256_setzero_ps());
2822 }
2823
2824 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2825 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2826   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2827                                               (__v2df)_mm_mul_pd(__A, __B),
2828                                               (__v2df)__W);
2829 }
2830
2831 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2832 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2833   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2834                                               (__v2df)_mm_mul_pd(__A, __B),
2835                                               (__v2df)_mm_setzero_pd());
2836 }
2837
2838 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2839 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2840   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2841                                               (__v4df)_mm256_mul_pd(__A, __B),
2842                                               (__v4df)__W);
2843 }
2844
2845 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2846 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2847   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2848                                               (__v4df)_mm256_mul_pd(__A, __B),
2849                                               (__v4df)_mm256_setzero_pd());
2850 }
2851
2852 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2853 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2854   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2855                                              (__v4sf)_mm_mul_ps(__A, __B),
2856                                              (__v4sf)__W);
2857 }
2858
2859 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2860 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2861   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2862                                              (__v4sf)_mm_mul_ps(__A, __B),
2863                                              (__v4sf)_mm_setzero_ps());
2864 }
2865
2866 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2867 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2868   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2869                                              (__v8sf)_mm256_mul_ps(__A, __B),
2870                                              (__v8sf)__W);
2871 }
2872
2873 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2874 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2875   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2876                                              (__v8sf)_mm256_mul_ps(__A, __B),
2877                                              (__v8sf)_mm256_setzero_ps());
2878 }
2879
2880 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2881 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2882   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2883                                              (__v4si)_mm_abs_epi32(__A),
2884                                              (__v4si)__W);
2885 }
2886
2887 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2888 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2889   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2890                                              (__v4si)_mm_abs_epi32(__A),
2891                                              (__v4si)_mm_setzero_si128());
2892 }
2893
2894 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2895 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2896   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2897                                              (__v8si)_mm256_abs_epi32(__A),
2898                                              (__v8si)__W);
2899 }
2900
2901 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2902 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2903   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2904                                              (__v8si)_mm256_abs_epi32(__A),
2905                                              (__v8si)_mm256_setzero_si256());
2906 }
2907
2908 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2909 _mm_abs_epi64 (__m128i __A) {
2910   return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2911 }
2912
2913 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2914 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2915   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2916                                              (__v2di)_mm_abs_epi64(__A),
2917                                              (__v2di)__W);
2918 }
2919
2920 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2921 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
2922   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2923                                              (__v2di)_mm_abs_epi64(__A),
2924                                              (__v2di)_mm_setzero_si128());
2925 }
2926
2927 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2928 _mm256_abs_epi64 (__m256i __A) {
2929   return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
2930 }
2931
2932 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2933 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2934   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2935                                              (__v4di)_mm256_abs_epi64(__A),
2936                                              (__v4di)__W);
2937 }
2938
2939 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2940 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
2941   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2942                                              (__v4di)_mm256_abs_epi64(__A),
2943                                              (__v4di)_mm256_setzero_si256());
2944 }
2945
2946 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2947 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2948   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2949                                              (__v4si)_mm_max_epi32(__A, __B),
2950                                              (__v4si)_mm_setzero_si128());
2951 }
2952
2953 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2954 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2955   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2956                                              (__v4si)_mm_max_epi32(__A, __B),
2957                                              (__v4si)__W);
2958 }
2959
2960 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2961 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2962   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2963                                              (__v8si)_mm256_max_epi32(__A, __B),
2964                                              (__v8si)_mm256_setzero_si256());
2965 }
2966
2967 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2968 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2969   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2970                                              (__v8si)_mm256_max_epi32(__A, __B),
2971                                              (__v8si)__W);
2972 }
2973
2974 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2975 _mm_max_epi64 (__m128i __A, __m128i __B) {
2976   return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
2977 }
2978
2979 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2980 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
2981   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2982                                              (__v2di)_mm_max_epi64(__A, __B),
2983                                              (__v2di)_mm_setzero_si128());
2984 }
2985
2986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2987 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2988   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2989                                              (__v2di)_mm_max_epi64(__A, __B),
2990                                              (__v2di)__W);
2991 }
2992
2993 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2994 _mm256_max_epi64 (__m256i __A, __m256i __B) {
2995   return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
2996 }
2997
2998 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2999 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3000   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3001                                              (__v4di)_mm256_max_epi64(__A, __B),
3002                                              (__v4di)_mm256_setzero_si256());
3003 }
3004
3005 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3006 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3007   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3008                                              (__v4di)_mm256_max_epi64(__A, __B),
3009                                              (__v4di)__W);
3010 }
3011
3012 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3013 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3014   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3015                                              (__v4si)_mm_max_epu32(__A, __B),
3016                                              (__v4si)_mm_setzero_si128());
3017 }
3018
3019 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3020 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3021   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3022                                              (__v4si)_mm_max_epu32(__A, __B),
3023                                              (__v4si)__W);
3024 }
3025
3026 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3027 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3028   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3029                                              (__v8si)_mm256_max_epu32(__A, __B),
3030                                              (__v8si)_mm256_setzero_si256());
3031 }
3032
3033 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3034 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3035   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3036                                              (__v8si)_mm256_max_epu32(__A, __B),
3037                                              (__v8si)__W);
3038 }
3039
3040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3041 _mm_max_epu64 (__m128i __A, __m128i __B) {
3042   return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3043 }
3044
3045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3046 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3047   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3048                                              (__v2di)_mm_max_epu64(__A, __B),
3049                                              (__v2di)_mm_setzero_si128());
3050 }
3051
3052 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3053 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3054   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3055                                              (__v2di)_mm_max_epu64(__A, __B),
3056                                              (__v2di)__W);
3057 }
3058
3059 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3060 _mm256_max_epu64 (__m256i __A, __m256i __B) {
3061   return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3062 }
3063
3064 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3065 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3066   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3067                                              (__v4di)_mm256_max_epu64(__A, __B),
3068                                              (__v4di)_mm256_setzero_si256());
3069 }
3070
3071 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3072 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3073   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3074                                              (__v4di)_mm256_max_epu64(__A, __B),
3075                                              (__v4di)__W);
3076 }
3077
3078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3079 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3080   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3081                                              (__v4si)_mm_min_epi32(__A, __B),
3082                                              (__v4si)_mm_setzero_si128());
3083 }
3084
3085 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3086 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3087   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3088                                              (__v4si)_mm_min_epi32(__A, __B),
3089                                              (__v4si)__W);
3090 }
3091
3092 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3093 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3094   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3095                                              (__v8si)_mm256_min_epi32(__A, __B),
3096                                              (__v8si)_mm256_setzero_si256());
3097 }
3098
3099 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3100 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3101   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3102                                              (__v8si)_mm256_min_epi32(__A, __B),
3103                                              (__v8si)__W);
3104 }
3105
3106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3107 _mm_min_epi64 (__m128i __A, __m128i __B) {
3108   return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3109 }
3110
3111 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3112 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3113   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3114                                              (__v2di)_mm_min_epi64(__A, __B),
3115                                              (__v2di)__W);
3116 }
3117
3118 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3119 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3120   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3121                                              (__v2di)_mm_min_epi64(__A, __B),
3122                                              (__v2di)_mm_setzero_si128());
3123 }
3124
3125 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3126 _mm256_min_epi64 (__m256i __A, __m256i __B) {
3127   return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3128 }
3129
3130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3131 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3132   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3133                                              (__v4di)_mm256_min_epi64(__A, __B),
3134                                              (__v4di)__W);
3135 }
3136
3137 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3138 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3139   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3140                                              (__v4di)_mm256_min_epi64(__A, __B),
3141                                              (__v4di)_mm256_setzero_si256());
3142 }
3143
3144 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3145 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3146   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3147                                              (__v4si)_mm_min_epu32(__A, __B),
3148                                              (__v4si)_mm_setzero_si128());
3149 }
3150
3151 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3152 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3153   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3154                                              (__v4si)_mm_min_epu32(__A, __B),
3155                                              (__v4si)__W);
3156 }
3157
3158 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3159 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3160   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3161                                              (__v8si)_mm256_min_epu32(__A, __B),
3162                                              (__v8si)_mm256_setzero_si256());
3163 }
3164
3165 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3166 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3167   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3168                                              (__v8si)_mm256_min_epu32(__A, __B),
3169                                              (__v8si)__W);
3170 }
3171
3172 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3173 _mm_min_epu64 (__m128i __A, __m128i __B) {
3174   return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3175 }
3176
3177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3178 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3179   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3180                                              (__v2di)_mm_min_epu64(__A, __B),
3181                                              (__v2di)__W);
3182 }
3183
3184 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3185 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3186   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3187                                              (__v2di)_mm_min_epu64(__A, __B),
3188                                              (__v2di)_mm_setzero_si128());
3189 }
3190
3191 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3192 _mm256_min_epu64 (__m256i __A, __m256i __B) {
3193   return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3194 }
3195
3196 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3197 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3198   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3199                                              (__v4di)_mm256_min_epu64(__A, __B),
3200                                              (__v4di)__W);
3201 }
3202
3203 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3204 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3205   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3206                                              (__v4di)_mm256_min_epu64(__A, __B),
3207                                              (__v4di)_mm256_setzero_si256());
3208 }
3209
3210 #define _mm_roundscale_pd(A, imm) \
3211   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3212                                               (int)(imm), \
3213                                               (__v2df)_mm_setzero_pd(), \
3214                                               (__mmask8)-1)
3215
3216
3217 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3218   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3219                                               (int)(imm), \
3220                                               (__v2df)(__m128d)(W), \
3221                                               (__mmask8)(U))
3222
3223
3224 #define _mm_maskz_roundscale_pd(U, A, imm) \
3225   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3226                                               (int)(imm), \
3227                                               (__v2df)_mm_setzero_pd(), \
3228                                               (__mmask8)(U))
3229
3230
3231 #define _mm256_roundscale_pd(A, imm) \
3232   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3233                                               (int)(imm), \
3234                                               (__v4df)_mm256_setzero_pd(), \
3235                                               (__mmask8)-1)
3236
3237
3238 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3239   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3240                                               (int)(imm), \
3241                                               (__v4df)(__m256d)(W), \
3242                                               (__mmask8)(U))
3243
3244
3245 #define _mm256_maskz_roundscale_pd(U, A, imm)  \
3246   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3247                                               (int)(imm), \
3248                                               (__v4df)_mm256_setzero_pd(), \
3249                                               (__mmask8)(U))
3250
3251 #define _mm_roundscale_ps(A, imm)  \
3252   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3253                                              (__v4sf)_mm_setzero_ps(), \
3254                                              (__mmask8)-1)
3255
3256
3257 #define _mm_mask_roundscale_ps(W, U, A, imm)  \
3258   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3259                                              (__v4sf)(__m128)(W), \
3260                                              (__mmask8)(U))
3261
3262
3263 #define _mm_maskz_roundscale_ps(U, A, imm)  \
3264   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3265                                              (__v4sf)_mm_setzero_ps(), \
3266                                              (__mmask8)(U))
3267
3268 #define _mm256_roundscale_ps(A, imm)  \
3269   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3270                                              (__v8sf)_mm256_setzero_ps(), \
3271                                              (__mmask8)-1)
3272
3273 #define _mm256_mask_roundscale_ps(W, U, A, imm)  \
3274   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3275                                              (__v8sf)(__m256)(W), \
3276                                              (__mmask8)(U))
3277
3278
3279 #define _mm256_maskz_roundscale_ps(U, A, imm)  \
3280   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3281                                              (__v8sf)_mm256_setzero_ps(), \
3282                                              (__mmask8)(U))
3283
3284 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3285 _mm_scalef_pd (__m128d __A, __m128d __B) {
3286   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3287                 (__v2df) __B,
3288                 (__v2df)
3289                 _mm_setzero_pd (),
3290                 (__mmask8) -1);
3291 }
3292
3293 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3294 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3295         __m128d __B) {
3296   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3297                 (__v2df) __B,
3298                 (__v2df) __W,
3299                 (__mmask8) __U);
3300 }
3301
3302 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3303 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3304   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3305                 (__v2df) __B,
3306                 (__v2df)
3307                 _mm_setzero_pd (),
3308                 (__mmask8) __U);
3309 }
3310
3311 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3312 _mm256_scalef_pd (__m256d __A, __m256d __B) {
3313   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3314                 (__v4df) __B,
3315                 (__v4df)
3316                 _mm256_setzero_pd (),
3317                 (__mmask8) -1);
3318 }
3319
3320 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3321 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3322            __m256d __B) {
3323   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3324                 (__v4df) __B,
3325                 (__v4df) __W,
3326                 (__mmask8) __U);
3327 }
3328
3329 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3330 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3331   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3332                 (__v4df) __B,
3333                 (__v4df)
3334                 _mm256_setzero_pd (),
3335                 (__mmask8) __U);
3336 }
3337
3338 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3339 _mm_scalef_ps (__m128 __A, __m128 __B) {
3340   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3341                (__v4sf) __B,
3342                (__v4sf)
3343                _mm_setzero_ps (),
3344                (__mmask8) -1);
3345 }
3346
3347 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3348 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3349   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3350                (__v4sf) __B,
3351                (__v4sf) __W,
3352                (__mmask8) __U);
3353 }
3354
3355 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3356 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3357   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3358                (__v4sf) __B,
3359                (__v4sf)
3360                _mm_setzero_ps (),
3361                (__mmask8) __U);
3362 }
3363
3364 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3365 _mm256_scalef_ps (__m256 __A, __m256 __B) {
3366   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3367                (__v8sf) __B,
3368                (__v8sf)
3369                _mm256_setzero_ps (),
3370                (__mmask8) -1);
3371 }
3372
3373 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3374 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3375            __m256 __B) {
3376   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3377                (__v8sf) __B,
3378                (__v8sf) __W,
3379                (__mmask8) __U);
3380 }
3381
3382 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3383 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3384   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3385                (__v8sf) __B,
3386                (__v8sf)
3387                _mm256_setzero_ps (),
3388                (__mmask8) __U);
3389 }
3390
3391 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3392   __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3393                                (__v2di)(__m128i)(index), \
3394                                (__v2df)(__m128d)(v1), (int)(scale))
3395
3396 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3397   __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3398                                (__v2di)(__m128i)(index), \
3399                                (__v2df)(__m128d)(v1), (int)(scale))
3400
3401 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3402   __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3403                                (__v2di)(__m128i)(index), \
3404                                (__v2di)(__m128i)(v1), (int)(scale))
3405
3406 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3407   __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3408                                (__v2di)(__m128i)(index), \
3409                                (__v2di)(__m128i)(v1), (int)(scale))
3410
3411 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3412   __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3413                                (__v4di)(__m256i)(index), \
3414                                (__v4df)(__m256d)(v1), (int)(scale))
3415
3416 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3417   __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3418                                (__v4di)(__m256i)(index), \
3419                                (__v4df)(__m256d)(v1), (int)(scale))
3420
3421 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3422   __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3423                                (__v4di)(__m256i)(index), \
3424                                (__v4di)(__m256i)(v1), (int)(scale))
3425
3426 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3427   __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3428                                (__v4di)(__m256i)(index), \
3429                                (__v4di)(__m256i)(v1), (int)(scale))
3430
3431 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3432   __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3433                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3434                                (int)(scale))
3435
3436 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3437   __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3438                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3439                                (int)(scale))
3440
3441 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3442   __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3443                                (__v2di)(__m128i)(index), \
3444                                (__v4si)(__m128i)(v1), (int)(scale))
3445
3446 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3447   __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3448                                (__v2di)(__m128i)(index), \
3449                                (__v4si)(__m128i)(v1), (int)(scale))
3450
3451 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3452   __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3453                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3454                                (int)(scale))
3455
3456 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3457   __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
3458                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3459                                (int)(scale))
3460
3461 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3462   __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
3463                                (__v4di)(__m256i)(index), \
3464                                (__v4si)(__m128i)(v1), (int)(scale))
3465
3466 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3467   __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
3468                                (__v4di)(__m256i)(index), \
3469                                (__v4si)(__m128i)(v1), (int)(scale))
3470
3471 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3472   __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
3473                                (__v4si)(__m128i)(index), \
3474                                (__v2df)(__m128d)(v1), (int)(scale))
3475
3476 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3477     __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
3478                                  (__v4si)(__m128i)(index), \
3479                                  (__v2df)(__m128d)(v1), (int)(scale))
3480
3481 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3482     __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
3483                                  (__v4si)(__m128i)(index), \
3484                                  (__v2di)(__m128i)(v1), (int)(scale))
3485
3486 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3487     __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
3488                                  (__v4si)(__m128i)(index), \
3489                                  (__v2di)(__m128i)(v1), (int)(scale))
3490
3491 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3492     __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
3493                                  (__v4si)(__m128i)(index), \
3494                                  (__v4df)(__m256d)(v1), (int)(scale))
3495
3496 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3497     __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
3498                                  (__v4si)(__m128i)(index), \
3499                                  (__v4df)(__m256d)(v1), (int)(scale))
3500
3501 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3502     __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
3503                                  (__v4si)(__m128i)(index), \
3504                                  (__v4di)(__m256i)(v1), (int)(scale))
3505
3506 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3507     __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
3508                                  (__v4si)(__m128i)(index), \
3509                                  (__v4di)(__m256i)(v1), (int)(scale))
3510
3511 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3512     __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
3513                                  (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3514                                  (int)(scale))
3515
3516 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3517     __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
3518                                  (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3519                                  (int)(scale))
3520
3521 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3522     __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
3523                                  (__v4si)(__m128i)(index), \
3524                                  (__v4si)(__m128i)(v1), (int)(scale))
3525
3526 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3527     __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
3528                                  (__v4si)(__m128i)(index), \
3529                                  (__v4si)(__m128i)(v1), (int)(scale))
3530
3531 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3532     __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
3533                                  (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3534                                  (int)(scale))
3535
3536 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3537     __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
3538                                  (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3539                                  (int)(scale))
3540
3541 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3542     __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
3543                                  (__v8si)(__m256i)(index), \
3544                                  (__v8si)(__m256i)(v1), (int)(scale))
3545
3546 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3547     __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
3548                                  (__v8si)(__m256i)(index), \
3549                                  (__v8si)(__m256i)(v1), (int)(scale))
3550
3551   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3552   _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3553     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3554                                                 (__v2df)_mm_sqrt_pd(__A),
3555                                                 (__v2df)__W);
3556   }
3557
3558   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3559   _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3560     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3561                                                 (__v2df)_mm_sqrt_pd(__A),
3562                                                 (__v2df)_mm_setzero_pd());
3563   }
3564
3565   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3566   _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3567     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3568                                                 (__v4df)_mm256_sqrt_pd(__A),
3569                                                 (__v4df)__W);
3570   }
3571
3572   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3573   _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3574     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3575                                                 (__v4df)_mm256_sqrt_pd(__A),
3576                                                 (__v4df)_mm256_setzero_pd());
3577   }
3578
3579   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3580   _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3581     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3582                                                (__v4sf)_mm_sqrt_ps(__A),
3583                                                (__v4sf)__W);
3584   }
3585
3586   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3587   _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3588     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3589                                                (__v4sf)_mm_sqrt_ps(__A),
3590                                                (__v4sf)_mm_setzero_ps());
3591   }
3592
3593   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3594   _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3595     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3596                                                (__v8sf)_mm256_sqrt_ps(__A),
3597                                                (__v8sf)__W);
3598   }
3599
3600   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3601   _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3602     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3603                                                (__v8sf)_mm256_sqrt_ps(__A),
3604                                                (__v8sf)_mm256_setzero_ps());
3605   }
3606
3607   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3608   _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3609     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3610                                                 (__v2df)_mm_sub_pd(__A, __B),
3611                                                 (__v2df)__W);
3612   }
3613
3614   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3615   _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3616     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3617                                                 (__v2df)_mm_sub_pd(__A, __B),
3618                                                 (__v2df)_mm_setzero_pd());
3619   }
3620
3621   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3622   _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3623     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3624                                                 (__v4df)_mm256_sub_pd(__A, __B),
3625                                                 (__v4df)__W);
3626   }
3627
3628   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3629   _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3630     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3631                                                 (__v4df)_mm256_sub_pd(__A, __B),
3632                                                 (__v4df)_mm256_setzero_pd());
3633   }
3634
3635   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3636   _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3637     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3638                                                (__v4sf)_mm_sub_ps(__A, __B),
3639                                                (__v4sf)__W);
3640   }
3641
3642   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3643   _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3644     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3645                                                (__v4sf)_mm_sub_ps(__A, __B),
3646                                                (__v4sf)_mm_setzero_ps());
3647   }
3648
3649   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3650   _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3651     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3652                                                (__v8sf)_mm256_sub_ps(__A, __B),
3653                                                (__v8sf)__W);
3654   }
3655
3656   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3657   _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3658     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3659                                                (__v8sf)_mm256_sub_ps(__A, __B),
3660                                                (__v8sf)_mm256_setzero_ps());
3661   }
3662
3663   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3664   _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3665     return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3666                                                   (__v4si)__B);
3667   }
3668
3669   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3670   _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3671                               __m128i __B) {
3672     return (__m128i)__builtin_ia32_selectd_128(__U,
3673                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3674                                     (__v4si)__A);
3675   }
3676
3677   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3678   _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3679                                __m128i __B) {
3680     return (__m128i)__builtin_ia32_selectd_128(__U,
3681                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3682                                     (__v4si)__I);
3683   }
3684
3685   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3686   _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3687                                __m128i __B) {
3688     return (__m128i)__builtin_ia32_selectd_128(__U,
3689                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3690                                     (__v4si)_mm_setzero_si128());
3691   }
3692
3693   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3694   _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3695     return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3696                                                   (__v8si) __B);
3697   }
3698
3699   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3700   _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3701                                  __m256i __B) {
3702     return (__m256i)__builtin_ia32_selectd_256(__U,
3703                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3704                                  (__v8si)__A);
3705   }
3706
3707   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3708   _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3709                                   __m256i __B) {
3710     return (__m256i)__builtin_ia32_selectd_256(__U,
3711                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3712                                  (__v8si)__I);
3713   }
3714
3715   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3716   _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3717                                   __m256i __B) {
3718     return (__m256i)__builtin_ia32_selectd_256(__U,
3719                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3720                                  (__v8si)_mm256_setzero_si256());
3721   }
3722
3723   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3724   _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3725     return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3726                                                    (__v2df)__B);
3727   }
3728
3729   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3730   _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3731     return (__m128d)__builtin_ia32_selectpd_128(__U,
3732                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3733                                        (__v2df)__A);
3734   }
3735
3736   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3737   _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3738     return (__m128d)__builtin_ia32_selectpd_128(__U,
3739                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3740                                        (__v2df)(__m128d)__I);
3741   }
3742
3743   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3744   _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3745     return (__m128d)__builtin_ia32_selectpd_128(__U,
3746                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3747                                        (__v2df)_mm_setzero_pd());
3748   }
3749
3750   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3751   _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3752     return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3753                                                    (__v4df)__B);
3754   }
3755
3756   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3757   _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3758                               __m256d __B) {
3759     return (__m256d)__builtin_ia32_selectpd_256(__U,
3760                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3761                                     (__v4df)__A);
3762   }
3763
3764   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3765   _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3766                                __m256d __B) {
3767     return (__m256d)__builtin_ia32_selectpd_256(__U,
3768                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3769                                     (__v4df)(__m256d)__I);
3770   }
3771
3772   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3773   _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3774                                __m256d __B) {
3775     return (__m256d)__builtin_ia32_selectpd_256(__U,
3776                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3777                                     (__v4df)_mm256_setzero_pd());
3778   }
3779
3780   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3781   _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3782     return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3783                                                   (__v4sf)__B);
3784   }
3785
3786   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3787   _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3788     return (__m128)__builtin_ia32_selectps_128(__U,
3789                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3790                                        (__v4sf)__A);
3791   }
3792
3793   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3794   _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3795     return (__m128)__builtin_ia32_selectps_128(__U,
3796                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3797                                        (__v4sf)(__m128)__I);
3798   }
3799
3800   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3801   _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3802     return (__m128)__builtin_ia32_selectps_128(__U,
3803                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3804                                        (__v4sf)_mm_setzero_ps());
3805   }
3806
3807   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3808   _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3809     return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3810                                                   (__v8sf) __B);
3811   }
3812
3813   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3814   _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3815     return (__m256)__builtin_ia32_selectps_256(__U,
3816                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3817                                     (__v8sf)__A);
3818   }
3819
3820   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3821   _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3822                                __m256 __B) {
3823     return (__m256)__builtin_ia32_selectps_256(__U,
3824                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3825                                     (__v8sf)(__m256)__I);
3826   }
3827
3828   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3829   _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3830                                __m256 __B) {
3831     return (__m256)__builtin_ia32_selectps_256(__U,
3832                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3833                                     (__v8sf)_mm256_setzero_ps());
3834   }
3835
3836   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3837   _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3838     return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3839                                                   (__v2di)__B);
3840   }
3841
3842   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3843   _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3844                               __m128i __B) {
3845     return (__m128i)__builtin_ia32_selectq_128(__U,
3846                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3847                                     (__v2di)__A);
3848   }
3849
3850   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3851   _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3852                                __m128i __B) {
3853     return (__m128i)__builtin_ia32_selectq_128(__U,
3854                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3855                                     (__v2di)__I);
3856   }
3857
3858   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3859   _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3860                                __m128i __B) {
3861     return (__m128i)__builtin_ia32_selectq_128(__U,
3862                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3863                                     (__v2di)_mm_setzero_si128());
3864   }
3865
3866
3867   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3868   _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3869     return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3870                                                   (__v4di) __B);
3871   }
3872
3873   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3874   _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3875                                  __m256i __B) {
3876     return (__m256i)__builtin_ia32_selectq_256(__U,
3877                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3878                                  (__v4di)__A);
3879   }
3880
3881   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3882   _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3883                                   __m256i __B) {
3884     return (__m256i)__builtin_ia32_selectq_256(__U,
3885                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3886                                  (__v4di)__I);
3887   }
3888
3889   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3890   _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3891                                   __m256i __B) {
3892     return (__m256i)__builtin_ia32_selectq_256(__U,
3893                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3894                                  (__v4di)_mm256_setzero_si256());
3895   }
3896
3897   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3898   _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3899   {
3900     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3901                                                (__v4si)_mm_cvtepi8_epi32(__A),
3902                                                (__v4si)__W);
3903   }
3904
3905   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3906   _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
3907   {
3908     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3909                                                (__v4si)_mm_cvtepi8_epi32(__A),
3910                                                (__v4si)_mm_setzero_si128());
3911   }
3912
3913   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3914   _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3915   {
3916     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3917                                                (__v8si)_mm256_cvtepi8_epi32(__A),
3918                                                (__v8si)__W);
3919   }
3920
3921   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3922   _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
3923   {
3924     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3925                                                (__v8si)_mm256_cvtepi8_epi32(__A),
3926                                                (__v8si)_mm256_setzero_si256());
3927   }
3928
3929   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3930   _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3931   {
3932     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3933                                                (__v2di)_mm_cvtepi8_epi64(__A),
3934                                                (__v2di)__W);
3935   }
3936
3937   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938   _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
3939   {
3940     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3941                                                (__v2di)_mm_cvtepi8_epi64(__A),
3942                                                (__v2di)_mm_setzero_si128());
3943   }
3944
3945   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3946   _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3947   {
3948     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3949                                                (__v4di)_mm256_cvtepi8_epi64(__A),
3950                                                (__v4di)__W);
3951   }
3952
3953   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3954   _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
3955   {
3956     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3957                                                (__v4di)_mm256_cvtepi8_epi64(__A),
3958                                                (__v4di)_mm256_setzero_si256());
3959   }
3960
3961   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3962   _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
3963   {
3964     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3965                                                (__v2di)_mm_cvtepi32_epi64(__X),
3966                                                (__v2di)__W);
3967   }
3968
3969   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3970   _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
3971   {
3972     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3973                                                (__v2di)_mm_cvtepi32_epi64(__X),
3974                                                (__v2di)_mm_setzero_si128());
3975   }
3976
3977   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3978   _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
3979   {
3980     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3981                                                (__v4di)_mm256_cvtepi32_epi64(__X),
3982                                                (__v4di)__W);
3983   }
3984
3985   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3986   _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
3987   {
3988     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3989                                                (__v4di)_mm256_cvtepi32_epi64(__X),
3990                                                (__v4di)_mm256_setzero_si256());
3991   }
3992
3993   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3994   _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3995   {
3996     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3997                                                (__v4si)_mm_cvtepi16_epi32(__A),
3998                                                (__v4si)__W);
3999   }
4000
4001   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4002   _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4003   {
4004     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4005                                                (__v4si)_mm_cvtepi16_epi32(__A),
4006                                                (__v4si)_mm_setzero_si128());
4007   }
4008
4009   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4010   _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4011   {
4012     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4013                                                (__v8si)_mm256_cvtepi16_epi32(__A),
4014                                                (__v8si)__W);
4015   }
4016
4017   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4018   _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4019   {
4020     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4021                                                (__v8si)_mm256_cvtepi16_epi32(__A),
4022                                                (__v8si)_mm256_setzero_si256());
4023   }
4024
4025   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4026   _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4027   {
4028     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4029                                                (__v2di)_mm_cvtepi16_epi64(__A),
4030                                                (__v2di)__W);
4031   }
4032
4033   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4034   _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4035   {
4036     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4037                                                (__v2di)_mm_cvtepi16_epi64(__A),
4038                                                (__v2di)_mm_setzero_si128());
4039   }
4040
4041   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4042   _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4043   {
4044     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4045                                                (__v4di)_mm256_cvtepi16_epi64(__A),
4046                                                (__v4di)__W);
4047   }
4048
4049   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4050   _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4051   {
4052     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4053                                                (__v4di)_mm256_cvtepi16_epi64(__A),
4054                                                (__v4di)_mm256_setzero_si256());
4055   }
4056
4057
4058   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4059   _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4060   {
4061     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4062                                                (__v4si)_mm_cvtepu8_epi32(__A),
4063                                                (__v4si)__W);
4064   }
4065
4066   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4067   _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4068   {
4069     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4070                                                (__v4si)_mm_cvtepu8_epi32(__A),
4071                                                (__v4si)_mm_setzero_si128());
4072   }
4073
4074   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4075   _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4076   {
4077     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4078                                                (__v8si)_mm256_cvtepu8_epi32(__A),
4079                                                (__v8si)__W);
4080   }
4081
4082   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4083   _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4084   {
4085     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4086                                                (__v8si)_mm256_cvtepu8_epi32(__A),
4087                                                (__v8si)_mm256_setzero_si256());
4088   }
4089
4090   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4091   _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4092   {
4093     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4094                                                (__v2di)_mm_cvtepu8_epi64(__A),
4095                                                (__v2di)__W);
4096   }
4097
4098   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4099   _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4100   {
4101     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4102                                                (__v2di)_mm_cvtepu8_epi64(__A),
4103                                                (__v2di)_mm_setzero_si128());
4104   }
4105
4106   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4107   _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4108   {
4109     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4110                                                (__v4di)_mm256_cvtepu8_epi64(__A),
4111                                                (__v4di)__W);
4112   }
4113
4114   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4115   _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4116   {
4117     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4118                                                (__v4di)_mm256_cvtepu8_epi64(__A),
4119                                                (__v4di)_mm256_setzero_si256());
4120   }
4121
4122   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4123   _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4124   {
4125     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4126                                                (__v2di)_mm_cvtepu32_epi64(__X),
4127                                                (__v2di)__W);
4128   }
4129
4130   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4131   _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4132   {
4133     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4134                                                (__v2di)_mm_cvtepu32_epi64(__X),
4135                                                (__v2di)_mm_setzero_si128());
4136   }
4137
4138   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4139   _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4140   {
4141     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4142                                                (__v4di)_mm256_cvtepu32_epi64(__X),
4143                                                (__v4di)__W);
4144   }
4145
4146   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4147   _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4148   {
4149     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4150                                                (__v4di)_mm256_cvtepu32_epi64(__X),
4151                                                (__v4di)_mm256_setzero_si256());
4152   }
4153
4154   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4155   _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4156   {
4157     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4158                                                (__v4si)_mm_cvtepu16_epi32(__A),
4159                                                (__v4si)__W);
4160   }
4161
4162   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4163   _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4164   {
4165     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4166                                                (__v4si)_mm_cvtepu16_epi32(__A),
4167                                                (__v4si)_mm_setzero_si128());
4168   }
4169
4170   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4171   _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4172   {
4173     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4174                                                (__v8si)_mm256_cvtepu16_epi32(__A),
4175                                                (__v8si)__W);
4176   }
4177
4178   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4179   _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4180   {
4181     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4182                                                (__v8si)_mm256_cvtepu16_epi32(__A),
4183                                                (__v8si)_mm256_setzero_si256());
4184   }
4185
4186   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4187   _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4188   {
4189     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4190                                                (__v2di)_mm_cvtepu16_epi64(__A),
4191                                                (__v2di)__W);
4192   }
4193
4194   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4195   _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4196   {
4197     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4198                                                (__v2di)_mm_cvtepu16_epi64(__A),
4199                                                (__v2di)_mm_setzero_si128());
4200   }
4201
4202   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4203   _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4204   {
4205     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4206                                                (__v4di)_mm256_cvtepu16_epi64(__A),
4207                                                (__v4di)__W);
4208   }
4209
4210   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4211   _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4212   {
4213     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4214                                                (__v4di)_mm256_cvtepu16_epi64(__A),
4215                                                (__v4di)_mm256_setzero_si256());
4216   }
4217
4218
4219 #define _mm_rol_epi32(a, b) \
4220   (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4221
4222 #define _mm_mask_rol_epi32(w, u, a, b) \
4223   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4224                                       (__v4si)_mm_rol_epi32((a), (b)), \
4225                                       (__v4si)(__m128i)(w))
4226
4227 #define _mm_maskz_rol_epi32(u, a, b) \
4228   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4229                                       (__v4si)_mm_rol_epi32((a), (b)), \
4230                                       (__v4si)_mm_setzero_si128())
4231
4232 #define _mm256_rol_epi32(a, b) \
4233   (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4234
4235 #define _mm256_mask_rol_epi32(w, u, a, b) \
4236   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4237                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4238                                       (__v8si)(__m256i)(w))
4239
4240 #define _mm256_maskz_rol_epi32(u, a, b) \
4241   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4242                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4243                                       (__v8si)_mm256_setzero_si256())
4244
4245 #define _mm_rol_epi64(a, b) \
4246   (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4247
4248 #define _mm_mask_rol_epi64(w, u, a, b) \
4249   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4250                                       (__v2di)_mm_rol_epi64((a), (b)), \
4251                                       (__v2di)(__m128i)(w))
4252
4253 #define _mm_maskz_rol_epi64(u, a, b) \
4254   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4255                                       (__v2di)_mm_rol_epi64((a), (b)), \
4256                                       (__v2di)_mm_setzero_si128())
4257
4258 #define _mm256_rol_epi64(a, b) \
4259   (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4260
4261 #define _mm256_mask_rol_epi64(w, u, a, b) \
4262   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4263                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4264                                       (__v4di)(__m256i)(w))
4265
4266 #define _mm256_maskz_rol_epi64(u, a, b) \
4267   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4268                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4269                                       (__v4di)_mm256_setzero_si256())
4270
4271 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4272 _mm_rolv_epi32 (__m128i __A, __m128i __B)
4273 {
4274   return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4275 }
4276
4277 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4278 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4279 {
4280   return (__m128i)__builtin_ia32_selectd_128(__U,
4281                                              (__v4si)_mm_rolv_epi32(__A, __B),
4282                                              (__v4si)__W);
4283 }
4284
4285 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4286 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4287 {
4288   return (__m128i)__builtin_ia32_selectd_128(__U,
4289                                              (__v4si)_mm_rolv_epi32(__A, __B),
4290                                              (__v4si)_mm_setzero_si128());
4291 }
4292
4293 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4294 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
4295 {
4296   return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4297 }
4298
4299 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4300 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4301 {
4302   return (__m256i)__builtin_ia32_selectd_256(__U,
4303                                             (__v8si)_mm256_rolv_epi32(__A, __B),
4304                                             (__v8si)__W);
4305 }
4306
4307 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4308 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4309 {
4310   return (__m256i)__builtin_ia32_selectd_256(__U,
4311                                             (__v8si)_mm256_rolv_epi32(__A, __B),
4312                                             (__v8si)_mm256_setzero_si256());
4313 }
4314
4315 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4316 _mm_rolv_epi64 (__m128i __A, __m128i __B)
4317 {
4318   return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4319 }
4320
4321 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4322 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4323 {
4324   return (__m128i)__builtin_ia32_selectq_128(__U,
4325                                              (__v2di)_mm_rolv_epi64(__A, __B),
4326                                              (__v2di)__W);
4327 }
4328
4329 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4330 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4331 {
4332   return (__m128i)__builtin_ia32_selectq_128(__U,
4333                                              (__v2di)_mm_rolv_epi64(__A, __B),
4334                                              (__v2di)_mm_setzero_si128());
4335 }
4336
4337 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4338 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
4339 {
4340   return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4341 }
4342
4343 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4344 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4345 {
4346   return (__m256i)__builtin_ia32_selectq_256(__U,
4347                                             (__v4di)_mm256_rolv_epi64(__A, __B),
4348                                             (__v4di)__W);
4349 }
4350
4351 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4352 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4353 {
4354   return (__m256i)__builtin_ia32_selectq_256(__U,
4355                                             (__v4di)_mm256_rolv_epi64(__A, __B),
4356                                             (__v4di)_mm256_setzero_si256());
4357 }
4358
4359 #define _mm_ror_epi32(a, b) \
4360   (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4361
4362 #define _mm_mask_ror_epi32(w, u, a, b) \
4363   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4364                                       (__v4si)_mm_ror_epi32((a), (b)), \
4365                                       (__v4si)(__m128i)(w))
4366
4367 #define _mm_maskz_ror_epi32(u, a, b) \
4368   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4369                                       (__v4si)_mm_ror_epi32((a), (b)), \
4370                                       (__v4si)_mm_setzero_si128())
4371
4372 #define _mm256_ror_epi32(a, b) \
4373   (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4374
4375 #define _mm256_mask_ror_epi32(w, u, a, b) \
4376   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4377                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4378                                       (__v8si)(__m256i)(w))
4379
4380 #define _mm256_maskz_ror_epi32(u, a, b) \
4381   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4382                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4383                                       (__v8si)_mm256_setzero_si256())
4384
4385 #define _mm_ror_epi64(a, b) \
4386   (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4387
4388 #define _mm_mask_ror_epi64(w, u, a, b) \
4389   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4390                                       (__v2di)_mm_ror_epi64((a), (b)), \
4391                                       (__v2di)(__m128i)(w))
4392
4393 #define _mm_maskz_ror_epi64(u, a, b) \
4394   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4395                                       (__v2di)_mm_ror_epi64((a), (b)), \
4396                                       (__v2di)_mm_setzero_si128())
4397
4398 #define _mm256_ror_epi64(a, b) \
4399   (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4400
4401 #define _mm256_mask_ror_epi64(w, u, a, b) \
4402   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4403                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4404                                       (__v4di)(__m256i)(w))
4405
4406 #define _mm256_maskz_ror_epi64(u, a, b) \
4407   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4408                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4409                                       (__v4di)_mm256_setzero_si256())
4410
4411 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4412 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4413 {
4414   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4415                                              (__v4si)_mm_sll_epi32(__A, __B),
4416                                              (__v4si)__W);
4417 }
4418
4419 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4420 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4421 {
4422   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4423                                              (__v4si)_mm_sll_epi32(__A, __B),
4424                                              (__v4si)_mm_setzero_si128());
4425 }
4426
4427 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4428 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4429 {
4430   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4431                                              (__v8si)_mm256_sll_epi32(__A, __B),
4432                                              (__v8si)__W);
4433 }
4434
4435 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4436 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4437 {
4438   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4439                                              (__v8si)_mm256_sll_epi32(__A, __B),
4440                                              (__v8si)_mm256_setzero_si256());
4441 }
4442
4443 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4444 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4445 {
4446   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4447                                              (__v4si)_mm_slli_epi32(__A, __B),
4448                                              (__v4si)__W);
4449 }
4450
4451 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4452 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4453 {
4454   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4455                                              (__v4si)_mm_slli_epi32(__A, __B),
4456                                              (__v4si)_mm_setzero_si128());
4457 }
4458
4459 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4460 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4461 {
4462   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4463                                              (__v8si)_mm256_slli_epi32(__A, __B),
4464                                              (__v8si)__W);
4465 }
4466
4467 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4468 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
4469 {
4470   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4471                                              (__v8si)_mm256_slli_epi32(__A, __B),
4472                                              (__v8si)_mm256_setzero_si256());
4473 }
4474
4475 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4476 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4477 {
4478   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4479                                              (__v2di)_mm_sll_epi64(__A, __B),
4480                                              (__v2di)__W);
4481 }
4482
4483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4484 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4485 {
4486   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4487                                              (__v2di)_mm_sll_epi64(__A, __B),
4488                                              (__v2di)_mm_setzero_si128());
4489 }
4490
4491 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4492 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4493 {
4494   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4495                                              (__v4di)_mm256_sll_epi64(__A, __B),
4496                                              (__v4di)__W);
4497 }
4498
4499 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4500 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4501 {
4502   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4503                                              (__v4di)_mm256_sll_epi64(__A, __B),
4504                                              (__v4di)_mm256_setzero_si256());
4505 }
4506
4507 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4508 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4509 {
4510   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4511                                              (__v2di)_mm_slli_epi64(__A, __B),
4512                                              (__v2di)__W);
4513 }
4514
4515 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4516 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
4517 {
4518   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4519                                              (__v2di)_mm_slli_epi64(__A, __B),
4520                                              (__v2di)_mm_setzero_si128());
4521 }
4522
4523 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4524 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4525 {
4526   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4527                                              (__v4di)_mm256_slli_epi64(__A, __B),
4528                                              (__v4di)__W);
4529 }
4530
4531 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4532 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
4533 {
4534   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4535                                              (__v4di)_mm256_slli_epi64(__A, __B),
4536                                              (__v4di)_mm256_setzero_si256());
4537 }
4538
4539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4540 _mm_rorv_epi32 (__m128i __A, __m128i __B)
4541 {
4542   return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4543 }
4544
4545 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4546 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4547 {
4548   return (__m128i)__builtin_ia32_selectd_128(__U,
4549                                              (__v4si)_mm_rorv_epi32(__A, __B),
4550                                              (__v4si)__W);
4551 }
4552
4553 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4554 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4555 {
4556   return (__m128i)__builtin_ia32_selectd_128(__U,
4557                                              (__v4si)_mm_rorv_epi32(__A, __B),
4558                                              (__v4si)_mm_setzero_si128());
4559 }
4560
4561 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4562 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
4563 {
4564   return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4565 }
4566
4567 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4568 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4569 {
4570   return (__m256i)__builtin_ia32_selectd_256(__U,
4571                                             (__v8si)_mm256_rorv_epi32(__A, __B),
4572                                             (__v8si)__W);
4573 }
4574
4575 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4576 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4577 {
4578   return (__m256i)__builtin_ia32_selectd_256(__U,
4579                                             (__v8si)_mm256_rorv_epi32(__A, __B),
4580                                             (__v8si)_mm256_setzero_si256());
4581 }
4582
4583 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4584 _mm_rorv_epi64 (__m128i __A, __m128i __B)
4585 {
4586   return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4587 }
4588
4589 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4590 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4591 {
4592   return (__m128i)__builtin_ia32_selectq_128(__U,
4593                                              (__v2di)_mm_rorv_epi64(__A, __B),
4594                                              (__v2di)__W);
4595 }
4596
4597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4598 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4599 {
4600   return (__m128i)__builtin_ia32_selectq_128(__U,
4601                                              (__v2di)_mm_rorv_epi64(__A, __B),
4602                                              (__v2di)_mm_setzero_si128());
4603 }
4604
4605 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4606 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
4607 {
4608   return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4609 }
4610
4611 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4612 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4613 {
4614   return (__m256i)__builtin_ia32_selectq_256(__U,
4615                                             (__v4di)_mm256_rorv_epi64(__A, __B),
4616                                             (__v4di)__W);
4617 }
4618
4619 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4620 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4621 {
4622   return (__m256i)__builtin_ia32_selectq_256(__U,
4623                                             (__v4di)_mm256_rorv_epi64(__A, __B),
4624                                             (__v4di)_mm256_setzero_si256());
4625 }
4626
4627 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4628 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4629 {
4630   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4631                                              (__v2di)_mm_sllv_epi64(__X, __Y),
4632                                              (__v2di)__W);
4633 }
4634
4635 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4636 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4637 {
4638   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4639                                              (__v2di)_mm_sllv_epi64(__X, __Y),
4640                                              (__v2di)_mm_setzero_si128());
4641 }
4642
4643 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4644 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4645 {
4646   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4647                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
4648                                             (__v4di)__W);
4649 }
4650
4651 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4652 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4653 {
4654   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4655                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
4656                                             (__v4di)_mm256_setzero_si256());
4657 }
4658
4659 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4660 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4661 {
4662   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4663                                              (__v4si)_mm_sllv_epi32(__X, __Y),
4664                                              (__v4si)__W);
4665 }
4666
4667 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4668 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4669 {
4670   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4671                                              (__v4si)_mm_sllv_epi32(__X, __Y),
4672                                              (__v4si)_mm_setzero_si128());
4673 }
4674
4675 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4676 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4677 {
4678   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4679                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
4680                                             (__v8si)__W);
4681 }
4682
4683 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4684 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4685 {
4686   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4687                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
4688                                             (__v8si)_mm256_setzero_si256());
4689 }
4690
4691 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4692 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4693 {
4694   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4695                                              (__v2di)_mm_srlv_epi64(__X, __Y),
4696                                              (__v2di)__W);
4697 }
4698
4699 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4700 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4701 {
4702   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4703                                              (__v2di)_mm_srlv_epi64(__X, __Y),
4704                                              (__v2di)_mm_setzero_si128());
4705 }
4706
4707 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4708 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4709 {
4710   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4711                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
4712                                             (__v4di)__W);
4713 }
4714
4715 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4716 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4717 {
4718   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4719                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
4720                                             (__v4di)_mm256_setzero_si256());
4721 }
4722
4723 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4724 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4725 {
4726   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4727                                             (__v4si)_mm_srlv_epi32(__X, __Y),
4728                                             (__v4si)__W);
4729 }
4730
4731 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4732 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4733 {
4734   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4735                                             (__v4si)_mm_srlv_epi32(__X, __Y),
4736                                             (__v4si)_mm_setzero_si128());
4737 }
4738
4739 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4740 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4741 {
4742   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4743                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
4744                                             (__v8si)__W);
4745 }
4746
4747 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4748 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4749 {
4750   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4751                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
4752                                             (__v8si)_mm256_setzero_si256());
4753 }
4754
4755 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4756 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4757 {
4758   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4759                                              (__v4si)_mm_srl_epi32(__A, __B),
4760                                              (__v4si)__W);
4761 }
4762
4763 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4764 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4765 {
4766   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4767                                              (__v4si)_mm_srl_epi32(__A, __B),
4768                                              (__v4si)_mm_setzero_si128());
4769 }
4770
4771 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4772 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4773 {
4774   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4775                                              (__v8si)_mm256_srl_epi32(__A, __B),
4776                                              (__v8si)__W);
4777 }
4778
4779 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4780 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4781 {
4782   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4783                                              (__v8si)_mm256_srl_epi32(__A, __B),
4784                                              (__v8si)_mm256_setzero_si256());
4785 }
4786
4787 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4788 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4789 {
4790   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4791                                              (__v4si)_mm_srli_epi32(__A, __B),
4792                                              (__v4si)__W);
4793 }
4794
4795 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4796 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
4797 {
4798   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4799                                              (__v4si)_mm_srli_epi32(__A, __B),
4800                                              (__v4si)_mm_setzero_si128());
4801 }
4802
4803 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4804 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4805 {
4806   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4807                                              (__v8si)_mm256_srli_epi32(__A, __B),
4808                                              (__v8si)__W);
4809 }
4810
4811 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4812 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
4813 {
4814   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4815                                              (__v8si)_mm256_srli_epi32(__A, __B),
4816                                              (__v8si)_mm256_setzero_si256());
4817 }
4818
4819 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4820 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4821 {
4822   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4823                                              (__v2di)_mm_srl_epi64(__A, __B),
4824                                              (__v2di)__W);
4825 }
4826
4827 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4828 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4829 {
4830   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4831                                              (__v2di)_mm_srl_epi64(__A, __B),
4832                                              (__v2di)_mm_setzero_si128());
4833 }
4834
4835 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4836 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4837 {
4838   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4839                                              (__v4di)_mm256_srl_epi64(__A, __B),
4840                                              (__v4di)__W);
4841 }
4842
4843 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4844 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4845 {
4846   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4847                                              (__v4di)_mm256_srl_epi64(__A, __B),
4848                                              (__v4di)_mm256_setzero_si256());
4849 }
4850
4851 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4852 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4853 {
4854   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4855                                              (__v2di)_mm_srli_epi64(__A, __B),
4856                                              (__v2di)__W);
4857 }
4858
4859 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4860 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
4861 {
4862   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4863                                              (__v2di)_mm_srli_epi64(__A, __B),
4864                                              (__v2di)_mm_setzero_si128());
4865 }
4866
4867 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4868 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4869 {
4870   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4871                                              (__v4di)_mm256_srli_epi64(__A, __B),
4872                                              (__v4di)__W);
4873 }
4874
4875 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4876 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
4877 {
4878   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4879                                              (__v4di)_mm256_srli_epi64(__A, __B),
4880                                              (__v4di)_mm256_setzero_si256());
4881 }
4882
4883 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4884 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4885 {
4886   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4887                                             (__v4si)_mm_srav_epi32(__X, __Y),
4888                                             (__v4si)__W);
4889 }
4890
4891 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4892 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4893 {
4894   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4895                                             (__v4si)_mm_srav_epi32(__X, __Y),
4896                                             (__v4si)_mm_setzero_si128());
4897 }
4898
4899 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4900 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4901 {
4902   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4903                                             (__v8si)_mm256_srav_epi32(__X, __Y),
4904                                             (__v8si)__W);
4905 }
4906
4907 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4908 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4909 {
4910   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4911                                             (__v8si)_mm256_srav_epi32(__X, __Y),
4912                                             (__v8si)_mm256_setzero_si256());
4913 }
4914
4915 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4916 _mm_srav_epi64(__m128i __X, __m128i __Y)
4917 {
4918   return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
4919 }
4920
4921 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4922 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4923 {
4924   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4925                                              (__v2di)_mm_srav_epi64(__X, __Y),
4926                                              (__v2di)__W);
4927 }
4928
4929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4930 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4931 {
4932   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4933                                              (__v2di)_mm_srav_epi64(__X, __Y),
4934                                              (__v2di)_mm_setzero_si128());
4935 }
4936
4937 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4938 _mm256_srav_epi64(__m256i __X, __m256i __Y)
4939 {
4940   return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
4941 }
4942
4943 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4944 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4945 {
4946   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4947                                              (__v4di)_mm256_srav_epi64(__X, __Y),
4948                                              (__v4di)__W);
4949 }
4950
4951 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4952 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
4953 {
4954   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4955                                              (__v4di)_mm256_srav_epi64(__X, __Y),
4956                                              (__v4di)_mm256_setzero_si256());
4957 }
4958
4959 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4960 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4961 {
4962   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4963                  (__v4si) __A,
4964                  (__v4si) __W);
4965 }
4966
4967 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4968 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
4969 {
4970   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4971                  (__v4si) __A,
4972                  (__v4si) _mm_setzero_si128 ());
4973 }
4974
4975
4976 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4977 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
4978 {
4979   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4980                  (__v8si) __A,
4981                  (__v8si) __W);
4982 }
4983
4984 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4985 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
4986 {
4987   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4988                  (__v8si) __A,
4989                  (__v8si) _mm256_setzero_si256 ());
4990 }
4991
4992 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4993 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
4994 {
4995   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
4996               (__v4si) __W,
4997               (__mmask8)
4998               __U);
4999 }
5000
5001 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5002 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5003 {
5004   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5005               (__v4si)
5006               _mm_setzero_si128 (),
5007               (__mmask8)
5008               __U);
5009 }
5010
5011 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5012 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5013 {
5014   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5015               (__v8si) __W,
5016               (__mmask8)
5017               __U);
5018 }
5019
5020 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5021 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5022 {
5023   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5024               (__v8si)
5025               _mm256_setzero_si256 (),
5026               (__mmask8)
5027               __U);
5028 }
5029
5030 static __inline__ void __DEFAULT_FN_ATTRS128
5031 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5032 {
5033   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5034           (__v4si) __A,
5035           (__mmask8) __U);
5036 }
5037
5038 static __inline__ void __DEFAULT_FN_ATTRS256
5039 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5040 {
5041   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5042           (__v8si) __A,
5043           (__mmask8) __U);
5044 }
5045
5046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5047 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5048 {
5049   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5050                  (__v2di) __A,
5051                  (__v2di) __W);
5052 }
5053
5054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5055 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5056 {
5057   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5058                  (__v2di) __A,
5059                  (__v2di) _mm_setzero_si128 ());
5060 }
5061
5062 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5063 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5064 {
5065   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5066                  (__v4di) __A,
5067                  (__v4di) __W);
5068 }
5069
5070 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5071 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5072 {
5073   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5074                  (__v4di) __A,
5075                  (__v4di) _mm256_setzero_si256 ());
5076 }
5077
5078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5079 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5080 {
5081   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5082               (__v2di) __W,
5083               (__mmask8)
5084               __U);
5085 }
5086
5087 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5088 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5089 {
5090   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5091               (__v2di)
5092               _mm_setzero_si128 (),
5093               (__mmask8)
5094               __U);
5095 }
5096
5097 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5098 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5099 {
5100   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5101               (__v4di) __W,
5102               (__mmask8)
5103               __U);
5104 }
5105
5106 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5107 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5108 {
5109   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5110               (__v4di)
5111               _mm256_setzero_si256 (),
5112               (__mmask8)
5113               __U);
5114 }
5115
5116 static __inline__ void __DEFAULT_FN_ATTRS128
5117 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5118 {
5119   __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5120           (__v2di) __A,
5121           (__mmask8) __U);
5122 }
5123
5124 static __inline__ void __DEFAULT_FN_ATTRS256
5125 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5126 {
5127   __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5128           (__v4di) __A,
5129           (__mmask8) __U);
5130 }
5131
5132 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5133 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5134 {
5135   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5136                                               (__v2df)_mm_movedup_pd(__A),
5137                                               (__v2df)__W);
5138 }
5139
5140 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5141 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5142 {
5143   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5144                                               (__v2df)_mm_movedup_pd(__A),
5145                                               (__v2df)_mm_setzero_pd());
5146 }
5147
5148 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5149 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5150 {
5151   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5152                                               (__v4df)_mm256_movedup_pd(__A),
5153                                               (__v4df)__W);
5154 }
5155
5156 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5157 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5158 {
5159   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5160                                               (__v4df)_mm256_movedup_pd(__A),
5161                                               (__v4df)_mm256_setzero_pd());
5162 }
5163
5164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5165 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5166 {
5167    return (__m128i)__builtin_ia32_selectd_128(__M,
5168                                               (__v4si) _mm_set1_epi32(__A),
5169                                               (__v4si)__O);
5170 }
5171
5172 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5173 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
5174 {
5175    return (__m128i)__builtin_ia32_selectd_128(__M,
5176                                               (__v4si) _mm_set1_epi32(__A),
5177                                               (__v4si)_mm_setzero_si128());
5178 }
5179
5180 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5181 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5182 {
5183    return (__m256i)__builtin_ia32_selectd_256(__M,
5184                                               (__v8si) _mm256_set1_epi32(__A),
5185                                               (__v8si)__O);
5186 }
5187
5188 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5189 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5190 {
5191    return (__m256i)__builtin_ia32_selectd_256(__M,
5192                                               (__v8si) _mm256_set1_epi32(__A),
5193                                               (__v8si)_mm256_setzero_si256());
5194 }
5195
5196
5197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5198 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5199 {
5200   return (__m128i) __builtin_ia32_selectq_128(__M,
5201                                               (__v2di) _mm_set1_epi64x(__A),
5202                                               (__v2di) __O);
5203 }
5204
5205 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5206 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5207 {
5208   return (__m128i) __builtin_ia32_selectq_128(__M,
5209                                               (__v2di) _mm_set1_epi64x(__A),
5210                                               (__v2di) _mm_setzero_si128());
5211 }
5212
5213 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5214 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5215 {
5216   return (__m256i) __builtin_ia32_selectq_256(__M,
5217                                               (__v4di) _mm256_set1_epi64x(__A),
5218                                               (__v4di) __O) ;
5219 }
5220
5221 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5222 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5223 {
5224    return (__m256i) __builtin_ia32_selectq_256(__M,
5225                                                (__v4di) _mm256_set1_epi64x(__A),
5226                                                (__v4di) _mm256_setzero_si256());
5227 }
5228
5229 #define _mm_fixupimm_pd(A, B, C, imm) \
5230   (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5231                                              (__v2df)(__m128d)(B), \
5232                                              (__v2di)(__m128i)(C), (int)(imm), \
5233                                              (__mmask8)-1)
5234
5235 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5236   (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5237                                              (__v2df)(__m128d)(B), \
5238                                              (__v2di)(__m128i)(C), (int)(imm), \
5239                                              (__mmask8)(U))
5240
5241 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5242   (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5243                                               (__v2df)(__m128d)(B), \
5244                                               (__v2di)(__m128i)(C), \
5245                                               (int)(imm), (__mmask8)(U))
5246
5247 #define _mm256_fixupimm_pd(A, B, C, imm) \
5248   (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5249                                              (__v4df)(__m256d)(B), \
5250                                              (__v4di)(__m256i)(C), (int)(imm), \
5251                                              (__mmask8)-1)
5252
5253 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5254   (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5255                                              (__v4df)(__m256d)(B), \
5256                                              (__v4di)(__m256i)(C), (int)(imm), \
5257                                              (__mmask8)(U))
5258
5259 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5260   (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5261                                               (__v4df)(__m256d)(B), \
5262                                               (__v4di)(__m256i)(C), \
5263                                               (int)(imm), (__mmask8)(U))
5264
5265 #define _mm_fixupimm_ps(A, B, C, imm) \
5266   (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5267                                             (__v4sf)(__m128)(B), \
5268                                             (__v4si)(__m128i)(C), (int)(imm), \
5269                                             (__mmask8)-1)
5270
5271 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5272   (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5273                                             (__v4sf)(__m128)(B), \
5274                                             (__v4si)(__m128i)(C), (int)(imm), \
5275                                             (__mmask8)(U))
5276
5277 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5278   (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5279                                              (__v4sf)(__m128)(B), \
5280                                              (__v4si)(__m128i)(C), (int)(imm), \
5281                                              (__mmask8)(U))
5282
5283 #define _mm256_fixupimm_ps(A, B, C, imm) \
5284   (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5285                                             (__v8sf)(__m256)(B), \
5286                                             (__v8si)(__m256i)(C), (int)(imm), \
5287                                             (__mmask8)-1)
5288
5289 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5290   (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5291                                             (__v8sf)(__m256)(B), \
5292                                             (__v8si)(__m256i)(C), (int)(imm), \
5293                                             (__mmask8)(U))
5294
5295 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5296   (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5297                                              (__v8sf)(__m256)(B), \
5298                                              (__v8si)(__m256i)(C), (int)(imm), \
5299                                              (__mmask8)(U))
5300
5301 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5302 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5303 {
5304   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5305                (__v2df) __W,
5306                (__mmask8) __U);
5307 }
5308
5309 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5310 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
5311 {
5312   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5313                (__v2df)
5314                _mm_setzero_pd (),
5315                (__mmask8) __U);
5316 }
5317
5318 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5319 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5320 {
5321   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5322                (__v4df) __W,
5323                (__mmask8) __U);
5324 }
5325
5326 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5327 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5328 {
5329   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5330                (__v4df)
5331                _mm256_setzero_pd (),
5332                (__mmask8) __U);
5333 }
5334
5335 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5336 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5337 {
5338   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5339               (__v4sf) __W,
5340               (__mmask8) __U);
5341 }
5342
5343 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5344 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
5345 {
5346   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5347               (__v4sf)
5348               _mm_setzero_ps (),
5349               (__mmask8) __U);
5350 }
5351
5352 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5353 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5354 {
5355   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5356               (__v8sf) __W,
5357               (__mmask8) __U);
5358 }
5359
5360 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5361 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5362 {
5363   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5364               (__v8sf)
5365               _mm256_setzero_ps (),
5366               (__mmask8) __U);
5367 }
5368
5369 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5370 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5371 {
5372   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5373                  (__v2di) __W,
5374                  (__mmask8) __U);
5375 }
5376
5377 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5378 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5379 {
5380   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5381                  (__v2di)
5382                  _mm_setzero_si128 (),
5383                  (__mmask8) __U);
5384 }
5385
5386 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5387 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5388 {
5389   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5390                  (__v4di) __W,
5391                  (__mmask8) __U);
5392 }
5393
5394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5395 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5396 {
5397   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5398                  (__v4di)
5399                  _mm256_setzero_si256 (),
5400                  (__mmask8) __U);
5401 }
5402
5403 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5404 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5405 {
5406   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5407                  (__v4si) __W,
5408                  (__mmask8) __U);
5409 }
5410
5411 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5412 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5413 {
5414   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5415                  (__v4si)
5416                  _mm_setzero_si128 (),
5417                  (__mmask8) __U);
5418 }
5419
5420 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5421 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5422 {
5423   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5424                  (__v8si) __W,
5425                  (__mmask8) __U);
5426 }
5427
5428 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5429 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5430 {
5431   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5432                  (__v8si)
5433                  _mm256_setzero_si256 (),
5434                  (__mmask8) __U);
5435 }
5436
5437 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5438 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5439 {
5440   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5441                (__v2df) __W,
5442                (__mmask8) __U);
5443 }
5444
5445 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5446 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5447 {
5448   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5449                (__v2df)
5450                _mm_setzero_pd (),
5451                (__mmask8) __U);
5452 }
5453
5454 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5455 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5456 {
5457   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5458                (__v4df) __W,
5459                (__mmask8) __U);
5460 }
5461
5462 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5463 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5464 {
5465   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5466                (__v4df)
5467                _mm256_setzero_pd (),
5468                (__mmask8) __U);
5469 }
5470
5471 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5472 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5473 {
5474   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5475               (__v4sf) __W,
5476               (__mmask8) __U);
5477 }
5478
5479 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5480 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5481 {
5482   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5483               (__v4sf)
5484               _mm_setzero_ps (),
5485               (__mmask8) __U);
5486 }
5487
5488 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5489 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5490 {
5491   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5492               (__v8sf) __W,
5493               (__mmask8) __U);
5494 }
5495
5496 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5497 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5498 {
5499   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5500               (__v8sf)
5501               _mm256_setzero_ps (),
5502               (__mmask8) __U);
5503 }
5504
5505 static __inline__ void __DEFAULT_FN_ATTRS128
5506 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5507 {
5508   __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5509            (__v2df) __A,
5510            (__mmask8) __U);
5511 }
5512
5513 static __inline__ void __DEFAULT_FN_ATTRS256
5514 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5515 {
5516   __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5517            (__v4df) __A,
5518            (__mmask8) __U);
5519 }
5520
5521 static __inline__ void __DEFAULT_FN_ATTRS128
5522 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5523 {
5524   __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5525            (__v4sf) __A,
5526            (__mmask8) __U);
5527 }
5528
5529 static __inline__ void __DEFAULT_FN_ATTRS256
5530 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5531 {
5532   __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5533            (__v8sf) __A,
5534            (__mmask8) __U);
5535 }
5536
5537 static __inline__ void __DEFAULT_FN_ATTRS128
5538 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5539 {
5540   __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5541              (__v2di) __A,
5542              (__mmask8) __U);
5543 }
5544
5545 static __inline__ void __DEFAULT_FN_ATTRS256
5546 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5547 {
5548   __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5549              (__v4di) __A,
5550              (__mmask8) __U);
5551 }
5552
5553 static __inline__ void __DEFAULT_FN_ATTRS128
5554 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5555 {
5556   __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5557              (__v4si) __A,
5558              (__mmask8) __U);
5559 }
5560
5561 static __inline__ void __DEFAULT_FN_ATTRS256
5562 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5563 {
5564   __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5565              (__v8si) __A,
5566              (__mmask8) __U);
5567 }
5568
5569 static __inline__ void __DEFAULT_FN_ATTRS128
5570 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5571 {
5572   __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5573            (__v2df) __A,
5574            (__mmask8) __U);
5575 }
5576
5577 static __inline__ void __DEFAULT_FN_ATTRS256
5578 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5579 {
5580   __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5581            (__v4df) __A,
5582            (__mmask8) __U);
5583 }
5584
5585 static __inline__ void __DEFAULT_FN_ATTRS128
5586 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5587 {
5588   __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5589            (__v4sf) __A,
5590            (__mmask8) __U);
5591 }
5592
5593 static __inline__ void __DEFAULT_FN_ATTRS256
5594 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5595 {
5596   __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5597            (__v8sf) __A,
5598            (__mmask8) __U);
5599 }
5600
5601
5602 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5603 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5604 {
5605   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5606                                               (__v2df)_mm_unpackhi_pd(__A, __B),
5607                                               (__v2df)__W);
5608 }
5609
5610 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5611 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5612 {
5613   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5614                                               (__v2df)_mm_unpackhi_pd(__A, __B),
5615                                               (__v2df)_mm_setzero_pd());
5616 }
5617
5618 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5619 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5620 {
5621   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5622                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
5623                                            (__v4df)__W);
5624 }
5625
5626 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5627 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5628 {
5629   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5630                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
5631                                            (__v4df)_mm256_setzero_pd());
5632 }
5633
5634 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5635 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5636 {
5637   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5638                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
5639                                              (__v4sf)__W);
5640 }
5641
5642 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5643 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5644 {
5645   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5646                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
5647                                              (__v4sf)_mm_setzero_ps());
5648 }
5649
5650 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5651 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5652 {
5653   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5654                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
5655                                            (__v8sf)__W);
5656 }
5657
5658 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5659 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5660 {
5661   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5662                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
5663                                            (__v8sf)_mm256_setzero_ps());
5664 }
5665
5666 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5667 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5668 {
5669   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5670                                               (__v2df)_mm_unpacklo_pd(__A, __B),
5671                                               (__v2df)__W);
5672 }
5673
5674 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5675 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5676 {
5677   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5678                                               (__v2df)_mm_unpacklo_pd(__A, __B),
5679                                               (__v2df)_mm_setzero_pd());
5680 }
5681
5682 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5683 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5684 {
5685   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5686                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
5687                                            (__v4df)__W);
5688 }
5689
5690 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5691 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5692 {
5693   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5694                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
5695                                            (__v4df)_mm256_setzero_pd());
5696 }
5697
5698 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5699 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5700 {
5701   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5702                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
5703                                              (__v4sf)__W);
5704 }
5705
5706 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5707 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5708 {
5709   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5710                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
5711                                              (__v4sf)_mm_setzero_ps());
5712 }
5713
5714 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5715 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5716 {
5717   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5718                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
5719                                            (__v8sf)__W);
5720 }
5721
5722 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5723 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5724 {
5725   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5726                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
5727                                            (__v8sf)_mm256_setzero_ps());
5728 }
5729
5730 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5731 _mm_rcp14_pd (__m128d __A)
5732 {
5733   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5734                 (__v2df)
5735                 _mm_setzero_pd (),
5736                 (__mmask8) -1);
5737 }
5738
5739 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5740 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5741 {
5742   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5743                 (__v2df) __W,
5744                 (__mmask8) __U);
5745 }
5746
5747 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5748 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5749 {
5750   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5751                 (__v2df)
5752                 _mm_setzero_pd (),
5753                 (__mmask8) __U);
5754 }
5755
5756 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5757 _mm256_rcp14_pd (__m256d __A)
5758 {
5759   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5760                 (__v4df)
5761                 _mm256_setzero_pd (),
5762                 (__mmask8) -1);
5763 }
5764
5765 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5766 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5767 {
5768   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5769                 (__v4df) __W,
5770                 (__mmask8) __U);
5771 }
5772
5773 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5774 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5775 {
5776   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5777                 (__v4df)
5778                 _mm256_setzero_pd (),
5779                 (__mmask8) __U);
5780 }
5781
5782 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5783 _mm_rcp14_ps (__m128 __A)
5784 {
5785   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5786                (__v4sf)
5787                _mm_setzero_ps (),
5788                (__mmask8) -1);
5789 }
5790
5791 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5792 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5793 {
5794   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5795                (__v4sf) __W,
5796                (__mmask8) __U);
5797 }
5798
5799 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5800 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
5801 {
5802   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5803                (__v4sf)
5804                _mm_setzero_ps (),
5805                (__mmask8) __U);
5806 }
5807
5808 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5809 _mm256_rcp14_ps (__m256 __A)
5810 {
5811   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5812                (__v8sf)
5813                _mm256_setzero_ps (),
5814                (__mmask8) -1);
5815 }
5816
5817 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5818 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
5819 {
5820   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5821                (__v8sf) __W,
5822                (__mmask8) __U);
5823 }
5824
5825 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5826 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
5827 {
5828   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5829                (__v8sf)
5830                _mm256_setzero_ps (),
5831                (__mmask8) __U);
5832 }
5833
5834 #define _mm_mask_permute_pd(W, U, X, C) \
5835   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5836                                        (__v2df)_mm_permute_pd((X), (C)), \
5837                                        (__v2df)(__m128d)(W))
5838
5839 #define _mm_maskz_permute_pd(U, X, C) \
5840   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5841                                        (__v2df)_mm_permute_pd((X), (C)), \
5842                                        (__v2df)_mm_setzero_pd())
5843
5844 #define _mm256_mask_permute_pd(W, U, X, C) \
5845   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5846                                        (__v4df)_mm256_permute_pd((X), (C)), \
5847                                        (__v4df)(__m256d)(W))
5848
5849 #define _mm256_maskz_permute_pd(U, X, C) \
5850   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5851                                        (__v4df)_mm256_permute_pd((X), (C)), \
5852                                        (__v4df)_mm256_setzero_pd())
5853
5854 #define _mm_mask_permute_ps(W, U, X, C) \
5855   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5856                                       (__v4sf)_mm_permute_ps((X), (C)), \
5857                                       (__v4sf)(__m128)(W))
5858
5859 #define _mm_maskz_permute_ps(U, X, C) \
5860   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5861                                       (__v4sf)_mm_permute_ps((X), (C)), \
5862                                       (__v4sf)_mm_setzero_ps())
5863
5864 #define _mm256_mask_permute_ps(W, U, X, C) \
5865   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5866                                       (__v8sf)_mm256_permute_ps((X), (C)), \
5867                                       (__v8sf)(__m256)(W))
5868
5869 #define _mm256_maskz_permute_ps(U, X, C) \
5870   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5871                                       (__v8sf)_mm256_permute_ps((X), (C)), \
5872                                       (__v8sf)_mm256_setzero_ps())
5873
5874 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5875 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
5876 {
5877   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5878                                             (__v2df)_mm_permutevar_pd(__A, __C),
5879                                             (__v2df)__W);
5880 }
5881
5882 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5883 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
5884 {
5885   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5886                                             (__v2df)_mm_permutevar_pd(__A, __C),
5887                                             (__v2df)_mm_setzero_pd());
5888 }
5889
5890 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5891 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
5892 {
5893   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5894                                          (__v4df)_mm256_permutevar_pd(__A, __C),
5895                                          (__v4df)__W);
5896 }
5897
5898 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5899 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
5900 {
5901   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5902                                          (__v4df)_mm256_permutevar_pd(__A, __C),
5903                                          (__v4df)_mm256_setzero_pd());
5904 }
5905
5906 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5907 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
5908 {
5909   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5910                                             (__v4sf)_mm_permutevar_ps(__A, __C),
5911                                             (__v4sf)__W);
5912 }
5913
5914 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5915 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
5916 {
5917   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5918                                             (__v4sf)_mm_permutevar_ps(__A, __C),
5919                                             (__v4sf)_mm_setzero_ps());
5920 }
5921
5922 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5923 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
5924 {
5925   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5926                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
5927                                           (__v8sf)__W);
5928 }
5929
5930 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5931 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
5932 {
5933   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5934                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
5935                                           (__v8sf)_mm256_setzero_ps());
5936 }
5937
5938 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5939 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5940 {
5941   return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
5942 }
5943
5944 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5945 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5946 {
5947   return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
5948                                      _mm_setzero_si128());
5949 }
5950
5951 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5952 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5953 {
5954   return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
5955                                    _mm256_setzero_si256());
5956 }
5957
5958 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5959 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5960 {
5961   return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
5962                                         _mm256_setzero_si256());
5963 }
5964
5965 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5966 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5967 {
5968   return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
5969 }
5970
5971 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5972 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5973 {
5974   return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
5975                                      _mm_setzero_si128());
5976 }
5977
5978 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5979 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5980 {
5981   return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
5982                                    _mm256_setzero_si256());
5983 }
5984
5985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5986 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5987 {
5988   return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
5989                                         _mm256_setzero_si256());
5990 }
5991
5992 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5993 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5994 {
5995   return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
5996 }
5997
5998 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5999 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6000 {
6001   return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6002                                     _mm_setzero_si128());
6003 }
6004
6005 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6006 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6007 {
6008   return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6009                                   _mm256_setzero_si256());
6010 }
6011
6012 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6013 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6014 {
6015   return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6016                                        _mm256_setzero_si256());
6017 }
6018
6019 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6020 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
6021 {
6022   return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6023 }
6024
6025 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6026 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6027 {
6028   return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6029                                     _mm_setzero_si128());
6030 }
6031
6032 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6033 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6034 {
6035   return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6036                                   _mm256_setzero_si256());
6037 }
6038
6039 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6040 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6041 {
6042   return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6043                                        _mm256_setzero_si256());
6044 }
6045
6046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6047 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6048 {
6049   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6050                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
6051                                            (__v4si)__W);
6052 }
6053
6054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6055 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6056 {
6057   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6058                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
6059                                            (__v4si)_mm_setzero_si128());
6060 }
6061
6062 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6063 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6064 {
6065   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6066                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
6067                                         (__v8si)__W);
6068 }
6069
6070 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6071 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6072 {
6073   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6074                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
6075                                         (__v8si)_mm256_setzero_si256());
6076 }
6077
6078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6079 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6080 {
6081   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6082                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
6083                                            (__v2di)__W);
6084 }
6085
6086 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6087 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6088 {
6089   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6090                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
6091                                            (__v2di)_mm_setzero_si128());
6092 }
6093
6094 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6095 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6096 {
6097   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6098                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
6099                                         (__v4di)__W);
6100 }
6101
6102 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6103 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6104 {
6105   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6106                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
6107                                         (__v4di)_mm256_setzero_si256());
6108 }
6109
6110 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6111 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6112 {
6113   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6114                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
6115                                            (__v4si)__W);
6116 }
6117
6118 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6119 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6120 {
6121   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6122                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
6123                                            (__v4si)_mm_setzero_si128());
6124 }
6125
6126 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6127 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6128 {
6129   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6130                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
6131                                         (__v8si)__W);
6132 }
6133
6134 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6135 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6136 {
6137   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6138                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
6139                                         (__v8si)_mm256_setzero_si256());
6140 }
6141
6142 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6143 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6144 {
6145   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6146                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
6147                                            (__v2di)__W);
6148 }
6149
6150 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6151 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6152 {
6153   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6154                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
6155                                            (__v2di)_mm_setzero_si128());
6156 }
6157
6158 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6159 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6160 {
6161   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6162                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
6163                                         (__v4di)__W);
6164 }
6165
6166 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6167 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6168 {
6169   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6170                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
6171                                         (__v4di)_mm256_setzero_si256());
6172 }
6173
6174 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6175 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6176 {
6177   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6178                                              (__v4si)_mm_sra_epi32(__A, __B),
6179                                              (__v4si)__W);
6180 }
6181
6182 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6183 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6184 {
6185   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6186                                              (__v4si)_mm_sra_epi32(__A, __B),
6187                                              (__v4si)_mm_setzero_si128());
6188 }
6189
6190 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6191 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6192 {
6193   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6194                                              (__v8si)_mm256_sra_epi32(__A, __B),
6195                                              (__v8si)__W);
6196 }
6197
6198 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6199 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6200 {
6201   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6202                                              (__v8si)_mm256_sra_epi32(__A, __B),
6203                                              (__v8si)_mm256_setzero_si256());
6204 }
6205
6206 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6207 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6208 {
6209   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6210                                              (__v4si)_mm_srai_epi32(__A, __B),
6211                                              (__v4si)__W);
6212 }
6213
6214 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6215 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6216 {
6217   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6218                                              (__v4si)_mm_srai_epi32(__A, __B),
6219                                              (__v4si)_mm_setzero_si128());
6220 }
6221
6222 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6223 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6224 {
6225   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6226                                              (__v8si)_mm256_srai_epi32(__A, __B),
6227                                              (__v8si)__W);
6228 }
6229
6230 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6231 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6232 {
6233   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6234                                              (__v8si)_mm256_srai_epi32(__A, __B),
6235                                              (__v8si)_mm256_setzero_si256());
6236 }
6237
6238 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6239 _mm_sra_epi64(__m128i __A, __m128i __B)
6240 {
6241   return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6242 }
6243
6244 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6245 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6246 {
6247   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6248                                              (__v2di)_mm_sra_epi64(__A, __B), \
6249                                              (__v2di)__W);
6250 }
6251
6252 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6253 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6254 {
6255   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6256                                              (__v2di)_mm_sra_epi64(__A, __B), \
6257                                              (__v2di)_mm_setzero_si128());
6258 }
6259
6260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6261 _mm256_sra_epi64(__m256i __A, __m128i __B)
6262 {
6263   return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6264 }
6265
6266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6267 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6268 {
6269   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6270                                            (__v4di)_mm256_sra_epi64(__A, __B), \
6271                                            (__v4di)__W);
6272 }
6273
6274 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6275 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6276 {
6277   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6278                                            (__v4di)_mm256_sra_epi64(__A, __B), \
6279                                            (__v4di)_mm256_setzero_si256());
6280 }
6281
6282 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6283 _mm_srai_epi64(__m128i __A, int __imm)
6284 {
6285   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6286 }
6287
6288 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6289 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6290 {
6291   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6292                                            (__v2di)_mm_srai_epi64(__A, __imm), \
6293                                            (__v2di)__W);
6294 }
6295
6296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6297 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6298 {
6299   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6300                                            (__v2di)_mm_srai_epi64(__A, __imm), \
6301                                            (__v2di)_mm_setzero_si128());
6302 }
6303
6304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6305 _mm256_srai_epi64(__m256i __A, int __imm)
6306 {
6307   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6308 }
6309
6310 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6311 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6312 {
6313   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6314                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
6315                                         (__v4di)__W);
6316 }
6317
6318 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6319 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6320 {
6321   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6322                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
6323                                         (__v4di)_mm256_setzero_si256());
6324 }
6325
6326 #define _mm_ternarylogic_epi32(A, B, C, imm) \
6327   (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6328                                             (__v4si)(__m128i)(B), \
6329                                             (__v4si)(__m128i)(C), (int)(imm), \
6330                                             (__mmask8)-1)
6331
6332 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6333   (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6334                                             (__v4si)(__m128i)(B), \
6335                                             (__v4si)(__m128i)(C), (int)(imm), \
6336                                             (__mmask8)(U))
6337
6338 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6339   (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6340                                              (__v4si)(__m128i)(B), \
6341                                              (__v4si)(__m128i)(C), (int)(imm), \
6342                                              (__mmask8)(U))
6343
6344 #define _mm256_ternarylogic_epi32(A, B, C, imm) \
6345   (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6346                                             (__v8si)(__m256i)(B), \
6347                                             (__v8si)(__m256i)(C), (int)(imm), \
6348                                             (__mmask8)-1)
6349
6350 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6351   (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6352                                             (__v8si)(__m256i)(B), \
6353                                             (__v8si)(__m256i)(C), (int)(imm), \
6354                                             (__mmask8)(U))
6355
6356 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6357   (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6358                                              (__v8si)(__m256i)(B), \
6359                                              (__v8si)(__m256i)(C), (int)(imm), \
6360                                              (__mmask8)(U))
6361
6362 #define _mm_ternarylogic_epi64(A, B, C, imm) \
6363   (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6364                                             (__v2di)(__m128i)(B), \
6365                                             (__v2di)(__m128i)(C), (int)(imm), \
6366                                             (__mmask8)-1)
6367
6368 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6369   (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6370                                             (__v2di)(__m128i)(B), \
6371                                             (__v2di)(__m128i)(C), (int)(imm), \
6372                                             (__mmask8)(U))
6373
6374 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6375   (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6376                                              (__v2di)(__m128i)(B), \
6377                                              (__v2di)(__m128i)(C), (int)(imm), \
6378                                              (__mmask8)(U))
6379
6380 #define _mm256_ternarylogic_epi64(A, B, C, imm) \
6381   (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6382                                             (__v4di)(__m256i)(B), \
6383                                             (__v4di)(__m256i)(C), (int)(imm), \
6384                                             (__mmask8)-1)
6385
6386 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6387   (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6388                                             (__v4di)(__m256i)(B), \
6389                                             (__v4di)(__m256i)(C), (int)(imm), \
6390                                             (__mmask8)(U))
6391
6392 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6393   (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6394                                              (__v4di)(__m256i)(B), \
6395                                              (__v4di)(__m256i)(C), (int)(imm), \
6396                                              (__mmask8)(U))
6397
6398
6399
6400 #define _mm256_shuffle_f32x4(A, B, imm) \
6401   (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6402                                         (__v8sf)(__m256)(B), (int)(imm))
6403
6404 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6405   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6406                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6407                                       (__v8sf)(__m256)(W))
6408
6409 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6410   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6411                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6412                                       (__v8sf)_mm256_setzero_ps())
6413
6414 #define _mm256_shuffle_f64x2(A, B, imm) \
6415   (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6416                                          (__v4df)(__m256d)(B), (int)(imm))
6417
6418 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6419   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6420                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6421                                       (__v4df)(__m256d)(W))
6422
6423 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6424   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6425                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6426                                       (__v4df)_mm256_setzero_pd())
6427
6428 #define _mm256_shuffle_i32x4(A, B, imm) \
6429   (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6430                                          (__v8si)(__m256i)(B), (int)(imm))
6431
6432 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6433   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6434                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6435                                       (__v8si)(__m256i)(W))
6436
6437 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6438   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6439                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6440                                       (__v8si)_mm256_setzero_si256())
6441
6442 #define _mm256_shuffle_i64x2(A, B, imm) \
6443   (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6444                                          (__v4di)(__m256i)(B), (int)(imm))
6445
6446 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6447   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6448                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6449                                       (__v4di)(__m256i)(W))
6450
6451
6452 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6453   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6454                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6455                                       (__v4di)_mm256_setzero_si256())
6456
6457 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
6458   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6459                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6460                                        (__v2df)(__m128d)(W))
6461
6462 #define _mm_maskz_shuffle_pd(U, A, B, M) \
6463   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6464                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6465                                        (__v2df)_mm_setzero_pd())
6466
6467 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6468   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6469                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6470                                        (__v4df)(__m256d)(W))
6471
6472 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
6473   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6474                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6475                                        (__v4df)_mm256_setzero_pd())
6476
6477 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
6478   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6479                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6480                                       (__v4sf)(__m128)(W))
6481
6482 #define _mm_maskz_shuffle_ps(U, A, B, M) \
6483   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6484                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6485                                       (__v4sf)_mm_setzero_ps())
6486
6487 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6488   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6489                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6490                                       (__v8sf)(__m256)(W))
6491
6492 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
6493   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6494                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6495                                       (__v8sf)_mm256_setzero_ps())
6496
6497 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6498 _mm_rsqrt14_pd (__m128d __A)
6499 {
6500   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6501                  (__v2df)
6502                  _mm_setzero_pd (),
6503                  (__mmask8) -1);
6504 }
6505
6506 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6507 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6508 {
6509   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6510                  (__v2df) __W,
6511                  (__mmask8) __U);
6512 }
6513
6514 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6515 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6516 {
6517   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6518                  (__v2df)
6519                  _mm_setzero_pd (),
6520                  (__mmask8) __U);
6521 }
6522
6523 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6524 _mm256_rsqrt14_pd (__m256d __A)
6525 {
6526   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6527                  (__v4df)
6528                  _mm256_setzero_pd (),
6529                  (__mmask8) -1);
6530 }
6531
6532 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6533 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6534 {
6535   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6536                  (__v4df) __W,
6537                  (__mmask8) __U);
6538 }
6539
6540 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6541 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6542 {
6543   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6544                  (__v4df)
6545                  _mm256_setzero_pd (),
6546                  (__mmask8) __U);
6547 }
6548
6549 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6550 _mm_rsqrt14_ps (__m128 __A)
6551 {
6552   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6553                 (__v4sf)
6554                 _mm_setzero_ps (),
6555                 (__mmask8) -1);
6556 }
6557
6558 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6559 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6560 {
6561   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6562                 (__v4sf) __W,
6563                 (__mmask8) __U);
6564 }
6565
6566 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6567 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6568 {
6569   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6570                 (__v4sf)
6571                 _mm_setzero_ps (),
6572                 (__mmask8) __U);
6573 }
6574
6575 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6576 _mm256_rsqrt14_ps (__m256 __A)
6577 {
6578   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6579                 (__v8sf)
6580                 _mm256_setzero_ps (),
6581                 (__mmask8) -1);
6582 }
6583
6584 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6585 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6586 {
6587   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6588                 (__v8sf) __W,
6589                 (__mmask8) __U);
6590 }
6591
6592 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6593 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6594 {
6595   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6596                 (__v8sf)
6597                 _mm256_setzero_ps (),
6598                 (__mmask8) __U);
6599 }
6600
6601 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6602 _mm256_broadcast_f32x4(__m128 __A)
6603 {
6604   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6605                                          0, 1, 2, 3, 0, 1, 2, 3);
6606 }
6607
6608 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6609 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6610 {
6611   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6612                                             (__v8sf)_mm256_broadcast_f32x4(__A),
6613                                             (__v8sf)__O);
6614 }
6615
6616 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6617 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6618 {
6619   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6620                                             (__v8sf)_mm256_broadcast_f32x4(__A),
6621                                             (__v8sf)_mm256_setzero_ps());
6622 }
6623
6624 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6625 _mm256_broadcast_i32x4(__m128i __A)
6626 {
6627   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6628                                           0, 1, 2, 3, 0, 1, 2, 3);
6629 }
6630
6631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6632 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6633 {
6634   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6635                                             (__v8si)_mm256_broadcast_i32x4(__A),
6636                                             (__v8si)__O);
6637 }
6638
6639 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6640 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6641 {
6642   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6643                                             (__v8si)_mm256_broadcast_i32x4(__A),
6644                                             (__v8si)_mm256_setzero_si256());
6645 }
6646
6647 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6648 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6649 {
6650   return (__m256d)__builtin_ia32_selectpd_256(__M,
6651                                               (__v4df) _mm256_broadcastsd_pd(__A),
6652                                               (__v4df) __O);
6653 }
6654
6655 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6656 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6657 {
6658   return (__m256d)__builtin_ia32_selectpd_256(__M,
6659                                               (__v4df) _mm256_broadcastsd_pd(__A),
6660                                               (__v4df) _mm256_setzero_pd());
6661 }
6662
6663 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6664 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6665 {
6666   return (__m128)__builtin_ia32_selectps_128(__M,
6667                                              (__v4sf) _mm_broadcastss_ps(__A),
6668                                              (__v4sf) __O);
6669 }
6670
6671 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6672 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6673 {
6674   return (__m128)__builtin_ia32_selectps_128(__M,
6675                                              (__v4sf) _mm_broadcastss_ps(__A),
6676                                              (__v4sf) _mm_setzero_ps());
6677 }
6678
6679 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6680 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6681 {
6682   return (__m256)__builtin_ia32_selectps_256(__M,
6683                                              (__v8sf) _mm256_broadcastss_ps(__A),
6684                                              (__v8sf) __O);
6685 }
6686
6687 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6688 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6689 {
6690   return (__m256)__builtin_ia32_selectps_256(__M,
6691                                              (__v8sf) _mm256_broadcastss_ps(__A),
6692                                              (__v8sf) _mm256_setzero_ps());
6693 }
6694
6695 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6696 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6697 {
6698   return (__m128i)__builtin_ia32_selectd_128(__M,
6699                                              (__v4si) _mm_broadcastd_epi32(__A),
6700                                              (__v4si) __O);
6701 }
6702
6703 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6704 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6705 {
6706   return (__m128i)__builtin_ia32_selectd_128(__M,
6707                                              (__v4si) _mm_broadcastd_epi32(__A),
6708                                              (__v4si) _mm_setzero_si128());
6709 }
6710
6711 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6712 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6713 {
6714   return (__m256i)__builtin_ia32_selectd_256(__M,
6715                                              (__v8si) _mm256_broadcastd_epi32(__A),
6716                                              (__v8si) __O);
6717 }
6718
6719 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6720 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6721 {
6722   return (__m256i)__builtin_ia32_selectd_256(__M,
6723                                              (__v8si) _mm256_broadcastd_epi32(__A),
6724                                              (__v8si) _mm256_setzero_si256());
6725 }
6726
6727 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6728 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6729 {
6730   return (__m128i)__builtin_ia32_selectq_128(__M,
6731                                              (__v2di) _mm_broadcastq_epi64(__A),
6732                                              (__v2di) __O);
6733 }
6734
6735 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6736 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6737 {
6738   return (__m128i)__builtin_ia32_selectq_128(__M,
6739                                              (__v2di) _mm_broadcastq_epi64(__A),
6740                                              (__v2di) _mm_setzero_si128());
6741 }
6742
6743 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6744 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6745 {
6746   return (__m256i)__builtin_ia32_selectq_256(__M,
6747                                              (__v4di) _mm256_broadcastq_epi64(__A),
6748                                              (__v4di) __O);
6749 }
6750
6751 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6752 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6753 {
6754   return (__m256i)__builtin_ia32_selectq_256(__M,
6755                                              (__v4di) _mm256_broadcastq_epi64(__A),
6756                                              (__v4di) _mm256_setzero_si256());
6757 }
6758
6759 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6760 _mm_cvtsepi32_epi8 (__m128i __A)
6761 {
6762   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6763                (__v16qi)_mm_undefined_si128(),
6764                (__mmask8) -1);
6765 }
6766
6767 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6768 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6769 {
6770   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6771                (__v16qi) __O, __M);
6772 }
6773
6774 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6775 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6776 {
6777   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6778                (__v16qi) _mm_setzero_si128 (),
6779                __M);
6780 }
6781
6782 static __inline__ void __DEFAULT_FN_ATTRS128
6783 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6784 {
6785   __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6786 }
6787
6788 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6789 _mm256_cvtsepi32_epi8 (__m256i __A)
6790 {
6791   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6792                (__v16qi)_mm_undefined_si128(),
6793                (__mmask8) -1);
6794 }
6795
6796 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6797 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6798 {
6799   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6800                (__v16qi) __O, __M);
6801 }
6802
6803 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6804 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
6805 {
6806   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6807                (__v16qi) _mm_setzero_si128 (),
6808                __M);
6809 }
6810
6811 static __inline__ void __DEFAULT_FN_ATTRS128
6812 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
6813 {
6814   __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
6815 }
6816
6817 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6818 _mm_cvtsepi32_epi16 (__m128i __A)
6819 {
6820   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6821                (__v8hi)_mm_setzero_si128 (),
6822                (__mmask8) -1);
6823 }
6824
6825 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6826 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6827 {
6828   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6829                (__v8hi)__O,
6830                __M);
6831 }
6832
6833 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6834 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
6835 {
6836   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6837                (__v8hi) _mm_setzero_si128 (),
6838                __M);
6839 }
6840
6841 static __inline__ void __DEFAULT_FN_ATTRS128
6842 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
6843 {
6844   __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
6845 }
6846
6847 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6848 _mm256_cvtsepi32_epi16 (__m256i __A)
6849 {
6850   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6851                (__v8hi)_mm_undefined_si128(),
6852                (__mmask8) -1);
6853 }
6854
6855 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6856 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6857 {
6858   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6859                (__v8hi) __O, __M);
6860 }
6861
6862 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6863 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
6864 {
6865   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6866                (__v8hi) _mm_setzero_si128 (),
6867                __M);
6868 }
6869
6870 static __inline__ void __DEFAULT_FN_ATTRS256
6871 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
6872 {
6873   __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
6874 }
6875
6876 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6877 _mm_cvtsepi64_epi8 (__m128i __A)
6878 {
6879   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6880                (__v16qi)_mm_undefined_si128(),
6881                (__mmask8) -1);
6882 }
6883
6884 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6885 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6886 {
6887   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6888                (__v16qi) __O, __M);
6889 }
6890
6891 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6892 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
6893 {
6894   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6895                (__v16qi) _mm_setzero_si128 (),
6896                __M);
6897 }
6898
6899 static __inline__ void __DEFAULT_FN_ATTRS128
6900 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6901 {
6902   __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
6903 }
6904
6905 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6906 _mm256_cvtsepi64_epi8 (__m256i __A)
6907 {
6908   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6909                (__v16qi)_mm_undefined_si128(),
6910                (__mmask8) -1);
6911 }
6912
6913 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6914 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6915 {
6916   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6917                (__v16qi) __O, __M);
6918 }
6919
6920 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6921 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
6922 {
6923   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6924                (__v16qi) _mm_setzero_si128 (),
6925                __M);
6926 }
6927
6928 static __inline__ void __DEFAULT_FN_ATTRS256
6929 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
6930 {
6931   __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
6932 }
6933
6934 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6935 _mm_cvtsepi64_epi32 (__m128i __A)
6936 {
6937   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6938                (__v4si)_mm_undefined_si128(),
6939                (__mmask8) -1);
6940 }
6941
6942 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6943 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6944 {
6945   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6946                (__v4si) __O, __M);
6947 }
6948
6949 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6950 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
6951 {
6952   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6953                (__v4si) _mm_setzero_si128 (),
6954                __M);
6955 }
6956
6957 static __inline__ void __DEFAULT_FN_ATTRS128
6958 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
6959 {
6960   __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
6961 }
6962
6963 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6964 _mm256_cvtsepi64_epi32 (__m256i __A)
6965 {
6966   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6967                (__v4si)_mm_undefined_si128(),
6968                (__mmask8) -1);
6969 }
6970
6971 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6972 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
6973 {
6974   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6975                (__v4si)__O,
6976                __M);
6977 }
6978
6979 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6980 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
6981 {
6982   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6983                (__v4si) _mm_setzero_si128 (),
6984                __M);
6985 }
6986
6987 static __inline__ void __DEFAULT_FN_ATTRS256
6988 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
6989 {
6990   __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
6991 }
6992
6993 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6994 _mm_cvtsepi64_epi16 (__m128i __A)
6995 {
6996   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6997                (__v8hi)_mm_undefined_si128(),
6998                (__mmask8) -1);
6999 }
7000
7001 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7002 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7003 {
7004   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7005                (__v8hi) __O, __M);
7006 }
7007
7008 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7009 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7010 {
7011   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7012                (__v8hi) _mm_setzero_si128 (),
7013                __M);
7014 }
7015
7016 static __inline__ void __DEFAULT_FN_ATTRS128
7017 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7018 {
7019   __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7020 }
7021
7022 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7023 _mm256_cvtsepi64_epi16 (__m256i __A)
7024 {
7025   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7026                (__v8hi)_mm_undefined_si128(),
7027                (__mmask8) -1);
7028 }
7029
7030 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7031 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7032 {
7033   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7034                (__v8hi) __O, __M);
7035 }
7036
7037 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7038 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7039 {
7040   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7041                (__v8hi) _mm_setzero_si128 (),
7042                __M);
7043 }
7044
7045 static __inline__ void __DEFAULT_FN_ATTRS256
7046 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7047 {
7048   __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7049 }
7050
7051 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7052 _mm_cvtusepi32_epi8 (__m128i __A)
7053 {
7054   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7055                 (__v16qi)_mm_undefined_si128(),
7056                 (__mmask8) -1);
7057 }
7058
7059 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7060 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7061 {
7062   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7063                 (__v16qi) __O,
7064                 __M);
7065 }
7066
7067 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7068 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7069 {
7070   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7071                 (__v16qi) _mm_setzero_si128 (),
7072                 __M);
7073 }
7074
7075 static __inline__ void __DEFAULT_FN_ATTRS128
7076 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7077 {
7078   __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7079 }
7080
7081 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7082 _mm256_cvtusepi32_epi8 (__m256i __A)
7083 {
7084   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7085                 (__v16qi)_mm_undefined_si128(),
7086                 (__mmask8) -1);
7087 }
7088
7089 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7090 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7091 {
7092   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7093                 (__v16qi) __O,
7094                 __M);
7095 }
7096
7097 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7098 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7099 {
7100   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7101                 (__v16qi) _mm_setzero_si128 (),
7102                 __M);
7103 }
7104
7105 static __inline__ void __DEFAULT_FN_ATTRS256
7106 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7107 {
7108   __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7109 }
7110
7111 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7112 _mm_cvtusepi32_epi16 (__m128i __A)
7113 {
7114   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7115                 (__v8hi)_mm_undefined_si128(),
7116                 (__mmask8) -1);
7117 }
7118
7119 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7120 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7121 {
7122   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7123                 (__v8hi) __O, __M);
7124 }
7125
7126 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7127 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7128 {
7129   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7130                 (__v8hi) _mm_setzero_si128 (),
7131                 __M);
7132 }
7133
7134 static __inline__ void __DEFAULT_FN_ATTRS128
7135 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7136 {
7137   __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7138 }
7139
7140 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7141 _mm256_cvtusepi32_epi16 (__m256i __A)
7142 {
7143   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7144                 (__v8hi) _mm_undefined_si128(),
7145                 (__mmask8) -1);
7146 }
7147
7148 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7149 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7150 {
7151   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7152                 (__v8hi) __O, __M);
7153 }
7154
7155 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7156 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7157 {
7158   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7159                 (__v8hi) _mm_setzero_si128 (),
7160                 __M);
7161 }
7162
7163 static __inline__ void __DEFAULT_FN_ATTRS256
7164 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7165 {
7166   __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7167 }
7168
7169 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7170 _mm_cvtusepi64_epi8 (__m128i __A)
7171 {
7172   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7173                 (__v16qi)_mm_undefined_si128(),
7174                 (__mmask8) -1);
7175 }
7176
7177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7178 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7179 {
7180   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7181                 (__v16qi) __O,
7182                 __M);
7183 }
7184
7185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7186 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7187 {
7188   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7189                 (__v16qi) _mm_setzero_si128 (),
7190                 __M);
7191 }
7192
7193 static __inline__ void __DEFAULT_FN_ATTRS128
7194 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7195 {
7196   __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7197 }
7198
7199 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7200 _mm256_cvtusepi64_epi8 (__m256i __A)
7201 {
7202   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7203                 (__v16qi)_mm_undefined_si128(),
7204                 (__mmask8) -1);
7205 }
7206
7207 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7208 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7209 {
7210   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7211                 (__v16qi) __O,
7212                 __M);
7213 }
7214
7215 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7216 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7217 {
7218   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7219                 (__v16qi) _mm_setzero_si128 (),
7220                 __M);
7221 }
7222
7223 static __inline__ void __DEFAULT_FN_ATTRS256
7224 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7225 {
7226   __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7227 }
7228
7229 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7230 _mm_cvtusepi64_epi32 (__m128i __A)
7231 {
7232   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7233                 (__v4si)_mm_undefined_si128(),
7234                 (__mmask8) -1);
7235 }
7236
7237 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7238 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7239 {
7240   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7241                 (__v4si) __O, __M);
7242 }
7243
7244 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7245 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7246 {
7247   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7248                 (__v4si) _mm_setzero_si128 (),
7249                 __M);
7250 }
7251
7252 static __inline__ void __DEFAULT_FN_ATTRS128
7253 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7254 {
7255   __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7256 }
7257
7258 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7259 _mm256_cvtusepi64_epi32 (__m256i __A)
7260 {
7261   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7262                 (__v4si)_mm_undefined_si128(),
7263                 (__mmask8) -1);
7264 }
7265
7266 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7267 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7268 {
7269   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7270                 (__v4si) __O, __M);
7271 }
7272
7273 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7274 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7275 {
7276   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7277                 (__v4si) _mm_setzero_si128 (),
7278                 __M);
7279 }
7280
7281 static __inline__ void __DEFAULT_FN_ATTRS256
7282 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7283 {
7284   __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7285 }
7286
7287 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7288 _mm_cvtusepi64_epi16 (__m128i __A)
7289 {
7290   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7291                 (__v8hi)_mm_undefined_si128(),
7292                 (__mmask8) -1);
7293 }
7294
7295 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7296 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7297 {
7298   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7299                 (__v8hi) __O, __M);
7300 }
7301
7302 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7303 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7304 {
7305   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7306                 (__v8hi) _mm_setzero_si128 (),
7307                 __M);
7308 }
7309
7310 static __inline__ void __DEFAULT_FN_ATTRS128
7311 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7312 {
7313   __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7314 }
7315
7316 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7317 _mm256_cvtusepi64_epi16 (__m256i __A)
7318 {
7319   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7320                 (__v8hi)_mm_undefined_si128(),
7321                 (__mmask8) -1);
7322 }
7323
7324 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7325 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7326 {
7327   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7328                 (__v8hi) __O, __M);
7329 }
7330
7331 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7332 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7333 {
7334   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7335                 (__v8hi) _mm_setzero_si128 (),
7336                 __M);
7337 }
7338
7339 static __inline__ void __DEFAULT_FN_ATTRS256
7340 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7341 {
7342   __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7343 }
7344
7345 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7346 _mm_cvtepi32_epi8 (__m128i __A)
7347 {
7348   return (__m128i)__builtin_shufflevector(
7349       __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7350       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7351 }
7352
7353 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7354 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7355 {
7356   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7357               (__v16qi) __O, __M);
7358 }
7359
7360 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7361 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7362 {
7363   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7364               (__v16qi)
7365               _mm_setzero_si128 (),
7366               __M);
7367 }
7368
7369 static __inline__ void __DEFAULT_FN_ATTRS256
7370 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7371 {
7372   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7373 }
7374
7375 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7376 _mm256_cvtepi32_epi8 (__m256i __A)
7377 {
7378   return (__m128i)__builtin_shufflevector(
7379       __builtin_convertvector((__v8si)__A, __v8qi),
7380       (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7381       12, 13, 14, 15);
7382 }
7383
7384 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7385 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7386 {
7387   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7388               (__v16qi) __O, __M);
7389 }
7390
7391 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7392 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7393 {
7394   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7395               (__v16qi) _mm_setzero_si128 (),
7396               __M);
7397 }
7398
7399 static __inline__ void __DEFAULT_FN_ATTRS256
7400 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7401 {
7402   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7403 }
7404
7405 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7406 _mm_cvtepi32_epi16 (__m128i __A)
7407 {
7408   return (__m128i)__builtin_shufflevector(
7409       __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7410       2, 3, 4, 5, 6, 7);
7411 }
7412
7413 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7414 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7415 {
7416   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7417               (__v8hi) __O, __M);
7418 }
7419
7420 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7421 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7422 {
7423   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7424               (__v8hi) _mm_setzero_si128 (),
7425               __M);
7426 }
7427
7428 static __inline__ void __DEFAULT_FN_ATTRS128
7429 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7430 {
7431   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7432 }
7433
7434 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7435 _mm256_cvtepi32_epi16 (__m256i __A)
7436 {
7437   return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7438 }
7439
7440 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7441 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7442 {
7443   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7444               (__v8hi) __O, __M);
7445 }
7446
7447 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7448 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7449 {
7450   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7451               (__v8hi) _mm_setzero_si128 (),
7452               __M);
7453 }
7454
7455 static __inline__ void __DEFAULT_FN_ATTRS256
7456 _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
7457 {
7458   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7459 }
7460
7461 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7462 _mm_cvtepi64_epi8 (__m128i __A)
7463 {
7464   return (__m128i)__builtin_shufflevector(
7465       __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7466       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7467 }
7468
7469 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7470 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7471 {
7472   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7473               (__v16qi) __O, __M);
7474 }
7475
7476 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7477 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7478 {
7479   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7480               (__v16qi) _mm_setzero_si128 (),
7481               __M);
7482 }
7483
7484 static __inline__ void __DEFAULT_FN_ATTRS128
7485 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7486 {
7487   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7488 }
7489
7490 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7491 _mm256_cvtepi64_epi8 (__m256i __A)
7492 {
7493   return (__m128i)__builtin_shufflevector(
7494       __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7495       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7496 }
7497
7498 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7499 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7500 {
7501   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7502               (__v16qi) __O, __M);
7503 }
7504
7505 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7506 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7507 {
7508   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7509               (__v16qi) _mm_setzero_si128 (),
7510               __M);
7511 }
7512
7513 static __inline__ void __DEFAULT_FN_ATTRS256
7514 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7515 {
7516   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7517 }
7518
7519 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7520 _mm_cvtepi64_epi32 (__m128i __A)
7521 {
7522   return (__m128i)__builtin_shufflevector(
7523       __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7524 }
7525
7526 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7527 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7528 {
7529   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7530               (__v4si) __O, __M);
7531 }
7532
7533 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7534 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7535 {
7536   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7537               (__v4si) _mm_setzero_si128 (),
7538               __M);
7539 }
7540
7541 static __inline__ void __DEFAULT_FN_ATTRS128
7542 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7543 {
7544   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7545 }
7546
7547 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7548 _mm256_cvtepi64_epi32 (__m256i __A)
7549 {
7550   return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7551 }
7552
7553 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7554 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7555 {
7556   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7557                                              (__v4si)_mm256_cvtepi64_epi32(__A),
7558                                              (__v4si)__O);
7559 }
7560
7561 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7562 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7563 {
7564   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7565                                              (__v4si)_mm256_cvtepi64_epi32(__A),
7566                                              (__v4si)_mm_setzero_si128());
7567 }
7568
7569 static __inline__ void __DEFAULT_FN_ATTRS256
7570 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7571 {
7572   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7573 }
7574
7575 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7576 _mm_cvtepi64_epi16 (__m128i __A)
7577 {
7578   return (__m128i)__builtin_shufflevector(
7579       __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7580       3, 3, 3, 3);
7581 }
7582
7583 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7584 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7585 {
7586   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7587               (__v8hi)__O,
7588               __M);
7589 }
7590
7591 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7592 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7593 {
7594   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7595               (__v8hi) _mm_setzero_si128 (),
7596               __M);
7597 }
7598
7599 static __inline__ void __DEFAULT_FN_ATTRS128
7600 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7601 {
7602   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7603 }
7604
7605 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7606 _mm256_cvtepi64_epi16 (__m256i __A)
7607 {
7608   return (__m128i)__builtin_shufflevector(
7609       __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7610       2, 3, 4, 5, 6, 7);
7611 }
7612
7613 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7614 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7615 {
7616   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7617               (__v8hi) __O, __M);
7618 }
7619
7620 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7621 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7622 {
7623   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7624               (__v8hi) _mm_setzero_si128 (),
7625               __M);
7626 }
7627
7628 static __inline__ void __DEFAULT_FN_ATTRS256
7629 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7630 {
7631   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7632 }
7633
7634 #define _mm256_extractf32x4_ps(A, imm) \
7635   (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7636                                                (int)(imm), \
7637                                                (__v4sf)_mm_undefined_ps(), \
7638                                                (__mmask8)-1)
7639
7640 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7641   (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7642                                                (int)(imm), \
7643                                                (__v4sf)(__m128)(W), \
7644                                                (__mmask8)(U))
7645
7646 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7647   (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7648                                                (int)(imm), \
7649                                                (__v4sf)_mm_setzero_ps(), \
7650                                                (__mmask8)(U))
7651
7652 #define _mm256_extracti32x4_epi32(A, imm) \
7653   (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7654                                                 (int)(imm), \
7655                                                 (__v4si)_mm_undefined_si128(), \
7656                                                 (__mmask8)-1)
7657
7658 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7659   (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7660                                                 (int)(imm), \
7661                                                 (__v4si)(__m128i)(W), \
7662                                                 (__mmask8)(U))
7663
7664 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7665   (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7666                                                 (int)(imm), \
7667                                                 (__v4si)_mm_setzero_si128(), \
7668                                                 (__mmask8)(U))
7669
7670 #define _mm256_insertf32x4(A, B, imm) \
7671   (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7672                                          (__v4sf)(__m128)(B), (int)(imm))
7673
7674 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7675   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7676                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7677                                   (__v8sf)(__m256)(W))
7678
7679 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
7680   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7681                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7682                                   (__v8sf)_mm256_setzero_ps())
7683
7684 #define _mm256_inserti32x4(A, B, imm) \
7685   (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7686                                           (__v4si)(__m128i)(B), (int)(imm))
7687
7688 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7689   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7690                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7691                                   (__v8si)(__m256i)(W))
7692
7693 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
7694   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7695                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7696                                   (__v8si)_mm256_setzero_si256())
7697
7698 #define _mm_getmant_pd(A, B, C) \
7699   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7700                                             (int)(((C)<<2) | (B)), \
7701                                             (__v2df)_mm_setzero_pd(), \
7702                                             (__mmask8)-1)
7703
7704 #define _mm_mask_getmant_pd(W, U, A, B, C) \
7705   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7706                                             (int)(((C)<<2) | (B)), \
7707                                             (__v2df)(__m128d)(W), \
7708                                             (__mmask8)(U))
7709
7710 #define _mm_maskz_getmant_pd(U, A, B, C) \
7711   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7712                                             (int)(((C)<<2) | (B)), \
7713                                             (__v2df)_mm_setzero_pd(), \
7714                                             (__mmask8)(U))
7715
7716 #define _mm256_getmant_pd(A, B, C) \
7717   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7718                                             (int)(((C)<<2) | (B)), \
7719                                             (__v4df)_mm256_setzero_pd(), \
7720                                             (__mmask8)-1)
7721
7722 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
7723   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7724                                             (int)(((C)<<2) | (B)), \
7725                                             (__v4df)(__m256d)(W), \
7726                                             (__mmask8)(U))
7727
7728 #define _mm256_maskz_getmant_pd(U, A, B, C) \
7729   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7730                                             (int)(((C)<<2) | (B)), \
7731                                             (__v4df)_mm256_setzero_pd(), \
7732                                             (__mmask8)(U))
7733
7734 #define _mm_getmant_ps(A, B, C) \
7735   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7736                                            (int)(((C)<<2) | (B)), \
7737                                            (__v4sf)_mm_setzero_ps(), \
7738                                            (__mmask8)-1)
7739
7740 #define _mm_mask_getmant_ps(W, U, A, B, C) \
7741   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7742                                            (int)(((C)<<2) | (B)), \
7743                                            (__v4sf)(__m128)(W), \
7744                                            (__mmask8)(U))
7745
7746 #define _mm_maskz_getmant_ps(U, A, B, C) \
7747   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7748                                            (int)(((C)<<2) | (B)), \
7749                                            (__v4sf)_mm_setzero_ps(), \
7750                                            (__mmask8)(U))
7751
7752 #define _mm256_getmant_ps(A, B, C) \
7753   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7754                                            (int)(((C)<<2) | (B)), \
7755                                            (__v8sf)_mm256_setzero_ps(), \
7756                                            (__mmask8)-1)
7757
7758 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
7759   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7760                                            (int)(((C)<<2) | (B)), \
7761                                            (__v8sf)(__m256)(W), \
7762                                            (__mmask8)(U))
7763
7764 #define _mm256_maskz_getmant_ps(U, A, B, C) \
7765   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7766                                            (int)(((C)<<2) | (B)), \
7767                                            (__v8sf)_mm256_setzero_ps(), \
7768                                            (__mmask8)(U))
7769
7770 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7771   (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7772                                         (double const *)(addr), \
7773                                         (__v2di)(__m128i)(index), \
7774                                         (__mmask8)(mask), (int)(scale))
7775
7776 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7777   (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7778                                         (long long const *)(addr), \
7779                                         (__v2di)(__m128i)(index), \
7780                                         (__mmask8)(mask), (int)(scale))
7781
7782 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7783   (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7784                                         (double const *)(addr), \
7785                                         (__v4di)(__m256i)(index), \
7786                                         (__mmask8)(mask), (int)(scale))
7787
7788 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7789   (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7790                                         (long long const *)(addr), \
7791                                         (__v4di)(__m256i)(index), \
7792                                         (__mmask8)(mask), (int)(scale))
7793
7794 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7795   (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7796                                        (float const *)(addr), \
7797                                        (__v2di)(__m128i)(index), \
7798                                        (__mmask8)(mask), (int)(scale))
7799
7800 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7801   (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7802                                         (int const *)(addr), \
7803                                         (__v2di)(__m128i)(index), \
7804                                         (__mmask8)(mask), (int)(scale))
7805
7806 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7807   (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7808                                        (float const *)(addr), \
7809                                        (__v4di)(__m256i)(index), \
7810                                        (__mmask8)(mask), (int)(scale))
7811
7812 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7813   (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
7814                                         (int const *)(addr), \
7815                                         (__v4di)(__m256i)(index), \
7816                                         (__mmask8)(mask), (int)(scale))
7817
7818 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7819   (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
7820                                         (double const *)(addr), \
7821                                         (__v4si)(__m128i)(index), \
7822                                         (__mmask8)(mask), (int)(scale))
7823
7824 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7825   (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
7826                                         (long long const *)(addr), \
7827                                         (__v4si)(__m128i)(index), \
7828                                         (__mmask8)(mask), (int)(scale))
7829
7830 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7831   (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
7832                                         (double const *)(addr), \
7833                                         (__v4si)(__m128i)(index), \
7834                                         (__mmask8)(mask), (int)(scale))
7835
7836 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7837   (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
7838                                         (long long const *)(addr), \
7839                                         (__v4si)(__m128i)(index), \
7840                                         (__mmask8)(mask), (int)(scale))
7841
7842 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7843   (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
7844                                        (float const *)(addr), \
7845                                        (__v4si)(__m128i)(index), \
7846                                        (__mmask8)(mask), (int)(scale))
7847
7848 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7849   (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
7850                                         (int const *)(addr), \
7851                                         (__v4si)(__m128i)(index), \
7852                                         (__mmask8)(mask), (int)(scale))
7853
7854 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7855   (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
7856                                        (float const *)(addr), \
7857                                        (__v8si)(__m256i)(index), \
7858                                        (__mmask8)(mask), (int)(scale))
7859
7860 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7861   (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
7862                                         (int const *)(addr), \
7863                                         (__v8si)(__m256i)(index), \
7864                                         (__mmask8)(mask), (int)(scale))
7865
7866 #define _mm256_permutex_pd(X, C) \
7867   (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
7868
7869 #define _mm256_mask_permutex_pd(W, U, X, C) \
7870   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7871                                        (__v4df)_mm256_permutex_pd((X), (C)), \
7872                                        (__v4df)(__m256d)(W))
7873
7874 #define _mm256_maskz_permutex_pd(U, X, C) \
7875   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7876                                        (__v4df)_mm256_permutex_pd((X), (C)), \
7877                                        (__v4df)_mm256_setzero_pd())
7878
7879 #define _mm256_permutex_epi64(X, C) \
7880   (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
7881
7882 #define _mm256_mask_permutex_epi64(W, U, X, C) \
7883   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7884                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
7885                                       (__v4di)(__m256i)(W))
7886
7887 #define _mm256_maskz_permutex_epi64(U, X, C) \
7888   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7889                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
7890                                       (__v4di)_mm256_setzero_si256())
7891
7892 static __inline__ __m256d __DEFAULT_FN_ATTRS256
7893 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
7894 {
7895   return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
7896 }
7897
7898 static __inline__ __m256d __DEFAULT_FN_ATTRS256
7899 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
7900           __m256d __Y)
7901 {
7902   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7903                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
7904                                         (__v4df)__W);
7905 }
7906
7907 static __inline__ __m256d __DEFAULT_FN_ATTRS256
7908 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
7909 {
7910   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7911                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
7912                                         (__v4df)_mm256_setzero_pd());
7913 }
7914
7915 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7916 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
7917 {
7918   return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
7919 }
7920
7921 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7922 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
7923 {
7924   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7925                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7926                                      (__v4di)_mm256_setzero_si256());
7927 }
7928
7929 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7930 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
7931              __m256i __Y)
7932 {
7933   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7934                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7935                                      (__v4di)__W);
7936 }
7937
7938 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
7939
7940 static __inline__ __m256 __DEFAULT_FN_ATTRS256
7941 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
7942 {
7943   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7944                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7945                                         (__v8sf)__W);
7946 }
7947
7948 static __inline__ __m256 __DEFAULT_FN_ATTRS256
7949 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
7950 {
7951   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7952                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7953                                         (__v8sf)_mm256_setzero_ps());
7954 }
7955
7956 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
7957
7958 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7959 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
7960                               __m256i __Y)
7961 {
7962   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7963                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7964                                      (__v8si)__W);
7965 }
7966
7967 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7968 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
7969 {
7970   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7971                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7972                                      (__v8si)_mm256_setzero_si256());
7973 }
7974
7975 #define _mm_alignr_epi32(A, B, imm) \
7976   (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
7977                                     (__v4si)(__m128i)(B), (int)(imm))
7978
7979 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
7980   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7981                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7982                                     (__v4si)(__m128i)(W))
7983
7984 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
7985   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7986                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7987                                     (__v4si)_mm_setzero_si128())
7988
7989 #define _mm256_alignr_epi32(A, B, imm) \
7990   (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
7991                                     (__v8si)(__m256i)(B), (int)(imm))
7992
7993 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
7994   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7995                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
7996                                  (__v8si)(__m256i)(W))
7997
7998 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
7999   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8000                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8001                                  (__v8si)_mm256_setzero_si256())
8002
8003 #define _mm_alignr_epi64(A, B, imm) \
8004   (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8005                                     (__v2di)(__m128i)(B), (int)(imm))
8006
8007 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8008   (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8009                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8010                                     (__v2di)(__m128i)(W))
8011
8012 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
8013   (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8014                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8015                                     (__v2di)_mm_setzero_si128())
8016
8017 #define _mm256_alignr_epi64(A, B, imm) \
8018   (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8019                                     (__v4di)(__m256i)(B), (int)(imm))
8020
8021 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8022   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8023                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8024                                  (__v4di)(__m256i)(W))
8025
8026 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8027   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8028                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8029                                  (__v4di)_mm256_setzero_si256())
8030
8031 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8032 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8033 {
8034   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8035                                              (__v4sf)_mm_movehdup_ps(__A),
8036                                              (__v4sf)__W);
8037 }
8038
8039 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8040 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8041 {
8042   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8043                                              (__v4sf)_mm_movehdup_ps(__A),
8044                                              (__v4sf)_mm_setzero_ps());
8045 }
8046
8047 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8048 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8049 {
8050   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8051                                              (__v8sf)_mm256_movehdup_ps(__A),
8052                                              (__v8sf)__W);
8053 }
8054
8055 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8056 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8057 {
8058   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8059                                              (__v8sf)_mm256_movehdup_ps(__A),
8060                                              (__v8sf)_mm256_setzero_ps());
8061 }
8062
8063 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8064 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8065 {
8066   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8067                                              (__v4sf)_mm_moveldup_ps(__A),
8068                                              (__v4sf)__W);
8069 }
8070
8071 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8072 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8073 {
8074   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8075                                              (__v4sf)_mm_moveldup_ps(__A),
8076                                              (__v4sf)_mm_setzero_ps());
8077 }
8078
8079 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8080 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8081 {
8082   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8083                                              (__v8sf)_mm256_moveldup_ps(__A),
8084                                              (__v8sf)__W);
8085 }
8086
8087 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8088 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8089 {
8090   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8091                                              (__v8sf)_mm256_moveldup_ps(__A),
8092                                              (__v8sf)_mm256_setzero_ps());
8093 }
8094
8095 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
8096   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8097                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8098                                       (__v8si)(__m256i)(W))
8099
8100 #define _mm256_maskz_shuffle_epi32(U, A, I) \
8101   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8102                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8103                                       (__v8si)_mm256_setzero_si256())
8104
8105 #define _mm_mask_shuffle_epi32(W, U, A, I) \
8106   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8107                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8108                                       (__v4si)(__m128i)(W))
8109
8110 #define _mm_maskz_shuffle_epi32(U, A, I) \
8111   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8112                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8113                                       (__v4si)_mm_setzero_si128())
8114
8115 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8116 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8117 {
8118   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8119               (__v2df) __A,
8120               (__v2df) __W);
8121 }
8122
8123 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8124 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8125 {
8126   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8127               (__v2df) __A,
8128               (__v2df) _mm_setzero_pd ());
8129 }
8130
8131 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8132 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8133 {
8134   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8135               (__v4df) __A,
8136               (__v4df) __W);
8137 }
8138
8139 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8140 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8141 {
8142   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8143               (__v4df) __A,
8144               (__v4df) _mm256_setzero_pd ());
8145 }
8146
8147 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8148 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8149 {
8150   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8151              (__v4sf) __A,
8152              (__v4sf) __W);
8153 }
8154
8155 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8156 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8157 {
8158   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8159              (__v4sf) __A,
8160              (__v4sf) _mm_setzero_ps ());
8161 }
8162
8163 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8164 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8165 {
8166   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8167              (__v8sf) __A,
8168              (__v8sf) __W);
8169 }
8170
8171 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8172 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8173 {
8174   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8175              (__v8sf) __A,
8176              (__v8sf) _mm256_setzero_ps ());
8177 }
8178
8179 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8180 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8181 {
8182   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8183              (__v4sf) __W,
8184              (__mmask8) __U);
8185 }
8186
8187 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8188 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8189 {
8190   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8191              (__v4sf)
8192              _mm_setzero_ps (),
8193              (__mmask8) __U);
8194 }
8195
8196 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8197 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8198 {
8199   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8200                 (__v8sf) __W,
8201                 (__mmask8) __U);
8202 }
8203
8204 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8205 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8206 {
8207   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8208                 (__v8sf)
8209                 _mm256_setzero_ps (),
8210                 (__mmask8) __U);
8211 }
8212
8213 static __inline __m128i __DEFAULT_FN_ATTRS128
8214 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8215 {
8216   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8217                                                   (__v8hi) __W,
8218                                                   (__mmask8) __U);
8219 }
8220
8221 static __inline __m128i __DEFAULT_FN_ATTRS128
8222 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8223 {
8224   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8225                                                   (__v8hi) _mm_setzero_si128 (),
8226                                                   (__mmask8) __U);
8227 }
8228
8229 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8230   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8231                                          (__v8hi)(__m128i)(W), \
8232                                          (__mmask8)(U))
8233
8234 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
8235   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8236                                          (__v8hi)_mm_setzero_si128(), \
8237                                          (__mmask8)(U))
8238
8239 static __inline __m128i __DEFAULT_FN_ATTRS256
8240 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8241 {
8242   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8243                                                       (__v8hi) __W,
8244                                                       (__mmask8) __U);
8245 }
8246
8247 static __inline __m128i __DEFAULT_FN_ATTRS256
8248 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8249 {
8250   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8251                                                       (__v8hi) _mm_setzero_si128(),
8252                                                       (__mmask8) __U);
8253 }
8254 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8255   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8256                                             (__v8hi)(__m128i)(W), \
8257                                             (__mmask8)(U))
8258
8259 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8260   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8261                                             (__v8hi)_mm_setzero_si128(), \
8262                                             (__mmask8)(U))
8263
8264
8265 #undef __DEFAULT_FN_ATTRS128
8266 #undef __DEFAULT_FN_ATTRS256
8267
8268 #endif /* __AVX512VLINTRIN_H */