]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/Headers/avx512vlintrin.h
Vendor import of clang trunk r351319 (just before the release_80 branch
[FreeBSD/FreeBSD.git] / lib / Headers / avx512vlintrin.h
1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23
24 #ifndef __IMMINTRIN_H
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
30
31 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
32 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
33
34 typedef short __v2hi __attribute__((__vector_size__(4)));
35 typedef char __v4qi __attribute__((__vector_size__(4)));
36 typedef char __v2qi __attribute__((__vector_size__(2)));
37
38 /* Integer compare */
39
40 #define _mm_cmpeq_epi32_mask(A, B) \
41     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
42 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
43     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
44 #define _mm_cmpge_epi32_mask(A, B) \
45     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
47     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
48 #define _mm_cmpgt_epi32_mask(A, B) \
49     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
50 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
51     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
52 #define _mm_cmple_epi32_mask(A, B) \
53     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
54 #define _mm_mask_cmple_epi32_mask(k, A, B) \
55     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
56 #define _mm_cmplt_epi32_mask(A, B) \
57     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
58 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
59     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
60 #define _mm_cmpneq_epi32_mask(A, B) \
61     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
62 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
63     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
64
65 #define _mm256_cmpeq_epi32_mask(A, B) \
66     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
67 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
68     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
69 #define _mm256_cmpge_epi32_mask(A, B) \
70     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
71 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
72     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
73 #define _mm256_cmpgt_epi32_mask(A, B) \
74     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
75 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
76     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
77 #define _mm256_cmple_epi32_mask(A, B) \
78     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
79 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
80     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
81 #define _mm256_cmplt_epi32_mask(A, B) \
82     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
83 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
84     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
85 #define _mm256_cmpneq_epi32_mask(A, B) \
86     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
87 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
88     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
89
90 #define _mm_cmpeq_epu32_mask(A, B) \
91     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
92 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
93     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
94 #define _mm_cmpge_epu32_mask(A, B) \
95     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
96 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
97     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
98 #define _mm_cmpgt_epu32_mask(A, B) \
99     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
100 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
101     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
102 #define _mm_cmple_epu32_mask(A, B) \
103     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
104 #define _mm_mask_cmple_epu32_mask(k, A, B) \
105     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
106 #define _mm_cmplt_epu32_mask(A, B) \
107     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
108 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
109     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
110 #define _mm_cmpneq_epu32_mask(A, B) \
111     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
112 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
113     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
114
115 #define _mm256_cmpeq_epu32_mask(A, B) \
116     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
117 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
118     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
119 #define _mm256_cmpge_epu32_mask(A, B) \
120     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
121 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
122     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
123 #define _mm256_cmpgt_epu32_mask(A, B) \
124     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
125 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
126     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
127 #define _mm256_cmple_epu32_mask(A, B) \
128     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
129 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
130     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
131 #define _mm256_cmplt_epu32_mask(A, B) \
132     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
133 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
134     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
135 #define _mm256_cmpneq_epu32_mask(A, B) \
136     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
137 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
138     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
139
140 #define _mm_cmpeq_epi64_mask(A, B) \
141     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
142 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
143     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
144 #define _mm_cmpge_epi64_mask(A, B) \
145     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
146 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
147     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
148 #define _mm_cmpgt_epi64_mask(A, B) \
149     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
150 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
151     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
152 #define _mm_cmple_epi64_mask(A, B) \
153     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
154 #define _mm_mask_cmple_epi64_mask(k, A, B) \
155     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
156 #define _mm_cmplt_epi64_mask(A, B) \
157     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
158 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
159     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
160 #define _mm_cmpneq_epi64_mask(A, B) \
161     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
162 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
163     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
164
165 #define _mm256_cmpeq_epi64_mask(A, B) \
166     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
167 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
168     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
169 #define _mm256_cmpge_epi64_mask(A, B) \
170     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
171 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
172     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
173 #define _mm256_cmpgt_epi64_mask(A, B) \
174     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
175 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
176     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
177 #define _mm256_cmple_epi64_mask(A, B) \
178     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
179 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
180     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
181 #define _mm256_cmplt_epi64_mask(A, B) \
182     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
183 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
184     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
185 #define _mm256_cmpneq_epi64_mask(A, B) \
186     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
187 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
188     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
189
190 #define _mm_cmpeq_epu64_mask(A, B) \
191     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
192 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
193     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
194 #define _mm_cmpge_epu64_mask(A, B) \
195     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
196 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
197     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
198 #define _mm_cmpgt_epu64_mask(A, B) \
199     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
200 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
201     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
202 #define _mm_cmple_epu64_mask(A, B) \
203     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
204 #define _mm_mask_cmple_epu64_mask(k, A, B) \
205     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
206 #define _mm_cmplt_epu64_mask(A, B) \
207     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
208 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
209     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
210 #define _mm_cmpneq_epu64_mask(A, B) \
211     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
212 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
213     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
214
215 #define _mm256_cmpeq_epu64_mask(A, B) \
216     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
217 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
218     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
219 #define _mm256_cmpge_epu64_mask(A, B) \
220     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
221 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
222     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
223 #define _mm256_cmpgt_epu64_mask(A, B) \
224     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
225 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
226     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
227 #define _mm256_cmple_epu64_mask(A, B) \
228     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
229 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
230     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
231 #define _mm256_cmplt_epu64_mask(A, B) \
232     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
233 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
234     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
235 #define _mm256_cmpneq_epu64_mask(A, B) \
236     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
237 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
238     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
239
240 static __inline__ __m256i __DEFAULT_FN_ATTRS256
241 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
242 {
243   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244                                              (__v8si)_mm256_add_epi32(__A, __B),
245                                              (__v8si)__W);
246 }
247
248 static __inline__ __m256i __DEFAULT_FN_ATTRS256
249 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
250 {
251   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
252                                              (__v8si)_mm256_add_epi32(__A, __B),
253                                              (__v8si)_mm256_setzero_si256());
254 }
255
256 static __inline__ __m256i __DEFAULT_FN_ATTRS256
257 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
258 {
259   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260                                              (__v4di)_mm256_add_epi64(__A, __B),
261                                              (__v4di)__W);
262 }
263
264 static __inline__ __m256i __DEFAULT_FN_ATTRS256
265 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
266 {
267   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
268                                              (__v4di)_mm256_add_epi64(__A, __B),
269                                              (__v4di)_mm256_setzero_si256());
270 }
271
272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
273 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
274 {
275   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276                                              (__v8si)_mm256_sub_epi32(__A, __B),
277                                              (__v8si)__W);
278 }
279
280 static __inline__ __m256i __DEFAULT_FN_ATTRS256
281 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
282 {
283   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
284                                              (__v8si)_mm256_sub_epi32(__A, __B),
285                                              (__v8si)_mm256_setzero_si256());
286 }
287
288 static __inline__ __m256i __DEFAULT_FN_ATTRS256
289 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
290 {
291   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292                                              (__v4di)_mm256_sub_epi64(__A, __B),
293                                              (__v4di)__W);
294 }
295
296 static __inline__ __m256i __DEFAULT_FN_ATTRS256
297 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
298 {
299   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
300                                              (__v4di)_mm256_sub_epi64(__A, __B),
301                                              (__v4di)_mm256_setzero_si256());
302 }
303
304 static __inline__ __m128i __DEFAULT_FN_ATTRS128
305 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
306 {
307   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308                                              (__v4si)_mm_add_epi32(__A, __B),
309                                              (__v4si)__W);
310 }
311
312 static __inline__ __m128i __DEFAULT_FN_ATTRS128
313 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
314 {
315   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
316                                              (__v4si)_mm_add_epi32(__A, __B),
317                                              (__v4si)_mm_setzero_si128());
318 }
319
320 static __inline__ __m128i __DEFAULT_FN_ATTRS128
321 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
322 {
323   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324                                              (__v2di)_mm_add_epi64(__A, __B),
325                                              (__v2di)__W);
326 }
327
328 static __inline__ __m128i __DEFAULT_FN_ATTRS128
329 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
330 {
331   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
332                                              (__v2di)_mm_add_epi64(__A, __B),
333                                              (__v2di)_mm_setzero_si128());
334 }
335
336 static __inline__ __m128i __DEFAULT_FN_ATTRS128
337 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
338 {
339   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340                                              (__v4si)_mm_sub_epi32(__A, __B),
341                                              (__v4si)__W);
342 }
343
344 static __inline__ __m128i __DEFAULT_FN_ATTRS128
345 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
346 {
347   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
348                                              (__v4si)_mm_sub_epi32(__A, __B),
349                                              (__v4si)_mm_setzero_si128());
350 }
351
352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
353 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
354 {
355   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356                                              (__v2di)_mm_sub_epi64(__A, __B),
357                                              (__v2di)__W);
358 }
359
360 static __inline__ __m128i __DEFAULT_FN_ATTRS128
361 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
362 {
363   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
364                                              (__v2di)_mm_sub_epi64(__A, __B),
365                                              (__v2di)_mm_setzero_si128());
366 }
367
368 static __inline__ __m256i __DEFAULT_FN_ATTRS256
369 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
370 {
371   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372                                              (__v4di)_mm256_mul_epi32(__X, __Y),
373                                              (__v4di)__W);
374 }
375
376 static __inline__ __m256i __DEFAULT_FN_ATTRS256
377 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
378 {
379   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
380                                              (__v4di)_mm256_mul_epi32(__X, __Y),
381                                              (__v4di)_mm256_setzero_si256());
382 }
383
384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
385 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
386 {
387   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388                                              (__v2di)_mm_mul_epi32(__X, __Y),
389                                              (__v2di)__W);
390 }
391
392 static __inline__ __m128i __DEFAULT_FN_ATTRS128
393 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
394 {
395   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
396                                              (__v2di)_mm_mul_epi32(__X, __Y),
397                                              (__v2di)_mm_setzero_si128());
398 }
399
400 static __inline__ __m256i __DEFAULT_FN_ATTRS256
401 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
402 {
403   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404                                              (__v4di)_mm256_mul_epu32(__X, __Y),
405                                              (__v4di)__W);
406 }
407
408 static __inline__ __m256i __DEFAULT_FN_ATTRS256
409 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
410 {
411   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
412                                              (__v4di)_mm256_mul_epu32(__X, __Y),
413                                              (__v4di)_mm256_setzero_si256());
414 }
415
416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
417 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
418 {
419   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420                                              (__v2di)_mm_mul_epu32(__X, __Y),
421                                              (__v2di)__W);
422 }
423
424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
425 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
426 {
427   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
428                                              (__v2di)_mm_mul_epu32(__X, __Y),
429                                              (__v2di)_mm_setzero_si128());
430 }
431
432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
433 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
434 {
435   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436                                              (__v8si)_mm256_mullo_epi32(__A, __B),
437                                              (__v8si)_mm256_setzero_si256());
438 }
439
440 static __inline__ __m256i __DEFAULT_FN_ATTRS256
441 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
442 {
443   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
444                                              (__v8si)_mm256_mullo_epi32(__A, __B),
445                                              (__v8si)__W);
446 }
447
448 static __inline__ __m128i __DEFAULT_FN_ATTRS128
449 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
450 {
451   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452                                              (__v4si)_mm_mullo_epi32(__A, __B),
453                                              (__v4si)_mm_setzero_si128());
454 }
455
456 static __inline__ __m128i __DEFAULT_FN_ATTRS128
457 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
458 {
459   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
460                                              (__v4si)_mm_mullo_epi32(__A, __B),
461                                              (__v4si)__W);
462 }
463
464 static __inline__ __m256i __DEFAULT_FN_ATTRS256
465 _mm256_and_epi32(__m256i __a, __m256i __b)
466 {
467   return (__m256i)((__v8su)__a & (__v8su)__b);
468 }
469
470 static __inline__ __m256i __DEFAULT_FN_ATTRS256
471 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
472 {
473   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
474                                              (__v8si)_mm256_and_epi32(__A, __B),
475                                              (__v8si)__W);
476 }
477
478 static __inline__ __m256i __DEFAULT_FN_ATTRS256
479 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
480 {
481   return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
482 }
483
484 static __inline__ __m128i __DEFAULT_FN_ATTRS128
485 _mm_and_epi32(__m128i __a, __m128i __b)
486 {
487   return (__m128i)((__v4su)__a & (__v4su)__b);
488 }
489
490 static __inline__ __m128i __DEFAULT_FN_ATTRS128
491 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
492 {
493   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
494                                              (__v4si)_mm_and_epi32(__A, __B),
495                                              (__v4si)__W);
496 }
497
498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
499 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
500 {
501   return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
502 }
503
504 static __inline__ __m256i __DEFAULT_FN_ATTRS256
505 _mm256_andnot_epi32(__m256i __A, __m256i __B)
506 {
507   return (__m256i)(~(__v8su)__A & (__v8su)__B);
508 }
509
510 static __inline__ __m256i __DEFAULT_FN_ATTRS256
511 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
512 {
513   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
514                                           (__v8si)_mm256_andnot_epi32(__A, __B),
515                                           (__v8si)__W);
516 }
517
518 static __inline__ __m256i __DEFAULT_FN_ATTRS256
519 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
520 {
521   return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
522                                            __U, __A, __B);
523 }
524
525 static __inline__ __m128i __DEFAULT_FN_ATTRS128
526 _mm_andnot_epi32(__m128i __A, __m128i __B)
527 {
528   return (__m128i)(~(__v4su)__A & (__v4su)__B);
529 }
530
531 static __inline__ __m128i __DEFAULT_FN_ATTRS128
532 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
533 {
534   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
535                                              (__v4si)_mm_andnot_epi32(__A, __B),
536                                              (__v4si)__W);
537 }
538
539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
540 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
541 {
542   return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
543 }
544
545 static __inline__ __m256i __DEFAULT_FN_ATTRS256
546 _mm256_or_epi32(__m256i __a, __m256i __b)
547 {
548   return (__m256i)((__v8su)__a | (__v8su)__b);
549 }
550
551 static __inline__ __m256i __DEFAULT_FN_ATTRS256
552 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
553 {
554   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
555                                              (__v8si)_mm256_or_epi32(__A, __B),
556                                              (__v8si)__W);
557 }
558
559 static __inline__ __m256i __DEFAULT_FN_ATTRS256
560 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
561 {
562   return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
563 }
564
565 static __inline__ __m128i __DEFAULT_FN_ATTRS128
566 _mm_or_epi32(__m128i __a, __m128i __b)
567 {
568   return (__m128i)((__v4su)__a | (__v4su)__b);
569 }
570
571 static __inline__ __m128i __DEFAULT_FN_ATTRS128
572 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
573 {
574   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
575                                              (__v4si)_mm_or_epi32(__A, __B),
576                                              (__v4si)__W);
577 }
578
579 static __inline__ __m128i __DEFAULT_FN_ATTRS128
580 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
581 {
582   return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
583 }
584
585 static __inline__ __m256i __DEFAULT_FN_ATTRS256
586 _mm256_xor_epi32(__m256i __a, __m256i __b)
587 {
588   return (__m256i)((__v8su)__a ^ (__v8su)__b);
589 }
590
591 static __inline__ __m256i __DEFAULT_FN_ATTRS256
592 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
593 {
594   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
595                                              (__v8si)_mm256_xor_epi32(__A, __B),
596                                              (__v8si)__W);
597 }
598
599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
600 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
601 {
602   return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
603 }
604
605 static __inline__ __m128i __DEFAULT_FN_ATTRS128
606 _mm_xor_epi32(__m128i __a, __m128i __b)
607 {
608   return (__m128i)((__v4su)__a ^ (__v4su)__b);
609 }
610
611 static __inline__ __m128i __DEFAULT_FN_ATTRS128
612 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
613 {
614   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
615                                              (__v4si)_mm_xor_epi32(__A, __B),
616                                              (__v4si)__W);
617 }
618
619 static __inline__ __m128i __DEFAULT_FN_ATTRS128
620 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
621 {
622   return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
623 }
624
625 static __inline__ __m256i __DEFAULT_FN_ATTRS256
626 _mm256_and_epi64(__m256i __a, __m256i __b)
627 {
628   return (__m256i)((__v4du)__a & (__v4du)__b);
629 }
630
631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
632 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
633 {
634   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
635                                              (__v4di)_mm256_and_epi64(__A, __B),
636                                              (__v4di)__W);
637 }
638
639 static __inline__ __m256i __DEFAULT_FN_ATTRS256
640 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
641 {
642   return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
643 }
644
645 static __inline__ __m128i __DEFAULT_FN_ATTRS128
646 _mm_and_epi64(__m128i __a, __m128i __b)
647 {
648   return (__m128i)((__v2du)__a & (__v2du)__b);
649 }
650
651 static __inline__ __m128i __DEFAULT_FN_ATTRS128
652 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
653 {
654   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
655                                              (__v2di)_mm_and_epi64(__A, __B),
656                                              (__v2di)__W);
657 }
658
659 static __inline__ __m128i __DEFAULT_FN_ATTRS128
660 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
661 {
662   return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
663 }
664
665 static __inline__ __m256i __DEFAULT_FN_ATTRS256
666 _mm256_andnot_epi64(__m256i __A, __m256i __B)
667 {
668   return (__m256i)(~(__v4du)__A & (__v4du)__B);
669 }
670
671 static __inline__ __m256i __DEFAULT_FN_ATTRS256
672 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
673 {
674   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
675                                           (__v4di)_mm256_andnot_epi64(__A, __B),
676                                           (__v4di)__W);
677 }
678
679 static __inline__ __m256i __DEFAULT_FN_ATTRS256
680 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
681 {
682   return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
683                                            __U, __A, __B);
684 }
685
686 static __inline__ __m128i __DEFAULT_FN_ATTRS128
687 _mm_andnot_epi64(__m128i __A, __m128i __B)
688 {
689   return (__m128i)(~(__v2du)__A & (__v2du)__B);
690 }
691
692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
693 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
694 {
695   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
696                                              (__v2di)_mm_andnot_epi64(__A, __B),
697                                              (__v2di)__W);
698 }
699
700 static __inline__ __m128i __DEFAULT_FN_ATTRS128
701 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
702 {
703   return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
704 }
705
706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
707 _mm256_or_epi64(__m256i __a, __m256i __b)
708 {
709   return (__m256i)((__v4du)__a | (__v4du)__b);
710 }
711
712 static __inline__ __m256i __DEFAULT_FN_ATTRS256
713 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
714 {
715   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
716                                              (__v4di)_mm256_or_epi64(__A, __B),
717                                              (__v4di)__W);
718 }
719
720 static __inline__ __m256i __DEFAULT_FN_ATTRS256
721 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
722 {
723   return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
724 }
725
726 static __inline__ __m128i __DEFAULT_FN_ATTRS128
727 _mm_or_epi64(__m128i __a, __m128i __b)
728 {
729   return (__m128i)((__v2du)__a | (__v2du)__b);
730 }
731
732 static __inline__ __m128i __DEFAULT_FN_ATTRS128
733 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
734 {
735   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
736                                              (__v2di)_mm_or_epi64(__A, __B),
737                                              (__v2di)__W);
738 }
739
740 static __inline__ __m128i __DEFAULT_FN_ATTRS128
741 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
742 {
743   return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
744 }
745
746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
747 _mm256_xor_epi64(__m256i __a, __m256i __b)
748 {
749   return (__m256i)((__v4du)__a ^ (__v4du)__b);
750 }
751
752 static __inline__ __m256i __DEFAULT_FN_ATTRS256
753 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
754 {
755   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
756                                              (__v4di)_mm256_xor_epi64(__A, __B),
757                                              (__v4di)__W);
758 }
759
760 static __inline__ __m256i __DEFAULT_FN_ATTRS256
761 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
762 {
763   return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
764 }
765
766 static __inline__ __m128i __DEFAULT_FN_ATTRS128
767 _mm_xor_epi64(__m128i __a, __m128i __b)
768 {
769   return (__m128i)((__v2du)__a ^ (__v2du)__b);
770 }
771
772 static __inline__ __m128i __DEFAULT_FN_ATTRS128
773 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
774         __m128i __B)
775 {
776   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
777                                              (__v2di)_mm_xor_epi64(__A, __B),
778                                              (__v2di)__W);
779 }
780
781 static __inline__ __m128i __DEFAULT_FN_ATTRS128
782 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
783 {
784   return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
785 }
786
787 #define _mm_cmp_epi32_mask(a, b, p) \
788   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
789                                         (__v4si)(__m128i)(b), (int)(p), \
790                                         (__mmask8)-1)
791
792 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
793   (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
794                                         (__v4si)(__m128i)(b), (int)(p), \
795                                         (__mmask8)(m))
796
797 #define _mm_cmp_epu32_mask(a, b, p) \
798   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
799                                          (__v4si)(__m128i)(b), (int)(p), \
800                                          (__mmask8)-1)
801
802 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
803   (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
804                                          (__v4si)(__m128i)(b), (int)(p), \
805                                          (__mmask8)(m))
806
807 #define _mm256_cmp_epi32_mask(a, b, p) \
808   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
809                                         (__v8si)(__m256i)(b), (int)(p), \
810                                         (__mmask8)-1)
811
812 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
813   (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
814                                         (__v8si)(__m256i)(b), (int)(p), \
815                                         (__mmask8)(m))
816
817 #define _mm256_cmp_epu32_mask(a, b, p) \
818   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
819                                          (__v8si)(__m256i)(b), (int)(p), \
820                                          (__mmask8)-1)
821
822 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
823   (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
824                                          (__v8si)(__m256i)(b), (int)(p), \
825                                          (__mmask8)(m))
826
827 #define _mm_cmp_epi64_mask(a, b, p) \
828   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
829                                         (__v2di)(__m128i)(b), (int)(p), \
830                                         (__mmask8)-1)
831
832 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
833   (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
834                                         (__v2di)(__m128i)(b), (int)(p), \
835                                         (__mmask8)(m))
836
837 #define _mm_cmp_epu64_mask(a, b, p) \
838   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
839                                          (__v2di)(__m128i)(b), (int)(p), \
840                                          (__mmask8)-1)
841
842 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
843   (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
844                                          (__v2di)(__m128i)(b), (int)(p), \
845                                          (__mmask8)(m))
846
847 #define _mm256_cmp_epi64_mask(a, b, p) \
848   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
849                                         (__v4di)(__m256i)(b), (int)(p), \
850                                         (__mmask8)-1)
851
852 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
853   (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
854                                         (__v4di)(__m256i)(b), (int)(p), \
855                                         (__mmask8)(m))
856
857 #define _mm256_cmp_epu64_mask(a, b, p) \
858   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
859                                          (__v4di)(__m256i)(b), (int)(p), \
860                                          (__mmask8)-1)
861
862 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
863   (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
864                                          (__v4di)(__m256i)(b), (int)(p), \
865                                          (__mmask8)(m))
866
867 #define _mm256_cmp_ps_mask(a, b, p)  \
868   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
869                                          (__v8sf)(__m256)(b), (int)(p), \
870                                          (__mmask8)-1)
871
872 #define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
873   (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
874                                          (__v8sf)(__m256)(b), (int)(p), \
875                                          (__mmask8)(m))
876
877 #define _mm256_cmp_pd_mask(a, b, p)  \
878   (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
879                                          (__v4df)(__m256d)(b), (int)(p), \
880                                          (__mmask8)-1)
881
882 #define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
883   (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
884                                          (__v4df)(__m256d)(b), (int)(p), \
885                                          (__mmask8)(m))
886
887 #define _mm_cmp_ps_mask(a, b, p)  \
888   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
889                                          (__v4sf)(__m128)(b), (int)(p), \
890                                          (__mmask8)-1)
891
892 #define _mm_mask_cmp_ps_mask(m, a, b, p)  \
893   (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
894                                          (__v4sf)(__m128)(b), (int)(p), \
895                                          (__mmask8)(m))
896
897 #define _mm_cmp_pd_mask(a, b, p)  \
898   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
899                                          (__v2df)(__m128d)(b), (int)(p), \
900                                          (__mmask8)-1)
901
902 #define _mm_mask_cmp_pd_mask(m, a, b, p)  \
903   (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
904                                          (__v2df)(__m128d)(b), (int)(p), \
905                                          (__mmask8)(m))
906
907 static __inline__ __m128d __DEFAULT_FN_ATTRS128
908 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
909 {
910   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
911                     __builtin_ia32_vfmaddpd ((__v2df) __A,
912                                              (__v2df) __B,
913                                              (__v2df) __C),
914                     (__v2df) __A);
915 }
916
917 static __inline__ __m128d __DEFAULT_FN_ATTRS128
918 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
919 {
920   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
921                     __builtin_ia32_vfmaddpd ((__v2df) __A,
922                                              (__v2df) __B,
923                                              (__v2df) __C),
924                     (__v2df) __C);
925 }
926
927 static __inline__ __m128d __DEFAULT_FN_ATTRS128
928 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
929 {
930   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
931                     __builtin_ia32_vfmaddpd ((__v2df) __A,
932                                              (__v2df) __B,
933                                              (__v2df) __C),
934                     (__v2df)_mm_setzero_pd());
935 }
936
937 static __inline__ __m128d __DEFAULT_FN_ATTRS128
938 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
939 {
940   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
941                     __builtin_ia32_vfmaddpd ((__v2df) __A,
942                                              (__v2df) __B,
943                                              -(__v2df) __C),
944                     (__v2df) __A);
945 }
946
947 static __inline__ __m128d __DEFAULT_FN_ATTRS128
948 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
949 {
950   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
951                     __builtin_ia32_vfmaddpd ((__v2df) __A,
952                                              (__v2df) __B,
953                                              -(__v2df) __C),
954                     (__v2df)_mm_setzero_pd());
955 }
956
957 static __inline__ __m128d __DEFAULT_FN_ATTRS128
958 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
959 {
960   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
961                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
962                                              (__v2df) __B,
963                                              (__v2df) __C),
964                     (__v2df) __C);
965 }
966
967 static __inline__ __m128d __DEFAULT_FN_ATTRS128
968 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
969 {
970   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
971                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
972                                              (__v2df) __B,
973                                              (__v2df) __C),
974                     (__v2df)_mm_setzero_pd());
975 }
976
977 static __inline__ __m128d __DEFAULT_FN_ATTRS128
978 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
979 {
980   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
981                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
982                                              (__v2df) __B,
983                                              -(__v2df) __C),
984                     (__v2df)_mm_setzero_pd());
985 }
986
987 static __inline__ __m256d __DEFAULT_FN_ATTRS256
988 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
989 {
990   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
991                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
992                                                 (__v4df) __B,
993                                                 (__v4df) __C),
994                     (__v4df) __A);
995 }
996
997 static __inline__ __m256d __DEFAULT_FN_ATTRS256
998 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
999 {
1000   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1001                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1002                                                 (__v4df) __B,
1003                                                 (__v4df) __C),
1004                     (__v4df) __C);
1005 }
1006
1007 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1008 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1009 {
1010   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1011                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1012                                                 (__v4df) __B,
1013                                                 (__v4df) __C),
1014                     (__v4df)_mm256_setzero_pd());
1015 }
1016
1017 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1018 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1019 {
1020   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1021                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1022                                                 (__v4df) __B,
1023                                                 -(__v4df) __C),
1024                     (__v4df) __A);
1025 }
1026
1027 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1028 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1029 {
1030   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1031                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1032                                                 (__v4df) __B,
1033                                                 -(__v4df) __C),
1034                     (__v4df)_mm256_setzero_pd());
1035 }
1036
1037 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1038 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1039 {
1040   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1041                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1042                                                 (__v4df) __B,
1043                                                 (__v4df) __C),
1044                     (__v4df) __C);
1045 }
1046
1047 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1048 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1049 {
1050   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1051                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1052                                                 (__v4df) __B,
1053                                                 (__v4df) __C),
1054                     (__v4df)_mm256_setzero_pd());
1055 }
1056
1057 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1058 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1059 {
1060   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1061                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1062                                                 (__v4df) __B,
1063                                                 -(__v4df) __C),
1064                     (__v4df)_mm256_setzero_pd());
1065 }
1066
1067 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1068 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1069 {
1070   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1071                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1072                                              (__v4sf) __B,
1073                                              (__v4sf) __C),
1074                     (__v4sf) __A);
1075 }
1076
1077 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1078 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1079 {
1080   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1081                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1082                                              (__v4sf) __B,
1083                                              (__v4sf) __C),
1084                     (__v4sf) __C);
1085 }
1086
1087 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1088 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1089 {
1090   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1091                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1092                                              (__v4sf) __B,
1093                                              (__v4sf) __C),
1094                     (__v4sf)_mm_setzero_ps());
1095 }
1096
1097 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1098 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1099 {
1100   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1101                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1102                                              (__v4sf) __B,
1103                                              -(__v4sf) __C),
1104                     (__v4sf) __A);
1105 }
1106
1107 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1108 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1109 {
1110   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1111                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1112                                              (__v4sf) __B,
1113                                              -(__v4sf) __C),
1114                     (__v4sf)_mm_setzero_ps());
1115 }
1116
1117 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1118 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1119 {
1120   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1121                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
1122                                              (__v4sf) __B,
1123                                              (__v4sf) __C),
1124                     (__v4sf) __C);
1125 }
1126
1127 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1129 {
1130   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1131                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
1132                                              (__v4sf) __B,
1133                                              (__v4sf) __C),
1134                     (__v4sf)_mm_setzero_ps());
1135 }
1136
1137 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1138 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1139 {
1140   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1141                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
1142                                              (__v4sf) __B,
1143                                              -(__v4sf) __C),
1144                     (__v4sf)_mm_setzero_ps());
1145 }
1146
1147 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1148 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1149 {
1150   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1151                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1152                                                 (__v8sf) __B,
1153                                                 (__v8sf) __C),
1154                     (__v8sf) __A);
1155 }
1156
1157 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1158 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1159 {
1160   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1161                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1162                                                 (__v8sf) __B,
1163                                                 (__v8sf) __C),
1164                     (__v8sf) __C);
1165 }
1166
1167 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1168 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1169 {
1170   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1171                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1172                                                 (__v8sf) __B,
1173                                                 (__v8sf) __C),
1174                     (__v8sf)_mm256_setzero_ps());
1175 }
1176
1177 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1178 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1179 {
1180   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1181                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1182                                                 (__v8sf) __B,
1183                                                 -(__v8sf) __C),
1184                     (__v8sf) __A);
1185 }
1186
1187 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1188 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1189 {
1190   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1191                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1192                                                 (__v8sf) __B,
1193                                                 -(__v8sf) __C),
1194                     (__v8sf)_mm256_setzero_ps());
1195 }
1196
1197 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1198 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1199 {
1200   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1201                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1202                                                 (__v8sf) __B,
1203                                                 (__v8sf) __C),
1204                     (__v8sf) __C);
1205 }
1206
1207 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1208 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1209 {
1210   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1211                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1212                                                 (__v8sf) __B,
1213                                                 (__v8sf) __C),
1214                     (__v8sf)_mm256_setzero_ps());
1215 }
1216
1217 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1218 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1219 {
1220   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1221                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1222                                                 (__v8sf) __B,
1223                                                 -(__v8sf) __C),
1224                     (__v8sf)_mm256_setzero_ps());
1225 }
1226
1227 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1228 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1229 {
1230   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1231                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1232                                                 (__v2df) __B,
1233                                                 (__v2df) __C),
1234                     (__v2df) __A);
1235 }
1236
1237 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1238 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1239 {
1240   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1241                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1242                                                 (__v2df) __B,
1243                                                 (__v2df) __C),
1244                     (__v2df) __C);
1245 }
1246
1247 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1248 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1249 {
1250   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1251                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1252                                                 (__v2df) __B,
1253                                                 (__v2df) __C),
1254                     (__v2df)_mm_setzero_pd());
1255 }
1256
1257 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1258 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1259 {
1260   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1261                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1262                                                 (__v2df) __B,
1263                                                 -(__v2df) __C),
1264                     (__v2df) __A);
1265 }
1266
1267 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1268 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1269 {
1270   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1271                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1272                                                 (__v2df) __B,
1273                                                 -(__v2df) __C),
1274                     (__v2df)_mm_setzero_pd());
1275 }
1276
1277 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1278 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1279 {
1280   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1281                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1282                                                    (__v4df) __B,
1283                                                    (__v4df) __C),
1284                     (__v4df) __A);
1285 }
1286
1287 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1288 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1289 {
1290   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1291                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1292                                                    (__v4df) __B,
1293                                                    (__v4df) __C),
1294                     (__v4df) __C);
1295 }
1296
1297 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1298 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1299 {
1300   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1301                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1302                                                    (__v4df) __B,
1303                                                    (__v4df) __C),
1304                     (__v4df)_mm256_setzero_pd());
1305 }
1306
1307 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1308 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1309 {
1310   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1311                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1312                                                    (__v4df) __B,
1313                                                    -(__v4df) __C),
1314                     (__v4df) __A);
1315 }
1316
1317 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1318 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1319 {
1320   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1321                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1322                                                    (__v4df) __B,
1323                                                    -(__v4df) __C),
1324                     (__v4df)_mm256_setzero_pd());
1325 }
1326
1327 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1328 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1329 {
1330   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1331                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1332                                                 (__v4sf) __B,
1333                                                 (__v4sf) __C),
1334                     (__v4sf) __A);
1335 }
1336
1337 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1338 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1339 {
1340   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1341                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1342                                                 (__v4sf) __B,
1343                                                 (__v4sf) __C),
1344                     (__v4sf) __C);
1345 }
1346
1347 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1348 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1349 {
1350   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1351                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1352                                                 (__v4sf) __B,
1353                                                 (__v4sf) __C),
1354                     (__v4sf)_mm_setzero_ps());
1355 }
1356
1357 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1358 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1359 {
1360   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1361                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1362                                                 (__v4sf) __B,
1363                                                 -(__v4sf) __C),
1364                     (__v4sf) __A);
1365 }
1366
1367 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1368 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1369 {
1370   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1371                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1372                                                 (__v4sf) __B,
1373                                                 -(__v4sf) __C),
1374                     (__v4sf)_mm_setzero_ps());
1375 }
1376
1377 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1378 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1379                          __m256 __C)
1380 {
1381   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1382                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1383                                                    (__v8sf) __B,
1384                                                    (__v8sf) __C),
1385                     (__v8sf) __A);
1386 }
1387
1388 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1389 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1390 {
1391   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1392                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1393                                                    (__v8sf) __B,
1394                                                    (__v8sf) __C),
1395                     (__v8sf) __C);
1396 }
1397
1398 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1399 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1400 {
1401   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1402                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1403                                                    (__v8sf) __B,
1404                                                    (__v8sf) __C),
1405                     (__v8sf)_mm256_setzero_ps());
1406 }
1407
1408 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1409 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1410 {
1411   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1412                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1413                                                    (__v8sf) __B,
1414                                                    -(__v8sf) __C),
1415                     (__v8sf) __A);
1416 }
1417
1418 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1419 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1420 {
1421   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1422                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1423                                                    (__v8sf) __B,
1424                                                    -(__v8sf) __C),
1425                     (__v8sf)_mm256_setzero_ps());
1426 }
1427
1428 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1429 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1430 {
1431   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1432                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1433                                              (__v2df) __B,
1434                                              -(__v2df) __C),
1435                     (__v2df) __C);
1436 }
1437
1438 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1439 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1440 {
1441   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1442                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1443                                                 (__v4df) __B,
1444                                                 -(__v4df) __C),
1445                     (__v4df) __C);
1446 }
1447
1448 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1449 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1450 {
1451   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1452                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1453                                              (__v4sf) __B,
1454                                              -(__v4sf) __C),
1455                     (__v4sf) __C);
1456 }
1457
1458 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1459 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1460 {
1461   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1462                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1463                                                 (__v8sf) __B,
1464                                                 -(__v8sf) __C),
1465                     (__v8sf) __C);
1466 }
1467
1468 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1469 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1470 {
1471   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1472                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1473                                                 (__v2df) __B,
1474                                                 -(__v2df) __C),
1475                     (__v2df) __C);
1476 }
1477
1478 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1479 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1480 {
1481   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1482                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1483                                                    (__v4df) __B,
1484                                                    -(__v4df) __C),
1485                     (__v4df) __C);
1486 }
1487
1488 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1489 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1490 {
1491   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1492                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1493                                                 (__v4sf) __B,
1494                                                 -(__v4sf) __C),
1495                     (__v4sf) __C);
1496 }
1497
1498 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1499 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1500 {
1501   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1502                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1503                                                    (__v8sf) __B,
1504                                                    -(__v8sf) __C),
1505                     (__v8sf) __C);
1506 }
1507
1508 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1509 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1510 {
1511   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1512                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1513                                              -(__v2df) __B,
1514                                              (__v2df) __C),
1515                     (__v2df) __A);
1516 }
1517
1518 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1519 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1520 {
1521   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1522                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1523                                                 -(__v4df) __B,
1524                                                 (__v4df) __C),
1525                     (__v4df) __A);
1526 }
1527
1528 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1529 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1530 {
1531   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1532                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1533                                              -(__v4sf) __B,
1534                                              (__v4sf) __C),
1535                     (__v4sf) __A);
1536 }
1537
1538 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1539 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1540 {
1541   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1542                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1543                                                 -(__v8sf) __B,
1544                                                 (__v8sf) __C),
1545                     (__v8sf) __A);
1546 }
1547
1548 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1549 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1550 {
1551   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1552                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1553                                              -(__v2df) __B,
1554                                              -(__v2df) __C),
1555                     (__v2df) __A);
1556 }
1557
1558 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1559 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1560 {
1561   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1562                     __builtin_ia32_vfmaddpd ((__v2df) __A,
1563                                              -(__v2df) __B,
1564                                              -(__v2df) __C),
1565                     (__v2df) __C);
1566 }
1567
1568 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1569 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1570 {
1571   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1572                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1573                                                 -(__v4df) __B,
1574                                                 -(__v4df) __C),
1575                     (__v4df) __A);
1576 }
1577
1578 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1579 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1580 {
1581   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1582                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1583                                                 -(__v4df) __B,
1584                                                 -(__v4df) __C),
1585                     (__v4df) __C);
1586 }
1587
1588 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1589 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1590 {
1591   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1592                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1593                                              -(__v4sf) __B,
1594                                              -(__v4sf) __C),
1595                     (__v4sf) __A);
1596 }
1597
1598 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1599 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1600 {
1601   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1602                     __builtin_ia32_vfmaddps ((__v4sf) __A,
1603                                              -(__v4sf) __B,
1604                                              -(__v4sf) __C),
1605                     (__v4sf) __C);
1606 }
1607
1608 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1609 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1610 {
1611   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1612                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1613                                                 -(__v8sf) __B,
1614                                                 -(__v8sf) __C),
1615                     (__v8sf) __A);
1616 }
1617
1618 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1619 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1620 {
1621   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1622                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1623                                                 -(__v8sf) __B,
1624                                                 -(__v8sf) __C),
1625                     (__v8sf) __C);
1626 }
1627
1628 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1629 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1630   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1631                                               (__v2df)_mm_add_pd(__A, __B),
1632                                               (__v2df)__W);
1633 }
1634
1635 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1636 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1637   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1638                                               (__v2df)_mm_add_pd(__A, __B),
1639                                               (__v2df)_mm_setzero_pd());
1640 }
1641
1642 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1643 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1644   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1645                                               (__v4df)_mm256_add_pd(__A, __B),
1646                                               (__v4df)__W);
1647 }
1648
1649 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1650 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1651   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1652                                               (__v4df)_mm256_add_pd(__A, __B),
1653                                               (__v4df)_mm256_setzero_pd());
1654 }
1655
1656 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1657 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1658   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1659                                              (__v4sf)_mm_add_ps(__A, __B),
1660                                              (__v4sf)__W);
1661 }
1662
1663 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1664 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1665   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1666                                              (__v4sf)_mm_add_ps(__A, __B),
1667                                              (__v4sf)_mm_setzero_ps());
1668 }
1669
1670 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1671 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1672   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1673                                              (__v8sf)_mm256_add_ps(__A, __B),
1674                                              (__v8sf)__W);
1675 }
1676
1677 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1678 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1679   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1680                                              (__v8sf)_mm256_add_ps(__A, __B),
1681                                              (__v8sf)_mm256_setzero_ps());
1682 }
1683
1684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1685 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1686   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1687                 (__v4si) __W,
1688                 (__v4si) __A);
1689 }
1690
1691 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1692 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1693   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1694                 (__v8si) __W,
1695                 (__v8si) __A);
1696 }
1697
1698 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1699 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1700   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1701                  (__v2df) __W,
1702                  (__v2df) __A);
1703 }
1704
1705 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1706 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1707   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1708                  (__v4df) __W,
1709                  (__v4df) __A);
1710 }
1711
1712 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1713 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1714   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1715                 (__v4sf) __W,
1716                 (__v4sf) __A);
1717 }
1718
1719 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1720 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1721   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1722                 (__v8sf) __W,
1723                 (__v8sf) __A);
1724 }
1725
1726 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1727 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1728   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1729                 (__v2di) __W,
1730                 (__v2di) __A);
1731 }
1732
1733 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1734 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1735   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1736                 (__v4di) __W,
1737                 (__v4di) __A);
1738 }
1739
1740 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1741 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1742   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1743                   (__v2df) __W,
1744                   (__mmask8) __U);
1745 }
1746
1747 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1748 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1749   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1750                   (__v2df)
1751                   _mm_setzero_pd (),
1752                   (__mmask8) __U);
1753 }
1754
1755 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1756 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1757   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1758                   (__v4df) __W,
1759                   (__mmask8) __U);
1760 }
1761
1762 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1763 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1764   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1765                   (__v4df)
1766                   _mm256_setzero_pd (),
1767                   (__mmask8) __U);
1768 }
1769
1770 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1771 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1772   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1773                   (__v2di) __W,
1774                   (__mmask8) __U);
1775 }
1776
1777 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1778 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1779   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1780                   (__v2di)
1781                   _mm_setzero_si128 (),
1782                   (__mmask8) __U);
1783 }
1784
1785 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1786 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1787   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1788                   (__v4di) __W,
1789                   (__mmask8) __U);
1790 }
1791
1792 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1793 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1794   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1795                   (__v4di)
1796                   _mm256_setzero_si256 (),
1797                   (__mmask8) __U);
1798 }
1799
1800 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1801 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1802   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1803                  (__v4sf) __W,
1804                  (__mmask8) __U);
1805 }
1806
1807 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1808 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1809   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1810                  (__v4sf)
1811                  _mm_setzero_ps (),
1812                  (__mmask8) __U);
1813 }
1814
1815 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1816 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1817   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1818                  (__v8sf) __W,
1819                  (__mmask8) __U);
1820 }
1821
1822 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1823 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1824   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1825                  (__v8sf)
1826                  _mm256_setzero_ps (),
1827                  (__mmask8) __U);
1828 }
1829
1830 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1831 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1832   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1833                   (__v4si) __W,
1834                   (__mmask8) __U);
1835 }
1836
1837 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1838 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1839   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1840                   (__v4si)
1841                   _mm_setzero_si128 (),
1842                   (__mmask8) __U);
1843 }
1844
1845 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1846 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1847   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1848                   (__v8si) __W,
1849                   (__mmask8) __U);
1850 }
1851
1852 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1853 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1854   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1855                   (__v8si)
1856                   _mm256_setzero_si256 (),
1857                   (__mmask8) __U);
1858 }
1859
1860 static __inline__ void __DEFAULT_FN_ATTRS128
1861 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1862   __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1863             (__v2df) __A,
1864             (__mmask8) __U);
1865 }
1866
1867 static __inline__ void __DEFAULT_FN_ATTRS256
1868 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1869   __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1870             (__v4df) __A,
1871             (__mmask8) __U);
1872 }
1873
1874 static __inline__ void __DEFAULT_FN_ATTRS128
1875 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1876   __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1877             (__v2di) __A,
1878             (__mmask8) __U);
1879 }
1880
1881 static __inline__ void __DEFAULT_FN_ATTRS256
1882 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1883   __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1884             (__v4di) __A,
1885             (__mmask8) __U);
1886 }
1887
1888 static __inline__ void __DEFAULT_FN_ATTRS128
1889 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1890   __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1891             (__v4sf) __A,
1892             (__mmask8) __U);
1893 }
1894
1895 static __inline__ void __DEFAULT_FN_ATTRS256
1896 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1897   __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1898             (__v8sf) __A,
1899             (__mmask8) __U);
1900 }
1901
1902 static __inline__ void __DEFAULT_FN_ATTRS128
1903 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1904   __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1905             (__v4si) __A,
1906             (__mmask8) __U);
1907 }
1908
1909 static __inline__ void __DEFAULT_FN_ATTRS256
1910 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1911   __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1912             (__v8si) __A,
1913             (__mmask8) __U);
1914 }
1915
1916 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1917 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1918   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1919                                               (__v2df)_mm_cvtepi32_pd(__A),
1920                                               (__v2df)__W);
1921 }
1922
1923 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1924 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1925   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1926                                               (__v2df)_mm_cvtepi32_pd(__A),
1927                                               (__v2df)_mm_setzero_pd());
1928 }
1929
1930 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1931 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1932   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1933                                               (__v4df)_mm256_cvtepi32_pd(__A),
1934                                               (__v4df)__W);
1935 }
1936
1937 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1938 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1939   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1940                                               (__v4df)_mm256_cvtepi32_pd(__A),
1941                                               (__v4df)_mm256_setzero_pd());
1942 }
1943
1944 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1945 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1946   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1947                                              (__v4sf)_mm_cvtepi32_ps(__A),
1948                                              (__v4sf)__W);
1949 }
1950
1951 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1952 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1953   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1954                                              (__v4sf)_mm_cvtepi32_ps(__A),
1955                                              (__v4sf)_mm_setzero_ps());
1956 }
1957
1958 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1959 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1960   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1961                                              (__v8sf)_mm256_cvtepi32_ps(__A),
1962                                              (__v8sf)__W);
1963 }
1964
1965 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1966 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1967   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1968                                              (__v8sf)_mm256_cvtepi32_ps(__A),
1969                                              (__v8sf)_mm256_setzero_ps());
1970 }
1971
1972 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1973 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1974   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1975                 (__v4si) __W,
1976                 (__mmask8) __U);
1977 }
1978
1979 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1980 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1981   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1982                 (__v4si)
1983                 _mm_setzero_si128 (),
1984                 (__mmask8) __U);
1985 }
1986
1987 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1988 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1989   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1990                                              (__v4si)_mm256_cvtpd_epi32(__A),
1991                                              (__v4si)__W);
1992 }
1993
1994 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1995 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1996   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1997                                              (__v4si)_mm256_cvtpd_epi32(__A),
1998                                              (__v4si)_mm_setzero_si128());
1999 }
2000
2001 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2002 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
2003   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2004             (__v4sf) __W,
2005             (__mmask8) __U);
2006 }
2007
2008 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2009 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2010   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2011             (__v4sf)
2012             _mm_setzero_ps (),
2013             (__mmask8) __U);
2014 }
2015
2016 static __inline__ __m128 __DEFAULT_FN_ATTRS256
2017 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2018   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2019                                              (__v4sf)_mm256_cvtpd_ps(__A),
2020                                              (__v4sf)__W);
2021 }
2022
2023 static __inline__ __m128 __DEFAULT_FN_ATTRS256
2024 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2025   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2026                                              (__v4sf)_mm256_cvtpd_ps(__A),
2027                                              (__v4sf)_mm_setzero_ps());
2028 }
2029
2030 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2031 _mm_cvtpd_epu32 (__m128d __A) {
2032   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2033                  (__v4si)
2034                  _mm_setzero_si128 (),
2035                  (__mmask8) -1);
2036 }
2037
2038 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2039 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2040   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2041                  (__v4si) __W,
2042                  (__mmask8) __U);
2043 }
2044
2045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2046 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2047   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2048                  (__v4si)
2049                  _mm_setzero_si128 (),
2050                  (__mmask8) __U);
2051 }
2052
2053 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2054 _mm256_cvtpd_epu32 (__m256d __A) {
2055   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2056                  (__v4si)
2057                  _mm_setzero_si128 (),
2058                  (__mmask8) -1);
2059 }
2060
2061 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2062 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2063   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2064                  (__v4si) __W,
2065                  (__mmask8) __U);
2066 }
2067
2068 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2069 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2070   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2071                  (__v4si)
2072                  _mm_setzero_si128 (),
2073                  (__mmask8) __U);
2074 }
2075
2076 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2078   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2079                                              (__v4si)_mm_cvtps_epi32(__A),
2080                                              (__v4si)__W);
2081 }
2082
2083 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2084 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2085   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2086                                              (__v4si)_mm_cvtps_epi32(__A),
2087                                              (__v4si)_mm_setzero_si128());
2088 }
2089
2090 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2091 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2092   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2093                                              (__v8si)_mm256_cvtps_epi32(__A),
2094                                              (__v8si)__W);
2095 }
2096
2097 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2098 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2099   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2100                                              (__v8si)_mm256_cvtps_epi32(__A),
2101                                              (__v8si)_mm256_setzero_si256());
2102 }
2103
2104 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2105 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2106   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2107                                               (__v2df)_mm_cvtps_pd(__A),
2108                                               (__v2df)__W);
2109 }
2110
2111 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2112 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2113   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2114                                               (__v2df)_mm_cvtps_pd(__A),
2115                                               (__v2df)_mm_setzero_pd());
2116 }
2117
2118 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2119 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2120   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2121                                               (__v4df)_mm256_cvtps_pd(__A),
2122                                               (__v4df)__W);
2123 }
2124
2125 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2126 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2127   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2128                                               (__v4df)_mm256_cvtps_pd(__A),
2129                                               (__v4df)_mm256_setzero_pd());
2130 }
2131
2132 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2133 _mm_cvtps_epu32 (__m128 __A) {
2134   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2135                  (__v4si)
2136                  _mm_setzero_si128 (),
2137                  (__mmask8) -1);
2138 }
2139
2140 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2141 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2142   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2143                  (__v4si) __W,
2144                  (__mmask8) __U);
2145 }
2146
2147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2148 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2149   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2150                  (__v4si)
2151                  _mm_setzero_si128 (),
2152                  (__mmask8) __U);
2153 }
2154
2155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156 _mm256_cvtps_epu32 (__m256 __A) {
2157   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2158                  (__v8si)
2159                  _mm256_setzero_si256 (),
2160                  (__mmask8) -1);
2161 }
2162
2163 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2164 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2165   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2166                  (__v8si) __W,
2167                  (__mmask8) __U);
2168 }
2169
2170 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2171 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2172   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2173                  (__v8si)
2174                  _mm256_setzero_si256 (),
2175                  (__mmask8) __U);
2176 }
2177
2178 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2179 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2180   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2181                  (__v4si) __W,
2182                  (__mmask8) __U);
2183 }
2184
2185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2186 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2187   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2188                  (__v4si)
2189                  _mm_setzero_si128 (),
2190                  (__mmask8) __U);
2191 }
2192
2193 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2194 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2195   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2196                                              (__v4si)_mm256_cvttpd_epi32(__A),
2197                                              (__v4si)__W);
2198 }
2199
2200 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2201 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2202   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2203                                              (__v4si)_mm256_cvttpd_epi32(__A),
2204                                              (__v4si)_mm_setzero_si128());
2205 }
2206
2207 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2208 _mm_cvttpd_epu32 (__m128d __A) {
2209   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2210                   (__v4si)
2211                   _mm_setzero_si128 (),
2212                   (__mmask8) -1);
2213 }
2214
2215 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2216 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2217   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2218                   (__v4si) __W,
2219                   (__mmask8) __U);
2220 }
2221
2222 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2223 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2224   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2225                   (__v4si)
2226                   _mm_setzero_si128 (),
2227                   (__mmask8) __U);
2228 }
2229
2230 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2231 _mm256_cvttpd_epu32 (__m256d __A) {
2232   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2233                   (__v4si)
2234                   _mm_setzero_si128 (),
2235                   (__mmask8) -1);
2236 }
2237
2238 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2239 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2240   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2241                   (__v4si) __W,
2242                   (__mmask8) __U);
2243 }
2244
2245 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2246 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2247   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2248                   (__v4si)
2249                   _mm_setzero_si128 (),
2250                   (__mmask8) __U);
2251 }
2252
2253 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2254 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2255   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2256                                              (__v4si)_mm_cvttps_epi32(__A),
2257                                              (__v4si)__W);
2258 }
2259
2260 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2261 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2262   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2263                                              (__v4si)_mm_cvttps_epi32(__A),
2264                                              (__v4si)_mm_setzero_si128());
2265 }
2266
2267 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2268 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2269   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2270                                              (__v8si)_mm256_cvttps_epi32(__A),
2271                                              (__v8si)__W);
2272 }
2273
2274 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2275 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2276   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2277                                              (__v8si)_mm256_cvttps_epi32(__A),
2278                                              (__v8si)_mm256_setzero_si256());
2279 }
2280
2281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2282 _mm_cvttps_epu32 (__m128 __A) {
2283   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2284                   (__v4si)
2285                   _mm_setzero_si128 (),
2286                   (__mmask8) -1);
2287 }
2288
2289 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2290 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2291   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2292                   (__v4si) __W,
2293                   (__mmask8) __U);
2294 }
2295
2296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2297 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2298   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2299                   (__v4si)
2300                   _mm_setzero_si128 (),
2301                   (__mmask8) __U);
2302 }
2303
2304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2305 _mm256_cvttps_epu32 (__m256 __A) {
2306   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2307                   (__v8si)
2308                   _mm256_setzero_si256 (),
2309                   (__mmask8) -1);
2310 }
2311
2312 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2313 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2314   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2315                   (__v8si) __W,
2316                   (__mmask8) __U);
2317 }
2318
2319 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2320 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2321   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2322                   (__v8si)
2323                   _mm256_setzero_si256 (),
2324                   (__mmask8) __U);
2325 }
2326
2327 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2328 _mm_cvtepu32_pd (__m128i __A) {
2329   return (__m128d) __builtin_convertvector(
2330       __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2331 }
2332
2333 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2334 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2335   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2336                                               (__v2df)_mm_cvtepu32_pd(__A),
2337                                               (__v2df)__W);
2338 }
2339
2340 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2341 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2342   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2343                                               (__v2df)_mm_cvtepu32_pd(__A),
2344                                               (__v2df)_mm_setzero_pd());
2345 }
2346
2347 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2348 _mm256_cvtepu32_pd (__m128i __A) {
2349   return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2350 }
2351
2352 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2353 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2354   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2355                                               (__v4df)_mm256_cvtepu32_pd(__A),
2356                                               (__v4df)__W);
2357 }
2358
2359 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2360 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2361   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2362                                               (__v4df)_mm256_cvtepu32_pd(__A),
2363                                               (__v4df)_mm256_setzero_pd());
2364 }
2365
2366 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2367 _mm_cvtepu32_ps (__m128i __A) {
2368   return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2369 }
2370
2371 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2372 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2373   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2374                                              (__v4sf)_mm_cvtepu32_ps(__A),
2375                                              (__v4sf)__W);
2376 }
2377
2378 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2379 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2380   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2381                                              (__v4sf)_mm_cvtepu32_ps(__A),
2382                                              (__v4sf)_mm_setzero_ps());
2383 }
2384
2385 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2386 _mm256_cvtepu32_ps (__m256i __A) {
2387   return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2388 }
2389
2390 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2391 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2392   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2393                                              (__v8sf)_mm256_cvtepu32_ps(__A),
2394                                              (__v8sf)__W);
2395 }
2396
2397 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2398 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2399   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2400                                              (__v8sf)_mm256_cvtepu32_ps(__A),
2401                                              (__v8sf)_mm256_setzero_ps());
2402 }
2403
2404 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2405 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2406   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2407                                               (__v2df)_mm_div_pd(__A, __B),
2408                                               (__v2df)__W);
2409 }
2410
2411 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2412 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2413   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2414                                               (__v2df)_mm_div_pd(__A, __B),
2415                                               (__v2df)_mm_setzero_pd());
2416 }
2417
2418 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2419 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2420   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2421                                               (__v4df)_mm256_div_pd(__A, __B),
2422                                               (__v4df)__W);
2423 }
2424
2425 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2426 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2427   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2428                                               (__v4df)_mm256_div_pd(__A, __B),
2429                                               (__v4df)_mm256_setzero_pd());
2430 }
2431
2432 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2433 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2434   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2435                                              (__v4sf)_mm_div_ps(__A, __B),
2436                                              (__v4sf)__W);
2437 }
2438
2439 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2440 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2441   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2442                                              (__v4sf)_mm_div_ps(__A, __B),
2443                                              (__v4sf)_mm_setzero_ps());
2444 }
2445
2446 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2447 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2448   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2449                                              (__v8sf)_mm256_div_ps(__A, __B),
2450                                              (__v8sf)__W);
2451 }
2452
2453 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2454 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2455   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2456                                              (__v8sf)_mm256_div_ps(__A, __B),
2457                                              (__v8sf)_mm256_setzero_ps());
2458 }
2459
2460 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2461 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2462   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2463                 (__v2df) __W,
2464                 (__mmask8) __U);
2465 }
2466
2467 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2468 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2469   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2470                  (__v2df)
2471                  _mm_setzero_pd (),
2472                  (__mmask8) __U);
2473 }
2474
2475 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2476 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2477   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2478                 (__v4df) __W,
2479                 (__mmask8) __U);
2480 }
2481
2482 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2483 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2484   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2485                  (__v4df)
2486                  _mm256_setzero_pd (),
2487                  (__mmask8) __U);
2488 }
2489
2490 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2491 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2492   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2493                 (__v2di) __W,
2494                 (__mmask8) __U);
2495 }
2496
2497 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2498 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2499   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2500                  (__v2di)
2501                  _mm_setzero_si128 (),
2502                  (__mmask8) __U);
2503 }
2504
2505 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2506 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2507   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2508                 (__v4di) __W,
2509                 (__mmask8) __U);
2510 }
2511
2512 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2513 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2514   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2515                  (__v4di)
2516                  _mm256_setzero_si256 (),
2517                  (__mmask8) __U);
2518 }
2519
2520 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2521 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2522   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2523               (__v2df) __W,
2524               (__mmask8)
2525               __U);
2526 }
2527
2528 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2529 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2530   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2531                (__v2df)
2532                _mm_setzero_pd (),
2533                (__mmask8)
2534                __U);
2535 }
2536
2537 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2538 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2539   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2540               (__v4df) __W,
2541               (__mmask8)
2542               __U);
2543 }
2544
2545 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2546 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2547   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2548                (__v4df)
2549                _mm256_setzero_pd (),
2550                (__mmask8)
2551                __U);
2552 }
2553
2554 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2555 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2556   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2557               (__v2di) __W,
2558               (__mmask8)
2559               __U);
2560 }
2561
2562 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2563 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2564   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2565                (__v2di)
2566                _mm_setzero_si128 (),
2567                (__mmask8)
2568                __U);
2569 }
2570
2571 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2572 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2573              void const *__P) {
2574   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2575               (__v4di) __W,
2576               (__mmask8)
2577               __U);
2578 }
2579
2580 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2581 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2582   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2583                (__v4di)
2584                _mm256_setzero_si256 (),
2585                (__mmask8)
2586                __U);
2587 }
2588
2589 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2590 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2591   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2592                    (__v4sf) __W,
2593                    (__mmask8) __U);
2594 }
2595
2596 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2597 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2598   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2599               (__v4sf)
2600               _mm_setzero_ps (),
2601               (__mmask8)
2602               __U);
2603 }
2604
2605 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2606 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2607   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2608                    (__v8sf) __W,
2609                    (__mmask8) __U);
2610 }
2611
2612 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2613 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2614   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2615               (__v8sf)
2616               _mm256_setzero_ps (),
2617               (__mmask8)
2618               __U);
2619 }
2620
2621 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2623   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2624               (__v4si) __W,
2625               (__mmask8)
2626               __U);
2627 }
2628
2629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2630 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2631   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2632                (__v4si)
2633                _mm_setzero_si128 (),
2634                (__mmask8)     __U);
2635 }
2636
2637 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2639              void const *__P) {
2640   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2641               (__v8si) __W,
2642               (__mmask8)
2643               __U);
2644 }
2645
2646 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2647 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2648   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2649                (__v8si)
2650                _mm256_setzero_si256 (),
2651                (__mmask8)
2652                __U);
2653 }
2654
2655 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2656 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2657   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2658                (__v4sf) __W,
2659                (__mmask8) __U);
2660 }
2661
2662 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2663 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2664   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2665                 (__v4sf)
2666                 _mm_setzero_ps (),
2667                 (__mmask8) __U);
2668 }
2669
2670 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2671 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2672   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2673                (__v8sf) __W,
2674                (__mmask8) __U);
2675 }
2676
2677 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2678 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2679   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2680                 (__v8sf)
2681                 _mm256_setzero_ps (),
2682                 (__mmask8) __U);
2683 }
2684
2685 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2686 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2687   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2688                 (__v4si) __W,
2689                 (__mmask8) __U);
2690 }
2691
2692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2693 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2694   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2695                  (__v4si)
2696                  _mm_setzero_si128 (),
2697                  (__mmask8) __U);
2698 }
2699
2700 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2701 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2702   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2703                 (__v8si) __W,
2704                 (__mmask8) __U);
2705 }
2706
2707 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2708 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2709   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2710                  (__v8si)
2711                  _mm256_setzero_si256 (),
2712                  (__mmask8) __U);
2713 }
2714
2715 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2716 _mm_getexp_pd (__m128d __A) {
2717   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2718                 (__v2df)
2719                 _mm_setzero_pd (),
2720                 (__mmask8) -1);
2721 }
2722
2723 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2724 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2725   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2726                 (__v2df) __W,
2727                 (__mmask8) __U);
2728 }
2729
2730 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2731 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2732   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2733                 (__v2df)
2734                 _mm_setzero_pd (),
2735                 (__mmask8) __U);
2736 }
2737
2738 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2739 _mm256_getexp_pd (__m256d __A) {
2740   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2741                 (__v4df)
2742                 _mm256_setzero_pd (),
2743                 (__mmask8) -1);
2744 }
2745
2746 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2747 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2748   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2749                 (__v4df) __W,
2750                 (__mmask8) __U);
2751 }
2752
2753 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2754 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2755   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2756                 (__v4df)
2757                 _mm256_setzero_pd (),
2758                 (__mmask8) __U);
2759 }
2760
2761 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2762 _mm_getexp_ps (__m128 __A) {
2763   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2764                (__v4sf)
2765                _mm_setzero_ps (),
2766                (__mmask8) -1);
2767 }
2768
2769 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2770 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2771   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2772                (__v4sf) __W,
2773                (__mmask8) __U);
2774 }
2775
2776 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2777 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2778   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2779                (__v4sf)
2780                _mm_setzero_ps (),
2781                (__mmask8) __U);
2782 }
2783
2784 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2785 _mm256_getexp_ps (__m256 __A) {
2786   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2787                (__v8sf)
2788                _mm256_setzero_ps (),
2789                (__mmask8) -1);
2790 }
2791
2792 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2793 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2794   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2795                (__v8sf) __W,
2796                (__mmask8) __U);
2797 }
2798
2799 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2800 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2801   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2802                (__v8sf)
2803                _mm256_setzero_ps (),
2804                (__mmask8) __U);
2805 }
2806
2807 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2808 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2809   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2810                                               (__v2df)_mm_max_pd(__A, __B),
2811                                               (__v2df)__W);
2812 }
2813
2814 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2815 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2816   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2817                                               (__v2df)_mm_max_pd(__A, __B),
2818                                               (__v2df)_mm_setzero_pd());
2819 }
2820
2821 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2822 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2823   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2824                                               (__v4df)_mm256_max_pd(__A, __B),
2825                                               (__v4df)__W);
2826 }
2827
2828 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2829 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2830   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2831                                               (__v4df)_mm256_max_pd(__A, __B),
2832                                               (__v4df)_mm256_setzero_pd());
2833 }
2834
2835 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2836 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2837   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2838                                              (__v4sf)_mm_max_ps(__A, __B),
2839                                              (__v4sf)__W);
2840 }
2841
2842 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2843 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2844   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2845                                              (__v4sf)_mm_max_ps(__A, __B),
2846                                              (__v4sf)_mm_setzero_ps());
2847 }
2848
2849 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2850 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2851   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2852                                              (__v8sf)_mm256_max_ps(__A, __B),
2853                                              (__v8sf)__W);
2854 }
2855
2856 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2857 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2858   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2859                                              (__v8sf)_mm256_max_ps(__A, __B),
2860                                              (__v8sf)_mm256_setzero_ps());
2861 }
2862
2863 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2864 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2865   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2866                                               (__v2df)_mm_min_pd(__A, __B),
2867                                               (__v2df)__W);
2868 }
2869
2870 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2871 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2872   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2873                                               (__v2df)_mm_min_pd(__A, __B),
2874                                               (__v2df)_mm_setzero_pd());
2875 }
2876
2877 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2878 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2879   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2880                                               (__v4df)_mm256_min_pd(__A, __B),
2881                                               (__v4df)__W);
2882 }
2883
2884 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2885 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2886   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2887                                               (__v4df)_mm256_min_pd(__A, __B),
2888                                               (__v4df)_mm256_setzero_pd());
2889 }
2890
2891 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2892 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2893   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2894                                              (__v4sf)_mm_min_ps(__A, __B),
2895                                              (__v4sf)__W);
2896 }
2897
2898 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2899 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2900   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2901                                              (__v4sf)_mm_min_ps(__A, __B),
2902                                              (__v4sf)_mm_setzero_ps());
2903 }
2904
2905 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2906 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2907   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2908                                              (__v8sf)_mm256_min_ps(__A, __B),
2909                                              (__v8sf)__W);
2910 }
2911
2912 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2913 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2914   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2915                                              (__v8sf)_mm256_min_ps(__A, __B),
2916                                              (__v8sf)_mm256_setzero_ps());
2917 }
2918
2919 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2920 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2921   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2922                                               (__v2df)_mm_mul_pd(__A, __B),
2923                                               (__v2df)__W);
2924 }
2925
2926 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2927 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2928   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2929                                               (__v2df)_mm_mul_pd(__A, __B),
2930                                               (__v2df)_mm_setzero_pd());
2931 }
2932
2933 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2934 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2935   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2936                                               (__v4df)_mm256_mul_pd(__A, __B),
2937                                               (__v4df)__W);
2938 }
2939
2940 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2941 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2942   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2943                                               (__v4df)_mm256_mul_pd(__A, __B),
2944                                               (__v4df)_mm256_setzero_pd());
2945 }
2946
2947 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2948 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2949   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2950                                              (__v4sf)_mm_mul_ps(__A, __B),
2951                                              (__v4sf)__W);
2952 }
2953
2954 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2955 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2956   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2957                                              (__v4sf)_mm_mul_ps(__A, __B),
2958                                              (__v4sf)_mm_setzero_ps());
2959 }
2960
2961 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2962 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2963   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2964                                              (__v8sf)_mm256_mul_ps(__A, __B),
2965                                              (__v8sf)__W);
2966 }
2967
2968 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2969 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2970   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2971                                              (__v8sf)_mm256_mul_ps(__A, __B),
2972                                              (__v8sf)_mm256_setzero_ps());
2973 }
2974
2975 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2976 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2977   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2978                                              (__v4si)_mm_abs_epi32(__A),
2979                                              (__v4si)__W);
2980 }
2981
2982 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2983 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2984   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2985                                              (__v4si)_mm_abs_epi32(__A),
2986                                              (__v4si)_mm_setzero_si128());
2987 }
2988
2989 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2990 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2991   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2992                                              (__v8si)_mm256_abs_epi32(__A),
2993                                              (__v8si)__W);
2994 }
2995
2996 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2997 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2998   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2999                                              (__v8si)_mm256_abs_epi32(__A),
3000                                              (__v8si)_mm256_setzero_si256());
3001 }
3002
3003 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3004 _mm_abs_epi64 (__m128i __A) {
3005   return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
3006 }
3007
3008 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3009 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3010   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3011                                              (__v2di)_mm_abs_epi64(__A),
3012                                              (__v2di)__W);
3013 }
3014
3015 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3016 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3017   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3018                                              (__v2di)_mm_abs_epi64(__A),
3019                                              (__v2di)_mm_setzero_si128());
3020 }
3021
3022 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3023 _mm256_abs_epi64 (__m256i __A) {
3024   return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3025 }
3026
3027 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3028 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3029   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3030                                              (__v4di)_mm256_abs_epi64(__A),
3031                                              (__v4di)__W);
3032 }
3033
3034 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3035 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3036   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3037                                              (__v4di)_mm256_abs_epi64(__A),
3038                                              (__v4di)_mm256_setzero_si256());
3039 }
3040
3041 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3042 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3043   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3044                                              (__v4si)_mm_max_epi32(__A, __B),
3045                                              (__v4si)_mm_setzero_si128());
3046 }
3047
3048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3049 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3050   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3051                                              (__v4si)_mm_max_epi32(__A, __B),
3052                                              (__v4si)__W);
3053 }
3054
3055 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3056 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3057   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3058                                              (__v8si)_mm256_max_epi32(__A, __B),
3059                                              (__v8si)_mm256_setzero_si256());
3060 }
3061
3062 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3063 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3064   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3065                                              (__v8si)_mm256_max_epi32(__A, __B),
3066                                              (__v8si)__W);
3067 }
3068
3069 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3070 _mm_max_epi64 (__m128i __A, __m128i __B) {
3071   return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3072 }
3073
3074 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3075 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3076   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3077                                              (__v2di)_mm_max_epi64(__A, __B),
3078                                              (__v2di)_mm_setzero_si128());
3079 }
3080
3081 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3082 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3083   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3084                                              (__v2di)_mm_max_epi64(__A, __B),
3085                                              (__v2di)__W);
3086 }
3087
3088 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3089 _mm256_max_epi64 (__m256i __A, __m256i __B) {
3090   return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3091 }
3092
3093 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3094 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3095   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3096                                              (__v4di)_mm256_max_epi64(__A, __B),
3097                                              (__v4di)_mm256_setzero_si256());
3098 }
3099
3100 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3101 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3102   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3103                                              (__v4di)_mm256_max_epi64(__A, __B),
3104                                              (__v4di)__W);
3105 }
3106
3107 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3108 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3109   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3110                                              (__v4si)_mm_max_epu32(__A, __B),
3111                                              (__v4si)_mm_setzero_si128());
3112 }
3113
3114 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3115 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3116   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3117                                              (__v4si)_mm_max_epu32(__A, __B),
3118                                              (__v4si)__W);
3119 }
3120
3121 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3122 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3123   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3124                                              (__v8si)_mm256_max_epu32(__A, __B),
3125                                              (__v8si)_mm256_setzero_si256());
3126 }
3127
3128 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3129 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3130   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3131                                              (__v8si)_mm256_max_epu32(__A, __B),
3132                                              (__v8si)__W);
3133 }
3134
3135 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3136 _mm_max_epu64 (__m128i __A, __m128i __B) {
3137   return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3138 }
3139
3140 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3141 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3142   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3143                                              (__v2di)_mm_max_epu64(__A, __B),
3144                                              (__v2di)_mm_setzero_si128());
3145 }
3146
3147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3148 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3149   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3150                                              (__v2di)_mm_max_epu64(__A, __B),
3151                                              (__v2di)__W);
3152 }
3153
3154 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3155 _mm256_max_epu64 (__m256i __A, __m256i __B) {
3156   return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3157 }
3158
3159 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3160 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3161   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3162                                              (__v4di)_mm256_max_epu64(__A, __B),
3163                                              (__v4di)_mm256_setzero_si256());
3164 }
3165
3166 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3167 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3168   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3169                                              (__v4di)_mm256_max_epu64(__A, __B),
3170                                              (__v4di)__W);
3171 }
3172
3173 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3174 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3175   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3176                                              (__v4si)_mm_min_epi32(__A, __B),
3177                                              (__v4si)_mm_setzero_si128());
3178 }
3179
3180 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3181 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3182   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3183                                              (__v4si)_mm_min_epi32(__A, __B),
3184                                              (__v4si)__W);
3185 }
3186
3187 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3188 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3189   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3190                                              (__v8si)_mm256_min_epi32(__A, __B),
3191                                              (__v8si)_mm256_setzero_si256());
3192 }
3193
3194 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3195 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3196   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3197                                              (__v8si)_mm256_min_epi32(__A, __B),
3198                                              (__v8si)__W);
3199 }
3200
3201 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3202 _mm_min_epi64 (__m128i __A, __m128i __B) {
3203   return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3204 }
3205
3206 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3207 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3208   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3209                                              (__v2di)_mm_min_epi64(__A, __B),
3210                                              (__v2di)__W);
3211 }
3212
3213 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3214 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3215   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3216                                              (__v2di)_mm_min_epi64(__A, __B),
3217                                              (__v2di)_mm_setzero_si128());
3218 }
3219
3220 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3221 _mm256_min_epi64 (__m256i __A, __m256i __B) {
3222   return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3223 }
3224
3225 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3226 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3227   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3228                                              (__v4di)_mm256_min_epi64(__A, __B),
3229                                              (__v4di)__W);
3230 }
3231
3232 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3233 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3234   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3235                                              (__v4di)_mm256_min_epi64(__A, __B),
3236                                              (__v4di)_mm256_setzero_si256());
3237 }
3238
3239 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3240 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3241   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3242                                              (__v4si)_mm_min_epu32(__A, __B),
3243                                              (__v4si)_mm_setzero_si128());
3244 }
3245
3246 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3247 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3248   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3249                                              (__v4si)_mm_min_epu32(__A, __B),
3250                                              (__v4si)__W);
3251 }
3252
3253 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3254 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3255   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3256                                              (__v8si)_mm256_min_epu32(__A, __B),
3257                                              (__v8si)_mm256_setzero_si256());
3258 }
3259
3260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3261 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3262   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3263                                              (__v8si)_mm256_min_epu32(__A, __B),
3264                                              (__v8si)__W);
3265 }
3266
3267 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3268 _mm_min_epu64 (__m128i __A, __m128i __B) {
3269   return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3270 }
3271
3272 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3273 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3274   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3275                                              (__v2di)_mm_min_epu64(__A, __B),
3276                                              (__v2di)__W);
3277 }
3278
3279 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3280 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3281   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3282                                              (__v2di)_mm_min_epu64(__A, __B),
3283                                              (__v2di)_mm_setzero_si128());
3284 }
3285
3286 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3287 _mm256_min_epu64 (__m256i __A, __m256i __B) {
3288   return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3289 }
3290
3291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3292 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3293   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3294                                              (__v4di)_mm256_min_epu64(__A, __B),
3295                                              (__v4di)__W);
3296 }
3297
3298 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3299 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3300   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3301                                              (__v4di)_mm256_min_epu64(__A, __B),
3302                                              (__v4di)_mm256_setzero_si256());
3303 }
3304
3305 #define _mm_roundscale_pd(A, imm) \
3306   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307                                               (int)(imm), \
3308                                               (__v2df)_mm_setzero_pd(), \
3309                                               (__mmask8)-1)
3310
3311
3312 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3313   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3314                                               (int)(imm), \
3315                                               (__v2df)(__m128d)(W), \
3316                                               (__mmask8)(U))
3317
3318
3319 #define _mm_maskz_roundscale_pd(U, A, imm) \
3320   (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3321                                               (int)(imm), \
3322                                               (__v2df)_mm_setzero_pd(), \
3323                                               (__mmask8)(U))
3324
3325
3326 #define _mm256_roundscale_pd(A, imm) \
3327   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328                                               (int)(imm), \
3329                                               (__v4df)_mm256_setzero_pd(), \
3330                                               (__mmask8)-1)
3331
3332
3333 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3334   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3335                                               (int)(imm), \
3336                                               (__v4df)(__m256d)(W), \
3337                                               (__mmask8)(U))
3338
3339
3340 #define _mm256_maskz_roundscale_pd(U, A, imm)  \
3341   (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3342                                               (int)(imm), \
3343                                               (__v4df)_mm256_setzero_pd(), \
3344                                               (__mmask8)(U))
3345
3346 #define _mm_roundscale_ps(A, imm)  \
3347   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3348                                              (__v4sf)_mm_setzero_ps(), \
3349                                              (__mmask8)-1)
3350
3351
3352 #define _mm_mask_roundscale_ps(W, U, A, imm)  \
3353   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3354                                              (__v4sf)(__m128)(W), \
3355                                              (__mmask8)(U))
3356
3357
3358 #define _mm_maskz_roundscale_ps(U, A, imm)  \
3359   (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3360                                              (__v4sf)_mm_setzero_ps(), \
3361                                              (__mmask8)(U))
3362
3363 #define _mm256_roundscale_ps(A, imm)  \
3364   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3365                                              (__v8sf)_mm256_setzero_ps(), \
3366                                              (__mmask8)-1)
3367
3368 #define _mm256_mask_roundscale_ps(W, U, A, imm)  \
3369   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3370                                              (__v8sf)(__m256)(W), \
3371                                              (__mmask8)(U))
3372
3373
3374 #define _mm256_maskz_roundscale_ps(U, A, imm)  \
3375   (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3376                                              (__v8sf)_mm256_setzero_ps(), \
3377                                              (__mmask8)(U))
3378
3379 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3380 _mm_scalef_pd (__m128d __A, __m128d __B) {
3381   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3382                 (__v2df) __B,
3383                 (__v2df)
3384                 _mm_setzero_pd (),
3385                 (__mmask8) -1);
3386 }
3387
3388 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3389 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3390         __m128d __B) {
3391   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3392                 (__v2df) __B,
3393                 (__v2df) __W,
3394                 (__mmask8) __U);
3395 }
3396
3397 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3398 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3399   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3400                 (__v2df) __B,
3401                 (__v2df)
3402                 _mm_setzero_pd (),
3403                 (__mmask8) __U);
3404 }
3405
3406 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3407 _mm256_scalef_pd (__m256d __A, __m256d __B) {
3408   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3409                 (__v4df) __B,
3410                 (__v4df)
3411                 _mm256_setzero_pd (),
3412                 (__mmask8) -1);
3413 }
3414
3415 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3416 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3417            __m256d __B) {
3418   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3419                 (__v4df) __B,
3420                 (__v4df) __W,
3421                 (__mmask8) __U);
3422 }
3423
3424 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3425 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3426   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3427                 (__v4df) __B,
3428                 (__v4df)
3429                 _mm256_setzero_pd (),
3430                 (__mmask8) __U);
3431 }
3432
3433 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3434 _mm_scalef_ps (__m128 __A, __m128 __B) {
3435   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3436                (__v4sf) __B,
3437                (__v4sf)
3438                _mm_setzero_ps (),
3439                (__mmask8) -1);
3440 }
3441
3442 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3443 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3444   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3445                (__v4sf) __B,
3446                (__v4sf) __W,
3447                (__mmask8) __U);
3448 }
3449
3450 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3451 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3452   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3453                (__v4sf) __B,
3454                (__v4sf)
3455                _mm_setzero_ps (),
3456                (__mmask8) __U);
3457 }
3458
3459 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3460 _mm256_scalef_ps (__m256 __A, __m256 __B) {
3461   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3462                (__v8sf) __B,
3463                (__v8sf)
3464                _mm256_setzero_ps (),
3465                (__mmask8) -1);
3466 }
3467
3468 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3469 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3470            __m256 __B) {
3471   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3472                (__v8sf) __B,
3473                (__v8sf) __W,
3474                (__mmask8) __U);
3475 }
3476
3477 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3478 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3479   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3480                (__v8sf) __B,
3481                (__v8sf)
3482                _mm256_setzero_ps (),
3483                (__mmask8) __U);
3484 }
3485
3486 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3487   __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3488                                (__v2di)(__m128i)(index), \
3489                                (__v2df)(__m128d)(v1), (int)(scale))
3490
3491 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3492   __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3493                                (__v2di)(__m128i)(index), \
3494                                (__v2df)(__m128d)(v1), (int)(scale))
3495
3496 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3497   __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3498                                (__v2di)(__m128i)(index), \
3499                                (__v2di)(__m128i)(v1), (int)(scale))
3500
3501 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3502   __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3503                                (__v2di)(__m128i)(index), \
3504                                (__v2di)(__m128i)(v1), (int)(scale))
3505
3506 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3507   __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3508                                (__v4di)(__m256i)(index), \
3509                                (__v4df)(__m256d)(v1), (int)(scale))
3510
3511 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3512   __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3513                                (__v4di)(__m256i)(index), \
3514                                (__v4df)(__m256d)(v1), (int)(scale))
3515
3516 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3517   __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3518                                (__v4di)(__m256i)(index), \
3519                                (__v4di)(__m256i)(v1), (int)(scale))
3520
3521 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3522   __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3523                                (__v4di)(__m256i)(index), \
3524                                (__v4di)(__m256i)(v1), (int)(scale))
3525
3526 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3527   __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3528                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3529                                (int)(scale))
3530
3531 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3532   __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3533                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3534                                (int)(scale))
3535
3536 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3537   __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3538                                (__v2di)(__m128i)(index), \
3539                                (__v4si)(__m128i)(v1), (int)(scale))
3540
3541 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3542   __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3543                                (__v2di)(__m128i)(index), \
3544                                (__v4si)(__m128i)(v1), (int)(scale))
3545
3546 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3547   __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3548                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3549                                (int)(scale))
3550
3551 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3552   __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3553                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3554                                (int)(scale))
3555
3556 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3557   __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3558                                (__v4di)(__m256i)(index), \
3559                                (__v4si)(__m128i)(v1), (int)(scale))
3560
3561 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3562   __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3563                                (__v4di)(__m256i)(index), \
3564                                (__v4si)(__m128i)(v1), (int)(scale))
3565
3566 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3567   __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3568                                (__v4si)(__m128i)(index), \
3569                                (__v2df)(__m128d)(v1), (int)(scale))
3570
3571 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3572     __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3573                                  (__v4si)(__m128i)(index), \
3574                                  (__v2df)(__m128d)(v1), (int)(scale))
3575
3576 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3577     __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3578                                  (__v4si)(__m128i)(index), \
3579                                  (__v2di)(__m128i)(v1), (int)(scale))
3580
3581 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3582     __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3583                                  (__v4si)(__m128i)(index), \
3584                                  (__v2di)(__m128i)(v1), (int)(scale))
3585
3586 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3587     __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3588                                  (__v4si)(__m128i)(index), \
3589                                  (__v4df)(__m256d)(v1), (int)(scale))
3590
3591 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3592     __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3593                                  (__v4si)(__m128i)(index), \
3594                                  (__v4df)(__m256d)(v1), (int)(scale))
3595
3596 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3597     __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3598                                  (__v4si)(__m128i)(index), \
3599                                  (__v4di)(__m256i)(v1), (int)(scale))
3600
3601 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3602     __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3603                                  (__v4si)(__m128i)(index), \
3604                                  (__v4di)(__m256i)(v1), (int)(scale))
3605
3606 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3607     __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3608                                  (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3609                                  (int)(scale))
3610
3611 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3612     __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3613                                  (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3614                                  (int)(scale))
3615
3616 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3617     __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3618                                  (__v4si)(__m128i)(index), \
3619                                  (__v4si)(__m128i)(v1), (int)(scale))
3620
3621 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3622     __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3623                                  (__v4si)(__m128i)(index), \
3624                                  (__v4si)(__m128i)(v1), (int)(scale))
3625
3626 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3627     __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3628                                  (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3629                                  (int)(scale))
3630
3631 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3632     __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3633                                  (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3634                                  (int)(scale))
3635
3636 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3637     __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3638                                  (__v8si)(__m256i)(index), \
3639                                  (__v8si)(__m256i)(v1), (int)(scale))
3640
3641 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3642     __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3643                                  (__v8si)(__m256i)(index), \
3644                                  (__v8si)(__m256i)(v1), (int)(scale))
3645
3646   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3647   _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3648     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3649                                                 (__v2df)_mm_sqrt_pd(__A),
3650                                                 (__v2df)__W);
3651   }
3652
3653   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3654   _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3655     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3656                                                 (__v2df)_mm_sqrt_pd(__A),
3657                                                 (__v2df)_mm_setzero_pd());
3658   }
3659
3660   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3661   _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3662     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3663                                                 (__v4df)_mm256_sqrt_pd(__A),
3664                                                 (__v4df)__W);
3665   }
3666
3667   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3668   _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3669     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3670                                                 (__v4df)_mm256_sqrt_pd(__A),
3671                                                 (__v4df)_mm256_setzero_pd());
3672   }
3673
3674   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3675   _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3676     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3677                                                (__v4sf)_mm_sqrt_ps(__A),
3678                                                (__v4sf)__W);
3679   }
3680
3681   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3682   _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3683     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3684                                                (__v4sf)_mm_sqrt_ps(__A),
3685                                                (__v4sf)_mm_setzero_ps());
3686   }
3687
3688   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3689   _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3690     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3691                                                (__v8sf)_mm256_sqrt_ps(__A),
3692                                                (__v8sf)__W);
3693   }
3694
3695   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3696   _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3697     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3698                                                (__v8sf)_mm256_sqrt_ps(__A),
3699                                                (__v8sf)_mm256_setzero_ps());
3700   }
3701
3702   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3703   _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3704     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3705                                                 (__v2df)_mm_sub_pd(__A, __B),
3706                                                 (__v2df)__W);
3707   }
3708
3709   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3710   _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3711     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3712                                                 (__v2df)_mm_sub_pd(__A, __B),
3713                                                 (__v2df)_mm_setzero_pd());
3714   }
3715
3716   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3717   _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3718     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3719                                                 (__v4df)_mm256_sub_pd(__A, __B),
3720                                                 (__v4df)__W);
3721   }
3722
3723   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3724   _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3725     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3726                                                 (__v4df)_mm256_sub_pd(__A, __B),
3727                                                 (__v4df)_mm256_setzero_pd());
3728   }
3729
3730   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3731   _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3732     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3733                                                (__v4sf)_mm_sub_ps(__A, __B),
3734                                                (__v4sf)__W);
3735   }
3736
3737   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3738   _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3739     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3740                                                (__v4sf)_mm_sub_ps(__A, __B),
3741                                                (__v4sf)_mm_setzero_ps());
3742   }
3743
3744   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3745   _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3746     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3747                                                (__v8sf)_mm256_sub_ps(__A, __B),
3748                                                (__v8sf)__W);
3749   }
3750
3751   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3752   _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3753     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3754                                                (__v8sf)_mm256_sub_ps(__A, __B),
3755                                                (__v8sf)_mm256_setzero_ps());
3756   }
3757
3758   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759   _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3760     return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3761                                                   (__v4si)__B);
3762   }
3763
3764   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3765   _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3766                               __m128i __B) {
3767     return (__m128i)__builtin_ia32_selectd_128(__U,
3768                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3769                                     (__v4si)__A);
3770   }
3771
3772   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3773   _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3774                                __m128i __B) {
3775     return (__m128i)__builtin_ia32_selectd_128(__U,
3776                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3777                                     (__v4si)__I);
3778   }
3779
3780   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3781   _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3782                                __m128i __B) {
3783     return (__m128i)__builtin_ia32_selectd_128(__U,
3784                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3785                                     (__v4si)_mm_setzero_si128());
3786   }
3787
3788   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789   _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3790     return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3791                                                   (__v8si) __B);
3792   }
3793
3794   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3795   _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3796                                  __m256i __B) {
3797     return (__m256i)__builtin_ia32_selectd_256(__U,
3798                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3799                                  (__v8si)__A);
3800   }
3801
3802   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3803   _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3804                                   __m256i __B) {
3805     return (__m256i)__builtin_ia32_selectd_256(__U,
3806                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3807                                  (__v8si)__I);
3808   }
3809
3810   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3811   _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3812                                   __m256i __B) {
3813     return (__m256i)__builtin_ia32_selectd_256(__U,
3814                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3815                                  (__v8si)_mm256_setzero_si256());
3816   }
3817
3818   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3819   _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3820     return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3821                                                    (__v2df)__B);
3822   }
3823
3824   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825   _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3826     return (__m128d)__builtin_ia32_selectpd_128(__U,
3827                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3828                                        (__v2df)__A);
3829   }
3830
3831   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3832   _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3833     return (__m128d)__builtin_ia32_selectpd_128(__U,
3834                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3835                                        (__v2df)(__m128d)__I);
3836   }
3837
3838   static __inline__ __m128d __DEFAULT_FN_ATTRS128
3839   _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3840     return (__m128d)__builtin_ia32_selectpd_128(__U,
3841                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3842                                        (__v2df)_mm_setzero_pd());
3843   }
3844
3845   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846   _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3847     return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3848                                                    (__v4df)__B);
3849   }
3850
3851   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3852   _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3853                               __m256d __B) {
3854     return (__m256d)__builtin_ia32_selectpd_256(__U,
3855                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3856                                     (__v4df)__A);
3857   }
3858
3859   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3860   _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3861                                __m256d __B) {
3862     return (__m256d)__builtin_ia32_selectpd_256(__U,
3863                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3864                                     (__v4df)(__m256d)__I);
3865   }
3866
3867   static __inline__ __m256d __DEFAULT_FN_ATTRS256
3868   _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3869                                __m256d __B) {
3870     return (__m256d)__builtin_ia32_selectpd_256(__U,
3871                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3872                                     (__v4df)_mm256_setzero_pd());
3873   }
3874
3875   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3876   _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3877     return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3878                                                   (__v4sf)__B);
3879   }
3880
3881   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882   _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3883     return (__m128)__builtin_ia32_selectps_128(__U,
3884                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3885                                        (__v4sf)__A);
3886   }
3887
3888   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3889   _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3890     return (__m128)__builtin_ia32_selectps_128(__U,
3891                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3892                                        (__v4sf)(__m128)__I);
3893   }
3894
3895   static __inline__ __m128 __DEFAULT_FN_ATTRS128
3896   _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3897     return (__m128)__builtin_ia32_selectps_128(__U,
3898                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3899                                        (__v4sf)_mm_setzero_ps());
3900   }
3901
3902   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3903   _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3904     return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3905                                                   (__v8sf) __B);
3906   }
3907
3908   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3909   _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3910     return (__m256)__builtin_ia32_selectps_256(__U,
3911                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3912                                     (__v8sf)__A);
3913   }
3914
3915   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3916   _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3917                                __m256 __B) {
3918     return (__m256)__builtin_ia32_selectps_256(__U,
3919                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3920                                     (__v8sf)(__m256)__I);
3921   }
3922
3923   static __inline__ __m256 __DEFAULT_FN_ATTRS256
3924   _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3925                                __m256 __B) {
3926     return (__m256)__builtin_ia32_selectps_256(__U,
3927                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3928                                     (__v8sf)_mm256_setzero_ps());
3929   }
3930
3931   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932   _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3933     return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3934                                                   (__v2di)__B);
3935   }
3936
3937   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938   _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3939                               __m128i __B) {
3940     return (__m128i)__builtin_ia32_selectq_128(__U,
3941                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3942                                     (__v2di)__A);
3943   }
3944
3945   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3946   _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3947                                __m128i __B) {
3948     return (__m128i)__builtin_ia32_selectq_128(__U,
3949                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3950                                     (__v2di)__I);
3951   }
3952
3953   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3954   _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3955                                __m128i __B) {
3956     return (__m128i)__builtin_ia32_selectq_128(__U,
3957                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3958                                     (__v2di)_mm_setzero_si128());
3959   }
3960
3961
3962   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963   _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3964     return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3965                                                   (__v4di) __B);
3966   }
3967
3968   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3969   _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3970                                  __m256i __B) {
3971     return (__m256i)__builtin_ia32_selectq_256(__U,
3972                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3973                                  (__v4di)__A);
3974   }
3975
3976   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3977   _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3978                                   __m256i __B) {
3979     return (__m256i)__builtin_ia32_selectq_256(__U,
3980                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3981                                  (__v4di)__I);
3982   }
3983
3984   static __inline__ __m256i __DEFAULT_FN_ATTRS256
3985   _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3986                                   __m256i __B) {
3987     return (__m256i)__builtin_ia32_selectq_256(__U,
3988                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3989                                  (__v4di)_mm256_setzero_si256());
3990   }
3991
3992   static __inline__ __m128i __DEFAULT_FN_ATTRS128
3993   _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3994   {
3995     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3996                                                (__v4si)_mm_cvtepi8_epi32(__A),
3997                                                (__v4si)__W);
3998   }
3999
4000   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4001   _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
4002   {
4003     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4004                                                (__v4si)_mm_cvtepi8_epi32(__A),
4005                                                (__v4si)_mm_setzero_si128());
4006   }
4007
4008   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4009   _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4010   {
4011     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4012                                                (__v8si)_mm256_cvtepi8_epi32(__A),
4013                                                (__v8si)__W);
4014   }
4015
4016   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4017   _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4018   {
4019     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4020                                                (__v8si)_mm256_cvtepi8_epi32(__A),
4021                                                (__v8si)_mm256_setzero_si256());
4022   }
4023
4024   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4025   _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4026   {
4027     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4028                                                (__v2di)_mm_cvtepi8_epi64(__A),
4029                                                (__v2di)__W);
4030   }
4031
4032   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4033   _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4034   {
4035     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4036                                                (__v2di)_mm_cvtepi8_epi64(__A),
4037                                                (__v2di)_mm_setzero_si128());
4038   }
4039
4040   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4041   _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4042   {
4043     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4044                                                (__v4di)_mm256_cvtepi8_epi64(__A),
4045                                                (__v4di)__W);
4046   }
4047
4048   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4049   _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4050   {
4051     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4052                                                (__v4di)_mm256_cvtepi8_epi64(__A),
4053                                                (__v4di)_mm256_setzero_si256());
4054   }
4055
4056   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4057   _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4058   {
4059     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4060                                                (__v2di)_mm_cvtepi32_epi64(__X),
4061                                                (__v2di)__W);
4062   }
4063
4064   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4065   _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4066   {
4067     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4068                                                (__v2di)_mm_cvtepi32_epi64(__X),
4069                                                (__v2di)_mm_setzero_si128());
4070   }
4071
4072   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4073   _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4074   {
4075     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4076                                                (__v4di)_mm256_cvtepi32_epi64(__X),
4077                                                (__v4di)__W);
4078   }
4079
4080   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4081   _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4082   {
4083     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4084                                                (__v4di)_mm256_cvtepi32_epi64(__X),
4085                                                (__v4di)_mm256_setzero_si256());
4086   }
4087
4088   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4089   _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4090   {
4091     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4092                                                (__v4si)_mm_cvtepi16_epi32(__A),
4093                                                (__v4si)__W);
4094   }
4095
4096   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4097   _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4098   {
4099     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4100                                                (__v4si)_mm_cvtepi16_epi32(__A),
4101                                                (__v4si)_mm_setzero_si128());
4102   }
4103
4104   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4105   _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4106   {
4107     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4108                                                (__v8si)_mm256_cvtepi16_epi32(__A),
4109                                                (__v8si)__W);
4110   }
4111
4112   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4113   _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4114   {
4115     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4116                                                (__v8si)_mm256_cvtepi16_epi32(__A),
4117                                                (__v8si)_mm256_setzero_si256());
4118   }
4119
4120   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4121   _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4122   {
4123     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4124                                                (__v2di)_mm_cvtepi16_epi64(__A),
4125                                                (__v2di)__W);
4126   }
4127
4128   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4129   _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4130   {
4131     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4132                                                (__v2di)_mm_cvtepi16_epi64(__A),
4133                                                (__v2di)_mm_setzero_si128());
4134   }
4135
4136   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4137   _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4138   {
4139     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4140                                                (__v4di)_mm256_cvtepi16_epi64(__A),
4141                                                (__v4di)__W);
4142   }
4143
4144   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4145   _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4146   {
4147     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4148                                                (__v4di)_mm256_cvtepi16_epi64(__A),
4149                                                (__v4di)_mm256_setzero_si256());
4150   }
4151
4152
4153   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4154   _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4155   {
4156     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4157                                                (__v4si)_mm_cvtepu8_epi32(__A),
4158                                                (__v4si)__W);
4159   }
4160
4161   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4162   _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4163   {
4164     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4165                                                (__v4si)_mm_cvtepu8_epi32(__A),
4166                                                (__v4si)_mm_setzero_si128());
4167   }
4168
4169   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4170   _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4171   {
4172     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4173                                                (__v8si)_mm256_cvtepu8_epi32(__A),
4174                                                (__v8si)__W);
4175   }
4176
4177   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4178   _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4179   {
4180     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4181                                                (__v8si)_mm256_cvtepu8_epi32(__A),
4182                                                (__v8si)_mm256_setzero_si256());
4183   }
4184
4185   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4186   _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4187   {
4188     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4189                                                (__v2di)_mm_cvtepu8_epi64(__A),
4190                                                (__v2di)__W);
4191   }
4192
4193   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4194   _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4195   {
4196     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4197                                                (__v2di)_mm_cvtepu8_epi64(__A),
4198                                                (__v2di)_mm_setzero_si128());
4199   }
4200
4201   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4202   _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4203   {
4204     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4205                                                (__v4di)_mm256_cvtepu8_epi64(__A),
4206                                                (__v4di)__W);
4207   }
4208
4209   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4210   _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4211   {
4212     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4213                                                (__v4di)_mm256_cvtepu8_epi64(__A),
4214                                                (__v4di)_mm256_setzero_si256());
4215   }
4216
4217   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4218   _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4219   {
4220     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4221                                                (__v2di)_mm_cvtepu32_epi64(__X),
4222                                                (__v2di)__W);
4223   }
4224
4225   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4226   _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4227   {
4228     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4229                                                (__v2di)_mm_cvtepu32_epi64(__X),
4230                                                (__v2di)_mm_setzero_si128());
4231   }
4232
4233   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4234   _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4235   {
4236     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4237                                                (__v4di)_mm256_cvtepu32_epi64(__X),
4238                                                (__v4di)__W);
4239   }
4240
4241   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4242   _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4243   {
4244     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4245                                                (__v4di)_mm256_cvtepu32_epi64(__X),
4246                                                (__v4di)_mm256_setzero_si256());
4247   }
4248
4249   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4250   _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4251   {
4252     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4253                                                (__v4si)_mm_cvtepu16_epi32(__A),
4254                                                (__v4si)__W);
4255   }
4256
4257   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4258   _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4259   {
4260     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4261                                                (__v4si)_mm_cvtepu16_epi32(__A),
4262                                                (__v4si)_mm_setzero_si128());
4263   }
4264
4265   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4266   _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4267   {
4268     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4269                                                (__v8si)_mm256_cvtepu16_epi32(__A),
4270                                                (__v8si)__W);
4271   }
4272
4273   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4274   _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4275   {
4276     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4277                                                (__v8si)_mm256_cvtepu16_epi32(__A),
4278                                                (__v8si)_mm256_setzero_si256());
4279   }
4280
4281   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4282   _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4283   {
4284     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4285                                                (__v2di)_mm_cvtepu16_epi64(__A),
4286                                                (__v2di)__W);
4287   }
4288
4289   static __inline__ __m128i __DEFAULT_FN_ATTRS128
4290   _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4291   {
4292     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4293                                                (__v2di)_mm_cvtepu16_epi64(__A),
4294                                                (__v2di)_mm_setzero_si128());
4295   }
4296
4297   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4298   _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4299   {
4300     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4301                                                (__v4di)_mm256_cvtepu16_epi64(__A),
4302                                                (__v4di)__W);
4303   }
4304
4305   static __inline__ __m256i __DEFAULT_FN_ATTRS256
4306   _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4307   {
4308     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4309                                                (__v4di)_mm256_cvtepu16_epi64(__A),
4310                                                (__v4di)_mm256_setzero_si256());
4311   }
4312
4313
4314 #define _mm_rol_epi32(a, b) \
4315   (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4316
4317 #define _mm_mask_rol_epi32(w, u, a, b) \
4318   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4319                                       (__v4si)_mm_rol_epi32((a), (b)), \
4320                                       (__v4si)(__m128i)(w))
4321
4322 #define _mm_maskz_rol_epi32(u, a, b) \
4323   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4324                                       (__v4si)_mm_rol_epi32((a), (b)), \
4325                                       (__v4si)_mm_setzero_si128())
4326
4327 #define _mm256_rol_epi32(a, b) \
4328   (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4329
4330 #define _mm256_mask_rol_epi32(w, u, a, b) \
4331   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4332                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4333                                       (__v8si)(__m256i)(w))
4334
4335 #define _mm256_maskz_rol_epi32(u, a, b) \
4336   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4337                                       (__v8si)_mm256_rol_epi32((a), (b)), \
4338                                       (__v8si)_mm256_setzero_si256())
4339
4340 #define _mm_rol_epi64(a, b) \
4341   (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4342
4343 #define _mm_mask_rol_epi64(w, u, a, b) \
4344   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4345                                       (__v2di)_mm_rol_epi64((a), (b)), \
4346                                       (__v2di)(__m128i)(w))
4347
4348 #define _mm_maskz_rol_epi64(u, a, b) \
4349   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4350                                       (__v2di)_mm_rol_epi64((a), (b)), \
4351                                       (__v2di)_mm_setzero_si128())
4352
4353 #define _mm256_rol_epi64(a, b) \
4354   (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4355
4356 #define _mm256_mask_rol_epi64(w, u, a, b) \
4357   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4358                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4359                                       (__v4di)(__m256i)(w))
4360
4361 #define _mm256_maskz_rol_epi64(u, a, b) \
4362   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4363                                       (__v4di)_mm256_rol_epi64((a), (b)), \
4364                                       (__v4di)_mm256_setzero_si256())
4365
4366 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367 _mm_rolv_epi32 (__m128i __A, __m128i __B)
4368 {
4369   return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4370 }
4371
4372 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4374 {
4375   return (__m128i)__builtin_ia32_selectd_128(__U,
4376                                              (__v4si)_mm_rolv_epi32(__A, __B),
4377                                              (__v4si)__W);
4378 }
4379
4380 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4381 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4382 {
4383   return (__m128i)__builtin_ia32_selectd_128(__U,
4384                                              (__v4si)_mm_rolv_epi32(__A, __B),
4385                                              (__v4si)_mm_setzero_si128());
4386 }
4387
4388 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
4390 {
4391   return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4392 }
4393
4394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4396 {
4397   return (__m256i)__builtin_ia32_selectd_256(__U,
4398                                             (__v8si)_mm256_rolv_epi32(__A, __B),
4399                                             (__v8si)__W);
4400 }
4401
4402 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4403 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4404 {
4405   return (__m256i)__builtin_ia32_selectd_256(__U,
4406                                             (__v8si)_mm256_rolv_epi32(__A, __B),
4407                                             (__v8si)_mm256_setzero_si256());
4408 }
4409
4410 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411 _mm_rolv_epi64 (__m128i __A, __m128i __B)
4412 {
4413   return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4414 }
4415
4416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4417 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4418 {
4419   return (__m128i)__builtin_ia32_selectq_128(__U,
4420                                              (__v2di)_mm_rolv_epi64(__A, __B),
4421                                              (__v2di)__W);
4422 }
4423
4424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4425 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4426 {
4427   return (__m128i)__builtin_ia32_selectq_128(__U,
4428                                              (__v2di)_mm_rolv_epi64(__A, __B),
4429                                              (__v2di)_mm_setzero_si128());
4430 }
4431
4432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
4434 {
4435   return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4436 }
4437
4438 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4439 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4440 {
4441   return (__m256i)__builtin_ia32_selectq_256(__U,
4442                                             (__v4di)_mm256_rolv_epi64(__A, __B),
4443                                             (__v4di)__W);
4444 }
4445
4446 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4447 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4448 {
4449   return (__m256i)__builtin_ia32_selectq_256(__U,
4450                                             (__v4di)_mm256_rolv_epi64(__A, __B),
4451                                             (__v4di)_mm256_setzero_si256());
4452 }
4453
4454 #define _mm_ror_epi32(a, b) \
4455   (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4456
4457 #define _mm_mask_ror_epi32(w, u, a, b) \
4458   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4459                                       (__v4si)_mm_ror_epi32((a), (b)), \
4460                                       (__v4si)(__m128i)(w))
4461
4462 #define _mm_maskz_ror_epi32(u, a, b) \
4463   (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4464                                       (__v4si)_mm_ror_epi32((a), (b)), \
4465                                       (__v4si)_mm_setzero_si128())
4466
4467 #define _mm256_ror_epi32(a, b) \
4468   (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4469
4470 #define _mm256_mask_ror_epi32(w, u, a, b) \
4471   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4472                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4473                                       (__v8si)(__m256i)(w))
4474
4475 #define _mm256_maskz_ror_epi32(u, a, b) \
4476   (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4477                                       (__v8si)_mm256_ror_epi32((a), (b)), \
4478                                       (__v8si)_mm256_setzero_si256())
4479
4480 #define _mm_ror_epi64(a, b) \
4481   (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4482
4483 #define _mm_mask_ror_epi64(w, u, a, b) \
4484   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4485                                       (__v2di)_mm_ror_epi64((a), (b)), \
4486                                       (__v2di)(__m128i)(w))
4487
4488 #define _mm_maskz_ror_epi64(u, a, b) \
4489   (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4490                                       (__v2di)_mm_ror_epi64((a), (b)), \
4491                                       (__v2di)_mm_setzero_si128())
4492
4493 #define _mm256_ror_epi64(a, b) \
4494   (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4495
4496 #define _mm256_mask_ror_epi64(w, u, a, b) \
4497   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4498                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4499                                       (__v4di)(__m256i)(w))
4500
4501 #define _mm256_maskz_ror_epi64(u, a, b) \
4502   (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4503                                       (__v4di)_mm256_ror_epi64((a), (b)), \
4504                                       (__v4di)_mm256_setzero_si256())
4505
4506 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4507 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4508 {
4509   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4510                                              (__v4si)_mm_sll_epi32(__A, __B),
4511                                              (__v4si)__W);
4512 }
4513
4514 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4515 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4516 {
4517   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4518                                              (__v4si)_mm_sll_epi32(__A, __B),
4519                                              (__v4si)_mm_setzero_si128());
4520 }
4521
4522 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4523 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4524 {
4525   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4526                                              (__v8si)_mm256_sll_epi32(__A, __B),
4527                                              (__v8si)__W);
4528 }
4529
4530 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4531 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4532 {
4533   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4534                                              (__v8si)_mm256_sll_epi32(__A, __B),
4535                                              (__v8si)_mm256_setzero_si256());
4536 }
4537
4538 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4539 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4540 {
4541   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4542                                              (__v4si)_mm_slli_epi32(__A, __B),
4543                                              (__v4si)__W);
4544 }
4545
4546 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4547 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4548 {
4549   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4550                                              (__v4si)_mm_slli_epi32(__A, __B),
4551                                              (__v4si)_mm_setzero_si128());
4552 }
4553
4554 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4555 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4556 {
4557   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4558                                              (__v8si)_mm256_slli_epi32(__A, __B),
4559                                              (__v8si)__W);
4560 }
4561
4562 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4563 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
4564 {
4565   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4566                                              (__v8si)_mm256_slli_epi32(__A, __B),
4567                                              (__v8si)_mm256_setzero_si256());
4568 }
4569
4570 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4571 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4572 {
4573   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4574                                              (__v2di)_mm_sll_epi64(__A, __B),
4575                                              (__v2di)__W);
4576 }
4577
4578 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4579 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4580 {
4581   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4582                                              (__v2di)_mm_sll_epi64(__A, __B),
4583                                              (__v2di)_mm_setzero_si128());
4584 }
4585
4586 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4587 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4588 {
4589   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4590                                              (__v4di)_mm256_sll_epi64(__A, __B),
4591                                              (__v4di)__W);
4592 }
4593
4594 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4595 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4596 {
4597   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4598                                              (__v4di)_mm256_sll_epi64(__A, __B),
4599                                              (__v4di)_mm256_setzero_si256());
4600 }
4601
4602 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4603 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4604 {
4605   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4606                                              (__v2di)_mm_slli_epi64(__A, __B),
4607                                              (__v2di)__W);
4608 }
4609
4610 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4611 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
4612 {
4613   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4614                                              (__v2di)_mm_slli_epi64(__A, __B),
4615                                              (__v2di)_mm_setzero_si128());
4616 }
4617
4618 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4619 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4620 {
4621   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4622                                              (__v4di)_mm256_slli_epi64(__A, __B),
4623                                              (__v4di)__W);
4624 }
4625
4626 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4627 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
4628 {
4629   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4630                                              (__v4di)_mm256_slli_epi64(__A, __B),
4631                                              (__v4di)_mm256_setzero_si256());
4632 }
4633
4634 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635 _mm_rorv_epi32 (__m128i __A, __m128i __B)
4636 {
4637   return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4638 }
4639
4640 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4641 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4642 {
4643   return (__m128i)__builtin_ia32_selectd_128(__U,
4644                                              (__v4si)_mm_rorv_epi32(__A, __B),
4645                                              (__v4si)__W);
4646 }
4647
4648 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4649 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4650 {
4651   return (__m128i)__builtin_ia32_selectd_128(__U,
4652                                              (__v4si)_mm_rorv_epi32(__A, __B),
4653                                              (__v4si)_mm_setzero_si128());
4654 }
4655
4656 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
4658 {
4659   return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4660 }
4661
4662 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4663 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4664 {
4665   return (__m256i)__builtin_ia32_selectd_256(__U,
4666                                             (__v8si)_mm256_rorv_epi32(__A, __B),
4667                                             (__v8si)__W);
4668 }
4669
4670 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4671 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4672 {
4673   return (__m256i)__builtin_ia32_selectd_256(__U,
4674                                             (__v8si)_mm256_rorv_epi32(__A, __B),
4675                                             (__v8si)_mm256_setzero_si256());
4676 }
4677
4678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679 _mm_rorv_epi64 (__m128i __A, __m128i __B)
4680 {
4681   return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4682 }
4683
4684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4686 {
4687   return (__m128i)__builtin_ia32_selectq_128(__U,
4688                                              (__v2di)_mm_rorv_epi64(__A, __B),
4689                                              (__v2di)__W);
4690 }
4691
4692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4693 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4694 {
4695   return (__m128i)__builtin_ia32_selectq_128(__U,
4696                                              (__v2di)_mm_rorv_epi64(__A, __B),
4697                                              (__v2di)_mm_setzero_si128());
4698 }
4699
4700 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
4702 {
4703   return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4704 }
4705
4706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4707 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4708 {
4709   return (__m256i)__builtin_ia32_selectq_256(__U,
4710                                             (__v4di)_mm256_rorv_epi64(__A, __B),
4711                                             (__v4di)__W);
4712 }
4713
4714 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4715 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4716 {
4717   return (__m256i)__builtin_ia32_selectq_256(__U,
4718                                             (__v4di)_mm256_rorv_epi64(__A, __B),
4719                                             (__v4di)_mm256_setzero_si256());
4720 }
4721
4722 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4723 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4724 {
4725   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4726                                              (__v2di)_mm_sllv_epi64(__X, __Y),
4727                                              (__v2di)__W);
4728 }
4729
4730 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4731 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4732 {
4733   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4734                                              (__v2di)_mm_sllv_epi64(__X, __Y),
4735                                              (__v2di)_mm_setzero_si128());
4736 }
4737
4738 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4739 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4740 {
4741   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4742                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
4743                                             (__v4di)__W);
4744 }
4745
4746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4747 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4748 {
4749   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4750                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
4751                                             (__v4di)_mm256_setzero_si256());
4752 }
4753
4754 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4755 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4756 {
4757   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758                                              (__v4si)_mm_sllv_epi32(__X, __Y),
4759                                              (__v4si)__W);
4760 }
4761
4762 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4763 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4764 {
4765   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4766                                              (__v4si)_mm_sllv_epi32(__X, __Y),
4767                                              (__v4si)_mm_setzero_si128());
4768 }
4769
4770 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4771 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4772 {
4773   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
4775                                             (__v8si)__W);
4776 }
4777
4778 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4779 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4780 {
4781   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4782                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
4783                                             (__v8si)_mm256_setzero_si256());
4784 }
4785
4786 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4787 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4788 {
4789   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4790                                              (__v2di)_mm_srlv_epi64(__X, __Y),
4791                                              (__v2di)__W);
4792 }
4793
4794 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4795 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4796 {
4797   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4798                                              (__v2di)_mm_srlv_epi64(__X, __Y),
4799                                              (__v2di)_mm_setzero_si128());
4800 }
4801
4802 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4803 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4804 {
4805   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4806                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
4807                                             (__v4di)__W);
4808 }
4809
4810 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4811 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4812 {
4813   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4814                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
4815                                             (__v4di)_mm256_setzero_si256());
4816 }
4817
4818 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4819 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4820 {
4821   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4822                                             (__v4si)_mm_srlv_epi32(__X, __Y),
4823                                             (__v4si)__W);
4824 }
4825
4826 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4827 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4828 {
4829   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4830                                             (__v4si)_mm_srlv_epi32(__X, __Y),
4831                                             (__v4si)_mm_setzero_si128());
4832 }
4833
4834 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4835 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4836 {
4837   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4838                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
4839                                             (__v8si)__W);
4840 }
4841
4842 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4843 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4844 {
4845   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4846                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
4847                                             (__v8si)_mm256_setzero_si256());
4848 }
4849
4850 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4851 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4852 {
4853   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4854                                              (__v4si)_mm_srl_epi32(__A, __B),
4855                                              (__v4si)__W);
4856 }
4857
4858 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4859 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4860 {
4861   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4862                                              (__v4si)_mm_srl_epi32(__A, __B),
4863                                              (__v4si)_mm_setzero_si128());
4864 }
4865
4866 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4867 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4868 {
4869   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4870                                              (__v8si)_mm256_srl_epi32(__A, __B),
4871                                              (__v8si)__W);
4872 }
4873
4874 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4875 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4876 {
4877   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4878                                              (__v8si)_mm256_srl_epi32(__A, __B),
4879                                              (__v8si)_mm256_setzero_si256());
4880 }
4881
4882 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4883 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4884 {
4885   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4886                                              (__v4si)_mm_srli_epi32(__A, __B),
4887                                              (__v4si)__W);
4888 }
4889
4890 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4891 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
4892 {
4893   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4894                                              (__v4si)_mm_srli_epi32(__A, __B),
4895                                              (__v4si)_mm_setzero_si128());
4896 }
4897
4898 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4899 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4900 {
4901   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4902                                              (__v8si)_mm256_srli_epi32(__A, __B),
4903                                              (__v8si)__W);
4904 }
4905
4906 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4907 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
4908 {
4909   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4910                                              (__v8si)_mm256_srli_epi32(__A, __B),
4911                                              (__v8si)_mm256_setzero_si256());
4912 }
4913
4914 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4915 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4916 {
4917   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4918                                              (__v2di)_mm_srl_epi64(__A, __B),
4919                                              (__v2di)__W);
4920 }
4921
4922 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4923 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4924 {
4925   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4926                                              (__v2di)_mm_srl_epi64(__A, __B),
4927                                              (__v2di)_mm_setzero_si128());
4928 }
4929
4930 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4931 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4932 {
4933   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4934                                              (__v4di)_mm256_srl_epi64(__A, __B),
4935                                              (__v4di)__W);
4936 }
4937
4938 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4939 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4940 {
4941   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4942                                              (__v4di)_mm256_srl_epi64(__A, __B),
4943                                              (__v4di)_mm256_setzero_si256());
4944 }
4945
4946 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4947 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4948 {
4949   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4950                                              (__v2di)_mm_srli_epi64(__A, __B),
4951                                              (__v2di)__W);
4952 }
4953
4954 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4955 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
4956 {
4957   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4958                                              (__v2di)_mm_srli_epi64(__A, __B),
4959                                              (__v2di)_mm_setzero_si128());
4960 }
4961
4962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4963 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4964 {
4965   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4966                                              (__v4di)_mm256_srli_epi64(__A, __B),
4967                                              (__v4di)__W);
4968 }
4969
4970 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4971 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
4972 {
4973   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4974                                              (__v4di)_mm256_srli_epi64(__A, __B),
4975                                              (__v4di)_mm256_setzero_si256());
4976 }
4977
4978 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4979 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4980 {
4981   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4982                                             (__v4si)_mm_srav_epi32(__X, __Y),
4983                                             (__v4si)__W);
4984 }
4985
4986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4987 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4988 {
4989   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4990                                             (__v4si)_mm_srav_epi32(__X, __Y),
4991                                             (__v4si)_mm_setzero_si128());
4992 }
4993
4994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4995 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4996 {
4997   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4998                                             (__v8si)_mm256_srav_epi32(__X, __Y),
4999                                             (__v8si)__W);
5000 }
5001
5002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5003 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5004 {
5005   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5006                                             (__v8si)_mm256_srav_epi32(__X, __Y),
5007                                             (__v8si)_mm256_setzero_si256());
5008 }
5009
5010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011 _mm_srav_epi64(__m128i __X, __m128i __Y)
5012 {
5013   return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5014 }
5015
5016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5017 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5018 {
5019   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5020                                              (__v2di)_mm_srav_epi64(__X, __Y),
5021                                              (__v2di)__W);
5022 }
5023
5024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5025 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5026 {
5027   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5028                                              (__v2di)_mm_srav_epi64(__X, __Y),
5029                                              (__v2di)_mm_setzero_si128());
5030 }
5031
5032 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033 _mm256_srav_epi64(__m256i __X, __m256i __Y)
5034 {
5035   return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5036 }
5037
5038 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5039 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5040 {
5041   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5042                                              (__v4di)_mm256_srav_epi64(__X, __Y),
5043                                              (__v4di)__W);
5044 }
5045
5046 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5047 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5048 {
5049   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5050                                              (__v4di)_mm256_srav_epi64(__X, __Y),
5051                                              (__v4di)_mm256_setzero_si256());
5052 }
5053
5054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5055 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5056 {
5057   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5058                  (__v4si) __A,
5059                  (__v4si) __W);
5060 }
5061
5062 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5063 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5064 {
5065   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5066                  (__v4si) __A,
5067                  (__v4si) _mm_setzero_si128 ());
5068 }
5069
5070
5071 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5072 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5073 {
5074   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5075                  (__v8si) __A,
5076                  (__v8si) __W);
5077 }
5078
5079 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5080 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5081 {
5082   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5083                  (__v8si) __A,
5084                  (__v8si) _mm256_setzero_si256 ());
5085 }
5086
5087 static __inline __m128i __DEFAULT_FN_ATTRS128
5088 _mm_load_epi32 (void const *__P)
5089 {
5090   return *(__m128i *) __P;
5091 }
5092
5093 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5094 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5095 {
5096   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5097               (__v4si) __W,
5098               (__mmask8)
5099               __U);
5100 }
5101
5102 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5103 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5104 {
5105   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5106               (__v4si)
5107               _mm_setzero_si128 (),
5108               (__mmask8)
5109               __U);
5110 }
5111
5112 static __inline __m256i __DEFAULT_FN_ATTRS256
5113 _mm256_load_epi32 (void const *__P)
5114 {
5115   return *(__m256i *) __P;
5116 }
5117
5118 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5119 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5120 {
5121   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5122               (__v8si) __W,
5123               (__mmask8)
5124               __U);
5125 }
5126
5127 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5128 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5129 {
5130   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5131               (__v8si)
5132               _mm256_setzero_si256 (),
5133               (__mmask8)
5134               __U);
5135 }
5136
5137 static __inline void __DEFAULT_FN_ATTRS128
5138 _mm_store_epi32 (void *__P, __m128i __A)
5139 {
5140   *(__m128i *) __P = __A;
5141 }
5142
5143 static __inline__ void __DEFAULT_FN_ATTRS128
5144 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5145 {
5146   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5147           (__v4si) __A,
5148           (__mmask8) __U);
5149 }
5150
5151 static __inline void __DEFAULT_FN_ATTRS256
5152 _mm256_store_epi32 (void *__P, __m256i __A)
5153 {
5154   *(__m256i *) __P = __A;
5155 }
5156
5157 static __inline__ void __DEFAULT_FN_ATTRS256
5158 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5159 {
5160   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5161           (__v8si) __A,
5162           (__mmask8) __U);
5163 }
5164
5165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5166 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5167 {
5168   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5169                  (__v2di) __A,
5170                  (__v2di) __W);
5171 }
5172
5173 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5174 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5175 {
5176   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5177                  (__v2di) __A,
5178                  (__v2di) _mm_setzero_si128 ());
5179 }
5180
5181 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5182 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5183 {
5184   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5185                  (__v4di) __A,
5186                  (__v4di) __W);
5187 }
5188
5189 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5190 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5191 {
5192   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5193                  (__v4di) __A,
5194                  (__v4di) _mm256_setzero_si256 ());
5195 }
5196
5197 static __inline __m128i __DEFAULT_FN_ATTRS128
5198 _mm_load_epi64 (void const *__P)
5199 {
5200   return *(__m128i *) __P;
5201 }
5202
5203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5204 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5205 {
5206   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5207               (__v2di) __W,
5208               (__mmask8)
5209               __U);
5210 }
5211
5212 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5213 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5214 {
5215   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5216               (__v2di)
5217               _mm_setzero_si128 (),
5218               (__mmask8)
5219               __U);
5220 }
5221
5222 static __inline __m256i __DEFAULT_FN_ATTRS256
5223 _mm256_load_epi64 (void const *__P)
5224 {
5225   return *(__m256i *) __P;
5226 }
5227
5228 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5229 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5230 {
5231   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5232               (__v4di) __W,
5233               (__mmask8)
5234               __U);
5235 }
5236
5237 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5238 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5239 {
5240   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5241               (__v4di)
5242               _mm256_setzero_si256 (),
5243               (__mmask8)
5244               __U);
5245 }
5246
5247 static __inline void __DEFAULT_FN_ATTRS128
5248 _mm_store_epi64 (void *__P, __m128i __A)
5249 {
5250   *(__m128i *) __P = __A;
5251 }
5252
5253 static __inline__ void __DEFAULT_FN_ATTRS128
5254 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5255 {
5256   __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5257           (__v2di) __A,
5258           (__mmask8) __U);
5259 }
5260
5261 static __inline void __DEFAULT_FN_ATTRS256
5262 _mm256_store_epi64 (void *__P, __m256i __A)
5263 {
5264   *(__m256i *) __P = __A;
5265 }
5266
5267 static __inline__ void __DEFAULT_FN_ATTRS256
5268 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5269 {
5270   __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5271           (__v4di) __A,
5272           (__mmask8) __U);
5273 }
5274
5275 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5276 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5277 {
5278   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5279                                               (__v2df)_mm_movedup_pd(__A),
5280                                               (__v2df)__W);
5281 }
5282
5283 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5284 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5285 {
5286   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5287                                               (__v2df)_mm_movedup_pd(__A),
5288                                               (__v2df)_mm_setzero_pd());
5289 }
5290
5291 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5292 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5293 {
5294   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5295                                               (__v4df)_mm256_movedup_pd(__A),
5296                                               (__v4df)__W);
5297 }
5298
5299 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5300 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5301 {
5302   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5303                                               (__v4df)_mm256_movedup_pd(__A),
5304                                               (__v4df)_mm256_setzero_pd());
5305 }
5306
5307 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5308 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5309 {
5310    return (__m128i)__builtin_ia32_selectd_128(__M,
5311                                               (__v4si) _mm_set1_epi32(__A),
5312                                               (__v4si)__O);
5313 }
5314
5315 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5316 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
5317 {
5318    return (__m128i)__builtin_ia32_selectd_128(__M,
5319                                               (__v4si) _mm_set1_epi32(__A),
5320                                               (__v4si)_mm_setzero_si128());
5321 }
5322
5323 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5324 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5325 {
5326    return (__m256i)__builtin_ia32_selectd_256(__M,
5327                                               (__v8si) _mm256_set1_epi32(__A),
5328                                               (__v8si)__O);
5329 }
5330
5331 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5332 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5333 {
5334    return (__m256i)__builtin_ia32_selectd_256(__M,
5335                                               (__v8si) _mm256_set1_epi32(__A),
5336                                               (__v8si)_mm256_setzero_si256());
5337 }
5338
5339
5340 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5341 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5342 {
5343   return (__m128i) __builtin_ia32_selectq_128(__M,
5344                                               (__v2di) _mm_set1_epi64x(__A),
5345                                               (__v2di) __O);
5346 }
5347
5348 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5349 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5350 {
5351   return (__m128i) __builtin_ia32_selectq_128(__M,
5352                                               (__v2di) _mm_set1_epi64x(__A),
5353                                               (__v2di) _mm_setzero_si128());
5354 }
5355
5356 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5357 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5358 {
5359   return (__m256i) __builtin_ia32_selectq_256(__M,
5360                                               (__v4di) _mm256_set1_epi64x(__A),
5361                                               (__v4di) __O) ;
5362 }
5363
5364 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5365 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5366 {
5367    return (__m256i) __builtin_ia32_selectq_256(__M,
5368                                                (__v4di) _mm256_set1_epi64x(__A),
5369                                                (__v4di) _mm256_setzero_si256());
5370 }
5371
5372 #define _mm_fixupimm_pd(A, B, C, imm) \
5373   (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5374                                              (__v2df)(__m128d)(B), \
5375                                              (__v2di)(__m128i)(C), (int)(imm), \
5376                                              (__mmask8)-1)
5377
5378 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5379   (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5380                                              (__v2df)(__m128d)(B), \
5381                                              (__v2di)(__m128i)(C), (int)(imm), \
5382                                              (__mmask8)(U))
5383
5384 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5385   (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5386                                               (__v2df)(__m128d)(B), \
5387                                               (__v2di)(__m128i)(C), \
5388                                               (int)(imm), (__mmask8)(U))
5389
5390 #define _mm256_fixupimm_pd(A, B, C, imm) \
5391   (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5392                                              (__v4df)(__m256d)(B), \
5393                                              (__v4di)(__m256i)(C), (int)(imm), \
5394                                              (__mmask8)-1)
5395
5396 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5397   (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5398                                              (__v4df)(__m256d)(B), \
5399                                              (__v4di)(__m256i)(C), (int)(imm), \
5400                                              (__mmask8)(U))
5401
5402 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5403   (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5404                                               (__v4df)(__m256d)(B), \
5405                                               (__v4di)(__m256i)(C), \
5406                                               (int)(imm), (__mmask8)(U))
5407
5408 #define _mm_fixupimm_ps(A, B, C, imm) \
5409   (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5410                                             (__v4sf)(__m128)(B), \
5411                                             (__v4si)(__m128i)(C), (int)(imm), \
5412                                             (__mmask8)-1)
5413
5414 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5415   (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5416                                             (__v4sf)(__m128)(B), \
5417                                             (__v4si)(__m128i)(C), (int)(imm), \
5418                                             (__mmask8)(U))
5419
5420 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5421   (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5422                                              (__v4sf)(__m128)(B), \
5423                                              (__v4si)(__m128i)(C), (int)(imm), \
5424                                              (__mmask8)(U))
5425
5426 #define _mm256_fixupimm_ps(A, B, C, imm) \
5427   (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5428                                             (__v8sf)(__m256)(B), \
5429                                             (__v8si)(__m256i)(C), (int)(imm), \
5430                                             (__mmask8)-1)
5431
5432 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5433   (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5434                                             (__v8sf)(__m256)(B), \
5435                                             (__v8si)(__m256i)(C), (int)(imm), \
5436                                             (__mmask8)(U))
5437
5438 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5439   (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5440                                              (__v8sf)(__m256)(B), \
5441                                              (__v8si)(__m256i)(C), (int)(imm), \
5442                                              (__mmask8)(U))
5443
5444 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5445 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5446 {
5447   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5448                (__v2df) __W,
5449                (__mmask8) __U);
5450 }
5451
5452 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5453 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
5454 {
5455   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5456                (__v2df)
5457                _mm_setzero_pd (),
5458                (__mmask8) __U);
5459 }
5460
5461 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5462 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5463 {
5464   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5465                (__v4df) __W,
5466                (__mmask8) __U);
5467 }
5468
5469 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5470 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5471 {
5472   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5473                (__v4df)
5474                _mm256_setzero_pd (),
5475                (__mmask8) __U);
5476 }
5477
5478 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5479 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5480 {
5481   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5482               (__v4sf) __W,
5483               (__mmask8) __U);
5484 }
5485
5486 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5487 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
5488 {
5489   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5490               (__v4sf)
5491               _mm_setzero_ps (),
5492               (__mmask8) __U);
5493 }
5494
5495 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5496 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5497 {
5498   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5499               (__v8sf) __W,
5500               (__mmask8) __U);
5501 }
5502
5503 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5504 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5505 {
5506   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5507               (__v8sf)
5508               _mm256_setzero_ps (),
5509               (__mmask8) __U);
5510 }
5511
5512 static __inline __m128i __DEFAULT_FN_ATTRS128
5513 _mm_loadu_epi64 (void const *__P)
5514 {
5515   struct __loadu_epi64 {
5516     __m128i __v;
5517   } __attribute__((__packed__, __may_alias__));
5518   return ((struct __loadu_epi64*)__P)->__v;
5519 }
5520
5521 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5522 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5523 {
5524   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5525                  (__v2di) __W,
5526                  (__mmask8) __U);
5527 }
5528
5529 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5530 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5531 {
5532   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5533                  (__v2di)
5534                  _mm_setzero_si128 (),
5535                  (__mmask8) __U);
5536 }
5537
5538 static __inline __m256i __DEFAULT_FN_ATTRS256
5539 _mm256_loadu_epi64 (void const *__P)
5540 {
5541   struct __loadu_epi64 {
5542     __m256i __v;
5543   } __attribute__((__packed__, __may_alias__));
5544   return ((struct __loadu_epi64*)__P)->__v;
5545 }
5546
5547 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5548 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5549 {
5550   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5551                  (__v4di) __W,
5552                  (__mmask8) __U);
5553 }
5554
5555 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5556 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5557 {
5558   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5559                  (__v4di)
5560                  _mm256_setzero_si256 (),
5561                  (__mmask8) __U);
5562 }
5563
5564 static __inline __m128i __DEFAULT_FN_ATTRS128
5565 _mm_loadu_epi32 (void const *__P)
5566 {
5567   struct __loadu_epi32 {
5568     __m128i __v;
5569   } __attribute__((__packed__, __may_alias__));
5570   return ((struct __loadu_epi32*)__P)->__v;
5571 }
5572
5573 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5574 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5575 {
5576   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5577                  (__v4si) __W,
5578                  (__mmask8) __U);
5579 }
5580
5581 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5582 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5583 {
5584   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5585                  (__v4si)
5586                  _mm_setzero_si128 (),
5587                  (__mmask8) __U);
5588 }
5589
5590 static __inline __m256i __DEFAULT_FN_ATTRS256
5591 _mm256_loadu_epi32 (void const *__P)
5592 {
5593   struct __loadu_epi32 {
5594     __m256i __v;
5595   } __attribute__((__packed__, __may_alias__));
5596   return ((struct __loadu_epi32*)__P)->__v;
5597 }
5598
5599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5600 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5601 {
5602   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5603                  (__v8si) __W,
5604                  (__mmask8) __U);
5605 }
5606
5607 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5608 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5609 {
5610   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5611                  (__v8si)
5612                  _mm256_setzero_si256 (),
5613                  (__mmask8) __U);
5614 }
5615
5616 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5617 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5618 {
5619   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5620                (__v2df) __W,
5621                (__mmask8) __U);
5622 }
5623
5624 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5625 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5626 {
5627   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5628                (__v2df)
5629                _mm_setzero_pd (),
5630                (__mmask8) __U);
5631 }
5632
5633 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5634 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5635 {
5636   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5637                (__v4df) __W,
5638                (__mmask8) __U);
5639 }
5640
5641 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5642 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5643 {
5644   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5645                (__v4df)
5646                _mm256_setzero_pd (),
5647                (__mmask8) __U);
5648 }
5649
5650 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5651 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5652 {
5653   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5654               (__v4sf) __W,
5655               (__mmask8) __U);
5656 }
5657
5658 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5659 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5660 {
5661   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5662               (__v4sf)
5663               _mm_setzero_ps (),
5664               (__mmask8) __U);
5665 }
5666
5667 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5668 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5669 {
5670   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5671               (__v8sf) __W,
5672               (__mmask8) __U);
5673 }
5674
5675 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5676 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5677 {
5678   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5679               (__v8sf)
5680               _mm256_setzero_ps (),
5681               (__mmask8) __U);
5682 }
5683
5684 static __inline__ void __DEFAULT_FN_ATTRS128
5685 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5686 {
5687   __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5688            (__v2df) __A,
5689            (__mmask8) __U);
5690 }
5691
5692 static __inline__ void __DEFAULT_FN_ATTRS256
5693 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5694 {
5695   __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5696            (__v4df) __A,
5697            (__mmask8) __U);
5698 }
5699
5700 static __inline__ void __DEFAULT_FN_ATTRS128
5701 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5702 {
5703   __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5704            (__v4sf) __A,
5705            (__mmask8) __U);
5706 }
5707
5708 static __inline__ void __DEFAULT_FN_ATTRS256
5709 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5710 {
5711   __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5712            (__v8sf) __A,
5713            (__mmask8) __U);
5714 }
5715
5716 static __inline void __DEFAULT_FN_ATTRS128
5717 _mm_storeu_epi64 (void *__P, __m128i __A)
5718 {
5719   struct __storeu_epi64 {
5720     __m128i __v;
5721   } __attribute__((__packed__, __may_alias__));
5722   ((struct __storeu_epi64*)__P)->__v = __A;
5723 }
5724
5725 static __inline__ void __DEFAULT_FN_ATTRS128
5726 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5727 {
5728   __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5729              (__v2di) __A,
5730              (__mmask8) __U);
5731 }
5732
5733 static __inline void __DEFAULT_FN_ATTRS256
5734 _mm256_storeu_epi64 (void *__P, __m256i __A)
5735 {
5736   struct __storeu_epi64 {
5737     __m256i __v;
5738   } __attribute__((__packed__, __may_alias__));
5739   ((struct __storeu_epi64*)__P)->__v = __A;
5740 }
5741
5742 static __inline__ void __DEFAULT_FN_ATTRS256
5743 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5744 {
5745   __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5746              (__v4di) __A,
5747              (__mmask8) __U);
5748 }
5749
5750 static __inline void __DEFAULT_FN_ATTRS128
5751 _mm_storeu_epi32 (void *__P, __m128i __A)
5752 {
5753   struct __storeu_epi32 {
5754     __m128i __v;
5755   } __attribute__((__packed__, __may_alias__));
5756   ((struct __storeu_epi32*)__P)->__v = __A;
5757 }
5758
5759 static __inline__ void __DEFAULT_FN_ATTRS128
5760 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5761 {
5762   __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5763              (__v4si) __A,
5764              (__mmask8) __U);
5765 }
5766
5767 static __inline void __DEFAULT_FN_ATTRS256
5768 _mm256_storeu_epi32 (void *__P, __m256i __A)
5769 {
5770   struct __storeu_epi32 {
5771     __m256i __v;
5772   } __attribute__((__packed__, __may_alias__));
5773   ((struct __storeu_epi32*)__P)->__v = __A;
5774 }
5775
5776 static __inline__ void __DEFAULT_FN_ATTRS256
5777 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5778 {
5779   __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5780              (__v8si) __A,
5781              (__mmask8) __U);
5782 }
5783
5784 static __inline__ void __DEFAULT_FN_ATTRS128
5785 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5786 {
5787   __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5788            (__v2df) __A,
5789            (__mmask8) __U);
5790 }
5791
5792 static __inline__ void __DEFAULT_FN_ATTRS256
5793 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5794 {
5795   __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5796            (__v4df) __A,
5797            (__mmask8) __U);
5798 }
5799
5800 static __inline__ void __DEFAULT_FN_ATTRS128
5801 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5802 {
5803   __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5804            (__v4sf) __A,
5805            (__mmask8) __U);
5806 }
5807
5808 static __inline__ void __DEFAULT_FN_ATTRS256
5809 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5810 {
5811   __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5812            (__v8sf) __A,
5813            (__mmask8) __U);
5814 }
5815
5816
5817 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5818 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5819 {
5820   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5821                                               (__v2df)_mm_unpackhi_pd(__A, __B),
5822                                               (__v2df)__W);
5823 }
5824
5825 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5826 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5827 {
5828   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5829                                               (__v2df)_mm_unpackhi_pd(__A, __B),
5830                                               (__v2df)_mm_setzero_pd());
5831 }
5832
5833 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5834 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5835 {
5836   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5837                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
5838                                            (__v4df)__W);
5839 }
5840
5841 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5842 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5843 {
5844   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5845                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
5846                                            (__v4df)_mm256_setzero_pd());
5847 }
5848
5849 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5850 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5851 {
5852   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5853                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
5854                                              (__v4sf)__W);
5855 }
5856
5857 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5858 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5859 {
5860   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5861                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
5862                                              (__v4sf)_mm_setzero_ps());
5863 }
5864
5865 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5866 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5867 {
5868   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5869                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
5870                                            (__v8sf)__W);
5871 }
5872
5873 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5874 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5875 {
5876   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5877                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
5878                                            (__v8sf)_mm256_setzero_ps());
5879 }
5880
5881 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5882 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5883 {
5884   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5885                                               (__v2df)_mm_unpacklo_pd(__A, __B),
5886                                               (__v2df)__W);
5887 }
5888
5889 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5890 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5891 {
5892   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5893                                               (__v2df)_mm_unpacklo_pd(__A, __B),
5894                                               (__v2df)_mm_setzero_pd());
5895 }
5896
5897 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5898 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5899 {
5900   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5901                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
5902                                            (__v4df)__W);
5903 }
5904
5905 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5906 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5907 {
5908   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5909                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
5910                                            (__v4df)_mm256_setzero_pd());
5911 }
5912
5913 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5914 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5915 {
5916   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5917                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
5918                                              (__v4sf)__W);
5919 }
5920
5921 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5922 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5923 {
5924   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5925                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
5926                                              (__v4sf)_mm_setzero_ps());
5927 }
5928
5929 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5930 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5931 {
5932   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5933                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
5934                                            (__v8sf)__W);
5935 }
5936
5937 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5938 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5939 {
5940   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5941                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
5942                                            (__v8sf)_mm256_setzero_ps());
5943 }
5944
5945 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5946 _mm_rcp14_pd (__m128d __A)
5947 {
5948   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5949                 (__v2df)
5950                 _mm_setzero_pd (),
5951                 (__mmask8) -1);
5952 }
5953
5954 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5955 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5956 {
5957   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5958                 (__v2df) __W,
5959                 (__mmask8) __U);
5960 }
5961
5962 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5963 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5964 {
5965   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5966                 (__v2df)
5967                 _mm_setzero_pd (),
5968                 (__mmask8) __U);
5969 }
5970
5971 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5972 _mm256_rcp14_pd (__m256d __A)
5973 {
5974   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5975                 (__v4df)
5976                 _mm256_setzero_pd (),
5977                 (__mmask8) -1);
5978 }
5979
5980 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5981 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5982 {
5983   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5984                 (__v4df) __W,
5985                 (__mmask8) __U);
5986 }
5987
5988 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5989 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5990 {
5991   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5992                 (__v4df)
5993                 _mm256_setzero_pd (),
5994                 (__mmask8) __U);
5995 }
5996
5997 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5998 _mm_rcp14_ps (__m128 __A)
5999 {
6000   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6001                (__v4sf)
6002                _mm_setzero_ps (),
6003                (__mmask8) -1);
6004 }
6005
6006 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6007 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6008 {
6009   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6010                (__v4sf) __W,
6011                (__mmask8) __U);
6012 }
6013
6014 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6015 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6016 {
6017   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6018                (__v4sf)
6019                _mm_setzero_ps (),
6020                (__mmask8) __U);
6021 }
6022
6023 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6024 _mm256_rcp14_ps (__m256 __A)
6025 {
6026   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6027                (__v8sf)
6028                _mm256_setzero_ps (),
6029                (__mmask8) -1);
6030 }
6031
6032 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6033 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6034 {
6035   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6036                (__v8sf) __W,
6037                (__mmask8) __U);
6038 }
6039
6040 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6041 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6042 {
6043   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6044                (__v8sf)
6045                _mm256_setzero_ps (),
6046                (__mmask8) __U);
6047 }
6048
6049 #define _mm_mask_permute_pd(W, U, X, C) \
6050   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6051                                        (__v2df)_mm_permute_pd((X), (C)), \
6052                                        (__v2df)(__m128d)(W))
6053
6054 #define _mm_maskz_permute_pd(U, X, C) \
6055   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6056                                        (__v2df)_mm_permute_pd((X), (C)), \
6057                                        (__v2df)_mm_setzero_pd())
6058
6059 #define _mm256_mask_permute_pd(W, U, X, C) \
6060   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6061                                        (__v4df)_mm256_permute_pd((X), (C)), \
6062                                        (__v4df)(__m256d)(W))
6063
6064 #define _mm256_maskz_permute_pd(U, X, C) \
6065   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6066                                        (__v4df)_mm256_permute_pd((X), (C)), \
6067                                        (__v4df)_mm256_setzero_pd())
6068
6069 #define _mm_mask_permute_ps(W, U, X, C) \
6070   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6071                                       (__v4sf)_mm_permute_ps((X), (C)), \
6072                                       (__v4sf)(__m128)(W))
6073
6074 #define _mm_maskz_permute_ps(U, X, C) \
6075   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6076                                       (__v4sf)_mm_permute_ps((X), (C)), \
6077                                       (__v4sf)_mm_setzero_ps())
6078
6079 #define _mm256_mask_permute_ps(W, U, X, C) \
6080   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6081                                       (__v8sf)_mm256_permute_ps((X), (C)), \
6082                                       (__v8sf)(__m256)(W))
6083
6084 #define _mm256_maskz_permute_ps(U, X, C) \
6085   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6086                                       (__v8sf)_mm256_permute_ps((X), (C)), \
6087                                       (__v8sf)_mm256_setzero_ps())
6088
6089 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6090 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6091 {
6092   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6093                                             (__v2df)_mm_permutevar_pd(__A, __C),
6094                                             (__v2df)__W);
6095 }
6096
6097 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6098 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6099 {
6100   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6101                                             (__v2df)_mm_permutevar_pd(__A, __C),
6102                                             (__v2df)_mm_setzero_pd());
6103 }
6104
6105 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6106 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6107 {
6108   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6109                                          (__v4df)_mm256_permutevar_pd(__A, __C),
6110                                          (__v4df)__W);
6111 }
6112
6113 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6114 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6115 {
6116   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6117                                          (__v4df)_mm256_permutevar_pd(__A, __C),
6118                                          (__v4df)_mm256_setzero_pd());
6119 }
6120
6121 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6122 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6123 {
6124   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6125                                             (__v4sf)_mm_permutevar_ps(__A, __C),
6126                                             (__v4sf)__W);
6127 }
6128
6129 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6130 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6131 {
6132   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6133                                             (__v4sf)_mm_permutevar_ps(__A, __C),
6134                                             (__v4sf)_mm_setzero_ps());
6135 }
6136
6137 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6138 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6139 {
6140   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6141                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
6142                                           (__v8sf)__W);
6143 }
6144
6145 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6146 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6147 {
6148   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6149                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
6150                                           (__v8sf)_mm256_setzero_ps());
6151 }
6152
6153 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6154 _mm_test_epi32_mask (__m128i __A, __m128i __B)
6155 {
6156   return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6157 }
6158
6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6160 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6161 {
6162   return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6163                                      _mm_setzero_si128());
6164 }
6165
6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6167 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
6168 {
6169   return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6170                                    _mm256_setzero_si256());
6171 }
6172
6173 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6174 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6175 {
6176   return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6177                                         _mm256_setzero_si256());
6178 }
6179
6180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6181 _mm_test_epi64_mask (__m128i __A, __m128i __B)
6182 {
6183   return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6184 }
6185
6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6187 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6188 {
6189   return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6190                                      _mm_setzero_si128());
6191 }
6192
6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6194 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
6195 {
6196   return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6197                                    _mm256_setzero_si256());
6198 }
6199
6200 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6201 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6202 {
6203   return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6204                                         _mm256_setzero_si256());
6205 }
6206
6207 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6208 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
6209 {
6210   return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6211 }
6212
6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6214 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6215 {
6216   return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6217                                     _mm_setzero_si128());
6218 }
6219
6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6221 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6222 {
6223   return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6224                                   _mm256_setzero_si256());
6225 }
6226
6227 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6228 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6229 {
6230   return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6231                                        _mm256_setzero_si256());
6232 }
6233
6234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6235 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
6236 {
6237   return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6238 }
6239
6240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6241 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6242 {
6243   return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6244                                     _mm_setzero_si128());
6245 }
6246
6247 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6248 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6249 {
6250   return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6251                                   _mm256_setzero_si256());
6252 }
6253
6254 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6255 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6256 {
6257   return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6258                                        _mm256_setzero_si256());
6259 }
6260
6261 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6262 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6263 {
6264   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6265                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
6266                                            (__v4si)__W);
6267 }
6268
6269 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6270 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6271 {
6272   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6273                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
6274                                            (__v4si)_mm_setzero_si128());
6275 }
6276
6277 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6278 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6279 {
6280   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6281                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
6282                                         (__v8si)__W);
6283 }
6284
6285 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6286 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6287 {
6288   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6289                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
6290                                         (__v8si)_mm256_setzero_si256());
6291 }
6292
6293 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6294 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6295 {
6296   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6297                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
6298                                            (__v2di)__W);
6299 }
6300
6301 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6302 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6303 {
6304   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6305                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
6306                                            (__v2di)_mm_setzero_si128());
6307 }
6308
6309 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6310 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6311 {
6312   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6313                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
6314                                         (__v4di)__W);
6315 }
6316
6317 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6318 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6319 {
6320   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6321                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
6322                                         (__v4di)_mm256_setzero_si256());
6323 }
6324
6325 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6326 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6327 {
6328   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6329                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
6330                                            (__v4si)__W);
6331 }
6332
6333 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6334 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6335 {
6336   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6337                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
6338                                            (__v4si)_mm_setzero_si128());
6339 }
6340
6341 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6342 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6343 {
6344   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6345                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
6346                                         (__v8si)__W);
6347 }
6348
6349 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6350 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6351 {
6352   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6353                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
6354                                         (__v8si)_mm256_setzero_si256());
6355 }
6356
6357 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6358 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6359 {
6360   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6361                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
6362                                            (__v2di)__W);
6363 }
6364
6365 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6366 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6367 {
6368   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6369                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
6370                                            (__v2di)_mm_setzero_si128());
6371 }
6372
6373 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6374 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6375 {
6376   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6377                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
6378                                         (__v4di)__W);
6379 }
6380
6381 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6382 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6383 {
6384   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6385                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
6386                                         (__v4di)_mm256_setzero_si256());
6387 }
6388
6389 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6390 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6391 {
6392   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6393                                              (__v4si)_mm_sra_epi32(__A, __B),
6394                                              (__v4si)__W);
6395 }
6396
6397 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6398 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6399 {
6400   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6401                                              (__v4si)_mm_sra_epi32(__A, __B),
6402                                              (__v4si)_mm_setzero_si128());
6403 }
6404
6405 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6406 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6407 {
6408   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6409                                              (__v8si)_mm256_sra_epi32(__A, __B),
6410                                              (__v8si)__W);
6411 }
6412
6413 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6414 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6415 {
6416   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6417                                              (__v8si)_mm256_sra_epi32(__A, __B),
6418                                              (__v8si)_mm256_setzero_si256());
6419 }
6420
6421 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6422 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6423 {
6424   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6425                                              (__v4si)_mm_srai_epi32(__A, __B),
6426                                              (__v4si)__W);
6427 }
6428
6429 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6430 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6431 {
6432   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6433                                              (__v4si)_mm_srai_epi32(__A, __B),
6434                                              (__v4si)_mm_setzero_si128());
6435 }
6436
6437 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6438 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6439 {
6440   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6441                                              (__v8si)_mm256_srai_epi32(__A, __B),
6442                                              (__v8si)__W);
6443 }
6444
6445 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6446 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6447 {
6448   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6449                                              (__v8si)_mm256_srai_epi32(__A, __B),
6450                                              (__v8si)_mm256_setzero_si256());
6451 }
6452
6453 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454 _mm_sra_epi64(__m128i __A, __m128i __B)
6455 {
6456   return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6457 }
6458
6459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6460 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6461 {
6462   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6463                                              (__v2di)_mm_sra_epi64(__A, __B), \
6464                                              (__v2di)__W);
6465 }
6466
6467 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6468 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6469 {
6470   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6471                                              (__v2di)_mm_sra_epi64(__A, __B), \
6472                                              (__v2di)_mm_setzero_si128());
6473 }
6474
6475 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476 _mm256_sra_epi64(__m256i __A, __m128i __B)
6477 {
6478   return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6479 }
6480
6481 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6482 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6483 {
6484   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6485                                            (__v4di)_mm256_sra_epi64(__A, __B), \
6486                                            (__v4di)__W);
6487 }
6488
6489 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6490 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6491 {
6492   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6493                                            (__v4di)_mm256_sra_epi64(__A, __B), \
6494                                            (__v4di)_mm256_setzero_si256());
6495 }
6496
6497 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498 _mm_srai_epi64(__m128i __A, int __imm)
6499 {
6500   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6501 }
6502
6503 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6504 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6505 {
6506   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6507                                            (__v2di)_mm_srai_epi64(__A, __imm), \
6508                                            (__v2di)__W);
6509 }
6510
6511 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6512 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6513 {
6514   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6515                                            (__v2di)_mm_srai_epi64(__A, __imm), \
6516                                            (__v2di)_mm_setzero_si128());
6517 }
6518
6519 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6520 _mm256_srai_epi64(__m256i __A, int __imm)
6521 {
6522   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6523 }
6524
6525 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6526 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6527 {
6528   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6529                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
6530                                         (__v4di)__W);
6531 }
6532
6533 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6534 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6535 {
6536   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6537                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
6538                                         (__v4di)_mm256_setzero_si256());
6539 }
6540
6541 #define _mm_ternarylogic_epi32(A, B, C, imm) \
6542   (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6543                                             (__v4si)(__m128i)(B), \
6544                                             (__v4si)(__m128i)(C), (int)(imm), \
6545                                             (__mmask8)-1)
6546
6547 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6548   (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6549                                             (__v4si)(__m128i)(B), \
6550                                             (__v4si)(__m128i)(C), (int)(imm), \
6551                                             (__mmask8)(U))
6552
6553 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6554   (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6555                                              (__v4si)(__m128i)(B), \
6556                                              (__v4si)(__m128i)(C), (int)(imm), \
6557                                              (__mmask8)(U))
6558
6559 #define _mm256_ternarylogic_epi32(A, B, C, imm) \
6560   (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6561                                             (__v8si)(__m256i)(B), \
6562                                             (__v8si)(__m256i)(C), (int)(imm), \
6563                                             (__mmask8)-1)
6564
6565 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6566   (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6567                                             (__v8si)(__m256i)(B), \
6568                                             (__v8si)(__m256i)(C), (int)(imm), \
6569                                             (__mmask8)(U))
6570
6571 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6572   (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6573                                              (__v8si)(__m256i)(B), \
6574                                              (__v8si)(__m256i)(C), (int)(imm), \
6575                                              (__mmask8)(U))
6576
6577 #define _mm_ternarylogic_epi64(A, B, C, imm) \
6578   (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6579                                             (__v2di)(__m128i)(B), \
6580                                             (__v2di)(__m128i)(C), (int)(imm), \
6581                                             (__mmask8)-1)
6582
6583 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6584   (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6585                                             (__v2di)(__m128i)(B), \
6586                                             (__v2di)(__m128i)(C), (int)(imm), \
6587                                             (__mmask8)(U))
6588
6589 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6590   (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6591                                              (__v2di)(__m128i)(B), \
6592                                              (__v2di)(__m128i)(C), (int)(imm), \
6593                                              (__mmask8)(U))
6594
6595 #define _mm256_ternarylogic_epi64(A, B, C, imm) \
6596   (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6597                                             (__v4di)(__m256i)(B), \
6598                                             (__v4di)(__m256i)(C), (int)(imm), \
6599                                             (__mmask8)-1)
6600
6601 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6602   (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6603                                             (__v4di)(__m256i)(B), \
6604                                             (__v4di)(__m256i)(C), (int)(imm), \
6605                                             (__mmask8)(U))
6606
6607 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6608   (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6609                                              (__v4di)(__m256i)(B), \
6610                                              (__v4di)(__m256i)(C), (int)(imm), \
6611                                              (__mmask8)(U))
6612
6613
6614
6615 #define _mm256_shuffle_f32x4(A, B, imm) \
6616   (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6617                                         (__v8sf)(__m256)(B), (int)(imm))
6618
6619 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6620   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6621                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6622                                       (__v8sf)(__m256)(W))
6623
6624 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6625   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6626                                       (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6627                                       (__v8sf)_mm256_setzero_ps())
6628
6629 #define _mm256_shuffle_f64x2(A, B, imm) \
6630   (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6631                                          (__v4df)(__m256d)(B), (int)(imm))
6632
6633 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6634   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6635                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6636                                       (__v4df)(__m256d)(W))
6637
6638 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6639   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6640                                       (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6641                                       (__v4df)_mm256_setzero_pd())
6642
6643 #define _mm256_shuffle_i32x4(A, B, imm) \
6644   (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6645                                          (__v8si)(__m256i)(B), (int)(imm))
6646
6647 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6648   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6649                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6650                                       (__v8si)(__m256i)(W))
6651
6652 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6653   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6654                                       (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6655                                       (__v8si)_mm256_setzero_si256())
6656
6657 #define _mm256_shuffle_i64x2(A, B, imm) \
6658   (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6659                                          (__v4di)(__m256i)(B), (int)(imm))
6660
6661 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6662   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6663                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6664                                       (__v4di)(__m256i)(W))
6665
6666
6667 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6668   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6669                                       (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6670                                       (__v4di)_mm256_setzero_si256())
6671
6672 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
6673   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6674                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6675                                        (__v2df)(__m128d)(W))
6676
6677 #define _mm_maskz_shuffle_pd(U, A, B, M) \
6678   (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6679                                        (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6680                                        (__v2df)_mm_setzero_pd())
6681
6682 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6683   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6684                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6685                                        (__v4df)(__m256d)(W))
6686
6687 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
6688   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6689                                        (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6690                                        (__v4df)_mm256_setzero_pd())
6691
6692 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
6693   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6694                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6695                                       (__v4sf)(__m128)(W))
6696
6697 #define _mm_maskz_shuffle_ps(U, A, B, M) \
6698   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6699                                       (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6700                                       (__v4sf)_mm_setzero_ps())
6701
6702 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6703   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6704                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6705                                       (__v8sf)(__m256)(W))
6706
6707 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
6708   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6709                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6710                                       (__v8sf)_mm256_setzero_ps())
6711
6712 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6713 _mm_rsqrt14_pd (__m128d __A)
6714 {
6715   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6716                  (__v2df)
6717                  _mm_setzero_pd (),
6718                  (__mmask8) -1);
6719 }
6720
6721 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6722 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6723 {
6724   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6725                  (__v2df) __W,
6726                  (__mmask8) __U);
6727 }
6728
6729 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6730 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6731 {
6732   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6733                  (__v2df)
6734                  _mm_setzero_pd (),
6735                  (__mmask8) __U);
6736 }
6737
6738 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6739 _mm256_rsqrt14_pd (__m256d __A)
6740 {
6741   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6742                  (__v4df)
6743                  _mm256_setzero_pd (),
6744                  (__mmask8) -1);
6745 }
6746
6747 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6748 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6749 {
6750   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6751                  (__v4df) __W,
6752                  (__mmask8) __U);
6753 }
6754
6755 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6756 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6757 {
6758   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6759                  (__v4df)
6760                  _mm256_setzero_pd (),
6761                  (__mmask8) __U);
6762 }
6763
6764 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6765 _mm_rsqrt14_ps (__m128 __A)
6766 {
6767   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6768                 (__v4sf)
6769                 _mm_setzero_ps (),
6770                 (__mmask8) -1);
6771 }
6772
6773 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6774 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6775 {
6776   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6777                 (__v4sf) __W,
6778                 (__mmask8) __U);
6779 }
6780
6781 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6782 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6783 {
6784   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6785                 (__v4sf)
6786                 _mm_setzero_ps (),
6787                 (__mmask8) __U);
6788 }
6789
6790 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6791 _mm256_rsqrt14_ps (__m256 __A)
6792 {
6793   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6794                 (__v8sf)
6795                 _mm256_setzero_ps (),
6796                 (__mmask8) -1);
6797 }
6798
6799 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6800 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6801 {
6802   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6803                 (__v8sf) __W,
6804                 (__mmask8) __U);
6805 }
6806
6807 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6808 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6809 {
6810   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6811                 (__v8sf)
6812                 _mm256_setzero_ps (),
6813                 (__mmask8) __U);
6814 }
6815
6816 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6817 _mm256_broadcast_f32x4(__m128 __A)
6818 {
6819   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6820                                          0, 1, 2, 3, 0, 1, 2, 3);
6821 }
6822
6823 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6824 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6825 {
6826   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6827                                             (__v8sf)_mm256_broadcast_f32x4(__A),
6828                                             (__v8sf)__O);
6829 }
6830
6831 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6832 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6833 {
6834   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6835                                             (__v8sf)_mm256_broadcast_f32x4(__A),
6836                                             (__v8sf)_mm256_setzero_ps());
6837 }
6838
6839 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6840 _mm256_broadcast_i32x4(__m128i __A)
6841 {
6842   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6843                                           0, 1, 2, 3, 0, 1, 2, 3);
6844 }
6845
6846 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6847 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6848 {
6849   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6850                                             (__v8si)_mm256_broadcast_i32x4(__A),
6851                                             (__v8si)__O);
6852 }
6853
6854 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6855 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6856 {
6857   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6858                                             (__v8si)_mm256_broadcast_i32x4(__A),
6859                                             (__v8si)_mm256_setzero_si256());
6860 }
6861
6862 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6863 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6864 {
6865   return (__m256d)__builtin_ia32_selectpd_256(__M,
6866                                               (__v4df) _mm256_broadcastsd_pd(__A),
6867                                               (__v4df) __O);
6868 }
6869
6870 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6871 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6872 {
6873   return (__m256d)__builtin_ia32_selectpd_256(__M,
6874                                               (__v4df) _mm256_broadcastsd_pd(__A),
6875                                               (__v4df) _mm256_setzero_pd());
6876 }
6877
6878 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6879 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6880 {
6881   return (__m128)__builtin_ia32_selectps_128(__M,
6882                                              (__v4sf) _mm_broadcastss_ps(__A),
6883                                              (__v4sf) __O);
6884 }
6885
6886 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6887 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6888 {
6889   return (__m128)__builtin_ia32_selectps_128(__M,
6890                                              (__v4sf) _mm_broadcastss_ps(__A),
6891                                              (__v4sf) _mm_setzero_ps());
6892 }
6893
6894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6895 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6896 {
6897   return (__m256)__builtin_ia32_selectps_256(__M,
6898                                              (__v8sf) _mm256_broadcastss_ps(__A),
6899                                              (__v8sf) __O);
6900 }
6901
6902 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6903 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6904 {
6905   return (__m256)__builtin_ia32_selectps_256(__M,
6906                                              (__v8sf) _mm256_broadcastss_ps(__A),
6907                                              (__v8sf) _mm256_setzero_ps());
6908 }
6909
6910 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6911 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6912 {
6913   return (__m128i)__builtin_ia32_selectd_128(__M,
6914                                              (__v4si) _mm_broadcastd_epi32(__A),
6915                                              (__v4si) __O);
6916 }
6917
6918 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6919 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6920 {
6921   return (__m128i)__builtin_ia32_selectd_128(__M,
6922                                              (__v4si) _mm_broadcastd_epi32(__A),
6923                                              (__v4si) _mm_setzero_si128());
6924 }
6925
6926 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6927 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6928 {
6929   return (__m256i)__builtin_ia32_selectd_256(__M,
6930                                              (__v8si) _mm256_broadcastd_epi32(__A),
6931                                              (__v8si) __O);
6932 }
6933
6934 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6935 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6936 {
6937   return (__m256i)__builtin_ia32_selectd_256(__M,
6938                                              (__v8si) _mm256_broadcastd_epi32(__A),
6939                                              (__v8si) _mm256_setzero_si256());
6940 }
6941
6942 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6943 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6944 {
6945   return (__m128i)__builtin_ia32_selectq_128(__M,
6946                                              (__v2di) _mm_broadcastq_epi64(__A),
6947                                              (__v2di) __O);
6948 }
6949
6950 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6951 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6952 {
6953   return (__m128i)__builtin_ia32_selectq_128(__M,
6954                                              (__v2di) _mm_broadcastq_epi64(__A),
6955                                              (__v2di) _mm_setzero_si128());
6956 }
6957
6958 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6959 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6960 {
6961   return (__m256i)__builtin_ia32_selectq_256(__M,
6962                                              (__v4di) _mm256_broadcastq_epi64(__A),
6963                                              (__v4di) __O);
6964 }
6965
6966 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6967 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6968 {
6969   return (__m256i)__builtin_ia32_selectq_256(__M,
6970                                              (__v4di) _mm256_broadcastq_epi64(__A),
6971                                              (__v4di) _mm256_setzero_si256());
6972 }
6973
6974 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6975 _mm_cvtsepi32_epi8 (__m128i __A)
6976 {
6977   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6978                (__v16qi)_mm_undefined_si128(),
6979                (__mmask8) -1);
6980 }
6981
6982 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6983 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6984 {
6985   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6986                (__v16qi) __O, __M);
6987 }
6988
6989 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6990 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6991 {
6992   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6993                (__v16qi) _mm_setzero_si128 (),
6994                __M);
6995 }
6996
6997 static __inline__ void __DEFAULT_FN_ATTRS128
6998 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6999 {
7000   __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7001 }
7002
7003 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7004 _mm256_cvtsepi32_epi8 (__m256i __A)
7005 {
7006   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7007                (__v16qi)_mm_undefined_si128(),
7008                (__mmask8) -1);
7009 }
7010
7011 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7012 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7013 {
7014   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7015                (__v16qi) __O, __M);
7016 }
7017
7018 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7019 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7020 {
7021   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7022                (__v16qi) _mm_setzero_si128 (),
7023                __M);
7024 }
7025
7026 static __inline__ void __DEFAULT_FN_ATTRS128
7027 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7028 {
7029   __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7030 }
7031
7032 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7033 _mm_cvtsepi32_epi16 (__m128i __A)
7034 {
7035   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7036                (__v8hi)_mm_setzero_si128 (),
7037                (__mmask8) -1);
7038 }
7039
7040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7041 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7042 {
7043   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7044                (__v8hi)__O,
7045                __M);
7046 }
7047
7048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7049 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7050 {
7051   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7052                (__v8hi) _mm_setzero_si128 (),
7053                __M);
7054 }
7055
7056 static __inline__ void __DEFAULT_FN_ATTRS128
7057 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7058 {
7059   __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7060 }
7061
7062 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7063 _mm256_cvtsepi32_epi16 (__m256i __A)
7064 {
7065   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7066                (__v8hi)_mm_undefined_si128(),
7067                (__mmask8) -1);
7068 }
7069
7070 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7071 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7072 {
7073   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7074                (__v8hi) __O, __M);
7075 }
7076
7077 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7078 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7079 {
7080   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7081                (__v8hi) _mm_setzero_si128 (),
7082                __M);
7083 }
7084
7085 static __inline__ void __DEFAULT_FN_ATTRS256
7086 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7087 {
7088   __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7089 }
7090
7091 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7092 _mm_cvtsepi64_epi8 (__m128i __A)
7093 {
7094   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7095                (__v16qi)_mm_undefined_si128(),
7096                (__mmask8) -1);
7097 }
7098
7099 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7100 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7101 {
7102   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7103                (__v16qi) __O, __M);
7104 }
7105
7106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7107 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7108 {
7109   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7110                (__v16qi) _mm_setzero_si128 (),
7111                __M);
7112 }
7113
7114 static __inline__ void __DEFAULT_FN_ATTRS128
7115 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7116 {
7117   __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7118 }
7119
7120 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7121 _mm256_cvtsepi64_epi8 (__m256i __A)
7122 {
7123   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7124                (__v16qi)_mm_undefined_si128(),
7125                (__mmask8) -1);
7126 }
7127
7128 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7129 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7130 {
7131   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7132                (__v16qi) __O, __M);
7133 }
7134
7135 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7136 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7137 {
7138   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7139                (__v16qi) _mm_setzero_si128 (),
7140                __M);
7141 }
7142
7143 static __inline__ void __DEFAULT_FN_ATTRS256
7144 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7145 {
7146   __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7147 }
7148
7149 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7150 _mm_cvtsepi64_epi32 (__m128i __A)
7151 {
7152   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7153                (__v4si)_mm_undefined_si128(),
7154                (__mmask8) -1);
7155 }
7156
7157 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7158 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7159 {
7160   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7161                (__v4si) __O, __M);
7162 }
7163
7164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7165 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7166 {
7167   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7168                (__v4si) _mm_setzero_si128 (),
7169                __M);
7170 }
7171
7172 static __inline__ void __DEFAULT_FN_ATTRS128
7173 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7174 {
7175   __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7176 }
7177
7178 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7179 _mm256_cvtsepi64_epi32 (__m256i __A)
7180 {
7181   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7182                (__v4si)_mm_undefined_si128(),
7183                (__mmask8) -1);
7184 }
7185
7186 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7187 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7188 {
7189   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7190                (__v4si)__O,
7191                __M);
7192 }
7193
7194 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7195 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7196 {
7197   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7198                (__v4si) _mm_setzero_si128 (),
7199                __M);
7200 }
7201
7202 static __inline__ void __DEFAULT_FN_ATTRS256
7203 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7204 {
7205   __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7206 }
7207
7208 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7209 _mm_cvtsepi64_epi16 (__m128i __A)
7210 {
7211   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7212                (__v8hi)_mm_undefined_si128(),
7213                (__mmask8) -1);
7214 }
7215
7216 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7217 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7218 {
7219   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7220                (__v8hi) __O, __M);
7221 }
7222
7223 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7224 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7225 {
7226   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7227                (__v8hi) _mm_setzero_si128 (),
7228                __M);
7229 }
7230
7231 static __inline__ void __DEFAULT_FN_ATTRS128
7232 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7233 {
7234   __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7235 }
7236
7237 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7238 _mm256_cvtsepi64_epi16 (__m256i __A)
7239 {
7240   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7241                (__v8hi)_mm_undefined_si128(),
7242                (__mmask8) -1);
7243 }
7244
7245 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7246 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7247 {
7248   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7249                (__v8hi) __O, __M);
7250 }
7251
7252 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7253 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7254 {
7255   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7256                (__v8hi) _mm_setzero_si128 (),
7257                __M);
7258 }
7259
7260 static __inline__ void __DEFAULT_FN_ATTRS256
7261 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7262 {
7263   __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7264 }
7265
7266 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7267 _mm_cvtusepi32_epi8 (__m128i __A)
7268 {
7269   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7270                 (__v16qi)_mm_undefined_si128(),
7271                 (__mmask8) -1);
7272 }
7273
7274 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7275 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7276 {
7277   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7278                 (__v16qi) __O,
7279                 __M);
7280 }
7281
7282 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7283 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7284 {
7285   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7286                 (__v16qi) _mm_setzero_si128 (),
7287                 __M);
7288 }
7289
7290 static __inline__ void __DEFAULT_FN_ATTRS128
7291 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7292 {
7293   __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7294 }
7295
7296 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7297 _mm256_cvtusepi32_epi8 (__m256i __A)
7298 {
7299   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7300                 (__v16qi)_mm_undefined_si128(),
7301                 (__mmask8) -1);
7302 }
7303
7304 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7305 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7306 {
7307   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7308                 (__v16qi) __O,
7309                 __M);
7310 }
7311
7312 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7313 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7314 {
7315   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7316                 (__v16qi) _mm_setzero_si128 (),
7317                 __M);
7318 }
7319
7320 static __inline__ void __DEFAULT_FN_ATTRS256
7321 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7322 {
7323   __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7324 }
7325
7326 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7327 _mm_cvtusepi32_epi16 (__m128i __A)
7328 {
7329   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7330                 (__v8hi)_mm_undefined_si128(),
7331                 (__mmask8) -1);
7332 }
7333
7334 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7335 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7336 {
7337   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7338                 (__v8hi) __O, __M);
7339 }
7340
7341 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7342 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7343 {
7344   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7345                 (__v8hi) _mm_setzero_si128 (),
7346                 __M);
7347 }
7348
7349 static __inline__ void __DEFAULT_FN_ATTRS128
7350 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7351 {
7352   __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7353 }
7354
7355 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7356 _mm256_cvtusepi32_epi16 (__m256i __A)
7357 {
7358   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7359                 (__v8hi) _mm_undefined_si128(),
7360                 (__mmask8) -1);
7361 }
7362
7363 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7364 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7365 {
7366   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7367                 (__v8hi) __O, __M);
7368 }
7369
7370 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7371 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7372 {
7373   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7374                 (__v8hi) _mm_setzero_si128 (),
7375                 __M);
7376 }
7377
7378 static __inline__ void __DEFAULT_FN_ATTRS256
7379 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7380 {
7381   __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7382 }
7383
7384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7385 _mm_cvtusepi64_epi8 (__m128i __A)
7386 {
7387   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7388                 (__v16qi)_mm_undefined_si128(),
7389                 (__mmask8) -1);
7390 }
7391
7392 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7393 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7394 {
7395   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7396                 (__v16qi) __O,
7397                 __M);
7398 }
7399
7400 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7401 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7402 {
7403   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7404                 (__v16qi) _mm_setzero_si128 (),
7405                 __M);
7406 }
7407
7408 static __inline__ void __DEFAULT_FN_ATTRS128
7409 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7410 {
7411   __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7412 }
7413
7414 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7415 _mm256_cvtusepi64_epi8 (__m256i __A)
7416 {
7417   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7418                 (__v16qi)_mm_undefined_si128(),
7419                 (__mmask8) -1);
7420 }
7421
7422 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7423 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7424 {
7425   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7426                 (__v16qi) __O,
7427                 __M);
7428 }
7429
7430 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7431 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7432 {
7433   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7434                 (__v16qi) _mm_setzero_si128 (),
7435                 __M);
7436 }
7437
7438 static __inline__ void __DEFAULT_FN_ATTRS256
7439 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7440 {
7441   __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7442 }
7443
7444 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7445 _mm_cvtusepi64_epi32 (__m128i __A)
7446 {
7447   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7448                 (__v4si)_mm_undefined_si128(),
7449                 (__mmask8) -1);
7450 }
7451
7452 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7453 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7454 {
7455   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7456                 (__v4si) __O, __M);
7457 }
7458
7459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7460 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7461 {
7462   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7463                 (__v4si) _mm_setzero_si128 (),
7464                 __M);
7465 }
7466
7467 static __inline__ void __DEFAULT_FN_ATTRS128
7468 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7469 {
7470   __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7471 }
7472
7473 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7474 _mm256_cvtusepi64_epi32 (__m256i __A)
7475 {
7476   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7477                 (__v4si)_mm_undefined_si128(),
7478                 (__mmask8) -1);
7479 }
7480
7481 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7482 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7483 {
7484   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7485                 (__v4si) __O, __M);
7486 }
7487
7488 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7489 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7490 {
7491   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7492                 (__v4si) _mm_setzero_si128 (),
7493                 __M);
7494 }
7495
7496 static __inline__ void __DEFAULT_FN_ATTRS256
7497 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7498 {
7499   __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7500 }
7501
7502 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7503 _mm_cvtusepi64_epi16 (__m128i __A)
7504 {
7505   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7506                 (__v8hi)_mm_undefined_si128(),
7507                 (__mmask8) -1);
7508 }
7509
7510 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7511 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7512 {
7513   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7514                 (__v8hi) __O, __M);
7515 }
7516
7517 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7518 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7519 {
7520   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7521                 (__v8hi) _mm_setzero_si128 (),
7522                 __M);
7523 }
7524
7525 static __inline__ void __DEFAULT_FN_ATTRS128
7526 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7527 {
7528   __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7529 }
7530
7531 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7532 _mm256_cvtusepi64_epi16 (__m256i __A)
7533 {
7534   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7535                 (__v8hi)_mm_undefined_si128(),
7536                 (__mmask8) -1);
7537 }
7538
7539 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7540 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7541 {
7542   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7543                 (__v8hi) __O, __M);
7544 }
7545
7546 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7547 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7548 {
7549   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7550                 (__v8hi) _mm_setzero_si128 (),
7551                 __M);
7552 }
7553
7554 static __inline__ void __DEFAULT_FN_ATTRS256
7555 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7556 {
7557   __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7558 }
7559
7560 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7561 _mm_cvtepi32_epi8 (__m128i __A)
7562 {
7563   return (__m128i)__builtin_shufflevector(
7564       __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7565       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7566 }
7567
7568 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7569 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7570 {
7571   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7572               (__v16qi) __O, __M);
7573 }
7574
7575 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7576 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7577 {
7578   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7579               (__v16qi)
7580               _mm_setzero_si128 (),
7581               __M);
7582 }
7583
7584 static __inline__ void __DEFAULT_FN_ATTRS256
7585 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7586 {
7587   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7588 }
7589
7590 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7591 _mm256_cvtepi32_epi8 (__m256i __A)
7592 {
7593   return (__m128i)__builtin_shufflevector(
7594       __builtin_convertvector((__v8si)__A, __v8qi),
7595       (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7596       12, 13, 14, 15);
7597 }
7598
7599 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7600 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7601 {
7602   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7603               (__v16qi) __O, __M);
7604 }
7605
7606 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7607 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7608 {
7609   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7610               (__v16qi) _mm_setzero_si128 (),
7611               __M);
7612 }
7613
7614 static __inline__ void __DEFAULT_FN_ATTRS256
7615 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7616 {
7617   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7618 }
7619
7620 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7621 _mm_cvtepi32_epi16 (__m128i __A)
7622 {
7623   return (__m128i)__builtin_shufflevector(
7624       __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7625       2, 3, 4, 5, 6, 7);
7626 }
7627
7628 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7629 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7630 {
7631   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7632               (__v8hi) __O, __M);
7633 }
7634
7635 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7636 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7637 {
7638   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7639               (__v8hi) _mm_setzero_si128 (),
7640               __M);
7641 }
7642
7643 static __inline__ void __DEFAULT_FN_ATTRS128
7644 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7645 {
7646   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7647 }
7648
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7650 _mm256_cvtepi32_epi16 (__m256i __A)
7651 {
7652   return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7653 }
7654
7655 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7656 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7657 {
7658   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7659               (__v8hi) __O, __M);
7660 }
7661
7662 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7663 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7664 {
7665   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7666               (__v8hi) _mm_setzero_si128 (),
7667               __M);
7668 }
7669
7670 static __inline__ void __DEFAULT_FN_ATTRS256
7671 _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
7672 {
7673   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7674 }
7675
7676 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7677 _mm_cvtepi64_epi8 (__m128i __A)
7678 {
7679   return (__m128i)__builtin_shufflevector(
7680       __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7681       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7682 }
7683
7684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7685 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7686 {
7687   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7688               (__v16qi) __O, __M);
7689 }
7690
7691 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7692 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7693 {
7694   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7695               (__v16qi) _mm_setzero_si128 (),
7696               __M);
7697 }
7698
7699 static __inline__ void __DEFAULT_FN_ATTRS128
7700 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7701 {
7702   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7703 }
7704
7705 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7706 _mm256_cvtepi64_epi8 (__m256i __A)
7707 {
7708   return (__m128i)__builtin_shufflevector(
7709       __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7710       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7711 }
7712
7713 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7714 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7715 {
7716   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7717               (__v16qi) __O, __M);
7718 }
7719
7720 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7721 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7722 {
7723   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7724               (__v16qi) _mm_setzero_si128 (),
7725               __M);
7726 }
7727
7728 static __inline__ void __DEFAULT_FN_ATTRS256
7729 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7730 {
7731   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7732 }
7733
7734 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7735 _mm_cvtepi64_epi32 (__m128i __A)
7736 {
7737   return (__m128i)__builtin_shufflevector(
7738       __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7739 }
7740
7741 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7742 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7743 {
7744   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7745               (__v4si) __O, __M);
7746 }
7747
7748 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7749 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7750 {
7751   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7752               (__v4si) _mm_setzero_si128 (),
7753               __M);
7754 }
7755
7756 static __inline__ void __DEFAULT_FN_ATTRS128
7757 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7758 {
7759   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7760 }
7761
7762 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7763 _mm256_cvtepi64_epi32 (__m256i __A)
7764 {
7765   return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7766 }
7767
7768 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7769 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7770 {
7771   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7772                                              (__v4si)_mm256_cvtepi64_epi32(__A),
7773                                              (__v4si)__O);
7774 }
7775
7776 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7777 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7778 {
7779   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7780                                              (__v4si)_mm256_cvtepi64_epi32(__A),
7781                                              (__v4si)_mm_setzero_si128());
7782 }
7783
7784 static __inline__ void __DEFAULT_FN_ATTRS256
7785 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7786 {
7787   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7788 }
7789
7790 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7791 _mm_cvtepi64_epi16 (__m128i __A)
7792 {
7793   return (__m128i)__builtin_shufflevector(
7794       __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7795       3, 3, 3, 3);
7796 }
7797
7798 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7799 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7800 {
7801   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7802               (__v8hi)__O,
7803               __M);
7804 }
7805
7806 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7807 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7808 {
7809   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7810               (__v8hi) _mm_setzero_si128 (),
7811               __M);
7812 }
7813
7814 static __inline__ void __DEFAULT_FN_ATTRS128
7815 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7816 {
7817   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7818 }
7819
7820 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7821 _mm256_cvtepi64_epi16 (__m256i __A)
7822 {
7823   return (__m128i)__builtin_shufflevector(
7824       __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7825       2, 3, 4, 5, 6, 7);
7826 }
7827
7828 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7829 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7830 {
7831   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7832               (__v8hi) __O, __M);
7833 }
7834
7835 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7836 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7837 {
7838   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7839               (__v8hi) _mm_setzero_si128 (),
7840               __M);
7841 }
7842
7843 static __inline__ void __DEFAULT_FN_ATTRS256
7844 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7845 {
7846   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7847 }
7848
7849 #define _mm256_extractf32x4_ps(A, imm) \
7850   (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7851                                                (int)(imm), \
7852                                                (__v4sf)_mm_undefined_ps(), \
7853                                                (__mmask8)-1)
7854
7855 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7856   (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7857                                                (int)(imm), \
7858                                                (__v4sf)(__m128)(W), \
7859                                                (__mmask8)(U))
7860
7861 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7862   (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7863                                                (int)(imm), \
7864                                                (__v4sf)_mm_setzero_ps(), \
7865                                                (__mmask8)(U))
7866
7867 #define _mm256_extracti32x4_epi32(A, imm) \
7868   (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7869                                                 (int)(imm), \
7870                                                 (__v4si)_mm_undefined_si128(), \
7871                                                 (__mmask8)-1)
7872
7873 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7874   (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7875                                                 (int)(imm), \
7876                                                 (__v4si)(__m128i)(W), \
7877                                                 (__mmask8)(U))
7878
7879 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7880   (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7881                                                 (int)(imm), \
7882                                                 (__v4si)_mm_setzero_si128(), \
7883                                                 (__mmask8)(U))
7884
7885 #define _mm256_insertf32x4(A, B, imm) \
7886   (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7887                                          (__v4sf)(__m128)(B), (int)(imm))
7888
7889 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7890   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7891                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7892                                   (__v8sf)(__m256)(W))
7893
7894 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
7895   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7896                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7897                                   (__v8sf)_mm256_setzero_ps())
7898
7899 #define _mm256_inserti32x4(A, B, imm) \
7900   (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7901                                           (__v4si)(__m128i)(B), (int)(imm))
7902
7903 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7904   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7905                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7906                                   (__v8si)(__m256i)(W))
7907
7908 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
7909   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7910                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7911                                   (__v8si)_mm256_setzero_si256())
7912
7913 #define _mm_getmant_pd(A, B, C) \
7914   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7915                                             (int)(((C)<<2) | (B)), \
7916                                             (__v2df)_mm_setzero_pd(), \
7917                                             (__mmask8)-1)
7918
7919 #define _mm_mask_getmant_pd(W, U, A, B, C) \
7920   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7921                                             (int)(((C)<<2) | (B)), \
7922                                             (__v2df)(__m128d)(W), \
7923                                             (__mmask8)(U))
7924
7925 #define _mm_maskz_getmant_pd(U, A, B, C) \
7926   (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7927                                             (int)(((C)<<2) | (B)), \
7928                                             (__v2df)_mm_setzero_pd(), \
7929                                             (__mmask8)(U))
7930
7931 #define _mm256_getmant_pd(A, B, C) \
7932   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7933                                             (int)(((C)<<2) | (B)), \
7934                                             (__v4df)_mm256_setzero_pd(), \
7935                                             (__mmask8)-1)
7936
7937 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
7938   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7939                                             (int)(((C)<<2) | (B)), \
7940                                             (__v4df)(__m256d)(W), \
7941                                             (__mmask8)(U))
7942
7943 #define _mm256_maskz_getmant_pd(U, A, B, C) \
7944   (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7945                                             (int)(((C)<<2) | (B)), \
7946                                             (__v4df)_mm256_setzero_pd(), \
7947                                             (__mmask8)(U))
7948
7949 #define _mm_getmant_ps(A, B, C) \
7950   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7951                                            (int)(((C)<<2) | (B)), \
7952                                            (__v4sf)_mm_setzero_ps(), \
7953                                            (__mmask8)-1)
7954
7955 #define _mm_mask_getmant_ps(W, U, A, B, C) \
7956   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7957                                            (int)(((C)<<2) | (B)), \
7958                                            (__v4sf)(__m128)(W), \
7959                                            (__mmask8)(U))
7960
7961 #define _mm_maskz_getmant_ps(U, A, B, C) \
7962   (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7963                                            (int)(((C)<<2) | (B)), \
7964                                            (__v4sf)_mm_setzero_ps(), \
7965                                            (__mmask8)(U))
7966
7967 #define _mm256_getmant_ps(A, B, C) \
7968   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7969                                            (int)(((C)<<2) | (B)), \
7970                                            (__v8sf)_mm256_setzero_ps(), \
7971                                            (__mmask8)-1)
7972
7973 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
7974   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7975                                            (int)(((C)<<2) | (B)), \
7976                                            (__v8sf)(__m256)(W), \
7977                                            (__mmask8)(U))
7978
7979 #define _mm256_maskz_getmant_ps(U, A, B, C) \
7980   (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7981                                            (int)(((C)<<2) | (B)), \
7982                                            (__v8sf)_mm256_setzero_ps(), \
7983                                            (__mmask8)(U))
7984
7985 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7986   (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7987                                         (void const *)(addr), \
7988                                         (__v2di)(__m128i)(index), \
7989                                         (__mmask8)(mask), (int)(scale))
7990
7991 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7992   (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7993                                         (void const *)(addr), \
7994                                         (__v2di)(__m128i)(index), \
7995                                         (__mmask8)(mask), (int)(scale))
7996
7997 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7998   (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7999                                         (void const *)(addr), \
8000                                         (__v4di)(__m256i)(index), \
8001                                         (__mmask8)(mask), (int)(scale))
8002
8003 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
8004   (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8005                                         (void const *)(addr), \
8006                                         (__v4di)(__m256i)(index), \
8007                                         (__mmask8)(mask), (int)(scale))
8008
8009 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8010   (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8011                                        (void const *)(addr), \
8012                                        (__v2di)(__m128i)(index), \
8013                                        (__mmask8)(mask), (int)(scale))
8014
8015 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8016   (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8017                                         (void const *)(addr), \
8018                                         (__v2di)(__m128i)(index), \
8019                                         (__mmask8)(mask), (int)(scale))
8020
8021 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8022   (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8023                                        (void const *)(addr), \
8024                                        (__v4di)(__m256i)(index), \
8025                                        (__mmask8)(mask), (int)(scale))
8026
8027 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8028   (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8029                                         (void const *)(addr), \
8030                                         (__v4di)(__m256i)(index), \
8031                                         (__mmask8)(mask), (int)(scale))
8032
8033 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8034   (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8035                                         (void const *)(addr), \
8036                                         (__v4si)(__m128i)(index), \
8037                                         (__mmask8)(mask), (int)(scale))
8038
8039 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8040   (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8041                                         (void const *)(addr), \
8042                                         (__v4si)(__m128i)(index), \
8043                                         (__mmask8)(mask), (int)(scale))
8044
8045 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8046   (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8047                                         (void const *)(addr), \
8048                                         (__v4si)(__m128i)(index), \
8049                                         (__mmask8)(mask), (int)(scale))
8050
8051 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8052   (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8053                                         (void const *)(addr), \
8054                                         (__v4si)(__m128i)(index), \
8055                                         (__mmask8)(mask), (int)(scale))
8056
8057 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8058   (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8059                                        (void const *)(addr), \
8060                                        (__v4si)(__m128i)(index), \
8061                                        (__mmask8)(mask), (int)(scale))
8062
8063 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8064   (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8065                                         (void const *)(addr), \
8066                                         (__v4si)(__m128i)(index), \
8067                                         (__mmask8)(mask), (int)(scale))
8068
8069 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8070   (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8071                                        (void const *)(addr), \
8072                                        (__v8si)(__m256i)(index), \
8073                                        (__mmask8)(mask), (int)(scale))
8074
8075 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8076   (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8077                                         (void const *)(addr), \
8078                                         (__v8si)(__m256i)(index), \
8079                                         (__mmask8)(mask), (int)(scale))
8080
8081 #define _mm256_permutex_pd(X, C) \
8082   (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
8083
8084 #define _mm256_mask_permutex_pd(W, U, X, C) \
8085   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8086                                        (__v4df)_mm256_permutex_pd((X), (C)), \
8087                                        (__v4df)(__m256d)(W))
8088
8089 #define _mm256_maskz_permutex_pd(U, X, C) \
8090   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8091                                        (__v4df)_mm256_permutex_pd((X), (C)), \
8092                                        (__v4df)_mm256_setzero_pd())
8093
8094 #define _mm256_permutex_epi64(X, C) \
8095   (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
8096
8097 #define _mm256_mask_permutex_epi64(W, U, X, C) \
8098   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8099                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
8100                                       (__v4di)(__m256i)(W))
8101
8102 #define _mm256_maskz_permutex_epi64(U, X, C) \
8103   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8104                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
8105                                       (__v4di)_mm256_setzero_si256())
8106
8107 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8108 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8109 {
8110   return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8111 }
8112
8113 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8114 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8115           __m256d __Y)
8116 {
8117   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8118                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
8119                                         (__v4df)__W);
8120 }
8121
8122 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8123 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8124 {
8125   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8126                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
8127                                         (__v4df)_mm256_setzero_pd());
8128 }
8129
8130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8131 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8132 {
8133   return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8134 }
8135
8136 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8137 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8138 {
8139   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8140                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8141                                      (__v4di)_mm256_setzero_si256());
8142 }
8143
8144 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8145 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8146              __m256i __Y)
8147 {
8148   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8149                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8150                                      (__v4di)__W);
8151 }
8152
8153 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8154
8155 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8156 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8157 {
8158   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8159                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8160                                         (__v8sf)__W);
8161 }
8162
8163 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8164 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
8165 {
8166   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8167                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8168                                         (__v8sf)_mm256_setzero_ps());
8169 }
8170
8171 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8172
8173 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8174 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8175                               __m256i __Y)
8176 {
8177   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8178                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8179                                      (__v8si)__W);
8180 }
8181
8182 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8183 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
8184 {
8185   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8186                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8187                                      (__v8si)_mm256_setzero_si256());
8188 }
8189
8190 #define _mm_alignr_epi32(A, B, imm) \
8191   (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8192                                     (__v4si)(__m128i)(B), (int)(imm))
8193
8194 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8195   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8196                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8197                                     (__v4si)(__m128i)(W))
8198
8199 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
8200   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8201                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8202                                     (__v4si)_mm_setzero_si128())
8203
8204 #define _mm256_alignr_epi32(A, B, imm) \
8205   (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8206                                     (__v8si)(__m256i)(B), (int)(imm))
8207
8208 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8209   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8210                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8211                                  (__v8si)(__m256i)(W))
8212
8213 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8214   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8215                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8216                                  (__v8si)_mm256_setzero_si256())
8217
8218 #define _mm_alignr_epi64(A, B, imm) \
8219   (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8220                                     (__v2di)(__m128i)(B), (int)(imm))
8221
8222 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8223   (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8224                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8225                                     (__v2di)(__m128i)(W))
8226
8227 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
8228   (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8229                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8230                                     (__v2di)_mm_setzero_si128())
8231
8232 #define _mm256_alignr_epi64(A, B, imm) \
8233   (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8234                                     (__v4di)(__m256i)(B), (int)(imm))
8235
8236 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8237   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8238                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8239                                  (__v4di)(__m256i)(W))
8240
8241 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8242   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8243                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8244                                  (__v4di)_mm256_setzero_si256())
8245
8246 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8247 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8248 {
8249   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8250                                              (__v4sf)_mm_movehdup_ps(__A),
8251                                              (__v4sf)__W);
8252 }
8253
8254 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8255 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8256 {
8257   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8258                                              (__v4sf)_mm_movehdup_ps(__A),
8259                                              (__v4sf)_mm_setzero_ps());
8260 }
8261
8262 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8263 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8264 {
8265   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8266                                              (__v8sf)_mm256_movehdup_ps(__A),
8267                                              (__v8sf)__W);
8268 }
8269
8270 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8271 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8272 {
8273   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8274                                              (__v8sf)_mm256_movehdup_ps(__A),
8275                                              (__v8sf)_mm256_setzero_ps());
8276 }
8277
8278 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8279 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8280 {
8281   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8282                                              (__v4sf)_mm_moveldup_ps(__A),
8283                                              (__v4sf)__W);
8284 }
8285
8286 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8287 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8288 {
8289   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8290                                              (__v4sf)_mm_moveldup_ps(__A),
8291                                              (__v4sf)_mm_setzero_ps());
8292 }
8293
8294 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8295 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8296 {
8297   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8298                                              (__v8sf)_mm256_moveldup_ps(__A),
8299                                              (__v8sf)__W);
8300 }
8301
8302 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8303 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8304 {
8305   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8306                                              (__v8sf)_mm256_moveldup_ps(__A),
8307                                              (__v8sf)_mm256_setzero_ps());
8308 }
8309
8310 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
8311   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8312                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8313                                       (__v8si)(__m256i)(W))
8314
8315 #define _mm256_maskz_shuffle_epi32(U, A, I) \
8316   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8317                                       (__v8si)_mm256_shuffle_epi32((A), (I)), \
8318                                       (__v8si)_mm256_setzero_si256())
8319
8320 #define _mm_mask_shuffle_epi32(W, U, A, I) \
8321   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8322                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8323                                       (__v4si)(__m128i)(W))
8324
8325 #define _mm_maskz_shuffle_epi32(U, A, I) \
8326   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8327                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
8328                                       (__v4si)_mm_setzero_si128())
8329
8330 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8331 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8332 {
8333   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8334               (__v2df) __A,
8335               (__v2df) __W);
8336 }
8337
8338 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8339 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8340 {
8341   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8342               (__v2df) __A,
8343               (__v2df) _mm_setzero_pd ());
8344 }
8345
8346 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8347 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8348 {
8349   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8350               (__v4df) __A,
8351               (__v4df) __W);
8352 }
8353
8354 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8355 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8356 {
8357   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8358               (__v4df) __A,
8359               (__v4df) _mm256_setzero_pd ());
8360 }
8361
8362 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8363 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8364 {
8365   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8366              (__v4sf) __A,
8367              (__v4sf) __W);
8368 }
8369
8370 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8371 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8372 {
8373   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8374              (__v4sf) __A,
8375              (__v4sf) _mm_setzero_ps ());
8376 }
8377
8378 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8379 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8380 {
8381   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8382              (__v8sf) __A,
8383              (__v8sf) __W);
8384 }
8385
8386 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8387 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8388 {
8389   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8390              (__v8sf) __A,
8391              (__v8sf) _mm256_setzero_ps ());
8392 }
8393
8394 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8395 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8396 {
8397   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8398              (__v4sf) __W,
8399              (__mmask8) __U);
8400 }
8401
8402 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8403 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8404 {
8405   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8406              (__v4sf)
8407              _mm_setzero_ps (),
8408              (__mmask8) __U);
8409 }
8410
8411 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8412 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8413 {
8414   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8415                 (__v8sf) __W,
8416                 (__mmask8) __U);
8417 }
8418
8419 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8420 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8421 {
8422   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8423                 (__v8sf)
8424                 _mm256_setzero_ps (),
8425                 (__mmask8) __U);
8426 }
8427
8428 static __inline __m128i __DEFAULT_FN_ATTRS128
8429 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8430 {
8431   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8432                                                   (__v8hi) __W,
8433                                                   (__mmask8) __U);
8434 }
8435
8436 static __inline __m128i __DEFAULT_FN_ATTRS128
8437 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8438 {
8439   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8440                                                   (__v8hi) _mm_setzero_si128 (),
8441                                                   (__mmask8) __U);
8442 }
8443
8444 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8445   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8446                                          (__v8hi)(__m128i)(W), \
8447                                          (__mmask8)(U))
8448
8449 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
8450   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8451                                          (__v8hi)_mm_setzero_si128(), \
8452                                          (__mmask8)(U))
8453
8454 static __inline __m128i __DEFAULT_FN_ATTRS256
8455 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8456 {
8457   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8458                                                       (__v8hi) __W,
8459                                                       (__mmask8) __U);
8460 }
8461
8462 static __inline __m128i __DEFAULT_FN_ATTRS256
8463 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8464 {
8465   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8466                                                       (__v8hi) _mm_setzero_si128(),
8467                                                       (__mmask8) __U);
8468 }
8469 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8470   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8471                                             (__v8hi)(__m128i)(W), \
8472                                             (__mmask8)(U))
8473
8474 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8475   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8476                                             (__v8hi)_mm_setzero_si128(), \
8477                                             (__mmask8)(U))
8478
8479
8480 #undef __DEFAULT_FN_ATTRS128
8481 #undef __DEFAULT_FN_ATTRS256
8482
8483 #endif /* __AVX512VLINTRIN_H */