1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
33 /* Doesn't require avx512vl, used in avx512dqintrin.h */
34 static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
35 _mm_setzero_di(void) {
36 return (__m128i)(__v2di){ 0LL, 0LL};
41 #define _mm_cmpeq_epi32_mask(A, B) \
42 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
43 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
44 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
45 #define _mm_cmpge_epi32_mask(A, B) \
46 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
47 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
48 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
49 #define _mm_cmpgt_epi32_mask(A, B) \
50 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
51 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
52 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
53 #define _mm_cmple_epi32_mask(A, B) \
54 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
55 #define _mm_mask_cmple_epi32_mask(k, A, B) \
56 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
57 #define _mm_cmplt_epi32_mask(A, B) \
58 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
59 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
60 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
61 #define _mm_cmpneq_epi32_mask(A, B) \
62 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
63 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
64 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
66 #define _mm256_cmpeq_epi32_mask(A, B) \
67 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
68 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
69 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
70 #define _mm256_cmpge_epi32_mask(A, B) \
71 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
72 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
73 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
74 #define _mm256_cmpgt_epi32_mask(A, B) \
75 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
76 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
77 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
78 #define _mm256_cmple_epi32_mask(A, B) \
79 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
80 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
81 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
82 #define _mm256_cmplt_epi32_mask(A, B) \
83 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
84 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
85 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
86 #define _mm256_cmpneq_epi32_mask(A, B) \
87 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
88 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
89 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
91 #define _mm_cmpeq_epu32_mask(A, B) \
92 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
93 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
94 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
95 #define _mm_cmpge_epu32_mask(A, B) \
96 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
97 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
98 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
99 #define _mm_cmpgt_epu32_mask(A, B) \
100 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
101 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
102 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
103 #define _mm_cmple_epu32_mask(A, B) \
104 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
105 #define _mm_mask_cmple_epu32_mask(k, A, B) \
106 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
107 #define _mm_cmplt_epu32_mask(A, B) \
108 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
109 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
110 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
111 #define _mm_cmpneq_epu32_mask(A, B) \
112 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
113 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
114 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
116 #define _mm256_cmpeq_epu32_mask(A, B) \
117 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
118 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
119 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
120 #define _mm256_cmpge_epu32_mask(A, B) \
121 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
122 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
123 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
124 #define _mm256_cmpgt_epu32_mask(A, B) \
125 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
126 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
127 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
128 #define _mm256_cmple_epu32_mask(A, B) \
129 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
130 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
131 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
132 #define _mm256_cmplt_epu32_mask(A, B) \
133 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
134 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
135 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
136 #define _mm256_cmpneq_epu32_mask(A, B) \
137 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
138 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
139 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
141 #define _mm_cmpeq_epi64_mask(A, B) \
142 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
143 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
144 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
145 #define _mm_cmpge_epi64_mask(A, B) \
146 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
147 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
148 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
149 #define _mm_cmpgt_epi64_mask(A, B) \
150 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
151 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
152 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
153 #define _mm_cmple_epi64_mask(A, B) \
154 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
155 #define _mm_mask_cmple_epi64_mask(k, A, B) \
156 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
157 #define _mm_cmplt_epi64_mask(A, B) \
158 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
159 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
160 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
161 #define _mm_cmpneq_epi64_mask(A, B) \
162 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
163 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
164 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
166 #define _mm256_cmpeq_epi64_mask(A, B) \
167 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
168 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
169 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
170 #define _mm256_cmpge_epi64_mask(A, B) \
171 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
172 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
173 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
174 #define _mm256_cmpgt_epi64_mask(A, B) \
175 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
176 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
177 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
178 #define _mm256_cmple_epi64_mask(A, B) \
179 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
180 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
181 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
182 #define _mm256_cmplt_epi64_mask(A, B) \
183 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
184 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
185 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
186 #define _mm256_cmpneq_epi64_mask(A, B) \
187 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
188 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
189 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
191 #define _mm_cmpeq_epu64_mask(A, B) \
192 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
193 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
194 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
195 #define _mm_cmpge_epu64_mask(A, B) \
196 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
197 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
198 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
199 #define _mm_cmpgt_epu64_mask(A, B) \
200 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
201 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
202 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
203 #define _mm_cmple_epu64_mask(A, B) \
204 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
205 #define _mm_mask_cmple_epu64_mask(k, A, B) \
206 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
207 #define _mm_cmplt_epu64_mask(A, B) \
208 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
209 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
210 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
211 #define _mm_cmpneq_epu64_mask(A, B) \
212 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
213 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
214 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
216 #define _mm256_cmpeq_epu64_mask(A, B) \
217 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
218 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
219 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
220 #define _mm256_cmpge_epu64_mask(A, B) \
221 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
222 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
223 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
224 #define _mm256_cmpgt_epu64_mask(A, B) \
225 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
226 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
227 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
228 #define _mm256_cmple_epu64_mask(A, B) \
229 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
230 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
231 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
232 #define _mm256_cmplt_epu64_mask(A, B) \
233 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
234 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
235 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
236 #define _mm256_cmpneq_epu64_mask(A, B) \
237 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
238 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
239 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
241 static __inline__ __m256i __DEFAULT_FN_ATTRS
242 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
244 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
245 (__v8si)_mm256_add_epi32(__A, __B),
249 static __inline__ __m256i __DEFAULT_FN_ATTRS
250 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
252 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
253 (__v8si)_mm256_add_epi32(__A, __B),
254 (__v8si)_mm256_setzero_si256());
257 static __inline__ __m256i __DEFAULT_FN_ATTRS
258 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
260 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
261 (__v4di)_mm256_add_epi64(__A, __B),
265 static __inline__ __m256i __DEFAULT_FN_ATTRS
266 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
268 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
269 (__v4di)_mm256_add_epi64(__A, __B),
270 (__v4di)_mm256_setzero_si256());
273 static __inline__ __m256i __DEFAULT_FN_ATTRS
274 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
276 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
277 (__v8si)_mm256_sub_epi32(__A, __B),
281 static __inline__ __m256i __DEFAULT_FN_ATTRS
282 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
284 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
285 (__v8si)_mm256_sub_epi32(__A, __B),
286 (__v8si)_mm256_setzero_si256());
289 static __inline__ __m256i __DEFAULT_FN_ATTRS
290 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
293 (__v4di)_mm256_sub_epi64(__A, __B),
297 static __inline__ __m256i __DEFAULT_FN_ATTRS
298 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
300 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
301 (__v4di)_mm256_sub_epi64(__A, __B),
302 (__v4di)_mm256_setzero_si256());
305 static __inline__ __m128i __DEFAULT_FN_ATTRS
306 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
308 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
309 (__v4si)_mm_add_epi32(__A, __B),
313 static __inline__ __m128i __DEFAULT_FN_ATTRS
314 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
316 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
317 (__v4si)_mm_add_epi32(__A, __B),
318 (__v4si)_mm_setzero_si128());
321 static __inline__ __m128i __DEFAULT_FN_ATTRS
322 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
324 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
325 (__v2di)_mm_add_epi64(__A, __B),
329 static __inline__ __m128i __DEFAULT_FN_ATTRS
330 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
332 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
333 (__v2di)_mm_add_epi64(__A, __B),
334 (__v2di)_mm_setzero_si128());
337 static __inline__ __m128i __DEFAULT_FN_ATTRS
338 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
340 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
341 (__v4si)_mm_sub_epi32(__A, __B),
345 static __inline__ __m128i __DEFAULT_FN_ATTRS
346 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
348 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
349 (__v4si)_mm_sub_epi32(__A, __B),
350 (__v4si)_mm_setzero_si128());
353 static __inline__ __m128i __DEFAULT_FN_ATTRS
354 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
356 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
357 (__v2di)_mm_sub_epi64(__A, __B),
361 static __inline__ __m128i __DEFAULT_FN_ATTRS
362 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
364 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
365 (__v2di)_mm_sub_epi64(__A, __B),
366 (__v2di)_mm_setzero_si128());
369 static __inline__ __m256i __DEFAULT_FN_ATTRS
370 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
372 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
373 (__v4di)_mm256_mul_epi32(__X, __Y),
377 static __inline__ __m256i __DEFAULT_FN_ATTRS
378 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
380 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
381 (__v4di)_mm256_mul_epi32(__X, __Y),
382 (__v4di)_mm256_setzero_si256());
385 static __inline__ __m128i __DEFAULT_FN_ATTRS
386 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
388 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
389 (__v2di)_mm_mul_epi32(__X, __Y),
393 static __inline__ __m128i __DEFAULT_FN_ATTRS
394 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
396 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
397 (__v2di)_mm_mul_epi32(__X, __Y),
398 (__v2di)_mm_setzero_si128());
401 static __inline__ __m256i __DEFAULT_FN_ATTRS
402 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
404 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
405 (__v4di)_mm256_mul_epu32(__X, __Y),
409 static __inline__ __m256i __DEFAULT_FN_ATTRS
410 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
412 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
413 (__v4di)_mm256_mul_epu32(__X, __Y),
414 (__v4di)_mm256_setzero_si256());
417 static __inline__ __m128i __DEFAULT_FN_ATTRS
418 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
420 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
421 (__v2di)_mm_mul_epu32(__X, __Y),
425 static __inline__ __m128i __DEFAULT_FN_ATTRS
426 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
428 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
429 (__v2di)_mm_mul_epu32(__X, __Y),
430 (__v2di)_mm_setzero_si128());
433 static __inline__ __m256i __DEFAULT_FN_ATTRS
434 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
436 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
437 (__v8si)_mm256_mullo_epi32(__A, __B),
438 (__v8si)_mm256_setzero_si256());
441 static __inline__ __m256i __DEFAULT_FN_ATTRS
442 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
444 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
445 (__v8si)_mm256_mullo_epi32(__A, __B),
449 static __inline__ __m128i __DEFAULT_FN_ATTRS
450 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
452 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
453 (__v4si)_mm_mullo_epi32(__A, __B),
454 (__v4si)_mm_setzero_si128());
457 static __inline__ __m128i __DEFAULT_FN_ATTRS
458 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
460 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
461 (__v4si)_mm_mullo_epi32(__A, __B),
465 static __inline__ __m256i __DEFAULT_FN_ATTRS
466 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
468 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
469 (__v8si)_mm256_and_si256(__A, __B),
473 static __inline__ __m256i __DEFAULT_FN_ATTRS
474 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
476 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
479 static __inline__ __m128i __DEFAULT_FN_ATTRS
480 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
482 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
483 (__v4si)_mm_and_si128(__A, __B),
487 static __inline__ __m128i __DEFAULT_FN_ATTRS
488 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
490 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
493 static __inline__ __m256i __DEFAULT_FN_ATTRS
494 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
496 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
497 (__v8si)_mm256_andnot_si256(__A, __B),
501 static __inline__ __m256i __DEFAULT_FN_ATTRS
502 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
504 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
508 static __inline__ __m128i __DEFAULT_FN_ATTRS
509 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
511 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
512 (__v4si)_mm_andnot_si128(__A, __B),
516 static __inline__ __m128i __DEFAULT_FN_ATTRS
517 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
519 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
522 static __inline__ __m256i __DEFAULT_FN_ATTRS
523 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
525 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
526 (__v8si)_mm256_or_si256(__A, __B),
530 static __inline__ __m256i __DEFAULT_FN_ATTRS
531 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
533 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
536 static __inline__ __m128i __DEFAULT_FN_ATTRS
537 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
539 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
540 (__v4si)_mm_or_si128(__A, __B),
544 static __inline__ __m128i __DEFAULT_FN_ATTRS
545 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
547 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
550 static __inline__ __m256i __DEFAULT_FN_ATTRS
551 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
553 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
554 (__v8si)_mm256_xor_si256(__A, __B),
558 static __inline__ __m256i __DEFAULT_FN_ATTRS
559 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
561 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
564 static __inline__ __m128i __DEFAULT_FN_ATTRS
565 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
568 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
569 (__v4si)_mm_xor_si128(__A, __B),
573 static __inline__ __m128i __DEFAULT_FN_ATTRS
574 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
576 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
579 static __inline__ __m256i __DEFAULT_FN_ATTRS
580 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
582 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
583 (__v4di)_mm256_and_si256(__A, __B),
587 static __inline__ __m256i __DEFAULT_FN_ATTRS
588 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
590 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
593 static __inline__ __m128i __DEFAULT_FN_ATTRS
594 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
596 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
597 (__v2di)_mm_and_si128(__A, __B),
601 static __inline__ __m128i __DEFAULT_FN_ATTRS
602 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
604 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
607 static __inline__ __m256i __DEFAULT_FN_ATTRS
608 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
610 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
611 (__v4di)_mm256_andnot_si256(__A, __B),
615 static __inline__ __m256i __DEFAULT_FN_ATTRS
616 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
618 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
622 static __inline__ __m128i __DEFAULT_FN_ATTRS
623 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
625 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
626 (__v2di)_mm_andnot_si128(__A, __B),
630 static __inline__ __m128i __DEFAULT_FN_ATTRS
631 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
633 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
636 static __inline__ __m256i __DEFAULT_FN_ATTRS
637 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
639 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
640 (__v4di)_mm256_or_si256(__A, __B),
644 static __inline__ __m256i __DEFAULT_FN_ATTRS
645 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
647 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
650 static __inline__ __m128i __DEFAULT_FN_ATTRS
651 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
653 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
654 (__v2di)_mm_or_si128(__A, __B),
658 static __inline__ __m128i __DEFAULT_FN_ATTRS
659 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
661 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
664 static __inline__ __m256i __DEFAULT_FN_ATTRS
665 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
667 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
668 (__v4di)_mm256_xor_si256(__A, __B),
672 static __inline__ __m256i __DEFAULT_FN_ATTRS
673 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
675 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
678 static __inline__ __m128i __DEFAULT_FN_ATTRS
679 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
682 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
683 (__v2di)_mm_xor_si128(__A, __B),
687 static __inline__ __m128i __DEFAULT_FN_ATTRS
688 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
690 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
693 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
694 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
695 (__v4si)(__m128i)(b), (int)(p), \
698 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
699 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
700 (__v4si)(__m128i)(b), (int)(p), \
703 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
704 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
705 (__v4si)(__m128i)(b), (int)(p), \
708 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
709 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
710 (__v4si)(__m128i)(b), (int)(p), \
713 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
714 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
715 (__v8si)(__m256i)(b), (int)(p), \
718 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
719 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
720 (__v8si)(__m256i)(b), (int)(p), \
723 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
724 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
725 (__v8si)(__m256i)(b), (int)(p), \
728 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
729 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
730 (__v8si)(__m256i)(b), (int)(p), \
733 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
734 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
735 (__v2di)(__m128i)(b), (int)(p), \
738 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
739 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
740 (__v2di)(__m128i)(b), (int)(p), \
743 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
744 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
745 (__v2di)(__m128i)(b), (int)(p), \
748 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
749 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
750 (__v2di)(__m128i)(b), (int)(p), \
753 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
754 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
755 (__v4di)(__m256i)(b), (int)(p), \
758 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
759 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
760 (__v4di)(__m256i)(b), (int)(p), \
763 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
764 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
765 (__v4di)(__m256i)(b), (int)(p), \
768 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
769 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
770 (__v4di)(__m256i)(b), (int)(p), \
773 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \
774 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
775 (__v8sf)(__m256)(b), (int)(p), \
778 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
779 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
780 (__v8sf)(__m256)(b), (int)(p), \
783 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \
784 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
785 (__v4df)(__m256d)(b), (int)(p), \
788 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
789 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
790 (__v4df)(__m256d)(b), (int)(p), \
793 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \
794 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
795 (__v4sf)(__m128)(b), (int)(p), \
798 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
799 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
800 (__v4sf)(__m128)(b), (int)(p), \
803 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \
804 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
805 (__v2df)(__m128d)(b), (int)(p), \
808 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
809 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
810 (__v2df)(__m128d)(b), (int)(p), \
813 static __inline__ __m128d __DEFAULT_FN_ATTRS
814 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
816 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
822 static __inline__ __m128d __DEFAULT_FN_ATTRS
823 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
825 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
831 static __inline__ __m128d __DEFAULT_FN_ATTRS
832 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
834 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
840 static __inline__ __m128d __DEFAULT_FN_ATTRS
841 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
843 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
849 static __inline__ __m128d __DEFAULT_FN_ATTRS
850 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
852 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
858 static __inline__ __m128d __DEFAULT_FN_ATTRS
859 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
861 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
867 static __inline__ __m128d __DEFAULT_FN_ATTRS
868 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
870 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
876 static __inline__ __m128d __DEFAULT_FN_ATTRS
877 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
879 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
885 static __inline__ __m256d __DEFAULT_FN_ATTRS
886 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
888 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
894 static __inline__ __m256d __DEFAULT_FN_ATTRS
895 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
897 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
903 static __inline__ __m256d __DEFAULT_FN_ATTRS
904 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
906 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
912 static __inline__ __m256d __DEFAULT_FN_ATTRS
913 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
915 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
921 static __inline__ __m256d __DEFAULT_FN_ATTRS
922 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
924 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
930 static __inline__ __m256d __DEFAULT_FN_ATTRS
931 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
933 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
939 static __inline__ __m256d __DEFAULT_FN_ATTRS
940 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
942 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
948 static __inline__ __m256d __DEFAULT_FN_ATTRS
949 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
951 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
957 static __inline__ __m128 __DEFAULT_FN_ATTRS
958 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
960 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
966 static __inline__ __m128 __DEFAULT_FN_ATTRS
967 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
969 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
975 static __inline__ __m128 __DEFAULT_FN_ATTRS
976 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
978 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
984 static __inline__ __m128 __DEFAULT_FN_ATTRS
985 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
987 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
993 static __inline__ __m128 __DEFAULT_FN_ATTRS
994 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
996 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1002 static __inline__ __m128 __DEFAULT_FN_ATTRS
1003 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1005 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1011 static __inline__ __m128 __DEFAULT_FN_ATTRS
1012 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1014 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1020 static __inline__ __m128 __DEFAULT_FN_ATTRS
1021 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1023 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1029 static __inline__ __m256 __DEFAULT_FN_ATTRS
1030 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1032 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1038 static __inline__ __m256 __DEFAULT_FN_ATTRS
1039 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1041 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1047 static __inline__ __m256 __DEFAULT_FN_ATTRS
1048 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1050 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1056 static __inline__ __m256 __DEFAULT_FN_ATTRS
1057 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1059 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1065 static __inline__ __m256 __DEFAULT_FN_ATTRS
1066 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1068 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1074 static __inline__ __m256 __DEFAULT_FN_ATTRS
1075 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1077 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1083 static __inline__ __m256 __DEFAULT_FN_ATTRS
1084 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1086 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1092 static __inline__ __m256 __DEFAULT_FN_ATTRS
1093 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1095 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1101 static __inline__ __m128d __DEFAULT_FN_ATTRS
1102 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1104 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1110 static __inline__ __m128d __DEFAULT_FN_ATTRS
1111 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1113 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1120 static __inline__ __m128d __DEFAULT_FN_ATTRS
1121 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1123 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1130 static __inline__ __m128d __DEFAULT_FN_ATTRS
1131 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1133 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1139 static __inline__ __m128d __DEFAULT_FN_ATTRS
1140 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1142 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1149 static __inline__ __m256d __DEFAULT_FN_ATTRS
1150 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1152 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1158 static __inline__ __m256d __DEFAULT_FN_ATTRS
1159 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1161 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1168 static __inline__ __m256d __DEFAULT_FN_ATTRS
1169 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1171 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1178 static __inline__ __m256d __DEFAULT_FN_ATTRS
1179 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1181 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1187 static __inline__ __m256d __DEFAULT_FN_ATTRS
1188 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1190 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1197 static __inline__ __m128 __DEFAULT_FN_ATTRS
1198 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1200 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1206 static __inline__ __m128 __DEFAULT_FN_ATTRS
1207 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1209 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1215 static __inline__ __m128 __DEFAULT_FN_ATTRS
1216 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1218 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1224 static __inline__ __m128 __DEFAULT_FN_ATTRS
1225 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1227 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1233 static __inline__ __m128 __DEFAULT_FN_ATTRS
1234 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1236 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1242 static __inline__ __m256 __DEFAULT_FN_ATTRS
1243 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1246 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1252 static __inline__ __m256 __DEFAULT_FN_ATTRS
1253 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1255 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1261 static __inline__ __m256 __DEFAULT_FN_ATTRS
1262 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1264 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1270 static __inline__ __m256 __DEFAULT_FN_ATTRS
1271 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1273 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1279 static __inline__ __m256 __DEFAULT_FN_ATTRS
1280 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1282 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1288 static __inline__ __m128d __DEFAULT_FN_ATTRS
1289 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1291 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1297 static __inline__ __m256d __DEFAULT_FN_ATTRS
1298 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1300 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1306 static __inline__ __m128 __DEFAULT_FN_ATTRS
1307 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1309 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1315 static __inline__ __m256 __DEFAULT_FN_ATTRS
1316 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1318 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1324 static __inline__ __m128d __DEFAULT_FN_ATTRS
1325 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1327 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1334 static __inline__ __m256d __DEFAULT_FN_ATTRS
1335 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1337 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1344 static __inline__ __m128 __DEFAULT_FN_ATTRS
1345 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1347 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1353 static __inline__ __m256 __DEFAULT_FN_ATTRS
1354 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1356 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1362 static __inline__ __m128d __DEFAULT_FN_ATTRS
1363 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1365 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1371 static __inline__ __m256d __DEFAULT_FN_ATTRS
1372 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1374 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1380 static __inline__ __m128 __DEFAULT_FN_ATTRS
1381 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1383 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1389 static __inline__ __m256 __DEFAULT_FN_ATTRS
1390 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1392 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1398 static __inline__ __m128d __DEFAULT_FN_ATTRS
1399 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1401 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1407 static __inline__ __m128d __DEFAULT_FN_ATTRS
1408 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1410 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1416 static __inline__ __m256d __DEFAULT_FN_ATTRS
1417 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1419 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1425 static __inline__ __m256d __DEFAULT_FN_ATTRS
1426 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1428 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1434 static __inline__ __m128 __DEFAULT_FN_ATTRS
1435 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1437 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1443 static __inline__ __m128 __DEFAULT_FN_ATTRS
1444 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1446 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1452 static __inline__ __m256 __DEFAULT_FN_ATTRS
1453 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1455 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1461 static __inline__ __m256 __DEFAULT_FN_ATTRS
1462 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1464 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1470 static __inline__ __m128d __DEFAULT_FN_ATTRS
1471 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1472 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1473 (__v2df)_mm_add_pd(__A, __B),
1477 static __inline__ __m128d __DEFAULT_FN_ATTRS
1478 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1479 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1480 (__v2df)_mm_add_pd(__A, __B),
1481 (__v2df)_mm_setzero_pd());
1484 static __inline__ __m256d __DEFAULT_FN_ATTRS
1485 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1486 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1487 (__v4df)_mm256_add_pd(__A, __B),
1491 static __inline__ __m256d __DEFAULT_FN_ATTRS
1492 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1493 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1494 (__v4df)_mm256_add_pd(__A, __B),
1495 (__v4df)_mm256_setzero_pd());
1498 static __inline__ __m128 __DEFAULT_FN_ATTRS
1499 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1500 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1501 (__v4sf)_mm_add_ps(__A, __B),
1505 static __inline__ __m128 __DEFAULT_FN_ATTRS
1506 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1507 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1508 (__v4sf)_mm_add_ps(__A, __B),
1509 (__v4sf)_mm_setzero_ps());
1512 static __inline__ __m256 __DEFAULT_FN_ATTRS
1513 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1514 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1515 (__v8sf)_mm256_add_ps(__A, __B),
1519 static __inline__ __m256 __DEFAULT_FN_ATTRS
1520 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1521 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1522 (__v8sf)_mm256_add_ps(__A, __B),
1523 (__v8sf)_mm256_setzero_ps());
1526 static __inline__ __m128i __DEFAULT_FN_ATTRS
1527 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1528 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1533 static __inline__ __m256i __DEFAULT_FN_ATTRS
1534 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1535 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1540 static __inline__ __m128d __DEFAULT_FN_ATTRS
1541 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1542 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1547 static __inline__ __m256d __DEFAULT_FN_ATTRS
1548 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1549 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1554 static __inline__ __m128 __DEFAULT_FN_ATTRS
1555 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1556 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1561 static __inline__ __m256 __DEFAULT_FN_ATTRS
1562 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1563 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1568 static __inline__ __m128i __DEFAULT_FN_ATTRS
1569 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1570 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1575 static __inline__ __m256i __DEFAULT_FN_ATTRS
1576 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1577 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1582 static __inline__ __m128d __DEFAULT_FN_ATTRS
1583 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1584 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1589 static __inline__ __m128d __DEFAULT_FN_ATTRS
1590 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1591 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1597 static __inline__ __m256d __DEFAULT_FN_ATTRS
1598 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1599 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1604 static __inline__ __m256d __DEFAULT_FN_ATTRS
1605 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1606 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1608 _mm256_setzero_pd (),
1612 static __inline__ __m128i __DEFAULT_FN_ATTRS
1613 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1614 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1619 static __inline__ __m128i __DEFAULT_FN_ATTRS
1620 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1621 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1623 _mm_setzero_si128 (),
1627 static __inline__ __m256i __DEFAULT_FN_ATTRS
1628 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1629 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1634 static __inline__ __m256i __DEFAULT_FN_ATTRS
1635 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1636 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1638 _mm256_setzero_si256 (),
1642 static __inline__ __m128 __DEFAULT_FN_ATTRS
1643 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1644 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1649 static __inline__ __m128 __DEFAULT_FN_ATTRS
1650 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1651 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1657 static __inline__ __m256 __DEFAULT_FN_ATTRS
1658 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1659 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1664 static __inline__ __m256 __DEFAULT_FN_ATTRS
1665 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1666 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1668 _mm256_setzero_ps (),
1672 static __inline__ __m128i __DEFAULT_FN_ATTRS
1673 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1674 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1679 static __inline__ __m128i __DEFAULT_FN_ATTRS
1680 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1681 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1683 _mm_setzero_si128 (),
1687 static __inline__ __m256i __DEFAULT_FN_ATTRS
1688 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1689 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1694 static __inline__ __m256i __DEFAULT_FN_ATTRS
1695 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1696 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1698 _mm256_setzero_si256 (),
1702 static __inline__ void __DEFAULT_FN_ATTRS
1703 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1704 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1709 static __inline__ void __DEFAULT_FN_ATTRS
1710 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1711 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1716 static __inline__ void __DEFAULT_FN_ATTRS
1717 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1718 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1723 static __inline__ void __DEFAULT_FN_ATTRS
1724 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1725 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1730 static __inline__ void __DEFAULT_FN_ATTRS
1731 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1732 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1737 static __inline__ void __DEFAULT_FN_ATTRS
1738 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1739 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1744 static __inline__ void __DEFAULT_FN_ATTRS
1745 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1746 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1751 static __inline__ void __DEFAULT_FN_ATTRS
1752 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1753 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1758 static __inline__ __m128d __DEFAULT_FN_ATTRS
1759 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1760 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1761 (__v2df)_mm_cvtepi32_pd(__A),
1765 static __inline__ __m128d __DEFAULT_FN_ATTRS
1766 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1767 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1768 (__v2df)_mm_cvtepi32_pd(__A),
1769 (__v2df)_mm_setzero_pd());
1772 static __inline__ __m256d __DEFAULT_FN_ATTRS
1773 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1774 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1775 (__v4df)_mm256_cvtepi32_pd(__A),
1779 static __inline__ __m256d __DEFAULT_FN_ATTRS
1780 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1781 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1782 (__v4df)_mm256_cvtepi32_pd(__A),
1783 (__v4df)_mm256_setzero_pd());
1786 static __inline__ __m128 __DEFAULT_FN_ATTRS
1787 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1788 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1793 static __inline__ __m128 __DEFAULT_FN_ATTRS
1794 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
1795 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1801 static __inline__ __m256 __DEFAULT_FN_ATTRS
1802 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1803 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1808 static __inline__ __m256 __DEFAULT_FN_ATTRS
1809 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
1810 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1812 _mm256_setzero_ps (),
1816 static __inline__ __m128i __DEFAULT_FN_ATTRS
1817 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1818 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1823 static __inline__ __m128i __DEFAULT_FN_ATTRS
1824 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1825 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1827 _mm_setzero_si128 (),
1831 static __inline__ __m128i __DEFAULT_FN_ATTRS
1832 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1833 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1838 static __inline__ __m128i __DEFAULT_FN_ATTRS
1839 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1840 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1842 _mm_setzero_si128 (),
1846 static __inline__ __m128 __DEFAULT_FN_ATTRS
1847 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1848 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1853 static __inline__ __m128 __DEFAULT_FN_ATTRS
1854 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1855 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1861 static __inline__ __m128 __DEFAULT_FN_ATTRS
1862 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
1863 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
1868 static __inline__ __m128 __DEFAULT_FN_ATTRS
1869 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
1870 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
1876 static __inline__ __m128i __DEFAULT_FN_ATTRS
1877 _mm_cvtpd_epu32 (__m128d __A) {
1878 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1880 _mm_setzero_si128 (),
1884 static __inline__ __m128i __DEFAULT_FN_ATTRS
1885 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
1886 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1891 static __inline__ __m128i __DEFAULT_FN_ATTRS
1892 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
1893 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1895 _mm_setzero_si128 (),
1899 static __inline__ __m128i __DEFAULT_FN_ATTRS
1900 _mm256_cvtpd_epu32 (__m256d __A) {
1901 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1903 _mm_setzero_si128 (),
1907 static __inline__ __m128i __DEFAULT_FN_ATTRS
1908 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
1909 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1914 static __inline__ __m128i __DEFAULT_FN_ATTRS
1915 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
1916 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1918 _mm_setzero_si128 (),
1922 static __inline__ __m128i __DEFAULT_FN_ATTRS
1923 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
1924 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
1929 static __inline__ __m128i __DEFAULT_FN_ATTRS
1930 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
1931 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
1933 _mm_setzero_si128 (),
1937 static __inline__ __m256i __DEFAULT_FN_ATTRS
1938 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
1939 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
1944 static __inline__ __m256i __DEFAULT_FN_ATTRS
1945 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
1946 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
1948 _mm256_setzero_si256 (),
1952 static __inline__ __m128d __DEFAULT_FN_ATTRS
1953 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
1954 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1959 static __inline__ __m128d __DEFAULT_FN_ATTRS
1960 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
1961 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1967 static __inline__ __m256d __DEFAULT_FN_ATTRS
1968 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
1969 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1974 static __inline__ __m256d __DEFAULT_FN_ATTRS
1975 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
1976 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1978 _mm256_setzero_pd (),
1982 static __inline__ __m128i __DEFAULT_FN_ATTRS
1983 _mm_cvtps_epu32 (__m128 __A) {
1984 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1986 _mm_setzero_si128 (),
1990 static __inline__ __m128i __DEFAULT_FN_ATTRS
1991 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
1992 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
1997 static __inline__ __m128i __DEFAULT_FN_ATTRS
1998 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
1999 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2001 _mm_setzero_si128 (),
2005 static __inline__ __m256i __DEFAULT_FN_ATTRS
2006 _mm256_cvtps_epu32 (__m256 __A) {
2007 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2009 _mm256_setzero_si256 (),
2013 static __inline__ __m256i __DEFAULT_FN_ATTRS
2014 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2015 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2020 static __inline__ __m256i __DEFAULT_FN_ATTRS
2021 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2022 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2024 _mm256_setzero_si256 (),
2028 static __inline__ __m128i __DEFAULT_FN_ATTRS
2029 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2030 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2035 static __inline__ __m128i __DEFAULT_FN_ATTRS
2036 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2037 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2039 _mm_setzero_si128 (),
2043 static __inline__ __m128i __DEFAULT_FN_ATTRS
2044 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2045 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2050 static __inline__ __m128i __DEFAULT_FN_ATTRS
2051 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2052 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2054 _mm_setzero_si128 (),
2058 static __inline__ __m128i __DEFAULT_FN_ATTRS
2059 _mm_cvttpd_epu32 (__m128d __A) {
2060 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2062 _mm_setzero_si128 (),
2066 static __inline__ __m128i __DEFAULT_FN_ATTRS
2067 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2068 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2073 static __inline__ __m128i __DEFAULT_FN_ATTRS
2074 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2075 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2077 _mm_setzero_si128 (),
2081 static __inline__ __m128i __DEFAULT_FN_ATTRS
2082 _mm256_cvttpd_epu32 (__m256d __A) {
2083 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2085 _mm_setzero_si128 (),
2089 static __inline__ __m128i __DEFAULT_FN_ATTRS
2090 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2091 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2096 static __inline__ __m128i __DEFAULT_FN_ATTRS
2097 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2098 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2100 _mm_setzero_si128 (),
2104 static __inline__ __m128i __DEFAULT_FN_ATTRS
2105 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2106 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2111 static __inline__ __m128i __DEFAULT_FN_ATTRS
2112 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2113 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2115 _mm_setzero_si128 (),
2119 static __inline__ __m256i __DEFAULT_FN_ATTRS
2120 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2121 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2126 static __inline__ __m256i __DEFAULT_FN_ATTRS
2127 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2128 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2130 _mm256_setzero_si256 (),
2134 static __inline__ __m128i __DEFAULT_FN_ATTRS
2135 _mm_cvttps_epu32 (__m128 __A) {
2136 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2138 _mm_setzero_si128 (),
2142 static __inline__ __m128i __DEFAULT_FN_ATTRS
2143 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2144 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2149 static __inline__ __m128i __DEFAULT_FN_ATTRS
2150 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2151 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2153 _mm_setzero_si128 (),
2157 static __inline__ __m256i __DEFAULT_FN_ATTRS
2158 _mm256_cvttps_epu32 (__m256 __A) {
2159 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2161 _mm256_setzero_si256 (),
2165 static __inline__ __m256i __DEFAULT_FN_ATTRS
2166 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2167 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2172 static __inline__ __m256i __DEFAULT_FN_ATTRS
2173 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2174 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2176 _mm256_setzero_si256 (),
2180 static __inline__ __m128d __DEFAULT_FN_ATTRS
2181 _mm_cvtepu32_pd (__m128i __A) {
2182 return (__m128d) __builtin_convertvector(
2183 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2186 static __inline__ __m128d __DEFAULT_FN_ATTRS
2187 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2188 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2189 (__v2df)_mm_cvtepu32_pd(__A),
2193 static __inline__ __m128d __DEFAULT_FN_ATTRS
2194 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2195 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2196 (__v2df)_mm_cvtepu32_pd(__A),
2197 (__v2df)_mm_setzero_pd());
2200 static __inline__ __m256d __DEFAULT_FN_ATTRS
2201 _mm256_cvtepu32_pd (__m128i __A) {
2202 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2205 static __inline__ __m256d __DEFAULT_FN_ATTRS
2206 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2207 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2208 (__v4df)_mm256_cvtepu32_pd(__A),
2212 static __inline__ __m256d __DEFAULT_FN_ATTRS
2213 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2214 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2215 (__v4df)_mm256_cvtepu32_pd(__A),
2216 (__v4df)_mm256_setzero_pd());
2219 static __inline__ __m128 __DEFAULT_FN_ATTRS
2220 _mm_cvtepu32_ps (__m128i __A) {
2221 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2227 static __inline__ __m128 __DEFAULT_FN_ATTRS
2228 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2229 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2234 static __inline__ __m128 __DEFAULT_FN_ATTRS
2235 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2236 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2242 static __inline__ __m256 __DEFAULT_FN_ATTRS
2243 _mm256_cvtepu32_ps (__m256i __A) {
2244 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2246 _mm256_setzero_ps (),
2250 static __inline__ __m256 __DEFAULT_FN_ATTRS
2251 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2252 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2257 static __inline__ __m256 __DEFAULT_FN_ATTRS
2258 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2259 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2261 _mm256_setzero_ps (),
2265 static __inline__ __m128d __DEFAULT_FN_ATTRS
2266 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2267 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2268 (__v2df)_mm_div_pd(__A, __B),
2272 static __inline__ __m128d __DEFAULT_FN_ATTRS
2273 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2274 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2275 (__v2df)_mm_div_pd(__A, __B),
2276 (__v2df)_mm_setzero_pd());
2279 static __inline__ __m256d __DEFAULT_FN_ATTRS
2280 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2281 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2282 (__v4df)_mm256_div_pd(__A, __B),
2286 static __inline__ __m256d __DEFAULT_FN_ATTRS
2287 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2289 (__v4df)_mm256_div_pd(__A, __B),
2290 (__v4df)_mm256_setzero_pd());
2293 static __inline__ __m128 __DEFAULT_FN_ATTRS
2294 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2295 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2296 (__v4sf)_mm_div_ps(__A, __B),
2300 static __inline__ __m128 __DEFAULT_FN_ATTRS
2301 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2302 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2303 (__v4sf)_mm_div_ps(__A, __B),
2304 (__v4sf)_mm_setzero_ps());
2307 static __inline__ __m256 __DEFAULT_FN_ATTRS
2308 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2309 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2310 (__v8sf)_mm256_div_ps(__A, __B),
2314 static __inline__ __m256 __DEFAULT_FN_ATTRS
2315 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2316 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2317 (__v8sf)_mm256_div_ps(__A, __B),
2318 (__v8sf)_mm256_setzero_ps());
2321 static __inline__ __m128d __DEFAULT_FN_ATTRS
2322 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2323 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2328 static __inline__ __m128d __DEFAULT_FN_ATTRS
2329 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2330 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2336 static __inline__ __m256d __DEFAULT_FN_ATTRS
2337 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2338 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2343 static __inline__ __m256d __DEFAULT_FN_ATTRS
2344 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2345 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2347 _mm256_setzero_pd (),
2351 static __inline__ __m128i __DEFAULT_FN_ATTRS
2352 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2353 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2358 static __inline__ __m128i __DEFAULT_FN_ATTRS
2359 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2360 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2362 _mm_setzero_si128 (),
2366 static __inline__ __m256i __DEFAULT_FN_ATTRS
2367 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2368 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2373 static __inline__ __m256i __DEFAULT_FN_ATTRS
2374 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2375 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2377 _mm256_setzero_si256 (),
2381 static __inline__ __m128d __DEFAULT_FN_ATTRS
2382 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2383 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2389 static __inline__ __m128d __DEFAULT_FN_ATTRS
2390 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2391 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2398 static __inline__ __m256d __DEFAULT_FN_ATTRS
2399 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2400 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2406 static __inline__ __m256d __DEFAULT_FN_ATTRS
2407 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2408 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2410 _mm256_setzero_pd (),
2415 static __inline__ __m128i __DEFAULT_FN_ATTRS
2416 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2417 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2423 static __inline__ __m128i __DEFAULT_FN_ATTRS
2424 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2425 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2427 _mm_setzero_si128 (),
2432 static __inline__ __m256i __DEFAULT_FN_ATTRS
2433 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2435 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2441 static __inline__ __m256i __DEFAULT_FN_ATTRS
2442 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2443 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2445 _mm256_setzero_si256 (),
2450 static __inline__ __m128 __DEFAULT_FN_ATTRS
2451 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2452 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2457 static __inline__ __m128 __DEFAULT_FN_ATTRS
2458 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2459 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2466 static __inline__ __m256 __DEFAULT_FN_ATTRS
2467 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2468 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2473 static __inline__ __m256 __DEFAULT_FN_ATTRS
2474 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2475 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2477 _mm256_setzero_ps (),
2482 static __inline__ __m128i __DEFAULT_FN_ATTRS
2483 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2484 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2490 static __inline__ __m128i __DEFAULT_FN_ATTRS
2491 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2492 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2494 _mm_setzero_si128 (),
2498 static __inline__ __m256i __DEFAULT_FN_ATTRS
2499 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2501 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2507 static __inline__ __m256i __DEFAULT_FN_ATTRS
2508 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2509 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2511 _mm256_setzero_si256 (),
2516 static __inline__ __m128 __DEFAULT_FN_ATTRS
2517 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2518 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2523 static __inline__ __m128 __DEFAULT_FN_ATTRS
2524 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2525 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2531 static __inline__ __m256 __DEFAULT_FN_ATTRS
2532 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2533 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2538 static __inline__ __m256 __DEFAULT_FN_ATTRS
2539 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2540 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2542 _mm256_setzero_ps (),
2546 static __inline__ __m128i __DEFAULT_FN_ATTRS
2547 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2548 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2553 static __inline__ __m128i __DEFAULT_FN_ATTRS
2554 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2555 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2557 _mm_setzero_si128 (),
2561 static __inline__ __m256i __DEFAULT_FN_ATTRS
2562 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2563 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2568 static __inline__ __m256i __DEFAULT_FN_ATTRS
2569 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2570 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2572 _mm256_setzero_si256 (),
2576 static __inline__ __m128d __DEFAULT_FN_ATTRS
2577 _mm_getexp_pd (__m128d __A) {
2578 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2584 static __inline__ __m128d __DEFAULT_FN_ATTRS
2585 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2586 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2591 static __inline__ __m128d __DEFAULT_FN_ATTRS
2592 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2593 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2599 static __inline__ __m256d __DEFAULT_FN_ATTRS
2600 _mm256_getexp_pd (__m256d __A) {
2601 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2603 _mm256_setzero_pd (),
2607 static __inline__ __m256d __DEFAULT_FN_ATTRS
2608 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2609 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2614 static __inline__ __m256d __DEFAULT_FN_ATTRS
2615 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2616 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2618 _mm256_setzero_pd (),
2622 static __inline__ __m128 __DEFAULT_FN_ATTRS
2623 _mm_getexp_ps (__m128 __A) {
2624 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2630 static __inline__ __m128 __DEFAULT_FN_ATTRS
2631 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2632 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2637 static __inline__ __m128 __DEFAULT_FN_ATTRS
2638 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2639 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2645 static __inline__ __m256 __DEFAULT_FN_ATTRS
2646 _mm256_getexp_ps (__m256 __A) {
2647 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2649 _mm256_setzero_ps (),
2653 static __inline__ __m256 __DEFAULT_FN_ATTRS
2654 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2655 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2660 static __inline__ __m256 __DEFAULT_FN_ATTRS
2661 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2662 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2664 _mm256_setzero_ps (),
2668 static __inline__ __m128d __DEFAULT_FN_ATTRS
2669 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2670 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2671 (__v2df)_mm_max_pd(__A, __B),
2675 static __inline__ __m128d __DEFAULT_FN_ATTRS
2676 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2677 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2678 (__v2df)_mm_max_pd(__A, __B),
2679 (__v2df)_mm_setzero_pd());
2682 static __inline__ __m256d __DEFAULT_FN_ATTRS
2683 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2684 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2685 (__v4df)_mm256_max_pd(__A, __B),
2689 static __inline__ __m256d __DEFAULT_FN_ATTRS
2690 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2691 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2692 (__v4df)_mm256_max_pd(__A, __B),
2693 (__v4df)_mm256_setzero_pd());
2696 static __inline__ __m128 __DEFAULT_FN_ATTRS
2697 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2698 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2699 (__v4sf)_mm_max_ps(__A, __B),
2703 static __inline__ __m128 __DEFAULT_FN_ATTRS
2704 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2705 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2706 (__v4sf)_mm_max_ps(__A, __B),
2707 (__v4sf)_mm_setzero_ps());
2710 static __inline__ __m256 __DEFAULT_FN_ATTRS
2711 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2712 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2713 (__v8sf)_mm256_max_ps(__A, __B),
2717 static __inline__ __m256 __DEFAULT_FN_ATTRS
2718 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2719 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2720 (__v8sf)_mm256_max_ps(__A, __B),
2721 (__v8sf)_mm256_setzero_ps());
2724 static __inline__ __m128d __DEFAULT_FN_ATTRS
2725 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2726 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2727 (__v2df)_mm_min_pd(__A, __B),
2731 static __inline__ __m128d __DEFAULT_FN_ATTRS
2732 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2733 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2734 (__v2df)_mm_min_pd(__A, __B),
2735 (__v2df)_mm_setzero_pd());
2738 static __inline__ __m256d __DEFAULT_FN_ATTRS
2739 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2740 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2741 (__v4df)_mm256_min_pd(__A, __B),
2745 static __inline__ __m256d __DEFAULT_FN_ATTRS
2746 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2747 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2748 (__v4df)_mm256_min_pd(__A, __B),
2749 (__v4df)_mm256_setzero_pd());
2752 static __inline__ __m128 __DEFAULT_FN_ATTRS
2753 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2754 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2755 (__v4sf)_mm_min_ps(__A, __B),
2759 static __inline__ __m128 __DEFAULT_FN_ATTRS
2760 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2761 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2762 (__v4sf)_mm_min_ps(__A, __B),
2763 (__v4sf)_mm_setzero_ps());
2766 static __inline__ __m256 __DEFAULT_FN_ATTRS
2767 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2768 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2769 (__v8sf)_mm256_min_ps(__A, __B),
2773 static __inline__ __m256 __DEFAULT_FN_ATTRS
2774 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2775 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2776 (__v8sf)_mm256_min_ps(__A, __B),
2777 (__v8sf)_mm256_setzero_ps());
2780 static __inline__ __m128d __DEFAULT_FN_ATTRS
2781 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2782 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2783 (__v2df)_mm_mul_pd(__A, __B),
2787 static __inline__ __m128d __DEFAULT_FN_ATTRS
2788 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2789 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2790 (__v2df)_mm_mul_pd(__A, __B),
2791 (__v2df)_mm_setzero_pd());
2794 static __inline__ __m256d __DEFAULT_FN_ATTRS
2795 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2796 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2797 (__v4df)_mm256_mul_pd(__A, __B),
2801 static __inline__ __m256d __DEFAULT_FN_ATTRS
2802 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2803 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2804 (__v4df)_mm256_mul_pd(__A, __B),
2805 (__v4df)_mm256_setzero_pd());
2808 static __inline__ __m128 __DEFAULT_FN_ATTRS
2809 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2810 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2811 (__v4sf)_mm_mul_ps(__A, __B),
2815 static __inline__ __m128 __DEFAULT_FN_ATTRS
2816 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2817 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2818 (__v4sf)_mm_mul_ps(__A, __B),
2819 (__v4sf)_mm_setzero_ps());
2822 static __inline__ __m256 __DEFAULT_FN_ATTRS
2823 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2824 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2825 (__v8sf)_mm256_mul_ps(__A, __B),
2829 static __inline__ __m256 __DEFAULT_FN_ATTRS
2830 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2831 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2832 (__v8sf)_mm256_mul_ps(__A, __B),
2833 (__v8sf)_mm256_setzero_ps());
2836 static __inline__ __m128i __DEFAULT_FN_ATTRS
2837 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2838 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2839 (__v4si)_mm_abs_epi32(__A),
2843 static __inline__ __m128i __DEFAULT_FN_ATTRS
2844 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2845 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2846 (__v4si)_mm_abs_epi32(__A),
2847 (__v4si)_mm_setzero_si128());
2850 static __inline__ __m256i __DEFAULT_FN_ATTRS
2851 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2852 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
2853 (__v8si)_mm256_abs_epi32(__A),
2857 static __inline__ __m256i __DEFAULT_FN_ATTRS
2858 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2859 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
2860 (__v8si)_mm256_abs_epi32(__A),
2861 (__v8si)_mm256_setzero_si256());
2864 static __inline__ __m128i __DEFAULT_FN_ATTRS
2865 _mm_abs_epi64 (__m128i __A) {
2866 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
2868 _mm_setzero_si128 (),
2872 static __inline__ __m128i __DEFAULT_FN_ATTRS
2873 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2874 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
2879 static __inline__ __m128i __DEFAULT_FN_ATTRS
2880 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
2881 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
2883 _mm_setzero_si128 (),
2887 static __inline__ __m256i __DEFAULT_FN_ATTRS
2888 _mm256_abs_epi64 (__m256i __A) {
2889 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
2891 _mm256_setzero_si256 (),
2895 static __inline__ __m256i __DEFAULT_FN_ATTRS
2896 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2897 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
2902 static __inline__ __m256i __DEFAULT_FN_ATTRS
2903 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
2904 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
2906 _mm256_setzero_si256 (),
2910 static __inline__ __m128i __DEFAULT_FN_ATTRS
2911 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2912 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2913 (__v4si)_mm_max_epi32(__A, __B),
2914 (__v4si)_mm_setzero_si128());
2917 static __inline__ __m128i __DEFAULT_FN_ATTRS
2918 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2919 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2920 (__v4si)_mm_max_epi32(__A, __B),
2924 static __inline__ __m256i __DEFAULT_FN_ATTRS
2925 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2926 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2927 (__v8si)_mm256_max_epi32(__A, __B),
2928 (__v8si)_mm256_setzero_si256());
2931 static __inline__ __m256i __DEFAULT_FN_ATTRS
2932 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2933 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2934 (__v8si)_mm256_max_epi32(__A, __B),
2938 static __inline__ __m128i __DEFAULT_FN_ATTRS
2939 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
2940 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
2943 _mm_setzero_si128 (),
2947 static __inline__ __m128i __DEFAULT_FN_ATTRS
2948 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
2950 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
2955 static __inline__ __m128i __DEFAULT_FN_ATTRS
2956 _mm_max_epi64 (__m128i __A, __m128i __B) {
2957 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
2960 _mm_setzero_si128 (),
2964 static __inline__ __m256i __DEFAULT_FN_ATTRS
2965 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
2966 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
2969 _mm256_setzero_si256 (),
2973 static __inline__ __m256i __DEFAULT_FN_ATTRS
2974 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
2976 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
2981 static __inline__ __m256i __DEFAULT_FN_ATTRS
2982 _mm256_max_epi64 (__m256i __A, __m256i __B) {
2983 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
2986 _mm256_setzero_si256 (),
2990 static __inline__ __m128i __DEFAULT_FN_ATTRS
2991 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
2992 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2993 (__v4si)_mm_max_epu32(__A, __B),
2994 (__v4si)_mm_setzero_si128());
2997 static __inline__ __m128i __DEFAULT_FN_ATTRS
2998 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2999 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3000 (__v4si)_mm_max_epu32(__A, __B),
3004 static __inline__ __m256i __DEFAULT_FN_ATTRS
3005 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3006 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3007 (__v8si)_mm256_max_epu32(__A, __B),
3008 (__v8si)_mm256_setzero_si256());
3011 static __inline__ __m256i __DEFAULT_FN_ATTRS
3012 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3013 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3014 (__v8si)_mm256_max_epu32(__A, __B),
3018 static __inline__ __m128i __DEFAULT_FN_ATTRS
3019 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3020 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3023 _mm_setzero_si128 (),
3027 static __inline__ __m128i __DEFAULT_FN_ATTRS
3028 _mm_max_epu64 (__m128i __A, __m128i __B) {
3029 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3032 _mm_setzero_si128 (),
3036 static __inline__ __m128i __DEFAULT_FN_ATTRS
3037 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3039 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3044 static __inline__ __m256i __DEFAULT_FN_ATTRS
3045 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3046 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3049 _mm256_setzero_si256 (),
3053 static __inline__ __m256i __DEFAULT_FN_ATTRS
3054 _mm256_max_epu64 (__m256i __A, __m256i __B) {
3055 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3058 _mm256_setzero_si256 (),
3062 static __inline__ __m256i __DEFAULT_FN_ATTRS
3063 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3065 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3070 static __inline__ __m128i __DEFAULT_FN_ATTRS
3071 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3072 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3073 (__v4si)_mm_min_epi32(__A, __B),
3074 (__v4si)_mm_setzero_si128());
3077 static __inline__ __m128i __DEFAULT_FN_ATTRS
3078 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3079 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3080 (__v4si)_mm_min_epi32(__A, __B),
3084 static __inline__ __m256i __DEFAULT_FN_ATTRS
3085 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3086 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3087 (__v8si)_mm256_min_epi32(__A, __B),
3088 (__v8si)_mm256_setzero_si256());
3091 static __inline__ __m256i __DEFAULT_FN_ATTRS
3092 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3094 (__v8si)_mm256_min_epi32(__A, __B),
3098 static __inline__ __m128i __DEFAULT_FN_ATTRS
3099 _mm_min_epi64 (__m128i __A, __m128i __B) {
3100 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3103 _mm_setzero_si128 (),
3107 static __inline__ __m128i __DEFAULT_FN_ATTRS
3108 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3110 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3115 static __inline__ __m128i __DEFAULT_FN_ATTRS
3116 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3117 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3120 _mm_setzero_si128 (),
3124 static __inline__ __m256i __DEFAULT_FN_ATTRS
3125 _mm256_min_epi64 (__m256i __A, __m256i __B) {
3126 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3129 _mm256_setzero_si256 (),
3133 static __inline__ __m256i __DEFAULT_FN_ATTRS
3134 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3136 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3141 static __inline__ __m256i __DEFAULT_FN_ATTRS
3142 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3143 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3146 _mm256_setzero_si256 (),
3150 static __inline__ __m128i __DEFAULT_FN_ATTRS
3151 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3152 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3153 (__v4si)_mm_min_epu32(__A, __B),
3154 (__v4si)_mm_setzero_si128());
3157 static __inline__ __m128i __DEFAULT_FN_ATTRS
3158 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3159 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3160 (__v4si)_mm_min_epu32(__A, __B),
3164 static __inline__ __m256i __DEFAULT_FN_ATTRS
3165 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3167 (__v8si)_mm256_min_epu32(__A, __B),
3168 (__v8si)_mm256_setzero_si256());
3171 static __inline__ __m256i __DEFAULT_FN_ATTRS
3172 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3173 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3174 (__v8si)_mm256_min_epu32(__A, __B),
3178 static __inline__ __m128i __DEFAULT_FN_ATTRS
3179 _mm_min_epu64 (__m128i __A, __m128i __B) {
3180 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3183 _mm_setzero_si128 (),
3187 static __inline__ __m128i __DEFAULT_FN_ATTRS
3188 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3190 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3195 static __inline__ __m128i __DEFAULT_FN_ATTRS
3196 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3197 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3200 _mm_setzero_si128 (),
3204 static __inline__ __m256i __DEFAULT_FN_ATTRS
3205 _mm256_min_epu64 (__m256i __A, __m256i __B) {
3206 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3209 _mm256_setzero_si256 (),
3213 static __inline__ __m256i __DEFAULT_FN_ATTRS
3214 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3216 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3221 static __inline__ __m256i __DEFAULT_FN_ATTRS
3222 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3223 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3226 _mm256_setzero_si256 (),
3230 #define _mm_roundscale_pd(A, imm) __extension__ ({ \
3231 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3233 (__v2df)_mm_setzero_pd(), \
3237 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3238 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3240 (__v2df)(__m128d)(W), \
3244 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3245 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3247 (__v2df)_mm_setzero_pd(), \
3251 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3252 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3254 (__v4df)_mm256_setzero_pd(), \
3258 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3259 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3261 (__v4df)(__m256d)(W), \
3265 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3266 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3268 (__v4df)_mm256_setzero_pd(), \
3271 #define _mm_roundscale_ps(A, imm) __extension__ ({ \
3272 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3273 (__v4sf)_mm_setzero_ps(), \
3277 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3278 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3279 (__v4sf)(__m128)(W), \
3283 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3284 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3285 (__v4sf)_mm_setzero_ps(), \
3288 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \
3289 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3290 (__v8sf)_mm256_setzero_ps(), \
3293 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3294 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3295 (__v8sf)(__m256)(W), \
3299 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3300 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3301 (__v8sf)_mm256_setzero_ps(), \
3304 static __inline__ __m128d __DEFAULT_FN_ATTRS
3305 _mm_scalef_pd (__m128d __A, __m128d __B) {
3306 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3313 static __inline__ __m128d __DEFAULT_FN_ATTRS
3314 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3316 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3322 static __inline__ __m128d __DEFAULT_FN_ATTRS
3323 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3324 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3331 static __inline__ __m256d __DEFAULT_FN_ATTRS
3332 _mm256_scalef_pd (__m256d __A, __m256d __B) {
3333 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3336 _mm256_setzero_pd (),
3340 static __inline__ __m256d __DEFAULT_FN_ATTRS
3341 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3343 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3349 static __inline__ __m256d __DEFAULT_FN_ATTRS
3350 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3351 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3354 _mm256_setzero_pd (),
3358 static __inline__ __m128 __DEFAULT_FN_ATTRS
3359 _mm_scalef_ps (__m128 __A, __m128 __B) {
3360 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3367 static __inline__ __m128 __DEFAULT_FN_ATTRS
3368 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3369 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3375 static __inline__ __m128 __DEFAULT_FN_ATTRS
3376 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3377 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3384 static __inline__ __m256 __DEFAULT_FN_ATTRS
3385 _mm256_scalef_ps (__m256 __A, __m256 __B) {
3386 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3389 _mm256_setzero_ps (),
3393 static __inline__ __m256 __DEFAULT_FN_ATTRS
3394 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3396 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3402 static __inline__ __m256 __DEFAULT_FN_ATTRS
3403 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3404 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3407 _mm256_setzero_ps (),
3411 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3412 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3413 (__v2di)(__m128i)(index), \
3414 (__v2df)(__m128d)(v1), (int)(scale)); })
3416 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3417 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3418 (__v2di)(__m128i)(index), \
3419 (__v2df)(__m128d)(v1), (int)(scale)); })
3421 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3422 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3423 (__v2di)(__m128i)(index), \
3424 (__v2di)(__m128i)(v1), (int)(scale)); })
3426 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3427 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3428 (__v2di)(__m128i)(index), \
3429 (__v2di)(__m128i)(v1), (int)(scale)); })
3431 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3432 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3433 (__v4di)(__m256i)(index), \
3434 (__v4df)(__m256d)(v1), (int)(scale)); })
3436 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3437 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3438 (__v4di)(__m256i)(index), \
3439 (__v4df)(__m256d)(v1), (int)(scale)); })
3441 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3442 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3443 (__v4di)(__m256i)(index), \
3444 (__v4di)(__m256i)(v1), (int)(scale)); })
3446 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3447 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3448 (__v4di)(__m256i)(index), \
3449 (__v4di)(__m256i)(v1), (int)(scale)); })
3451 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3452 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3453 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3456 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3457 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3458 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3461 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3462 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3463 (__v2di)(__m128i)(index), \
3464 (__v4si)(__m128i)(v1), (int)(scale)); })
3466 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3467 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3468 (__v2di)(__m128i)(index), \
3469 (__v4si)(__m128i)(v1), (int)(scale)); })
3471 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3472 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3473 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3476 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3477 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
3478 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3481 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3482 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
3483 (__v4di)(__m256i)(index), \
3484 (__v4si)(__m128i)(v1), (int)(scale)); })
3486 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3487 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
3488 (__v4di)(__m256i)(index), \
3489 (__v4si)(__m128i)(v1), (int)(scale)); })
3491 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3492 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
3493 (__v4si)(__m128i)(index), \
3494 (__v2df)(__m128d)(v1), (int)(scale)); })
3496 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3497 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
3498 (__v4si)(__m128i)(index), \
3499 (__v2df)(__m128d)(v1), (int)(scale)); })
3501 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3502 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
3503 (__v4si)(__m128i)(index), \
3504 (__v2di)(__m128i)(v1), (int)(scale)); })
3506 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3507 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
3508 (__v4si)(__m128i)(index), \
3509 (__v2di)(__m128i)(v1), (int)(scale)); })
3511 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3512 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
3513 (__v4si)(__m128i)(index), \
3514 (__v4df)(__m256d)(v1), (int)(scale)); })
3516 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3517 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
3518 (__v4si)(__m128i)(index), \
3519 (__v4df)(__m256d)(v1), (int)(scale)); })
3521 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3522 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
3523 (__v4si)(__m128i)(index), \
3524 (__v4di)(__m256i)(v1), (int)(scale)); })
3526 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3527 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
3528 (__v4si)(__m128i)(index), \
3529 (__v4di)(__m256i)(v1), (int)(scale)); })
3531 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3532 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
3533 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3536 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3537 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
3538 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3541 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3542 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
3543 (__v4si)(__m128i)(index), \
3544 (__v4si)(__m128i)(v1), (int)(scale)); })
3546 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3547 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
3548 (__v4si)(__m128i)(index), \
3549 (__v4si)(__m128i)(v1), (int)(scale)); })
3551 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3552 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
3553 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3556 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3557 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
3558 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3561 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3562 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
3563 (__v8si)(__m256i)(index), \
3564 (__v8si)(__m256i)(v1), (int)(scale)); })
3566 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3567 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
3568 (__v8si)(__m256i)(index), \
3569 (__v8si)(__m256i)(v1), (int)(scale)); })
3571 static __inline__ __m128d __DEFAULT_FN_ATTRS
3572 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3573 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3574 (__v2df)_mm_sqrt_pd(__A),
3578 static __inline__ __m128d __DEFAULT_FN_ATTRS
3579 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3580 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3581 (__v2df)_mm_sqrt_pd(__A),
3582 (__v2df)_mm_setzero_pd());
3585 static __inline__ __m256d __DEFAULT_FN_ATTRS
3586 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3587 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3588 (__v4df)_mm256_sqrt_pd(__A),
3592 static __inline__ __m256d __DEFAULT_FN_ATTRS
3593 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3594 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3595 (__v4df)_mm256_sqrt_pd(__A),
3596 (__v4df)_mm256_setzero_pd());
3599 static __inline__ __m128 __DEFAULT_FN_ATTRS
3600 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3601 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3602 (__v4sf)_mm_sqrt_ps(__A),
3606 static __inline__ __m128 __DEFAULT_FN_ATTRS
3607 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3608 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3609 (__v4sf)_mm_sqrt_ps(__A),
3610 (__v4sf)_mm_setzero_pd());
3613 static __inline__ __m256 __DEFAULT_FN_ATTRS
3614 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3615 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3616 (__v8sf)_mm256_sqrt_ps(__A),
3620 static __inline__ __m256 __DEFAULT_FN_ATTRS
3621 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3622 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3623 (__v8sf)_mm256_sqrt_ps(__A),
3624 (__v8sf)_mm256_setzero_ps());
3627 static __inline__ __m128d __DEFAULT_FN_ATTRS
3628 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3629 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3630 (__v2df)_mm_sub_pd(__A, __B),
3634 static __inline__ __m128d __DEFAULT_FN_ATTRS
3635 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3636 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3637 (__v2df)_mm_sub_pd(__A, __B),
3638 (__v2df)_mm_setzero_pd());
3641 static __inline__ __m256d __DEFAULT_FN_ATTRS
3642 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3643 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3644 (__v4df)_mm256_sub_pd(__A, __B),
3648 static __inline__ __m256d __DEFAULT_FN_ATTRS
3649 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3650 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3651 (__v4df)_mm256_sub_pd(__A, __B),
3652 (__v4df)_mm256_setzero_pd());
3655 static __inline__ __m128 __DEFAULT_FN_ATTRS
3656 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3657 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3658 (__v4sf)_mm_sub_ps(__A, __B),
3662 static __inline__ __m128 __DEFAULT_FN_ATTRS
3663 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3664 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3665 (__v4sf)_mm_sub_ps(__A, __B),
3666 (__v4sf)_mm_setzero_ps());
3669 static __inline__ __m256 __DEFAULT_FN_ATTRS
3670 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3671 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3672 (__v8sf)_mm256_sub_ps(__A, __B),
3676 static __inline__ __m256 __DEFAULT_FN_ATTRS
3677 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3678 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3679 (__v8sf)_mm256_sub_ps(__A, __B),
3680 (__v8sf)_mm256_setzero_ps());
3683 static __inline__ __m128i __DEFAULT_FN_ATTRS
3684 _mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
3686 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
3693 static __inline__ __m256i __DEFAULT_FN_ATTRS
3694 _mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
3695 __mmask8 __U, __m256i __B) {
3696 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
3703 static __inline__ __m128d __DEFAULT_FN_ATTRS
3704 _mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
3706 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
3714 static __inline__ __m256d __DEFAULT_FN_ATTRS
3715 _mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
3717 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
3725 static __inline__ __m128 __DEFAULT_FN_ATTRS
3726 _mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
3728 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
3735 static __inline__ __m256 __DEFAULT_FN_ATTRS
3736 _mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
3738 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
3745 static __inline__ __m128i __DEFAULT_FN_ATTRS
3746 _mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
3748 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
3755 static __inline__ __m256i __DEFAULT_FN_ATTRS
3756 _mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
3757 __mmask8 __U, __m256i __B) {
3758 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
3765 static __inline__ __m128i __DEFAULT_FN_ATTRS
3766 _mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
3767 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
3774 static __inline__ __m128i __DEFAULT_FN_ATTRS
3775 _mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
3777 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
3784 static __inline__ __m128i __DEFAULT_FN_ATTRS
3785 _mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
3787 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
3795 static __inline__ __m256i __DEFAULT_FN_ATTRS
3796 _mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
3797 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
3804 static __inline__ __m256i __DEFAULT_FN_ATTRS
3805 _mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
3807 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
3814 static __inline__ __m256i __DEFAULT_FN_ATTRS
3815 _mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
3816 __m256i __I, __m256i __B) {
3817 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
3825 static __inline__ __m128d __DEFAULT_FN_ATTRS
3826 _mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
3827 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
3835 static __inline__ __m128d __DEFAULT_FN_ATTRS
3836 _mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
3838 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
3846 static __inline__ __m128d __DEFAULT_FN_ATTRS
3847 _mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
3849 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
3857 static __inline__ __m256d __DEFAULT_FN_ATTRS
3858 _mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
3859 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
3867 static __inline__ __m256d __DEFAULT_FN_ATTRS
3868 _mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
3870 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
3878 static __inline__ __m256d __DEFAULT_FN_ATTRS
3879 _mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
3881 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
3889 static __inline__ __m128 __DEFAULT_FN_ATTRS
3890 _mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
3891 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
3898 static __inline__ __m128 __DEFAULT_FN_ATTRS
3899 _mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
3901 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
3908 static __inline__ __m128 __DEFAULT_FN_ATTRS
3909 _mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
3911 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
3919 static __inline__ __m256 __DEFAULT_FN_ATTRS
3920 _mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
3921 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
3928 static __inline__ __m256 __DEFAULT_FN_ATTRS
3929 _mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
3931 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
3938 static __inline__ __m256 __DEFAULT_FN_ATTRS
3939 _mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
3941 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
3949 static __inline__ __m128i __DEFAULT_FN_ATTRS
3950 _mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
3951 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
3958 static __inline__ __m128i __DEFAULT_FN_ATTRS
3959 _mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
3961 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
3968 static __inline__ __m128i __DEFAULT_FN_ATTRS
3969 _mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
3971 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
3980 static __inline__ __m256i __DEFAULT_FN_ATTRS
3981 _mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
3982 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
3989 static __inline__ __m256i __DEFAULT_FN_ATTRS
3990 _mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
3992 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
3999 static __inline__ __m256i __DEFAULT_FN_ATTRS
4000 _mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
4001 __m256i __I, __m256i __B) {
4002 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4010 static __inline__ __m128i __DEFAULT_FN_ATTRS
4011 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4013 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4014 (__v4si)_mm_cvtepi8_epi32(__A),
4018 static __inline__ __m128i __DEFAULT_FN_ATTRS
4019 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
4021 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4022 (__v4si)_mm_cvtepi8_epi32(__A),
4023 (__v4si)_mm_setzero_si128());
4026 static __inline__ __m256i __DEFAULT_FN_ATTRS
4027 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4029 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4030 (__v8si)_mm256_cvtepi8_epi32(__A),
4034 static __inline__ __m256i __DEFAULT_FN_ATTRS
4035 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4037 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4038 (__v8si)_mm256_cvtepi8_epi32(__A),
4039 (__v8si)_mm256_setzero_si256());
4042 static __inline__ __m128i __DEFAULT_FN_ATTRS
4043 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4046 (__v2di)_mm_cvtepi8_epi64(__A),
4050 static __inline__ __m128i __DEFAULT_FN_ATTRS
4051 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4054 (__v2di)_mm_cvtepi8_epi64(__A),
4055 (__v2di)_mm_setzero_si128());
4058 static __inline__ __m256i __DEFAULT_FN_ATTRS
4059 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4062 (__v4di)_mm256_cvtepi8_epi64(__A),
4066 static __inline__ __m256i __DEFAULT_FN_ATTRS
4067 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4070 (__v4di)_mm256_cvtepi8_epi64(__A),
4071 (__v4di)_mm256_setzero_si256());
4074 static __inline__ __m128i __DEFAULT_FN_ATTRS
4075 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4077 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4078 (__v2di)_mm_cvtepi32_epi64(__X),
4082 static __inline__ __m128i __DEFAULT_FN_ATTRS
4083 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4085 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4086 (__v2di)_mm_cvtepi32_epi64(__X),
4087 (__v2di)_mm_setzero_si128());
4090 static __inline__ __m256i __DEFAULT_FN_ATTRS
4091 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4093 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4094 (__v4di)_mm256_cvtepi32_epi64(__X),
4098 static __inline__ __m256i __DEFAULT_FN_ATTRS
4099 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4101 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4102 (__v4di)_mm256_cvtepi32_epi64(__X),
4103 (__v4di)_mm256_setzero_si256());
4106 static __inline__ __m128i __DEFAULT_FN_ATTRS
4107 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4109 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4110 (__v4si)_mm_cvtepi16_epi32(__A),
4114 static __inline__ __m128i __DEFAULT_FN_ATTRS
4115 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4117 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4118 (__v4si)_mm_cvtepi16_epi32(__A),
4119 (__v4si)_mm_setzero_si128());
4122 static __inline__ __m256i __DEFAULT_FN_ATTRS
4123 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4125 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4126 (__v8si)_mm256_cvtepi16_epi32(__A),
4130 static __inline__ __m256i __DEFAULT_FN_ATTRS
4131 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4133 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4134 (__v8si)_mm256_cvtepi16_epi32(__A),
4135 (__v8si)_mm256_setzero_si256());
4138 static __inline__ __m128i __DEFAULT_FN_ATTRS
4139 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4141 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4142 (__v2di)_mm_cvtepi16_epi64(__A),
4146 static __inline__ __m128i __DEFAULT_FN_ATTRS
4147 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4149 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4150 (__v2di)_mm_cvtepi16_epi64(__A),
4151 (__v2di)_mm_setzero_si128());
4154 static __inline__ __m256i __DEFAULT_FN_ATTRS
4155 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4157 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4158 (__v4di)_mm256_cvtepi16_epi64(__A),
4162 static __inline__ __m256i __DEFAULT_FN_ATTRS
4163 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4165 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4166 (__v4di)_mm256_cvtepi16_epi64(__A),
4167 (__v4di)_mm256_setzero_si256());
4171 static __inline__ __m128i __DEFAULT_FN_ATTRS
4172 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4174 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4175 (__v4si)_mm_cvtepu8_epi32(__A),
4179 static __inline__ __m128i __DEFAULT_FN_ATTRS
4180 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4182 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4183 (__v4si)_mm_cvtepu8_epi32(__A),
4184 (__v4si)_mm_setzero_si128());
4187 static __inline__ __m256i __DEFAULT_FN_ATTRS
4188 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4190 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4191 (__v8si)_mm256_cvtepu8_epi32(__A),
4195 static __inline__ __m256i __DEFAULT_FN_ATTRS
4196 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4198 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4199 (__v8si)_mm256_cvtepu8_epi32(__A),
4200 (__v8si)_mm256_setzero_si256());
4203 static __inline__ __m128i __DEFAULT_FN_ATTRS
4204 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4207 (__v2di)_mm_cvtepu8_epi64(__A),
4211 static __inline__ __m128i __DEFAULT_FN_ATTRS
4212 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4215 (__v2di)_mm_cvtepu8_epi64(__A),
4216 (__v2di)_mm_setzero_si128());
4219 static __inline__ __m256i __DEFAULT_FN_ATTRS
4220 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4223 (__v4di)_mm256_cvtepu8_epi64(__A),
4227 static __inline__ __m256i __DEFAULT_FN_ATTRS
4228 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4231 (__v4di)_mm256_cvtepu8_epi64(__A),
4232 (__v4di)_mm256_setzero_si256());
4235 static __inline__ __m128i __DEFAULT_FN_ATTRS
4236 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4238 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4239 (__v2di)_mm_cvtepu32_epi64(__X),
4243 static __inline__ __m128i __DEFAULT_FN_ATTRS
4244 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4246 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4247 (__v2di)_mm_cvtepu32_epi64(__X),
4248 (__v2di)_mm_setzero_si128());
4251 static __inline__ __m256i __DEFAULT_FN_ATTRS
4252 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4254 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4255 (__v4di)_mm256_cvtepu32_epi64(__X),
4259 static __inline__ __m256i __DEFAULT_FN_ATTRS
4260 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4262 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4263 (__v4di)_mm256_cvtepu32_epi64(__X),
4264 (__v4di)_mm256_setzero_si256());
4267 static __inline__ __m128i __DEFAULT_FN_ATTRS
4268 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4270 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4271 (__v4si)_mm_cvtepu16_epi32(__A),
4275 static __inline__ __m128i __DEFAULT_FN_ATTRS
4276 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4278 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4279 (__v4si)_mm_cvtepu16_epi32(__A),
4280 (__v4si)_mm_setzero_si128());
4283 static __inline__ __m256i __DEFAULT_FN_ATTRS
4284 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4286 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4287 (__v8si)_mm256_cvtepu16_epi32(__A),
4291 static __inline__ __m256i __DEFAULT_FN_ATTRS
4292 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4294 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4295 (__v8si)_mm256_cvtepu16_epi32(__A),
4296 (__v8si)_mm256_setzero_si256());
4299 static __inline__ __m128i __DEFAULT_FN_ATTRS
4300 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4302 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4303 (__v2di)_mm_cvtepu16_epi64(__A),
4307 static __inline__ __m128i __DEFAULT_FN_ATTRS
4308 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4310 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4311 (__v2di)_mm_cvtepu16_epi64(__A),
4312 (__v2di)_mm_setzero_si128());
4315 static __inline__ __m256i __DEFAULT_FN_ATTRS
4316 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4318 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4319 (__v4di)_mm256_cvtepu16_epi64(__A),
4323 static __inline__ __m256i __DEFAULT_FN_ATTRS
4324 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4326 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4327 (__v4di)_mm256_cvtepu16_epi64(__A),
4328 (__v4di)_mm256_setzero_si256());
4332 #define _mm_rol_epi32(a, b) __extension__ ({\
4333 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4334 (__v4si)_mm_setzero_si128(), \
4337 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4338 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4339 (__v4si)(__m128i)(w), (__mmask8)(u)); })
4341 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4342 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4343 (__v4si)_mm_setzero_si128(), \
4346 #define _mm256_rol_epi32(a, b) __extension__ ({\
4347 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4348 (__v8si)_mm256_setzero_si256(), \
4351 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4352 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4353 (__v8si)(__m256i)(w), (__mmask8)(u)); })
4355 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4356 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4357 (__v8si)_mm256_setzero_si256(), \
4360 #define _mm_rol_epi64(a, b) __extension__ ({\
4361 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4362 (__v2di)_mm_setzero_di(), \
4365 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4366 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4367 (__v2di)(__m128i)(w), (__mmask8)(u)); })
4369 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4370 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4371 (__v2di)_mm_setzero_di(), \
4374 #define _mm256_rol_epi64(a, b) __extension__ ({\
4375 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4376 (__v4di)_mm256_setzero_si256(), \
4379 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4380 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4381 (__v4di)(__m256i)(w), (__mmask8)(u)); })
4383 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4384 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4385 (__v4di)_mm256_setzero_si256(), \
4388 static __inline__ __m128i __DEFAULT_FN_ATTRS
4389 _mm_rolv_epi32 (__m128i __A, __m128i __B)
4391 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4394 _mm_setzero_si128 (),
4398 static __inline__ __m128i __DEFAULT_FN_ATTRS
4399 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4402 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4408 static __inline__ __m128i __DEFAULT_FN_ATTRS
4409 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4411 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4414 _mm_setzero_si128 (),
4418 static __inline__ __m256i __DEFAULT_FN_ATTRS
4419 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
4421 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4424 _mm256_setzero_si256 (),
4428 static __inline__ __m256i __DEFAULT_FN_ATTRS
4429 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4432 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4438 static __inline__ __m256i __DEFAULT_FN_ATTRS
4439 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4441 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4444 _mm256_setzero_si256 (),
4448 static __inline__ __m128i __DEFAULT_FN_ATTRS
4449 _mm_rolv_epi64 (__m128i __A, __m128i __B)
4451 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4458 static __inline__ __m128i __DEFAULT_FN_ATTRS
4459 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
4462 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4468 static __inline__ __m128i __DEFAULT_FN_ATTRS
4469 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4471 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4478 static __inline__ __m256i __DEFAULT_FN_ATTRS
4479 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
4481 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4484 _mm256_setzero_si256 (),
4488 static __inline__ __m256i __DEFAULT_FN_ATTRS
4489 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
4492 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4498 static __inline__ __m256i __DEFAULT_FN_ATTRS
4499 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4501 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4504 _mm256_setzero_si256 (),
4508 #define _mm_ror_epi32(A, B) __extension__ ({ \
4509 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4510 (__v4si)_mm_setzero_si128(), \
4513 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4514 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4515 (__v4si)(__m128i)(W), (__mmask8)(U)); })
4517 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
4518 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4519 (__v4si)_mm_setzero_si128(), \
4522 #define _mm256_ror_epi32(A, B) __extension__ ({ \
4523 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4524 (__v8si)_mm256_setzero_si256(), \
4527 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4528 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4529 (__v8si)(__m256i)(W), (__mmask8)(U)); })
4531 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
4532 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4533 (__v8si)_mm256_setzero_si256(), \
4536 #define _mm_ror_epi64(A, B) __extension__ ({ \
4537 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4538 (__v2di)_mm_setzero_di(), \
4541 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4542 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4543 (__v2di)(__m128i)(W), (__mmask8)(U)); })
4545 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
4546 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4547 (__v2di)_mm_setzero_di(), \
4550 #define _mm256_ror_epi64(A, B) __extension__ ({ \
4551 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4552 (__v4di)_mm256_setzero_si256(), \
4555 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4556 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4557 (__v4di)(__m256i)(W), (__mmask8)(U)); })
4559 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
4560 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4561 (__v4di)_mm256_setzero_si256(), \
4564 static __inline__ __m128i __DEFAULT_FN_ATTRS
4565 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4567 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4568 (__v4si)_mm_sll_epi32(__A, __B),
4572 static __inline__ __m128i __DEFAULT_FN_ATTRS
4573 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4575 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4576 (__v4si)_mm_sll_epi32(__A, __B),
4577 (__v4si)_mm_setzero_si128());
4580 static __inline__ __m256i __DEFAULT_FN_ATTRS
4581 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4583 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4584 (__v8si)_mm256_sll_epi32(__A, __B),
4588 static __inline__ __m256i __DEFAULT_FN_ATTRS
4589 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4591 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4592 (__v8si)_mm256_sll_epi32(__A, __B),
4593 (__v8si)_mm256_setzero_si256());
4596 static __inline__ __m128i __DEFAULT_FN_ATTRS
4597 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4599 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4600 (__v4si)_mm_slli_epi32(__A, __B),
4604 static __inline__ __m128i __DEFAULT_FN_ATTRS
4605 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4607 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4608 (__v4si)_mm_slli_epi32(__A, __B),
4609 (__v4si)_mm_setzero_si128());
4612 static __inline__ __m256i __DEFAULT_FN_ATTRS
4613 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4615 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4616 (__v8si)_mm256_slli_epi32(__A, __B),
4620 static __inline__ __m256i __DEFAULT_FN_ATTRS
4621 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
4623 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4624 (__v8si)_mm256_slli_epi32(__A, __B),
4625 (__v8si)_mm256_setzero_si256());
4628 static __inline__ __m128i __DEFAULT_FN_ATTRS
4629 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4631 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4632 (__v2di)_mm_sll_epi64(__A, __B),
4636 static __inline__ __m128i __DEFAULT_FN_ATTRS
4637 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4639 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4640 (__v2di)_mm_sll_epi64(__A, __B),
4641 (__v2di)_mm_setzero_di());
4644 static __inline__ __m256i __DEFAULT_FN_ATTRS
4645 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4647 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4648 (__v4di)_mm256_sll_epi64(__A, __B),
4652 static __inline__ __m256i __DEFAULT_FN_ATTRS
4653 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4655 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4656 (__v4di)_mm256_sll_epi64(__A, __B),
4657 (__v4di)_mm256_setzero_si256());
4660 static __inline__ __m128i __DEFAULT_FN_ATTRS
4661 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4663 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4664 (__v2di)_mm_slli_epi64(__A, __B),
4668 static __inline__ __m128i __DEFAULT_FN_ATTRS
4669 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
4671 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4672 (__v2di)_mm_slli_epi64(__A, __B),
4673 (__v2di)_mm_setzero_di());
4676 static __inline__ __m256i __DEFAULT_FN_ATTRS
4677 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4679 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4680 (__v4di)_mm256_slli_epi64(__A, __B),
4684 static __inline__ __m256i __DEFAULT_FN_ATTRS
4685 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
4687 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4688 (__v4di)_mm256_slli_epi64(__A, __B),
4689 (__v4di)_mm256_setzero_si256());
4692 static __inline__ __m128i __DEFAULT_FN_ATTRS
4693 _mm_rorv_epi32 (__m128i __A, __m128i __B)
4695 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
4698 _mm_setzero_si128 (),
4702 static __inline__ __m128i __DEFAULT_FN_ATTRS
4703 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4706 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
4712 static __inline__ __m128i __DEFAULT_FN_ATTRS
4713 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4715 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
4718 _mm_setzero_si128 (),
4722 static __inline__ __m256i __DEFAULT_FN_ATTRS
4723 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
4725 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
4728 _mm256_setzero_si256 (),
4732 static __inline__ __m256i __DEFAULT_FN_ATTRS
4733 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4736 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
4742 static __inline__ __m256i __DEFAULT_FN_ATTRS
4743 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4745 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
4748 _mm256_setzero_si256 (),
4752 static __inline__ __m128i __DEFAULT_FN_ATTRS
4753 _mm_rorv_epi64 (__m128i __A, __m128i __B)
4755 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
4762 static __inline__ __m128i __DEFAULT_FN_ATTRS
4763 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
4766 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
4772 static __inline__ __m128i __DEFAULT_FN_ATTRS
4773 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4775 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
4782 static __inline__ __m256i __DEFAULT_FN_ATTRS
4783 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
4785 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
4788 _mm256_setzero_si256 (),
4792 static __inline__ __m256i __DEFAULT_FN_ATTRS
4793 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
4796 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
4802 static __inline__ __m256i __DEFAULT_FN_ATTRS
4803 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4805 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
4808 _mm256_setzero_si256 (),
4812 static __inline__ __m128i __DEFAULT_FN_ATTRS
4813 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4815 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4816 (__v2di)_mm_sllv_epi64(__X, __Y),
4820 static __inline__ __m128i __DEFAULT_FN_ATTRS
4821 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4823 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4824 (__v2di)_mm_sllv_epi64(__X, __Y),
4825 (__v2di)_mm_setzero_di());
4828 static __inline__ __m256i __DEFAULT_FN_ATTRS
4829 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4831 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4832 (__v4di)_mm256_sllv_epi64(__X, __Y),
4836 static __inline__ __m256i __DEFAULT_FN_ATTRS
4837 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4839 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4840 (__v4di)_mm256_sllv_epi64(__X, __Y),
4841 (__v4di)_mm256_setzero_si256());
4844 static __inline__ __m128i __DEFAULT_FN_ATTRS
4845 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4848 (__v4si)_mm_sllv_epi32(__X, __Y),
4852 static __inline__ __m128i __DEFAULT_FN_ATTRS
4853 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4855 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4856 (__v4si)_mm_sllv_epi32(__X, __Y),
4857 (__v4si)_mm_setzero_si128());
4860 static __inline__ __m256i __DEFAULT_FN_ATTRS
4861 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4864 (__v8si)_mm256_sllv_epi32(__X, __Y),
4868 static __inline__ __m256i __DEFAULT_FN_ATTRS
4869 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4871 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4872 (__v8si)_mm256_sllv_epi32(__X, __Y),
4873 (__v8si)_mm256_setzero_si256());
4876 static __inline__ __m128i __DEFAULT_FN_ATTRS
4877 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4879 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4880 (__v2di)_mm_srlv_epi64(__X, __Y),
4884 static __inline__ __m128i __DEFAULT_FN_ATTRS
4885 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4887 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4888 (__v2di)_mm_srlv_epi64(__X, __Y),
4889 (__v2di)_mm_setzero_di());
4892 static __inline__ __m256i __DEFAULT_FN_ATTRS
4893 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4895 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4896 (__v4di)_mm256_srlv_epi64(__X, __Y),
4900 static __inline__ __m256i __DEFAULT_FN_ATTRS
4901 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4903 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4904 (__v4di)_mm256_srlv_epi64(__X, __Y),
4905 (__v4di)_mm256_setzero_si256());
4908 static __inline__ __m128i __DEFAULT_FN_ATTRS
4909 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4911 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4912 (__v4si)_mm_srlv_epi32(__X, __Y),
4916 static __inline__ __m128i __DEFAULT_FN_ATTRS
4917 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4919 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4920 (__v4si)_mm_srlv_epi32(__X, __Y),
4921 (__v4si)_mm_setzero_si128());
4924 static __inline__ __m256i __DEFAULT_FN_ATTRS
4925 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4927 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4928 (__v8si)_mm256_srlv_epi32(__X, __Y),
4932 static __inline__ __m256i __DEFAULT_FN_ATTRS
4933 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4935 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4936 (__v8si)_mm256_srlv_epi32(__X, __Y),
4937 (__v8si)_mm256_setzero_si256());
4940 static __inline__ __m128i __DEFAULT_FN_ATTRS
4941 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4943 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4944 (__v4si)_mm_srl_epi32(__A, __B),
4948 static __inline__ __m128i __DEFAULT_FN_ATTRS
4949 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4951 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4952 (__v4si)_mm_srl_epi32(__A, __B),
4953 (__v4si)_mm_setzero_si128());
4956 static __inline__ __m256i __DEFAULT_FN_ATTRS
4957 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4959 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4960 (__v8si)_mm256_srl_epi32(__A, __B),
4964 static __inline__ __m256i __DEFAULT_FN_ATTRS
4965 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4967 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4968 (__v8si)_mm256_srl_epi32(__A, __B),
4969 (__v8si)_mm256_setzero_si256());
4972 static __inline__ __m128i __DEFAULT_FN_ATTRS
4973 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4976 (__v4si)_mm_srli_epi32(__A, __B),
4980 static __inline__ __m128i __DEFAULT_FN_ATTRS
4981 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
4983 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4984 (__v4si)_mm_srli_epi32(__A, __B),
4985 (__v4si)_mm_setzero_si128());
4988 static __inline__ __m256i __DEFAULT_FN_ATTRS
4989 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4992 (__v8si)_mm256_srli_epi32(__A, __B),
4996 static __inline__ __m256i __DEFAULT_FN_ATTRS
4997 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
4999 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5000 (__v8si)_mm256_srli_epi32(__A, __B),
5001 (__v8si)_mm256_setzero_si256());
5004 static __inline__ __m128i __DEFAULT_FN_ATTRS
5005 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5007 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5008 (__v2di)_mm_srl_epi64(__A, __B),
5012 static __inline__ __m128i __DEFAULT_FN_ATTRS
5013 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
5015 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5016 (__v2di)_mm_srl_epi64(__A, __B),
5017 (__v2di)_mm_setzero_di());
5020 static __inline__ __m256i __DEFAULT_FN_ATTRS
5021 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5023 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5024 (__v4di)_mm256_srl_epi64(__A, __B),
5028 static __inline__ __m256i __DEFAULT_FN_ATTRS
5029 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
5031 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5032 (__v4di)_mm256_srl_epi64(__A, __B),
5033 (__v4di)_mm256_setzero_si256());
5036 static __inline__ __m128i __DEFAULT_FN_ATTRS
5037 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5039 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5040 (__v2di)_mm_srli_epi64(__A, __B),
5044 static __inline__ __m128i __DEFAULT_FN_ATTRS
5045 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
5047 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5048 (__v2di)_mm_srli_epi64(__A, __B),
5049 (__v2di)_mm_setzero_di());
5052 static __inline__ __m256i __DEFAULT_FN_ATTRS
5053 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5055 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5056 (__v4di)_mm256_srli_epi64(__A, __B),
5060 static __inline__ __m256i __DEFAULT_FN_ATTRS
5061 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
5063 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5064 (__v4di)_mm256_srli_epi64(__A, __B),
5065 (__v4di)_mm256_setzero_si256());
5068 static __inline__ __m128i __DEFAULT_FN_ATTRS
5069 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5072 (__v4si)_mm_srav_epi32(__X, __Y),
5076 static __inline__ __m128i __DEFAULT_FN_ATTRS
5077 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5079 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5080 (__v4si)_mm_srav_epi32(__X, __Y),
5081 (__v4si)_mm_setzero_si128());
5084 static __inline__ __m256i __DEFAULT_FN_ATTRS
5085 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5087 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5088 (__v8si)_mm256_srav_epi32(__X, __Y),
5092 static __inline__ __m256i __DEFAULT_FN_ATTRS
5093 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5095 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5096 (__v8si)_mm256_srav_epi32(__X, __Y),
5097 (__v8si)_mm256_setzero_si256());
5100 static __inline__ __m128i __DEFAULT_FN_ATTRS
5101 _mm_srav_epi64(__m128i __X, __m128i __Y)
5103 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5106 static __inline__ __m128i __DEFAULT_FN_ATTRS
5107 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5110 (__v2di)_mm_srav_epi64(__X, __Y),
5114 static __inline__ __m128i __DEFAULT_FN_ATTRS
5115 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5118 (__v2di)_mm_srav_epi64(__X, __Y),
5119 (__v2di)_mm_setzero_di());
5122 static __inline__ __m256i __DEFAULT_FN_ATTRS
5123 _mm256_srav_epi64(__m256i __X, __m256i __Y)
5125 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5128 static __inline__ __m256i __DEFAULT_FN_ATTRS
5129 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5131 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5132 (__v4di)_mm256_srav_epi64(__X, __Y),
5136 static __inline__ __m256i __DEFAULT_FN_ATTRS
5137 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5140 (__v4di)_mm256_srav_epi64(__X, __Y),
5141 (__v4di)_mm256_setzero_si256());
5144 static __inline__ __m128i __DEFAULT_FN_ATTRS
5145 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5147 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5152 static __inline__ __m128i __DEFAULT_FN_ATTRS
5153 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5155 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5157 (__v4si) _mm_setzero_si128 ());
5161 static __inline__ __m256i __DEFAULT_FN_ATTRS
5162 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5164 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5169 static __inline__ __m256i __DEFAULT_FN_ATTRS
5170 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5172 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5174 (__v8si) _mm256_setzero_si256 ());
5177 static __inline__ __m128i __DEFAULT_FN_ATTRS
5178 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5180 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5186 static __inline__ __m128i __DEFAULT_FN_ATTRS
5187 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5189 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5191 _mm_setzero_si128 (),
5196 static __inline__ __m256i __DEFAULT_FN_ATTRS
5197 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5199 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5205 static __inline__ __m256i __DEFAULT_FN_ATTRS
5206 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5208 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5210 _mm256_setzero_si256 (),
5215 static __inline__ void __DEFAULT_FN_ATTRS
5216 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5218 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5223 static __inline__ void __DEFAULT_FN_ATTRS
5224 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5226 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5231 static __inline__ __m128i __DEFAULT_FN_ATTRS
5232 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5234 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5239 static __inline__ __m128i __DEFAULT_FN_ATTRS
5240 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5242 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5244 (__v2di) _mm_setzero_di ());
5247 static __inline__ __m256i __DEFAULT_FN_ATTRS
5248 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5250 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5255 static __inline__ __m256i __DEFAULT_FN_ATTRS
5256 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5258 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5260 (__v4di) _mm256_setzero_si256 ());
5263 static __inline__ __m128i __DEFAULT_FN_ATTRS
5264 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5266 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5272 static __inline__ __m128i __DEFAULT_FN_ATTRS
5273 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5275 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5282 static __inline__ __m256i __DEFAULT_FN_ATTRS
5283 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5285 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5291 static __inline__ __m256i __DEFAULT_FN_ATTRS
5292 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5294 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5296 _mm256_setzero_si256 (),
5301 static __inline__ void __DEFAULT_FN_ATTRS
5302 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5304 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5309 static __inline__ void __DEFAULT_FN_ATTRS
5310 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5312 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5317 static __inline__ __m128d __DEFAULT_FN_ATTRS
5318 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5320 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5321 (__v2df)_mm_movedup_pd(__A),
5325 static __inline__ __m128d __DEFAULT_FN_ATTRS
5326 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5329 (__v2df)_mm_movedup_pd(__A),
5330 (__v2df)_mm_setzero_pd());
5333 static __inline__ __m256d __DEFAULT_FN_ATTRS
5334 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5336 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5337 (__v4df)_mm256_movedup_pd(__A),
5341 static __inline__ __m256d __DEFAULT_FN_ATTRS
5342 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5344 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5345 (__v4df)_mm256_movedup_pd(__A),
5346 (__v4df)_mm256_setzero_pd());
5349 static __inline__ __m128i __DEFAULT_FN_ATTRS
5350 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5352 return (__m128i)__builtin_ia32_selectd_128(__M,
5353 (__v4si) _mm_set1_epi32(__A),
5357 static __inline__ __m128i __DEFAULT_FN_ATTRS
5358 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
5360 return (__m128i)__builtin_ia32_selectd_128(__M,
5361 (__v4si) _mm_set1_epi32(__A),
5362 (__v4si)_mm_setzero_si128());
5365 static __inline__ __m256i __DEFAULT_FN_ATTRS
5366 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5368 return (__m256i)__builtin_ia32_selectd_256(__M,
5369 (__v8si) _mm256_set1_epi32(__A),
5373 static __inline__ __m256i __DEFAULT_FN_ATTRS
5374 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5376 return (__m256i)__builtin_ia32_selectd_256(__M,
5377 (__v8si) _mm256_set1_epi32(__A),
5378 (__v8si)_mm256_setzero_si256());
5383 static __inline__ __m128i __DEFAULT_FN_ATTRS
5384 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5386 return (__m128i) __builtin_ia32_selectq_128(__M,
5387 (__v2di) _mm_set1_epi64x(__A),
5391 static __inline__ __m128i __DEFAULT_FN_ATTRS
5392 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5394 return (__m128i) __builtin_ia32_selectq_128(__M,
5395 (__v2di) _mm_set1_epi64x(__A),
5396 (__v2di) _mm_setzero_si128());
5399 static __inline__ __m256i __DEFAULT_FN_ATTRS
5400 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5402 return (__m256i) __builtin_ia32_selectq_256(__M,
5403 (__v4di) _mm256_set1_epi64x(__A),
5407 static __inline__ __m256i __DEFAULT_FN_ATTRS
5408 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5410 return (__m256i) __builtin_ia32_selectq_256(__M,
5411 (__v4di) _mm256_set1_epi64x(__A),
5412 (__v4di) _mm256_setzero_si256());
5417 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5418 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5419 (__v2df)(__m128d)(B), \
5420 (__v2di)(__m128i)(C), (int)(imm), \
5423 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5424 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5425 (__v2df)(__m128d)(B), \
5426 (__v2di)(__m128i)(C), (int)(imm), \
5429 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5430 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5431 (__v2df)(__m128d)(B), \
5432 (__v2di)(__m128i)(C), \
5433 (int)(imm), (__mmask8)(U)); })
5435 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5436 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5437 (__v4df)(__m256d)(B), \
5438 (__v4di)(__m256i)(C), (int)(imm), \
5441 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5442 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5443 (__v4df)(__m256d)(B), \
5444 (__v4di)(__m256i)(C), (int)(imm), \
5447 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5448 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5449 (__v4df)(__m256d)(B), \
5450 (__v4di)(__m256i)(C), \
5451 (int)(imm), (__mmask8)(U)); })
5453 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5454 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5455 (__v4sf)(__m128)(B), \
5456 (__v4si)(__m128i)(C), (int)(imm), \
5459 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5460 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5461 (__v4sf)(__m128)(B), \
5462 (__v4si)(__m128i)(C), (int)(imm), \
5465 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5466 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5467 (__v4sf)(__m128)(B), \
5468 (__v4si)(__m128i)(C), (int)(imm), \
5471 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5472 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5473 (__v8sf)(__m256)(B), \
5474 (__v8si)(__m256i)(C), (int)(imm), \
5477 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5478 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5479 (__v8sf)(__m256)(B), \
5480 (__v8si)(__m256i)(C), (int)(imm), \
5483 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5484 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5485 (__v8sf)(__m256)(B), \
5486 (__v8si)(__m256i)(C), (int)(imm), \
5489 static __inline__ __m128d __DEFAULT_FN_ATTRS
5490 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5492 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5497 static __inline__ __m128d __DEFAULT_FN_ATTRS
5498 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
5500 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5506 static __inline__ __m256d __DEFAULT_FN_ATTRS
5507 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5509 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5514 static __inline__ __m256d __DEFAULT_FN_ATTRS
5515 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5517 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5519 _mm256_setzero_pd (),
5523 static __inline__ __m128 __DEFAULT_FN_ATTRS
5524 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5526 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5531 static __inline__ __m128 __DEFAULT_FN_ATTRS
5532 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
5534 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5540 static __inline__ __m256 __DEFAULT_FN_ATTRS
5541 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5543 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5548 static __inline__ __m256 __DEFAULT_FN_ATTRS
5549 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5551 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5553 _mm256_setzero_ps (),
5557 static __inline__ __m128i __DEFAULT_FN_ATTRS
5558 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5560 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5565 static __inline__ __m128i __DEFAULT_FN_ATTRS
5566 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5568 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5570 _mm_setzero_si128 (),
5574 static __inline__ __m256i __DEFAULT_FN_ATTRS
5575 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5577 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5582 static __inline__ __m256i __DEFAULT_FN_ATTRS
5583 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5585 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5587 _mm256_setzero_si256 (),
5591 static __inline__ __m128i __DEFAULT_FN_ATTRS
5592 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5594 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5599 static __inline__ __m128i __DEFAULT_FN_ATTRS
5600 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5602 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5604 _mm_setzero_si128 (),
5608 static __inline__ __m256i __DEFAULT_FN_ATTRS
5609 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5611 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5616 static __inline__ __m256i __DEFAULT_FN_ATTRS
5617 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5619 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5621 _mm256_setzero_si256 (),
5625 static __inline__ __m128d __DEFAULT_FN_ATTRS
5626 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5628 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5633 static __inline__ __m128d __DEFAULT_FN_ATTRS
5634 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5636 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5642 static __inline__ __m256d __DEFAULT_FN_ATTRS
5643 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5645 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5650 static __inline__ __m256d __DEFAULT_FN_ATTRS
5651 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5653 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5655 _mm256_setzero_pd (),
5659 static __inline__ __m128 __DEFAULT_FN_ATTRS
5660 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5662 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5667 static __inline__ __m128 __DEFAULT_FN_ATTRS
5668 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5670 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5676 static __inline__ __m256 __DEFAULT_FN_ATTRS
5677 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5679 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5684 static __inline__ __m256 __DEFAULT_FN_ATTRS
5685 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5687 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5689 _mm256_setzero_ps (),
5693 static __inline__ void __DEFAULT_FN_ATTRS
5694 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5696 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5701 static __inline__ void __DEFAULT_FN_ATTRS
5702 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5704 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5709 static __inline__ void __DEFAULT_FN_ATTRS
5710 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5712 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5717 static __inline__ void __DEFAULT_FN_ATTRS
5718 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5720 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5725 static __inline__ void __DEFAULT_FN_ATTRS
5726 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5728 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5733 static __inline__ void __DEFAULT_FN_ATTRS
5734 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5736 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5741 static __inline__ void __DEFAULT_FN_ATTRS
5742 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5744 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5749 static __inline__ void __DEFAULT_FN_ATTRS
5750 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5752 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5757 static __inline__ void __DEFAULT_FN_ATTRS
5758 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5760 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5765 static __inline__ void __DEFAULT_FN_ATTRS
5766 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5768 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5773 static __inline__ void __DEFAULT_FN_ATTRS
5774 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5776 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5781 static __inline__ void __DEFAULT_FN_ATTRS
5782 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5784 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5790 static __inline__ __m128d __DEFAULT_FN_ATTRS
5791 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5793 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5794 (__v2df)_mm_unpackhi_pd(__A, __B),
5798 static __inline__ __m128d __DEFAULT_FN_ATTRS
5799 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5801 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5802 (__v2df)_mm_unpackhi_pd(__A, __B),
5803 (__v2df)_mm_setzero_pd());
5806 static __inline__ __m256d __DEFAULT_FN_ATTRS
5807 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5810 (__v4df)_mm256_unpackhi_pd(__A, __B),
5814 static __inline__ __m256d __DEFAULT_FN_ATTRS
5815 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5817 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5818 (__v4df)_mm256_unpackhi_pd(__A, __B),
5819 (__v4df)_mm256_setzero_pd());
5822 static __inline__ __m128 __DEFAULT_FN_ATTRS
5823 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5825 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5826 (__v4sf)_mm_unpackhi_ps(__A, __B),
5830 static __inline__ __m128 __DEFAULT_FN_ATTRS
5831 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5833 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5834 (__v4sf)_mm_unpackhi_ps(__A, __B),
5835 (__v4sf)_mm_setzero_ps());
5838 static __inline__ __m256 __DEFAULT_FN_ATTRS
5839 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5841 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5842 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5846 static __inline__ __m256 __DEFAULT_FN_ATTRS
5847 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5849 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5850 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5851 (__v8sf)_mm256_setzero_ps());
5854 static __inline__ __m128d __DEFAULT_FN_ATTRS
5855 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5857 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5858 (__v2df)_mm_unpacklo_pd(__A, __B),
5862 static __inline__ __m128d __DEFAULT_FN_ATTRS
5863 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5865 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5866 (__v2df)_mm_unpacklo_pd(__A, __B),
5867 (__v2df)_mm_setzero_pd());
5870 static __inline__ __m256d __DEFAULT_FN_ATTRS
5871 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5873 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5874 (__v4df)_mm256_unpacklo_pd(__A, __B),
5878 static __inline__ __m256d __DEFAULT_FN_ATTRS
5879 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5881 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5882 (__v4df)_mm256_unpacklo_pd(__A, __B),
5883 (__v4df)_mm256_setzero_pd());
5886 static __inline__ __m128 __DEFAULT_FN_ATTRS
5887 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5889 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5890 (__v4sf)_mm_unpacklo_ps(__A, __B),
5894 static __inline__ __m128 __DEFAULT_FN_ATTRS
5895 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5897 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5898 (__v4sf)_mm_unpacklo_ps(__A, __B),
5899 (__v4sf)_mm_setzero_ps());
5902 static __inline__ __m256 __DEFAULT_FN_ATTRS
5903 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5905 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5906 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5910 static __inline__ __m256 __DEFAULT_FN_ATTRS
5911 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5913 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5914 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5915 (__v8sf)_mm256_setzero_ps());
5918 static __inline__ __m128d __DEFAULT_FN_ATTRS
5919 _mm_rcp14_pd (__m128d __A)
5921 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5927 static __inline__ __m128d __DEFAULT_FN_ATTRS
5928 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5930 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5935 static __inline__ __m128d __DEFAULT_FN_ATTRS
5936 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5938 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5944 static __inline__ __m256d __DEFAULT_FN_ATTRS
5945 _mm256_rcp14_pd (__m256d __A)
5947 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5949 _mm256_setzero_pd (),
5953 static __inline__ __m256d __DEFAULT_FN_ATTRS
5954 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5956 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5961 static __inline__ __m256d __DEFAULT_FN_ATTRS
5962 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5964 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5966 _mm256_setzero_pd (),
5970 static __inline__ __m128 __DEFAULT_FN_ATTRS
5971 _mm_rcp14_ps (__m128 __A)
5973 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5979 static __inline__ __m128 __DEFAULT_FN_ATTRS
5980 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5982 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5987 static __inline__ __m128 __DEFAULT_FN_ATTRS
5988 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
5990 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5996 static __inline__ __m256 __DEFAULT_FN_ATTRS
5997 _mm256_rcp14_ps (__m256 __A)
5999 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6001 _mm256_setzero_ps (),
6005 static __inline__ __m256 __DEFAULT_FN_ATTRS
6006 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6008 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6013 static __inline__ __m256 __DEFAULT_FN_ATTRS
6014 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6016 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6018 _mm256_setzero_ps (),
6022 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6023 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6024 (__v2df)_mm_permute_pd((X), (C)), \
6025 (__v2df)(__m128d)(W)); })
6027 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6028 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6029 (__v2df)_mm_permute_pd((X), (C)), \
6030 (__v2df)_mm_setzero_pd()); })
6032 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6033 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6034 (__v4df)_mm256_permute_pd((X), (C)), \
6035 (__v4df)(__m256d)(W)); })
6037 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6038 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6039 (__v4df)_mm256_permute_pd((X), (C)), \
6040 (__v4df)_mm256_setzero_pd()); })
6042 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6043 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6044 (__v4sf)_mm_permute_ps((X), (C)), \
6045 (__v4sf)(__m128)(W)); })
6047 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6048 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6049 (__v4sf)_mm_permute_ps((X), (C)), \
6050 (__v4sf)_mm_setzero_ps()); })
6052 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6053 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6054 (__v8sf)_mm256_permute_ps((X), (C)), \
6055 (__v8sf)(__m256)(W)); })
6057 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6058 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6059 (__v8sf)_mm256_permute_ps((X), (C)), \
6060 (__v8sf)_mm256_setzero_ps()); })
6062 static __inline__ __m128d __DEFAULT_FN_ATTRS
6063 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6065 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6066 (__v2df)_mm_permutevar_pd(__A, __C),
6070 static __inline__ __m128d __DEFAULT_FN_ATTRS
6071 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6073 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6074 (__v2df)_mm_permutevar_pd(__A, __C),
6075 (__v2df)_mm_setzero_pd());
6078 static __inline__ __m256d __DEFAULT_FN_ATTRS
6079 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6081 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6082 (__v4df)_mm256_permutevar_pd(__A, __C),
6086 static __inline__ __m256d __DEFAULT_FN_ATTRS
6087 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6089 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6090 (__v4df)_mm256_permutevar_pd(__A, __C),
6091 (__v4df)_mm256_setzero_pd());
6094 static __inline__ __m128 __DEFAULT_FN_ATTRS
6095 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6097 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6098 (__v4sf)_mm_permutevar_ps(__A, __C),
6102 static __inline__ __m128 __DEFAULT_FN_ATTRS
6103 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6105 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6106 (__v4sf)_mm_permutevar_ps(__A, __C),
6107 (__v4sf)_mm_setzero_ps());
6110 static __inline__ __m256 __DEFAULT_FN_ATTRS
6111 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6113 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6114 (__v8sf)_mm256_permutevar_ps(__A, __C),
6118 static __inline__ __m256 __DEFAULT_FN_ATTRS
6119 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6121 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6122 (__v8sf)_mm256_permutevar_ps(__A, __C),
6123 (__v8sf)_mm256_setzero_ps());
6126 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6127 _mm_test_epi32_mask (__m128i __A, __m128i __B)
6129 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
6132 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6133 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6135 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6140 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
6142 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6143 _mm256_setzero_si256());
6146 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6147 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6149 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6150 _mm256_setzero_si256());
6153 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6154 _mm_test_epi64_mask (__m128i __A, __m128i __B)
6156 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6160 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6162 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6167 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
6169 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6170 _mm256_setzero_si256());
6173 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6174 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6176 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6177 _mm256_setzero_si256());
6180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6181 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
6183 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6187 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6189 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6194 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6196 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6197 _mm256_setzero_si256());
6200 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6201 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6203 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6204 _mm256_setzero_si256());
6207 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6208 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
6210 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_di());
6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6214 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6216 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6221 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6223 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6224 _mm256_setzero_si256());
6227 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6228 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6230 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6231 _mm256_setzero_si256());
6234 static __inline__ __m128i __DEFAULT_FN_ATTRS
6235 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6237 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6238 (__v4si)_mm_unpackhi_epi32(__A, __B),
6242 static __inline__ __m128i __DEFAULT_FN_ATTRS
6243 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6245 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6246 (__v4si)_mm_unpackhi_epi32(__A, __B),
6247 (__v4si)_mm_setzero_si128());
6250 static __inline__ __m256i __DEFAULT_FN_ATTRS
6251 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6253 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6254 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6258 static __inline__ __m256i __DEFAULT_FN_ATTRS
6259 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6262 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6263 (__v8si)_mm256_setzero_si256());
6266 static __inline__ __m128i __DEFAULT_FN_ATTRS
6267 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6269 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6270 (__v2di)_mm_unpackhi_epi64(__A, __B),
6274 static __inline__ __m128i __DEFAULT_FN_ATTRS
6275 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6277 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6278 (__v2di)_mm_unpackhi_epi64(__A, __B),
6279 (__v2di)_mm_setzero_di());
6282 static __inline__ __m256i __DEFAULT_FN_ATTRS
6283 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6286 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6290 static __inline__ __m256i __DEFAULT_FN_ATTRS
6291 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6293 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6294 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6295 (__v4di)_mm256_setzero_si256());
6298 static __inline__ __m128i __DEFAULT_FN_ATTRS
6299 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6302 (__v4si)_mm_unpacklo_epi32(__A, __B),
6306 static __inline__ __m128i __DEFAULT_FN_ATTRS
6307 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6309 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6310 (__v4si)_mm_unpacklo_epi32(__A, __B),
6311 (__v4si)_mm_setzero_si128());
6314 static __inline__ __m256i __DEFAULT_FN_ATTRS
6315 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6317 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6318 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6322 static __inline__ __m256i __DEFAULT_FN_ATTRS
6323 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6325 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6326 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6327 (__v8si)_mm256_setzero_si256());
6330 static __inline__ __m128i __DEFAULT_FN_ATTRS
6331 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6333 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6334 (__v2di)_mm_unpacklo_epi64(__A, __B),
6338 static __inline__ __m128i __DEFAULT_FN_ATTRS
6339 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6342 (__v2di)_mm_unpacklo_epi64(__A, __B),
6343 (__v2di)_mm_setzero_di());
6346 static __inline__ __m256i __DEFAULT_FN_ATTRS
6347 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6349 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6350 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6354 static __inline__ __m256i __DEFAULT_FN_ATTRS
6355 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6358 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6359 (__v4di)_mm256_setzero_si256());
6362 static __inline__ __m128i __DEFAULT_FN_ATTRS
6363 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6365 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6366 (__v4si)_mm_sra_epi32(__A, __B),
6370 static __inline__ __m128i __DEFAULT_FN_ATTRS
6371 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6373 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6374 (__v4si)_mm_sra_epi32(__A, __B),
6375 (__v4si)_mm_setzero_si128());
6378 static __inline__ __m256i __DEFAULT_FN_ATTRS
6379 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6381 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6382 (__v8si)_mm256_sra_epi32(__A, __B),
6386 static __inline__ __m256i __DEFAULT_FN_ATTRS
6387 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6389 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6390 (__v8si)_mm256_sra_epi32(__A, __B),
6391 (__v8si)_mm256_setzero_si256());
6394 static __inline__ __m128i __DEFAULT_FN_ATTRS
6395 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6397 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6398 (__v4si)_mm_srai_epi32(__A, __B),
6402 static __inline__ __m128i __DEFAULT_FN_ATTRS
6403 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6405 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6406 (__v4si)_mm_srai_epi32(__A, __B),
6407 (__v4si)_mm_setzero_si128());
6410 static __inline__ __m256i __DEFAULT_FN_ATTRS
6411 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6413 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6414 (__v8si)_mm256_srai_epi32(__A, __B),
6418 static __inline__ __m256i __DEFAULT_FN_ATTRS
6419 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6422 (__v8si)_mm256_srai_epi32(__A, __B),
6423 (__v8si)_mm256_setzero_si256());
6426 static __inline__ __m128i __DEFAULT_FN_ATTRS
6427 _mm_sra_epi64(__m128i __A, __m128i __B)
6429 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6432 static __inline__ __m128i __DEFAULT_FN_ATTRS
6433 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6435 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6436 (__v2di)_mm_sra_epi64(__A, __B), \
6440 static __inline__ __m128i __DEFAULT_FN_ATTRS
6441 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6443 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6444 (__v2di)_mm_sra_epi64(__A, __B), \
6445 (__v2di)_mm_setzero_di());
6448 static __inline__ __m256i __DEFAULT_FN_ATTRS
6449 _mm256_sra_epi64(__m256i __A, __m128i __B)
6451 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6454 static __inline__ __m256i __DEFAULT_FN_ATTRS
6455 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6457 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6458 (__v4di)_mm256_sra_epi64(__A, __B), \
6462 static __inline__ __m256i __DEFAULT_FN_ATTRS
6463 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6465 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6466 (__v4di)_mm256_sra_epi64(__A, __B), \
6467 (__v4di)_mm256_setzero_si256());
6470 static __inline__ __m128i __DEFAULT_FN_ATTRS
6471 _mm_srai_epi64(__m128i __A, int __imm)
6473 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6476 static __inline__ __m128i __DEFAULT_FN_ATTRS
6477 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6479 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6480 (__v2di)_mm_srai_epi64(__A, __imm), \
6484 static __inline__ __m128i __DEFAULT_FN_ATTRS
6485 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6487 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6488 (__v2di)_mm_srai_epi64(__A, __imm), \
6489 (__v2di)_mm_setzero_di());
6492 static __inline__ __m256i __DEFAULT_FN_ATTRS
6493 _mm256_srai_epi64(__m256i __A, int __imm)
6495 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6498 static __inline__ __m256i __DEFAULT_FN_ATTRS
6499 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6501 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6502 (__v4di)_mm256_srai_epi64(__A, __imm), \
6506 static __inline__ __m256i __DEFAULT_FN_ATTRS
6507 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6509 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6510 (__v4di)_mm256_srai_epi64(__A, __imm), \
6511 (__v4di)_mm256_setzero_si256());
6514 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6515 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6516 (__v4si)(__m128i)(B), \
6517 (__v4si)(__m128i)(C), (int)(imm), \
6520 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6521 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6522 (__v4si)(__m128i)(B), \
6523 (__v4si)(__m128i)(C), (int)(imm), \
6526 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6527 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6528 (__v4si)(__m128i)(B), \
6529 (__v4si)(__m128i)(C), (int)(imm), \
6532 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6533 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6534 (__v8si)(__m256i)(B), \
6535 (__v8si)(__m256i)(C), (int)(imm), \
6538 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6539 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6540 (__v8si)(__m256i)(B), \
6541 (__v8si)(__m256i)(C), (int)(imm), \
6544 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6545 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6546 (__v8si)(__m256i)(B), \
6547 (__v8si)(__m256i)(C), (int)(imm), \
6550 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6551 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6552 (__v2di)(__m128i)(B), \
6553 (__v2di)(__m128i)(C), (int)(imm), \
6556 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6557 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6558 (__v2di)(__m128i)(B), \
6559 (__v2di)(__m128i)(C), (int)(imm), \
6562 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6563 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6564 (__v2di)(__m128i)(B), \
6565 (__v2di)(__m128i)(C), (int)(imm), \
6568 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6569 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6570 (__v4di)(__m256i)(B), \
6571 (__v4di)(__m256i)(C), (int)(imm), \
6574 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6575 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6576 (__v4di)(__m256i)(B), \
6577 (__v4di)(__m256i)(C), (int)(imm), \
6580 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6581 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6582 (__v4di)(__m256i)(B), \
6583 (__v4di)(__m256i)(C), (int)(imm), \
6588 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
6589 (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
6590 (__v8sf)(__m256)(B), \
6591 0 + ((((imm) >> 0) & 0x1) * 4), \
6592 1 + ((((imm) >> 0) & 0x1) * 4), \
6593 2 + ((((imm) >> 0) & 0x1) * 4), \
6594 3 + ((((imm) >> 0) & 0x1) * 4), \
6595 8 + ((((imm) >> 1) & 0x1) * 4), \
6596 9 + ((((imm) >> 1) & 0x1) * 4), \
6597 10 + ((((imm) >> 1) & 0x1) * 4), \
6598 11 + ((((imm) >> 1) & 0x1) * 4)); })
6600 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
6601 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6602 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6603 (__v8sf)(__m256)(W)); })
6605 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
6606 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6607 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6608 (__v8sf)_mm256_setzero_ps()); })
6610 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
6611 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
6612 (__v4df)(__m256d)(B), \
6613 0 + ((((imm) >> 0) & 0x1) * 2), \
6614 1 + ((((imm) >> 0) & 0x1) * 2), \
6615 4 + ((((imm) >> 1) & 0x1) * 2), \
6616 5 + ((((imm) >> 1) & 0x1) * 2)); })
6618 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
6619 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6620 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6621 (__v4df)(__m256)(W)); })
6623 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
6624 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6625 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6626 (__v4df)_mm256_setzero_pd()); })
6628 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
6629 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
6630 (__v4di)(__m256i)(B), \
6631 0 + ((((imm) >> 0) & 0x1) * 2), \
6632 1 + ((((imm) >> 0) & 0x1) * 2), \
6633 4 + ((((imm) >> 1) & 0x1) * 2), \
6634 5 + ((((imm) >> 1) & 0x1) * 2)); })
6636 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
6637 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6638 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6639 (__v8si)(__m256)(W)); })
6641 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
6642 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6643 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6644 (__v8si)_mm256_setzero_si256()); })
6646 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
6647 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
6648 (__v4di)(__m256i)(B), \
6649 0 + ((((imm) >> 0) & 0x1) * 2), \
6650 1 + ((((imm) >> 0) & 0x1) * 2), \
6651 4 + ((((imm) >> 1) & 0x1) * 2), \
6652 5 + ((((imm) >> 1) & 0x1) * 2)); })
6654 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
6655 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6656 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6657 (__v4di)(__m256)(W)); })
6660 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
6661 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6662 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6663 (__v4di)_mm256_setzero_si256()); })
6665 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
6666 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6667 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6668 (__v2df)(__m128d)(W)); })
6670 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
6671 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6672 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6673 (__v2df)_mm_setzero_pd()); })
6675 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
6676 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6677 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6678 (__v4df)(__m256d)(W)); })
6680 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
6681 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6682 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6683 (__v4df)_mm256_setzero_pd()); })
6685 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
6686 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6687 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6688 (__v4sf)(__m128)(W)); })
6690 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
6691 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6692 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6693 (__v4sf)_mm_setzero_ps()); })
6695 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
6696 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6697 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6698 (__v8sf)(__m256)(W)); })
6700 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
6701 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6702 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6703 (__v8sf)_mm256_setzero_ps()); })
6705 static __inline__ __m128d __DEFAULT_FN_ATTRS
6706 _mm_rsqrt14_pd (__m128d __A)
6708 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6714 static __inline__ __m128d __DEFAULT_FN_ATTRS
6715 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6717 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6722 static __inline__ __m128d __DEFAULT_FN_ATTRS
6723 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6725 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6731 static __inline__ __m256d __DEFAULT_FN_ATTRS
6732 _mm256_rsqrt14_pd (__m256d __A)
6734 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6736 _mm256_setzero_pd (),
6740 static __inline__ __m256d __DEFAULT_FN_ATTRS
6741 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6743 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6748 static __inline__ __m256d __DEFAULT_FN_ATTRS
6749 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6751 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6753 _mm256_setzero_pd (),
6757 static __inline__ __m128 __DEFAULT_FN_ATTRS
6758 _mm_rsqrt14_ps (__m128 __A)
6760 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6766 static __inline__ __m128 __DEFAULT_FN_ATTRS
6767 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6769 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6774 static __inline__ __m128 __DEFAULT_FN_ATTRS
6775 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6777 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6783 static __inline__ __m256 __DEFAULT_FN_ATTRS
6784 _mm256_rsqrt14_ps (__m256 __A)
6786 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6788 _mm256_setzero_ps (),
6792 static __inline__ __m256 __DEFAULT_FN_ATTRS
6793 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6795 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6800 static __inline__ __m256 __DEFAULT_FN_ATTRS
6801 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6803 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6805 _mm256_setzero_ps (),
6809 static __inline__ __m256 __DEFAULT_FN_ATTRS
6810 _mm256_broadcast_f32x4(__m128 __A)
6812 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6813 0, 1, 2, 3, 0, 1, 2, 3);
6816 static __inline__ __m256 __DEFAULT_FN_ATTRS
6817 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6819 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6820 (__v8sf)_mm256_broadcast_f32x4(__A),
6824 static __inline__ __m256 __DEFAULT_FN_ATTRS
6825 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6827 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6828 (__v8sf)_mm256_broadcast_f32x4(__A),
6829 (__v8sf)_mm256_setzero_ps());
6832 static __inline__ __m256i __DEFAULT_FN_ATTRS
6833 _mm256_broadcast_i32x4(__m128i __A)
6835 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6836 0, 1, 2, 3, 0, 1, 2, 3);
6839 static __inline__ __m256i __DEFAULT_FN_ATTRS
6840 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6842 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6843 (__v8si)_mm256_broadcast_i32x4(__A),
6847 static __inline__ __m256i __DEFAULT_FN_ATTRS
6848 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6850 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6851 (__v8si)_mm256_broadcast_i32x4(__A),
6852 (__v8si)_mm256_setzero_si256());
6855 static __inline__ __m256d __DEFAULT_FN_ATTRS
6856 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6858 return (__m256d)__builtin_ia32_selectpd_256(__M,
6859 (__v4df) _mm256_broadcastsd_pd(__A),
6863 static __inline__ __m256d __DEFAULT_FN_ATTRS
6864 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6866 return (__m256d)__builtin_ia32_selectpd_256(__M,
6867 (__v4df) _mm256_broadcastsd_pd(__A),
6868 (__v4df) _mm256_setzero_pd());
6871 static __inline__ __m128 __DEFAULT_FN_ATTRS
6872 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6874 return (__m128)__builtin_ia32_selectps_128(__M,
6875 (__v4sf) _mm_broadcastss_ps(__A),
6879 static __inline__ __m128 __DEFAULT_FN_ATTRS
6880 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6882 return (__m128)__builtin_ia32_selectps_128(__M,
6883 (__v4sf) _mm_broadcastss_ps(__A),
6884 (__v4sf) _mm_setzero_ps());
6887 static __inline__ __m256 __DEFAULT_FN_ATTRS
6888 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6890 return (__m256)__builtin_ia32_selectps_256(__M,
6891 (__v8sf) _mm256_broadcastss_ps(__A),
6895 static __inline__ __m256 __DEFAULT_FN_ATTRS
6896 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6898 return (__m256)__builtin_ia32_selectps_256(__M,
6899 (__v8sf) _mm256_broadcastss_ps(__A),
6900 (__v8sf) _mm256_setzero_ps());
6903 static __inline__ __m128i __DEFAULT_FN_ATTRS
6904 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6906 return (__m128i)__builtin_ia32_selectd_128(__M,
6907 (__v4si) _mm_broadcastd_epi32(__A),
6911 static __inline__ __m128i __DEFAULT_FN_ATTRS
6912 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6914 return (__m128i)__builtin_ia32_selectd_128(__M,
6915 (__v4si) _mm_broadcastd_epi32(__A),
6916 (__v4si) _mm_setzero_si128());
6919 static __inline__ __m256i __DEFAULT_FN_ATTRS
6920 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6922 return (__m256i)__builtin_ia32_selectd_256(__M,
6923 (__v8si) _mm256_broadcastd_epi32(__A),
6927 static __inline__ __m256i __DEFAULT_FN_ATTRS
6928 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6930 return (__m256i)__builtin_ia32_selectd_256(__M,
6931 (__v8si) _mm256_broadcastd_epi32(__A),
6932 (__v8si) _mm256_setzero_si256());
6935 static __inline__ __m128i __DEFAULT_FN_ATTRS
6936 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6938 return (__m128i)__builtin_ia32_selectq_128(__M,
6939 (__v2di) _mm_broadcastq_epi64(__A),
6943 static __inline__ __m128i __DEFAULT_FN_ATTRS
6944 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6946 return (__m128i)__builtin_ia32_selectq_128(__M,
6947 (__v2di) _mm_broadcastq_epi64(__A),
6948 (__v2di) _mm_setzero_si128());
6951 static __inline__ __m256i __DEFAULT_FN_ATTRS
6952 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6954 return (__m256i)__builtin_ia32_selectq_256(__M,
6955 (__v4di) _mm256_broadcastq_epi64(__A),
6959 static __inline__ __m256i __DEFAULT_FN_ATTRS
6960 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6962 return (__m256i)__builtin_ia32_selectq_256(__M,
6963 (__v4di) _mm256_broadcastq_epi64(__A),
6964 (__v4di) _mm256_setzero_si256());
6967 static __inline__ __m128i __DEFAULT_FN_ATTRS
6968 _mm_cvtsepi32_epi8 (__m128i __A)
6970 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6971 (__v16qi)_mm_undefined_si128(),
6975 static __inline__ __m128i __DEFAULT_FN_ATTRS
6976 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6978 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6979 (__v16qi) __O, __M);
6982 static __inline__ __m128i __DEFAULT_FN_ATTRS
6983 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6985 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6986 (__v16qi) _mm_setzero_si128 (),
6990 static __inline__ void __DEFAULT_FN_ATTRS
6991 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6993 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6996 static __inline__ __m128i __DEFAULT_FN_ATTRS
6997 _mm256_cvtsepi32_epi8 (__m256i __A)
6999 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7000 (__v16qi)_mm_undefined_si128(),
7004 static __inline__ __m128i __DEFAULT_FN_ATTRS
7005 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7007 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7008 (__v16qi) __O, __M);
7011 static __inline__ __m128i __DEFAULT_FN_ATTRS
7012 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7014 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7015 (__v16qi) _mm_setzero_si128 (),
7019 static __inline__ void __DEFAULT_FN_ATTRS
7020 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7022 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7025 static __inline__ __m128i __DEFAULT_FN_ATTRS
7026 _mm_cvtsepi32_epi16 (__m128i __A)
7028 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7029 (__v8hi)_mm_setzero_si128 (),
7033 static __inline__ __m128i __DEFAULT_FN_ATTRS
7034 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7036 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7041 static __inline__ __m128i __DEFAULT_FN_ATTRS
7042 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7044 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7045 (__v8hi) _mm_setzero_si128 (),
7049 static __inline__ void __DEFAULT_FN_ATTRS
7050 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7052 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7055 static __inline__ __m128i __DEFAULT_FN_ATTRS
7056 _mm256_cvtsepi32_epi16 (__m256i __A)
7058 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7059 (__v8hi)_mm_undefined_si128(),
7063 static __inline__ __m128i __DEFAULT_FN_ATTRS
7064 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7066 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7070 static __inline__ __m128i __DEFAULT_FN_ATTRS
7071 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7073 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7074 (__v8hi) _mm_setzero_si128 (),
7078 static __inline__ void __DEFAULT_FN_ATTRS
7079 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7081 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7084 static __inline__ __m128i __DEFAULT_FN_ATTRS
7085 _mm_cvtsepi64_epi8 (__m128i __A)
7087 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7088 (__v16qi)_mm_undefined_si128(),
7092 static __inline__ __m128i __DEFAULT_FN_ATTRS
7093 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7095 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7096 (__v16qi) __O, __M);
7099 static __inline__ __m128i __DEFAULT_FN_ATTRS
7100 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7102 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7103 (__v16qi) _mm_setzero_si128 (),
7107 static __inline__ void __DEFAULT_FN_ATTRS
7108 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7110 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7113 static __inline__ __m128i __DEFAULT_FN_ATTRS
7114 _mm256_cvtsepi64_epi8 (__m256i __A)
7116 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7117 (__v16qi)_mm_undefined_si128(),
7121 static __inline__ __m128i __DEFAULT_FN_ATTRS
7122 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7124 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7125 (__v16qi) __O, __M);
7128 static __inline__ __m128i __DEFAULT_FN_ATTRS
7129 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7131 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7132 (__v16qi) _mm_setzero_si128 (),
7136 static __inline__ void __DEFAULT_FN_ATTRS
7137 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7139 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7142 static __inline__ __m128i __DEFAULT_FN_ATTRS
7143 _mm_cvtsepi64_epi32 (__m128i __A)
7145 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7146 (__v4si)_mm_undefined_si128(),
7150 static __inline__ __m128i __DEFAULT_FN_ATTRS
7151 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7153 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7157 static __inline__ __m128i __DEFAULT_FN_ATTRS
7158 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7160 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7161 (__v4si) _mm_setzero_si128 (),
7165 static __inline__ void __DEFAULT_FN_ATTRS
7166 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7168 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7171 static __inline__ __m128i __DEFAULT_FN_ATTRS
7172 _mm256_cvtsepi64_epi32 (__m256i __A)
7174 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7175 (__v4si)_mm_undefined_si128(),
7179 static __inline__ __m128i __DEFAULT_FN_ATTRS
7180 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7182 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7187 static __inline__ __m128i __DEFAULT_FN_ATTRS
7188 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7190 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7191 (__v4si) _mm_setzero_si128 (),
7195 static __inline__ void __DEFAULT_FN_ATTRS
7196 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7198 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7201 static __inline__ __m128i __DEFAULT_FN_ATTRS
7202 _mm_cvtsepi64_epi16 (__m128i __A)
7204 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7205 (__v8hi)_mm_undefined_si128(),
7209 static __inline__ __m128i __DEFAULT_FN_ATTRS
7210 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7212 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7216 static __inline__ __m128i __DEFAULT_FN_ATTRS
7217 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7219 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7220 (__v8hi) _mm_setzero_si128 (),
7224 static __inline__ void __DEFAULT_FN_ATTRS
7225 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7227 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7230 static __inline__ __m128i __DEFAULT_FN_ATTRS
7231 _mm256_cvtsepi64_epi16 (__m256i __A)
7233 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7234 (__v8hi)_mm_undefined_si128(),
7238 static __inline__ __m128i __DEFAULT_FN_ATTRS
7239 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7241 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7245 static __inline__ __m128i __DEFAULT_FN_ATTRS
7246 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7248 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7249 (__v8hi) _mm_setzero_si128 (),
7253 static __inline__ void __DEFAULT_FN_ATTRS
7254 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7256 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7259 static __inline__ __m128i __DEFAULT_FN_ATTRS
7260 _mm_cvtusepi32_epi8 (__m128i __A)
7262 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7263 (__v16qi)_mm_undefined_si128(),
7267 static __inline__ __m128i __DEFAULT_FN_ATTRS
7268 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7270 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7275 static __inline__ __m128i __DEFAULT_FN_ATTRS
7276 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7278 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7279 (__v16qi) _mm_setzero_si128 (),
7283 static __inline__ void __DEFAULT_FN_ATTRS
7284 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7286 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7289 static __inline__ __m128i __DEFAULT_FN_ATTRS
7290 _mm256_cvtusepi32_epi8 (__m256i __A)
7292 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7293 (__v16qi)_mm_undefined_si128(),
7297 static __inline__ __m128i __DEFAULT_FN_ATTRS
7298 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7300 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7305 static __inline__ __m128i __DEFAULT_FN_ATTRS
7306 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7308 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7309 (__v16qi) _mm_setzero_si128 (),
7313 static __inline__ void __DEFAULT_FN_ATTRS
7314 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7316 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7319 static __inline__ __m128i __DEFAULT_FN_ATTRS
7320 _mm_cvtusepi32_epi16 (__m128i __A)
7322 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7323 (__v8hi)_mm_undefined_si128(),
7327 static __inline__ __m128i __DEFAULT_FN_ATTRS
7328 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7330 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7334 static __inline__ __m128i __DEFAULT_FN_ATTRS
7335 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7337 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7338 (__v8hi) _mm_setzero_si128 (),
7342 static __inline__ void __DEFAULT_FN_ATTRS
7343 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7345 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7348 static __inline__ __m128i __DEFAULT_FN_ATTRS
7349 _mm256_cvtusepi32_epi16 (__m256i __A)
7351 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7352 (__v8hi) _mm_undefined_si128(),
7356 static __inline__ __m128i __DEFAULT_FN_ATTRS
7357 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7359 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7363 static __inline__ __m128i __DEFAULT_FN_ATTRS
7364 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7366 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7367 (__v8hi) _mm_setzero_si128 (),
7371 static __inline__ void __DEFAULT_FN_ATTRS
7372 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7374 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7377 static __inline__ __m128i __DEFAULT_FN_ATTRS
7378 _mm_cvtusepi64_epi8 (__m128i __A)
7380 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7381 (__v16qi)_mm_undefined_si128(),
7385 static __inline__ __m128i __DEFAULT_FN_ATTRS
7386 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7388 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7393 static __inline__ __m128i __DEFAULT_FN_ATTRS
7394 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7396 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7397 (__v16qi) _mm_setzero_si128 (),
7401 static __inline__ void __DEFAULT_FN_ATTRS
7402 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7404 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7407 static __inline__ __m128i __DEFAULT_FN_ATTRS
7408 _mm256_cvtusepi64_epi8 (__m256i __A)
7410 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7411 (__v16qi)_mm_undefined_si128(),
7415 static __inline__ __m128i __DEFAULT_FN_ATTRS
7416 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7418 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7423 static __inline__ __m128i __DEFAULT_FN_ATTRS
7424 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7426 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7427 (__v16qi) _mm_setzero_si128 (),
7431 static __inline__ void __DEFAULT_FN_ATTRS
7432 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7434 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7437 static __inline__ __m128i __DEFAULT_FN_ATTRS
7438 _mm_cvtusepi64_epi32 (__m128i __A)
7440 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7441 (__v4si)_mm_undefined_si128(),
7445 static __inline__ __m128i __DEFAULT_FN_ATTRS
7446 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7448 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7452 static __inline__ __m128i __DEFAULT_FN_ATTRS
7453 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7455 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7456 (__v4si) _mm_setzero_si128 (),
7460 static __inline__ void __DEFAULT_FN_ATTRS
7461 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7463 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7466 static __inline__ __m128i __DEFAULT_FN_ATTRS
7467 _mm256_cvtusepi64_epi32 (__m256i __A)
7469 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7470 (__v4si)_mm_undefined_si128(),
7474 static __inline__ __m128i __DEFAULT_FN_ATTRS
7475 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7477 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7481 static __inline__ __m128i __DEFAULT_FN_ATTRS
7482 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7484 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7485 (__v4si) _mm_setzero_si128 (),
7489 static __inline__ void __DEFAULT_FN_ATTRS
7490 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7492 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7495 static __inline__ __m128i __DEFAULT_FN_ATTRS
7496 _mm_cvtusepi64_epi16 (__m128i __A)
7498 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7499 (__v8hi)_mm_undefined_si128(),
7503 static __inline__ __m128i __DEFAULT_FN_ATTRS
7504 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7506 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7510 static __inline__ __m128i __DEFAULT_FN_ATTRS
7511 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7513 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7514 (__v8hi) _mm_setzero_si128 (),
7518 static __inline__ void __DEFAULT_FN_ATTRS
7519 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7521 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7524 static __inline__ __m128i __DEFAULT_FN_ATTRS
7525 _mm256_cvtusepi64_epi16 (__m256i __A)
7527 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7528 (__v8hi)_mm_undefined_si128(),
7532 static __inline__ __m128i __DEFAULT_FN_ATTRS
7533 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7535 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7539 static __inline__ __m128i __DEFAULT_FN_ATTRS
7540 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7542 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7543 (__v8hi) _mm_setzero_si128 (),
7547 static __inline__ void __DEFAULT_FN_ATTRS
7548 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7550 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7553 static __inline__ __m128i __DEFAULT_FN_ATTRS
7554 _mm_cvtepi32_epi8 (__m128i __A)
7556 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7557 (__v16qi)_mm_undefined_si128(),
7561 static __inline__ __m128i __DEFAULT_FN_ATTRS
7562 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7564 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7565 (__v16qi) __O, __M);
7568 static __inline__ __m128i __DEFAULT_FN_ATTRS
7569 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7571 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7573 _mm_setzero_si128 (),
7577 static __inline__ void __DEFAULT_FN_ATTRS
7578 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7580 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7583 static __inline__ __m128i __DEFAULT_FN_ATTRS
7584 _mm256_cvtepi32_epi8 (__m256i __A)
7586 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7587 (__v16qi)_mm_undefined_si128(),
7591 static __inline__ __m128i __DEFAULT_FN_ATTRS
7592 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7594 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7595 (__v16qi) __O, __M);
7598 static __inline__ __m128i __DEFAULT_FN_ATTRS
7599 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7601 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7602 (__v16qi) _mm_setzero_si128 (),
7606 static __inline__ void __DEFAULT_FN_ATTRS
7607 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7609 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7612 static __inline__ __m128i __DEFAULT_FN_ATTRS
7613 _mm_cvtepi32_epi16 (__m128i __A)
7615 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7616 (__v8hi) _mm_setzero_si128 (),
7620 static __inline__ __m128i __DEFAULT_FN_ATTRS
7621 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7623 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7627 static __inline__ __m128i __DEFAULT_FN_ATTRS
7628 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7630 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7631 (__v8hi) _mm_setzero_si128 (),
7635 static __inline__ void __DEFAULT_FN_ATTRS
7636 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7638 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7641 static __inline__ __m128i __DEFAULT_FN_ATTRS
7642 _mm256_cvtepi32_epi16 (__m256i __A)
7644 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7645 (__v8hi)_mm_setzero_si128 (),
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS
7650 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7652 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7656 static __inline__ __m128i __DEFAULT_FN_ATTRS
7657 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7659 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7660 (__v8hi) _mm_setzero_si128 (),
7664 static __inline__ void __DEFAULT_FN_ATTRS
7665 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7667 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7670 static __inline__ __m128i __DEFAULT_FN_ATTRS
7671 _mm_cvtepi64_epi8 (__m128i __A)
7673 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7674 (__v16qi) _mm_undefined_si128(),
7678 static __inline__ __m128i __DEFAULT_FN_ATTRS
7679 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7681 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7682 (__v16qi) __O, __M);
7685 static __inline__ __m128i __DEFAULT_FN_ATTRS
7686 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7688 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7689 (__v16qi) _mm_setzero_si128 (),
7693 static __inline__ void __DEFAULT_FN_ATTRS
7694 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7696 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7699 static __inline__ __m128i __DEFAULT_FN_ATTRS
7700 _mm256_cvtepi64_epi8 (__m256i __A)
7702 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7703 (__v16qi) _mm_undefined_si128(),
7707 static __inline__ __m128i __DEFAULT_FN_ATTRS
7708 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7710 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7711 (__v16qi) __O, __M);
7714 static __inline__ __m128i __DEFAULT_FN_ATTRS
7715 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7717 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7718 (__v16qi) _mm_setzero_si128 (),
7722 static __inline__ void __DEFAULT_FN_ATTRS
7723 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7725 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7728 static __inline__ __m128i __DEFAULT_FN_ATTRS
7729 _mm_cvtepi64_epi32 (__m128i __A)
7731 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7732 (__v4si)_mm_undefined_si128(),
7736 static __inline__ __m128i __DEFAULT_FN_ATTRS
7737 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7739 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7743 static __inline__ __m128i __DEFAULT_FN_ATTRS
7744 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7746 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7747 (__v4si) _mm_setzero_si128 (),
7751 static __inline__ void __DEFAULT_FN_ATTRS
7752 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7754 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7757 static __inline__ __m128i __DEFAULT_FN_ATTRS
7758 _mm256_cvtepi64_epi32 (__m256i __A)
7760 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
7761 (__v4si) _mm_undefined_si128(),
7765 static __inline__ __m128i __DEFAULT_FN_ATTRS
7766 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7768 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
7772 static __inline__ __m128i __DEFAULT_FN_ATTRS
7773 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7775 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
7776 (__v4si) _mm_setzero_si128 (),
7780 static __inline__ void __DEFAULT_FN_ATTRS
7781 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7783 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7786 static __inline__ __m128i __DEFAULT_FN_ATTRS
7787 _mm_cvtepi64_epi16 (__m128i __A)
7789 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7790 (__v8hi) _mm_undefined_si128(),
7794 static __inline__ __m128i __DEFAULT_FN_ATTRS
7795 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7797 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7802 static __inline__ __m128i __DEFAULT_FN_ATTRS
7803 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7805 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7806 (__v8hi) _mm_setzero_si128 (),
7810 static __inline__ void __DEFAULT_FN_ATTRS
7811 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7813 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7816 static __inline__ __m128i __DEFAULT_FN_ATTRS
7817 _mm256_cvtepi64_epi16 (__m256i __A)
7819 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7820 (__v8hi)_mm_undefined_si128(),
7824 static __inline__ __m128i __DEFAULT_FN_ATTRS
7825 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7827 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7831 static __inline__ __m128i __DEFAULT_FN_ATTRS
7832 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7834 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7835 (__v8hi) _mm_setzero_si128 (),
7839 static __inline__ void __DEFAULT_FN_ATTRS
7840 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7842 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7845 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
7846 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
7847 (__v8sf)_mm256_undefined_ps(), \
7848 ((imm) & 1) ? 4 : 0, \
7849 ((imm) & 1) ? 5 : 1, \
7850 ((imm) & 1) ? 6 : 2, \
7851 ((imm) & 1) ? 7 : 3); })
7853 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
7854 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7855 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
7858 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
7859 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7860 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
7861 (__v4sf)_mm_setzero_ps()); })
7863 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
7864 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
7865 (__v8si)_mm256_undefined_si256(), \
7866 ((imm) & 1) ? 4 : 0, \
7867 ((imm) & 1) ? 5 : 1, \
7868 ((imm) & 1) ? 6 : 2, \
7869 ((imm) & 1) ? 7 : 3); })
7871 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7872 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7873 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
7876 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7877 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7878 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
7879 (__v4si)_mm_setzero_si128()); })
7881 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
7882 (__m256)__builtin_shufflevector((__v8sf)(A), \
7883 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
7884 ((imm) & 0x1) ? 0 : 8, \
7885 ((imm) & 0x1) ? 1 : 9, \
7886 ((imm) & 0x1) ? 2 : 10, \
7887 ((imm) & 0x1) ? 3 : 11, \
7888 ((imm) & 0x1) ? 8 : 4, \
7889 ((imm) & 0x1) ? 9 : 5, \
7890 ((imm) & 0x1) ? 10 : 6, \
7891 ((imm) & 0x1) ? 11 : 7); })
7893 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
7894 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7895 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7898 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
7899 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7900 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7901 (__v8sf)_mm256_setzero_ps()); })
7903 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
7904 (__m256i)__builtin_shufflevector((__v8si)(A), \
7905 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
7906 ((imm) & 0x1) ? 0 : 8, \
7907 ((imm) & 0x1) ? 1 : 9, \
7908 ((imm) & 0x1) ? 2 : 10, \
7909 ((imm) & 0x1) ? 3 : 11, \
7910 ((imm) & 0x1) ? 8 : 4, \
7911 ((imm) & 0x1) ? 9 : 5, \
7912 ((imm) & 0x1) ? 10 : 6, \
7913 ((imm) & 0x1) ? 11 : 7); })
7915 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
7916 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7917 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7920 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
7921 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7922 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7923 (__v8si)_mm256_setzero_si256()); })
7925 #define _mm_getmant_pd(A, B, C) __extension__({\
7926 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7927 (int)(((C)<<2) | (B)), \
7928 (__v2df)_mm_setzero_pd(), \
7931 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
7932 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7933 (int)(((C)<<2) | (B)), \
7934 (__v2df)(__m128d)(W), \
7937 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
7938 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7939 (int)(((C)<<2) | (B)), \
7940 (__v2df)_mm_setzero_pd(), \
7943 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
7944 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7945 (int)(((C)<<2) | (B)), \
7946 (__v4df)_mm256_setzero_pd(), \
7949 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
7950 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7951 (int)(((C)<<2) | (B)), \
7952 (__v4df)(__m256d)(W), \
7955 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
7956 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7957 (int)(((C)<<2) | (B)), \
7958 (__v4df)_mm256_setzero_pd(), \
7961 #define _mm_getmant_ps(A, B, C) __extension__ ({ \
7962 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7963 (int)(((C)<<2) | (B)), \
7964 (__v4sf)_mm_setzero_ps(), \
7967 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
7968 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7969 (int)(((C)<<2) | (B)), \
7970 (__v4sf)(__m128)(W), \
7973 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
7974 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7975 (int)(((C)<<2) | (B)), \
7976 (__v4sf)_mm_setzero_ps(), \
7979 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
7980 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7981 (int)(((C)<<2) | (B)), \
7982 (__v8sf)_mm256_setzero_ps(), \
7985 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
7986 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7987 (int)(((C)<<2) | (B)), \
7988 (__v8sf)(__m256)(W), \
7991 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
7992 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7993 (int)(((C)<<2) | (B)), \
7994 (__v8sf)_mm256_setzero_ps(), \
7997 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
7998 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7999 (double const *)(addr), \
8000 (__v2di)(__m128i)(index), \
8001 (__mmask8)(mask), (int)(scale)); })
8003 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8004 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8005 (long long const *)(addr), \
8006 (__v2di)(__m128i)(index), \
8007 (__mmask8)(mask), (int)(scale)); })
8009 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8010 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8011 (double const *)(addr), \
8012 (__v4di)(__m256i)(index), \
8013 (__mmask8)(mask), (int)(scale)); })
8015 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8016 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8017 (long long const *)(addr), \
8018 (__v4di)(__m256i)(index), \
8019 (__mmask8)(mask), (int)(scale)); })
8021 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8022 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8023 (float const *)(addr), \
8024 (__v2di)(__m128i)(index), \
8025 (__mmask8)(mask), (int)(scale)); })
8027 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8028 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8029 (int const *)(addr), \
8030 (__v2di)(__m128i)(index), \
8031 (__mmask8)(mask), (int)(scale)); })
8033 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8034 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8035 (float const *)(addr), \
8036 (__v4di)(__m256i)(index), \
8037 (__mmask8)(mask), (int)(scale)); })
8039 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8040 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8041 (int const *)(addr), \
8042 (__v4di)(__m256i)(index), \
8043 (__mmask8)(mask), (int)(scale)); })
8045 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8046 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8047 (double const *)(addr), \
8048 (__v4si)(__m128i)(index), \
8049 (__mmask8)(mask), (int)(scale)); })
8051 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8052 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8053 (long long const *)(addr), \
8054 (__v4si)(__m128i)(index), \
8055 (__mmask8)(mask), (int)(scale)); })
8057 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8058 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8059 (double const *)(addr), \
8060 (__v4si)(__m128i)(index), \
8061 (__mmask8)(mask), (int)(scale)); })
8063 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8064 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8065 (long long const *)(addr), \
8066 (__v4si)(__m128i)(index), \
8067 (__mmask8)(mask), (int)(scale)); })
8069 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8070 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8071 (float const *)(addr), \
8072 (__v4si)(__m128i)(index), \
8073 (__mmask8)(mask), (int)(scale)); })
8075 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8076 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8077 (int const *)(addr), \
8078 (__v4si)(__m128i)(index), \
8079 (__mmask8)(mask), (int)(scale)); })
8081 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8082 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8083 (float const *)(addr), \
8084 (__v8si)(__m256i)(index), \
8085 (__mmask8)(mask), (int)(scale)); })
8087 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8088 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8089 (int const *)(addr), \
8090 (__v8si)(__m256i)(index), \
8091 (__mmask8)(mask), (int)(scale)); })
8093 #define _mm256_permutex_pd(X, C) __extension__ ({ \
8094 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8095 (__v4df)_mm256_undefined_pd(), \
8096 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8097 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8099 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8100 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8101 (__v4df)_mm256_permutex_pd((X), (C)), \
8102 (__v4df)(__m256d)(W)); })
8104 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8105 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8106 (__v4df)_mm256_permutex_pd((X), (C)), \
8107 (__v4df)_mm256_setzero_pd()); })
8109 #define _mm256_permutex_epi64(X, C) __extension__ ({ \
8110 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8111 (__v4di)_mm256_undefined_si256(), \
8112 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8113 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8115 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8116 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8117 (__v4di)_mm256_permutex_epi64((X), (C)), \
8118 (__v4di)(__m256i)(W)); })
8120 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8121 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8122 (__v4di)_mm256_permutex_epi64((X), (C)), \
8123 (__v4di)_mm256_setzero_si256()); })
8125 static __inline__ __m256d __DEFAULT_FN_ATTRS
8126 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8128 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8130 (__v4df) _mm256_undefined_si256 (),
8134 static __inline__ __m256d __DEFAULT_FN_ATTRS
8135 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8138 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8144 static __inline__ __m256d __DEFAULT_FN_ATTRS
8145 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8147 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8149 (__v4df) _mm256_setzero_pd (),
8153 static __inline__ __m256i __DEFAULT_FN_ATTRS
8154 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8156 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8158 (__v4di) _mm256_setzero_si256 (),
8162 static __inline__ __m256i __DEFAULT_FN_ATTRS
8163 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8165 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8167 (__v4di) _mm256_undefined_si256 (),
8171 static __inline__ __m256i __DEFAULT_FN_ATTRS
8172 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8175 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8181 static __inline__ __m256 __DEFAULT_FN_ATTRS
8182 _mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8185 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8191 static __inline__ __m256 __DEFAULT_FN_ATTRS
8192 _mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8194 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8196 (__v8sf) _mm256_setzero_ps (),
8200 static __inline__ __m256 __DEFAULT_FN_ATTRS
8201 _mm256_permutexvar_ps (__m256i __X, __m256 __Y)
8203 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8205 (__v8sf) _mm256_undefined_si256 (),
8209 static __inline__ __m256i __DEFAULT_FN_ATTRS
8210 _mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
8212 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8214 (__v8si) _mm256_setzero_si256 (),
8218 static __inline__ __m256i __DEFAULT_FN_ATTRS
8219 _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
8222 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8228 static __inline__ __m256i __DEFAULT_FN_ATTRS
8229 _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
8231 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8233 (__v8si) _mm256_undefined_si256(),
8237 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8238 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
8239 (__v4si)(__m128i)(A), \
8240 ((int)(imm) & 0x3) + 0, \
8241 ((int)(imm) & 0x3) + 1, \
8242 ((int)(imm) & 0x3) + 2, \
8243 ((int)(imm) & 0x3) + 3); })
8245 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8246 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8247 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8248 (__v4si)(__m128i)(W)); })
8250 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8251 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8252 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8253 (__v4si)_mm_setzero_si128()); })
8255 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8256 (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
8257 (__v8si)(__m256i)(A), \
8258 ((int)(imm) & 0x7) + 0, \
8259 ((int)(imm) & 0x7) + 1, \
8260 ((int)(imm) & 0x7) + 2, \
8261 ((int)(imm) & 0x7) + 3, \
8262 ((int)(imm) & 0x7) + 4, \
8263 ((int)(imm) & 0x7) + 5, \
8264 ((int)(imm) & 0x7) + 6, \
8265 ((int)(imm) & 0x7) + 7); })
8267 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8268 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8269 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8270 (__v8si)(__m256i)(W)); })
8272 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8273 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8274 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8275 (__v8si)_mm256_setzero_si256()); })
8277 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8278 (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
8279 (__v2di)(__m128i)(A), \
8280 ((int)(imm) & 0x1) + 0, \
8281 ((int)(imm) & 0x1) + 1); })
8283 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8284 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8285 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8286 (__v2di)(__m128i)(W)); })
8288 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8289 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8290 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8291 (__v2di)_mm_setzero_di()); })
8293 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8294 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
8295 (__v4di)(__m256i)(A), \
8296 ((int)(imm) & 0x3) + 0, \
8297 ((int)(imm) & 0x3) + 1, \
8298 ((int)(imm) & 0x3) + 2, \
8299 ((int)(imm) & 0x3) + 3); })
8301 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8302 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8303 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8304 (__v4di)(__m256i)(W)); })
8306 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8307 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8308 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8309 (__v4di)_mm256_setzero_si256()); })
8311 static __inline__ __m128 __DEFAULT_FN_ATTRS
8312 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8314 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8315 (__v4sf)_mm_movehdup_ps(__A),
8319 static __inline__ __m128 __DEFAULT_FN_ATTRS
8320 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8322 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8323 (__v4sf)_mm_movehdup_ps(__A),
8324 (__v4sf)_mm_setzero_ps());
8327 static __inline__ __m256 __DEFAULT_FN_ATTRS
8328 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8330 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8331 (__v8sf)_mm256_movehdup_ps(__A),
8335 static __inline__ __m256 __DEFAULT_FN_ATTRS
8336 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8338 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8339 (__v8sf)_mm256_movehdup_ps(__A),
8340 (__v8sf)_mm256_setzero_ps());
8343 static __inline__ __m128 __DEFAULT_FN_ATTRS
8344 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8346 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8347 (__v4sf)_mm_moveldup_ps(__A),
8351 static __inline__ __m128 __DEFAULT_FN_ATTRS
8352 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8354 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8355 (__v4sf)_mm_moveldup_ps(__A),
8356 (__v4sf)_mm_setzero_ps());
8359 static __inline__ __m256 __DEFAULT_FN_ATTRS
8360 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8362 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8363 (__v8sf)_mm256_moveldup_ps(__A),
8367 static __inline__ __m256 __DEFAULT_FN_ATTRS
8368 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8370 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8371 (__v8sf)_mm256_moveldup_ps(__A),
8372 (__v8sf)_mm256_setzero_ps());
8375 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8376 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8377 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8378 (__v8si)(__m256i)(W)); })
8380 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
8381 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8382 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8383 (__v8si)_mm256_setzero_si256()); })
8385 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
8386 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8387 (__v4si)_mm_shuffle_epi32((A), (I)), \
8388 (__v4si)(__m128i)(W)); })
8390 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
8391 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8392 (__v4si)_mm_shuffle_epi32((A), (I)), \
8393 (__v4si)_mm_setzero_si128()); })
8395 static __inline__ __m128d __DEFAULT_FN_ATTRS
8396 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8398 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8403 static __inline__ __m128d __DEFAULT_FN_ATTRS
8404 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8406 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8408 (__v2df) _mm_setzero_pd ());
8411 static __inline__ __m256d __DEFAULT_FN_ATTRS
8412 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8414 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8419 static __inline__ __m256d __DEFAULT_FN_ATTRS
8420 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8422 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8424 (__v4df) _mm256_setzero_pd ());
8427 static __inline__ __m128 __DEFAULT_FN_ATTRS
8428 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8430 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8435 static __inline__ __m128 __DEFAULT_FN_ATTRS
8436 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8438 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8440 (__v4sf) _mm_setzero_ps ());
8443 static __inline__ __m256 __DEFAULT_FN_ATTRS
8444 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8446 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8451 static __inline__ __m256 __DEFAULT_FN_ATTRS
8452 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8454 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8456 (__v8sf) _mm256_setzero_ps ());
8459 static __inline__ __m128 __DEFAULT_FN_ATTRS
8460 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8462 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8467 static __inline__ __m128 __DEFAULT_FN_ATTRS
8468 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8470 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8476 static __inline__ __m256 __DEFAULT_FN_ATTRS
8477 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8479 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8484 static __inline__ __m256 __DEFAULT_FN_ATTRS
8485 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8487 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8489 _mm256_setzero_ps (),
8493 static __inline __m128i __DEFAULT_FN_ATTRS
8494 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8496 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8501 static __inline __m128i __DEFAULT_FN_ATTRS
8502 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8504 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8505 (__v8hi) _mm_setzero_si128 (),
8509 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8510 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8511 (__v8hi)(__m128i)(W), \
8514 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8515 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8516 (__v8hi)_mm_setzero_si128(), \
8519 static __inline __m128i __DEFAULT_FN_ATTRS
8520 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8522 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8527 static __inline __m128i __DEFAULT_FN_ATTRS
8528 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8530 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8531 (__v8hi) _mm_setzero_si128(),
8534 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8535 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8536 (__v8hi)(__m128i)(W), \
8539 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8540 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8541 (__v8hi)_mm_setzero_si128(), \
8545 #undef __DEFAULT_FN_ATTRS
8547 #endif /* __AVX512VLINTRIN_H */