1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
32 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
34 typedef short __v2hi __attribute__((__vector_size__(4)));
35 typedef char __v4qi __attribute__((__vector_size__(4)));
36 typedef char __v2qi __attribute__((__vector_size__(2)));
40 #define _mm_cmpeq_epi32_mask(A, B) \
41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
42 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
44 #define _mm_cmpge_epi32_mask(A, B) \
45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
48 #define _mm_cmpgt_epi32_mask(A, B) \
49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
50 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
52 #define _mm_cmple_epi32_mask(A, B) \
53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
54 #define _mm_mask_cmple_epi32_mask(k, A, B) \
55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
56 #define _mm_cmplt_epi32_mask(A, B) \
57 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
58 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
59 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
60 #define _mm_cmpneq_epi32_mask(A, B) \
61 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
62 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
63 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
65 #define _mm256_cmpeq_epi32_mask(A, B) \
66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
67 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
69 #define _mm256_cmpge_epi32_mask(A, B) \
70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
71 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
73 #define _mm256_cmpgt_epi32_mask(A, B) \
74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
75 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
77 #define _mm256_cmple_epi32_mask(A, B) \
78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
79 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
81 #define _mm256_cmplt_epi32_mask(A, B) \
82 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
83 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
84 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
85 #define _mm256_cmpneq_epi32_mask(A, B) \
86 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
87 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
88 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
90 #define _mm_cmpeq_epu32_mask(A, B) \
91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
92 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
94 #define _mm_cmpge_epu32_mask(A, B) \
95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
96 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
98 #define _mm_cmpgt_epu32_mask(A, B) \
99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
100 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
102 #define _mm_cmple_epu32_mask(A, B) \
103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
104 #define _mm_mask_cmple_epu32_mask(k, A, B) \
105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
106 #define _mm_cmplt_epu32_mask(A, B) \
107 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
108 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
109 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
110 #define _mm_cmpneq_epu32_mask(A, B) \
111 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
112 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
113 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
115 #define _mm256_cmpeq_epu32_mask(A, B) \
116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
117 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
119 #define _mm256_cmpge_epu32_mask(A, B) \
120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
121 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
123 #define _mm256_cmpgt_epu32_mask(A, B) \
124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
125 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
127 #define _mm256_cmple_epu32_mask(A, B) \
128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
129 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
131 #define _mm256_cmplt_epu32_mask(A, B) \
132 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
133 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
134 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
135 #define _mm256_cmpneq_epu32_mask(A, B) \
136 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
137 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
138 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
140 #define _mm_cmpeq_epi64_mask(A, B) \
141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
142 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
144 #define _mm_cmpge_epi64_mask(A, B) \
145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
146 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
148 #define _mm_cmpgt_epi64_mask(A, B) \
149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
150 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
152 #define _mm_cmple_epi64_mask(A, B) \
153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
154 #define _mm_mask_cmple_epi64_mask(k, A, B) \
155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
156 #define _mm_cmplt_epi64_mask(A, B) \
157 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
158 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
159 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
160 #define _mm_cmpneq_epi64_mask(A, B) \
161 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
162 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
163 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
165 #define _mm256_cmpeq_epi64_mask(A, B) \
166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
167 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
169 #define _mm256_cmpge_epi64_mask(A, B) \
170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
171 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
173 #define _mm256_cmpgt_epi64_mask(A, B) \
174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
175 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
177 #define _mm256_cmple_epi64_mask(A, B) \
178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
179 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
181 #define _mm256_cmplt_epi64_mask(A, B) \
182 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
183 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
184 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
185 #define _mm256_cmpneq_epi64_mask(A, B) \
186 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
187 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
188 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
190 #define _mm_cmpeq_epu64_mask(A, B) \
191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
192 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
194 #define _mm_cmpge_epu64_mask(A, B) \
195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
196 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
198 #define _mm_cmpgt_epu64_mask(A, B) \
199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
200 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
202 #define _mm_cmple_epu64_mask(A, B) \
203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
204 #define _mm_mask_cmple_epu64_mask(k, A, B) \
205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
206 #define _mm_cmplt_epu64_mask(A, B) \
207 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
208 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
209 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
210 #define _mm_cmpneq_epu64_mask(A, B) \
211 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
212 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
213 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
215 #define _mm256_cmpeq_epu64_mask(A, B) \
216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
217 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
219 #define _mm256_cmpge_epu64_mask(A, B) \
220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
221 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
223 #define _mm256_cmpgt_epu64_mask(A, B) \
224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
225 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
227 #define _mm256_cmple_epu64_mask(A, B) \
228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
229 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
231 #define _mm256_cmplt_epu64_mask(A, B) \
232 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
233 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
234 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
235 #define _mm256_cmpneq_epu64_mask(A, B) \
236 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
237 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
238 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
240 static __inline__ __m256i __DEFAULT_FN_ATTRS256
241 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
243 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244 (__v8si)_mm256_add_epi32(__A, __B),
248 static __inline__ __m256i __DEFAULT_FN_ATTRS256
249 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
251 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
252 (__v8si)_mm256_add_epi32(__A, __B),
253 (__v8si)_mm256_setzero_si256());
256 static __inline__ __m256i __DEFAULT_FN_ATTRS256
257 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
259 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260 (__v4di)_mm256_add_epi64(__A, __B),
264 static __inline__ __m256i __DEFAULT_FN_ATTRS256
265 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
267 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
268 (__v4di)_mm256_add_epi64(__A, __B),
269 (__v4di)_mm256_setzero_si256());
272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
273 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
280 static __inline__ __m256i __DEFAULT_FN_ATTRS256
281 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
283 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
284 (__v8si)_mm256_sub_epi32(__A, __B),
285 (__v8si)_mm256_setzero_si256());
288 static __inline__ __m256i __DEFAULT_FN_ATTRS256
289 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
291 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292 (__v4di)_mm256_sub_epi64(__A, __B),
296 static __inline__ __m256i __DEFAULT_FN_ATTRS256
297 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
299 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
300 (__v4di)_mm256_sub_epi64(__A, __B),
301 (__v4di)_mm256_setzero_si256());
304 static __inline__ __m128i __DEFAULT_FN_ATTRS128
305 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
307 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308 (__v4si)_mm_add_epi32(__A, __B),
312 static __inline__ __m128i __DEFAULT_FN_ATTRS128
313 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
315 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
316 (__v4si)_mm_add_epi32(__A, __B),
317 (__v4si)_mm_setzero_si128());
320 static __inline__ __m128i __DEFAULT_FN_ATTRS128
321 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
323 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324 (__v2di)_mm_add_epi64(__A, __B),
328 static __inline__ __m128i __DEFAULT_FN_ATTRS128
329 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
331 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
332 (__v2di)_mm_add_epi64(__A, __B),
333 (__v2di)_mm_setzero_si128());
336 static __inline__ __m128i __DEFAULT_FN_ATTRS128
337 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
339 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340 (__v4si)_mm_sub_epi32(__A, __B),
344 static __inline__ __m128i __DEFAULT_FN_ATTRS128
345 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
347 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
348 (__v4si)_mm_sub_epi32(__A, __B),
349 (__v4si)_mm_setzero_si128());
352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
353 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
355 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356 (__v2di)_mm_sub_epi64(__A, __B),
360 static __inline__ __m128i __DEFAULT_FN_ATTRS128
361 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
363 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
364 (__v2di)_mm_sub_epi64(__A, __B),
365 (__v2di)_mm_setzero_si128());
368 static __inline__ __m256i __DEFAULT_FN_ATTRS256
369 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
371 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372 (__v4di)_mm256_mul_epi32(__X, __Y),
376 static __inline__ __m256i __DEFAULT_FN_ATTRS256
377 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
379 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
380 (__v4di)_mm256_mul_epi32(__X, __Y),
381 (__v4di)_mm256_setzero_si256());
384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
385 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
387 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388 (__v2di)_mm_mul_epi32(__X, __Y),
392 static __inline__ __m128i __DEFAULT_FN_ATTRS128
393 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
395 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
396 (__v2di)_mm_mul_epi32(__X, __Y),
397 (__v2di)_mm_setzero_si128());
400 static __inline__ __m256i __DEFAULT_FN_ATTRS256
401 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
403 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404 (__v4di)_mm256_mul_epu32(__X, __Y),
408 static __inline__ __m256i __DEFAULT_FN_ATTRS256
409 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
411 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
412 (__v4di)_mm256_mul_epu32(__X, __Y),
413 (__v4di)_mm256_setzero_si256());
416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
417 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
419 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420 (__v2di)_mm_mul_epu32(__X, __Y),
424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
425 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
427 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
428 (__v2di)_mm_mul_epu32(__X, __Y),
429 (__v2di)_mm_setzero_si128());
432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
433 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
435 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436 (__v8si)_mm256_mullo_epi32(__A, __B),
437 (__v8si)_mm256_setzero_si256());
440 static __inline__ __m256i __DEFAULT_FN_ATTRS256
441 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
443 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
444 (__v8si)_mm256_mullo_epi32(__A, __B),
448 static __inline__ __m128i __DEFAULT_FN_ATTRS128
449 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
451 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452 (__v4si)_mm_mullo_epi32(__A, __B),
453 (__v4si)_mm_setzero_si128());
456 static __inline__ __m128i __DEFAULT_FN_ATTRS128
457 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
459 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
460 (__v4si)_mm_mullo_epi32(__A, __B),
464 static __inline__ __m256i __DEFAULT_FN_ATTRS256
465 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
467 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
468 (__v8si)_mm256_and_si256(__A, __B),
472 static __inline__ __m256i __DEFAULT_FN_ATTRS256
473 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
475 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
478 static __inline__ __m128i __DEFAULT_FN_ATTRS128
479 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
481 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
482 (__v4si)_mm_and_si128(__A, __B),
486 static __inline__ __m128i __DEFAULT_FN_ATTRS128
487 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
489 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
492 static __inline__ __m256i __DEFAULT_FN_ATTRS256
493 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
495 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
496 (__v8si)_mm256_andnot_si256(__A, __B),
500 static __inline__ __m256i __DEFAULT_FN_ATTRS256
501 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
503 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
507 static __inline__ __m128i __DEFAULT_FN_ATTRS128
508 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
510 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
511 (__v4si)_mm_andnot_si128(__A, __B),
515 static __inline__ __m128i __DEFAULT_FN_ATTRS128
516 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
518 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
521 static __inline__ __m256i __DEFAULT_FN_ATTRS256
522 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
524 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
525 (__v8si)_mm256_or_si256(__A, __B),
529 static __inline__ __m256i __DEFAULT_FN_ATTRS256
530 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
532 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
535 static __inline__ __m128i __DEFAULT_FN_ATTRS128
536 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
538 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
539 (__v4si)_mm_or_si128(__A, __B),
543 static __inline__ __m128i __DEFAULT_FN_ATTRS128
544 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
546 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
549 static __inline__ __m256i __DEFAULT_FN_ATTRS256
550 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
552 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
553 (__v8si)_mm256_xor_si256(__A, __B),
557 static __inline__ __m256i __DEFAULT_FN_ATTRS256
558 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
560 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
563 static __inline__ __m128i __DEFAULT_FN_ATTRS128
564 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
567 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
568 (__v4si)_mm_xor_si128(__A, __B),
572 static __inline__ __m128i __DEFAULT_FN_ATTRS128
573 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
575 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
578 static __inline__ __m256i __DEFAULT_FN_ATTRS256
579 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
581 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
582 (__v4di)_mm256_and_si256(__A, __B),
586 static __inline__ __m256i __DEFAULT_FN_ATTRS256
587 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
589 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
592 static __inline__ __m128i __DEFAULT_FN_ATTRS128
593 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
595 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
596 (__v2di)_mm_and_si128(__A, __B),
600 static __inline__ __m128i __DEFAULT_FN_ATTRS128
601 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
603 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
606 static __inline__ __m256i __DEFAULT_FN_ATTRS256
607 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
609 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
610 (__v4di)_mm256_andnot_si256(__A, __B),
614 static __inline__ __m256i __DEFAULT_FN_ATTRS256
615 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
617 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
621 static __inline__ __m128i __DEFAULT_FN_ATTRS128
622 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
624 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
625 (__v2di)_mm_andnot_si128(__A, __B),
629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
630 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
632 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
635 static __inline__ __m256i __DEFAULT_FN_ATTRS256
636 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
638 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
639 (__v4di)_mm256_or_si256(__A, __B),
643 static __inline__ __m256i __DEFAULT_FN_ATTRS256
644 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
646 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
649 static __inline__ __m128i __DEFAULT_FN_ATTRS128
650 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
652 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
653 (__v2di)_mm_or_si128(__A, __B),
657 static __inline__ __m128i __DEFAULT_FN_ATTRS128
658 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
660 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
663 static __inline__ __m256i __DEFAULT_FN_ATTRS256
664 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
666 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
667 (__v4di)_mm256_xor_si256(__A, __B),
671 static __inline__ __m256i __DEFAULT_FN_ATTRS256
672 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
674 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
677 static __inline__ __m128i __DEFAULT_FN_ATTRS128
678 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
682 (__v2di)_mm_xor_si128(__A, __B),
686 static __inline__ __m128i __DEFAULT_FN_ATTRS128
687 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
689 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
692 #define _mm_cmp_epi32_mask(a, b, p) \
693 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
694 (__v4si)(__m128i)(b), (int)(p), \
697 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
698 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
699 (__v4si)(__m128i)(b), (int)(p), \
702 #define _mm_cmp_epu32_mask(a, b, p) \
703 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
704 (__v4si)(__m128i)(b), (int)(p), \
707 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
708 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
709 (__v4si)(__m128i)(b), (int)(p), \
712 #define _mm256_cmp_epi32_mask(a, b, p) \
713 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
714 (__v8si)(__m256i)(b), (int)(p), \
717 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
718 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
719 (__v8si)(__m256i)(b), (int)(p), \
722 #define _mm256_cmp_epu32_mask(a, b, p) \
723 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
724 (__v8si)(__m256i)(b), (int)(p), \
727 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
728 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
729 (__v8si)(__m256i)(b), (int)(p), \
732 #define _mm_cmp_epi64_mask(a, b, p) \
733 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
734 (__v2di)(__m128i)(b), (int)(p), \
737 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
738 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
739 (__v2di)(__m128i)(b), (int)(p), \
742 #define _mm_cmp_epu64_mask(a, b, p) \
743 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
744 (__v2di)(__m128i)(b), (int)(p), \
747 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
748 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
749 (__v2di)(__m128i)(b), (int)(p), \
752 #define _mm256_cmp_epi64_mask(a, b, p) \
753 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
754 (__v4di)(__m256i)(b), (int)(p), \
757 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
758 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
759 (__v4di)(__m256i)(b), (int)(p), \
762 #define _mm256_cmp_epu64_mask(a, b, p) \
763 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
764 (__v4di)(__m256i)(b), (int)(p), \
767 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
768 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
769 (__v4di)(__m256i)(b), (int)(p), \
772 #define _mm256_cmp_ps_mask(a, b, p) \
773 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
774 (__v8sf)(__m256)(b), (int)(p), \
777 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \
778 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
779 (__v8sf)(__m256)(b), (int)(p), \
782 #define _mm256_cmp_pd_mask(a, b, p) \
783 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
784 (__v4df)(__m256d)(b), (int)(p), \
787 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \
788 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
789 (__v4df)(__m256d)(b), (int)(p), \
792 #define _mm_cmp_ps_mask(a, b, p) \
793 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
794 (__v4sf)(__m128)(b), (int)(p), \
797 #define _mm_mask_cmp_ps_mask(m, a, b, p) \
798 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
799 (__v4sf)(__m128)(b), (int)(p), \
802 #define _mm_cmp_pd_mask(a, b, p) \
803 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
804 (__v2df)(__m128d)(b), (int)(p), \
807 #define _mm_mask_cmp_pd_mask(m, a, b, p) \
808 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
809 (__v2df)(__m128d)(b), (int)(p), \
812 static __inline__ __m128d __DEFAULT_FN_ATTRS128
813 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
815 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
816 __builtin_ia32_vfmaddpd ((__v2df) __A,
822 static __inline__ __m128d __DEFAULT_FN_ATTRS128
823 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
825 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
826 __builtin_ia32_vfmaddpd ((__v2df) __A,
832 static __inline__ __m128d __DEFAULT_FN_ATTRS128
833 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
835 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
836 __builtin_ia32_vfmaddpd ((__v2df) __A,
839 (__v2df)_mm_setzero_pd());
842 static __inline__ __m128d __DEFAULT_FN_ATTRS128
843 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
845 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
846 __builtin_ia32_vfmaddpd ((__v2df) __A,
852 static __inline__ __m128d __DEFAULT_FN_ATTRS128
853 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
855 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
856 __builtin_ia32_vfmaddpd ((__v2df) __A,
859 (__v2df)_mm_setzero_pd());
862 static __inline__ __m128d __DEFAULT_FN_ATTRS128
863 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
865 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
866 __builtin_ia32_vfmaddpd (-(__v2df) __A,
872 static __inline__ __m128d __DEFAULT_FN_ATTRS128
873 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
875 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
876 __builtin_ia32_vfmaddpd (-(__v2df) __A,
879 (__v2df)_mm_setzero_pd());
882 static __inline__ __m128d __DEFAULT_FN_ATTRS128
883 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
885 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
886 __builtin_ia32_vfmaddpd (-(__v2df) __A,
889 (__v2df)_mm_setzero_pd());
892 static __inline__ __m256d __DEFAULT_FN_ATTRS256
893 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
895 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
896 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
902 static __inline__ __m256d __DEFAULT_FN_ATTRS256
903 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
905 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
906 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
912 static __inline__ __m256d __DEFAULT_FN_ATTRS256
913 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
915 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
916 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
919 (__v4df)_mm256_setzero_pd());
922 static __inline__ __m256d __DEFAULT_FN_ATTRS256
923 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
925 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
926 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
932 static __inline__ __m256d __DEFAULT_FN_ATTRS256
933 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
935 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
936 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
939 (__v4df)_mm256_setzero_pd());
942 static __inline__ __m256d __DEFAULT_FN_ATTRS256
943 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
945 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
946 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
952 static __inline__ __m256d __DEFAULT_FN_ATTRS256
953 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
955 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
956 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
959 (__v4df)_mm256_setzero_pd());
962 static __inline__ __m256d __DEFAULT_FN_ATTRS256
963 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
965 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
966 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
969 (__v4df)_mm256_setzero_pd());
972 static __inline__ __m128 __DEFAULT_FN_ATTRS128
973 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
975 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
976 __builtin_ia32_vfmaddps ((__v4sf) __A,
982 static __inline__ __m128 __DEFAULT_FN_ATTRS128
983 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
985 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
986 __builtin_ia32_vfmaddps ((__v4sf) __A,
992 static __inline__ __m128 __DEFAULT_FN_ATTRS128
993 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
995 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
996 __builtin_ia32_vfmaddps ((__v4sf) __A,
999 (__v4sf)_mm_setzero_ps());
1002 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1003 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1005 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1006 __builtin_ia32_vfmaddps ((__v4sf) __A,
1012 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1013 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1015 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1016 __builtin_ia32_vfmaddps ((__v4sf) __A,
1019 (__v4sf)_mm_setzero_ps());
1022 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1023 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1025 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1026 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1032 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1033 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1035 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1036 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1039 (__v4sf)_mm_setzero_ps());
1042 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1043 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1045 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1046 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1049 (__v4sf)_mm_setzero_ps());
1052 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1053 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1055 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1056 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1062 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1063 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1065 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1066 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1072 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1073 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1075 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1076 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1079 (__v8sf)_mm256_setzero_ps());
1082 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1083 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1085 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1086 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1092 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1093 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1095 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1096 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1099 (__v8sf)_mm256_setzero_ps());
1102 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1103 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1105 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1106 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1112 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1113 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1115 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1116 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1119 (__v8sf)_mm256_setzero_ps());
1122 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1123 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1125 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1126 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1129 (__v8sf)_mm256_setzero_ps());
1132 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1133 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1135 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1136 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1142 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1143 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1145 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1146 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1152 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1153 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1155 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1156 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1159 (__v2df)_mm_setzero_pd());
1162 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1163 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1165 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1166 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1172 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1173 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1175 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1176 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1179 (__v2df)_mm_setzero_pd());
1182 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1183 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1185 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1186 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1192 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1193 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1195 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1196 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1202 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1203 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1205 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1206 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1209 (__v4df)_mm256_setzero_pd());
1212 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1213 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1215 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1216 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1222 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1223 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1225 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1226 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1229 (__v4df)_mm256_setzero_pd());
1232 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1233 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1235 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1236 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1242 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1243 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1245 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1246 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1252 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1253 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1255 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1256 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1259 (__v4sf)_mm_setzero_ps());
1262 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1263 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1265 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1266 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1272 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1273 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1275 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1276 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1279 (__v4sf)_mm_setzero_ps());
1282 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1283 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1286 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1287 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1293 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1294 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1296 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1297 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1303 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1304 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1306 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1307 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1310 (__v8sf)_mm256_setzero_ps());
1313 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1314 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1316 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1317 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1323 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1324 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1326 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1327 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1330 (__v8sf)_mm256_setzero_ps());
1333 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1334 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1336 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1337 __builtin_ia32_vfmaddpd ((__v2df) __A,
1343 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1344 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1346 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1347 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1353 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1354 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1357 __builtin_ia32_vfmaddps ((__v4sf) __A,
1363 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1364 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1366 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1367 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1373 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1374 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1376 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1377 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1383 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1384 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1386 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1387 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1393 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1394 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1396 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1397 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1403 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1404 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1406 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1407 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1413 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1414 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1416 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1417 __builtin_ia32_vfmaddpd ((__v2df) __A,
1423 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1424 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1426 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1427 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1433 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1434 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1436 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1437 __builtin_ia32_vfmaddps ((__v4sf) __A,
1443 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1444 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1446 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1447 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1453 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1454 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1456 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1457 __builtin_ia32_vfmaddpd ((__v2df) __A,
1463 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1464 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1466 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1467 __builtin_ia32_vfmaddpd ((__v2df) __A,
1473 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1474 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1476 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1477 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1483 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1484 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1486 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1487 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1493 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1494 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1496 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1497 __builtin_ia32_vfmaddps ((__v4sf) __A,
1503 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1504 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1506 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1507 __builtin_ia32_vfmaddps ((__v4sf) __A,
1513 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1514 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1516 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1517 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1523 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1524 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1526 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1527 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1533 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1534 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1535 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1536 (__v2df)_mm_add_pd(__A, __B),
1540 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1541 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1542 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1543 (__v2df)_mm_add_pd(__A, __B),
1544 (__v2df)_mm_setzero_pd());
1547 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1548 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1549 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1550 (__v4df)_mm256_add_pd(__A, __B),
1554 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1555 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1556 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1557 (__v4df)_mm256_add_pd(__A, __B),
1558 (__v4df)_mm256_setzero_pd());
1561 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1562 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1563 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1564 (__v4sf)_mm_add_ps(__A, __B),
1568 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1569 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1570 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1571 (__v4sf)_mm_add_ps(__A, __B),
1572 (__v4sf)_mm_setzero_ps());
1575 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1576 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1577 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1578 (__v8sf)_mm256_add_ps(__A, __B),
1582 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1583 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1584 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1585 (__v8sf)_mm256_add_ps(__A, __B),
1586 (__v8sf)_mm256_setzero_ps());
1589 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1590 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1591 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1596 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1597 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1598 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1603 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1604 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1605 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1610 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1611 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1612 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1617 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1618 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1619 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1624 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1625 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1626 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1631 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1632 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1633 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1638 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1640 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1645 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1646 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1647 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1652 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1653 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1654 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1660 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1661 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1662 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1667 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1668 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1669 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1671 _mm256_setzero_pd (),
1675 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1676 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1677 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1682 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1683 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1684 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1686 _mm_setzero_si128 (),
1690 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1691 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1692 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1697 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1698 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1699 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1701 _mm256_setzero_si256 (),
1705 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1706 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1707 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1712 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1713 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1714 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1720 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1721 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1722 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1727 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1728 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1729 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1731 _mm256_setzero_ps (),
1735 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1736 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1737 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1742 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1743 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1744 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1746 _mm_setzero_si128 (),
1750 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1751 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1752 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1757 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1758 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1759 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1761 _mm256_setzero_si256 (),
1765 static __inline__ void __DEFAULT_FN_ATTRS128
1766 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1767 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1772 static __inline__ void __DEFAULT_FN_ATTRS256
1773 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1774 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1779 static __inline__ void __DEFAULT_FN_ATTRS128
1780 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1781 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1786 static __inline__ void __DEFAULT_FN_ATTRS256
1787 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1788 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1793 static __inline__ void __DEFAULT_FN_ATTRS128
1794 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1795 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1800 static __inline__ void __DEFAULT_FN_ATTRS256
1801 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1802 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1807 static __inline__ void __DEFAULT_FN_ATTRS128
1808 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1809 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1814 static __inline__ void __DEFAULT_FN_ATTRS256
1815 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1816 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1821 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1822 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1823 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1824 (__v2df)_mm_cvtepi32_pd(__A),
1828 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1829 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1830 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1831 (__v2df)_mm_cvtepi32_pd(__A),
1832 (__v2df)_mm_setzero_pd());
1835 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1836 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1837 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1838 (__v4df)_mm256_cvtepi32_pd(__A),
1842 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1843 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1844 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1845 (__v4df)_mm256_cvtepi32_pd(__A),
1846 (__v4df)_mm256_setzero_pd());
1849 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1850 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1851 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1852 (__v4sf)_mm_cvtepi32_ps(__A),
1856 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1857 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1858 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1859 (__v4sf)_mm_cvtepi32_ps(__A),
1860 (__v4sf)_mm_setzero_ps());
1863 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1864 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1865 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1866 (__v8sf)_mm256_cvtepi32_ps(__A),
1870 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1871 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1872 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1873 (__v8sf)_mm256_cvtepi32_ps(__A),
1874 (__v8sf)_mm256_setzero_ps());
1877 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1878 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1879 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1884 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1885 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1886 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1888 _mm_setzero_si128 (),
1892 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1893 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1894 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1895 (__v4si)_mm256_cvtpd_epi32(__A),
1899 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1900 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1901 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1902 (__v4si)_mm256_cvtpd_epi32(__A),
1903 (__v4si)_mm_setzero_si128());
1906 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1907 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1908 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1913 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1914 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1915 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1921 static __inline__ __m128 __DEFAULT_FN_ATTRS256
1922 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
1923 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1924 (__v4sf)_mm256_cvtpd_ps(__A),
1928 static __inline__ __m128 __DEFAULT_FN_ATTRS256
1929 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
1930 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1931 (__v4sf)_mm256_cvtpd_ps(__A),
1932 (__v4sf)_mm_setzero_ps());
1935 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1936 _mm_cvtpd_epu32 (__m128d __A) {
1937 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1939 _mm_setzero_si128 (),
1943 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1944 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
1945 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1950 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1951 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
1952 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
1954 _mm_setzero_si128 (),
1958 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1959 _mm256_cvtpd_epu32 (__m256d __A) {
1960 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1962 _mm_setzero_si128 (),
1966 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1967 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
1968 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1973 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1974 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
1975 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
1977 _mm_setzero_si128 (),
1981 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1982 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
1983 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1984 (__v4si)_mm_cvtps_epi32(__A),
1988 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1989 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
1990 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1991 (__v4si)_mm_cvtps_epi32(__A),
1992 (__v4si)_mm_setzero_si128());
1995 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1996 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
1997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
1998 (__v8si)_mm256_cvtps_epi32(__A),
2002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2003 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2004 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2005 (__v8si)_mm256_cvtps_epi32(__A),
2006 (__v8si)_mm256_setzero_si256());
2009 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2010 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2011 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2012 (__v2df)_mm_cvtps_pd(__A),
2016 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2017 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2018 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2019 (__v2df)_mm_cvtps_pd(__A),
2020 (__v2df)_mm_setzero_pd());
2023 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2024 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2025 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2026 (__v4df)_mm256_cvtps_pd(__A),
2030 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2031 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2032 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2033 (__v4df)_mm256_cvtps_pd(__A),
2034 (__v4df)_mm256_setzero_pd());
2037 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2038 _mm_cvtps_epu32 (__m128 __A) {
2039 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2041 _mm_setzero_si128 (),
2045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2046 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2047 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2052 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2053 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2054 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2056 _mm_setzero_si128 (),
2060 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2061 _mm256_cvtps_epu32 (__m256 __A) {
2062 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2064 _mm256_setzero_si256 (),
2068 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2069 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2070 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2075 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2076 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2077 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2079 _mm256_setzero_si256 (),
2083 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2084 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2085 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2090 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2091 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2092 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2094 _mm_setzero_si128 (),
2098 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2099 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2100 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2101 (__v4si)_mm256_cvttpd_epi32(__A),
2105 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2106 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2107 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2108 (__v4si)_mm256_cvttpd_epi32(__A),
2109 (__v4si)_mm_setzero_si128());
2112 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2113 _mm_cvttpd_epu32 (__m128d __A) {
2114 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2116 _mm_setzero_si128 (),
2120 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2121 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2122 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2127 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2128 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2129 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2131 _mm_setzero_si128 (),
2135 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2136 _mm256_cvttpd_epu32 (__m256d __A) {
2137 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2139 _mm_setzero_si128 (),
2143 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2144 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2145 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2150 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2151 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2152 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2154 _mm_setzero_si128 (),
2158 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2159 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2160 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2161 (__v4si)_mm_cvttps_epi32(__A),
2165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2166 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2167 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2168 (__v4si)_mm_cvttps_epi32(__A),
2169 (__v4si)_mm_setzero_si128());
2172 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2173 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2174 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2175 (__v8si)_mm256_cvttps_epi32(__A),
2179 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2180 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2181 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2182 (__v8si)_mm256_cvttps_epi32(__A),
2183 (__v8si)_mm256_setzero_si256());
2186 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2187 _mm_cvttps_epu32 (__m128 __A) {
2188 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2190 _mm_setzero_si128 (),
2194 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2195 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2196 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2201 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2203 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2205 _mm_setzero_si128 (),
2209 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2210 _mm256_cvttps_epu32 (__m256 __A) {
2211 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2213 _mm256_setzero_si256 (),
2217 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2218 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2219 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2224 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2225 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2226 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2228 _mm256_setzero_si256 (),
2232 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2233 _mm_cvtepu32_pd (__m128i __A) {
2234 return (__m128d) __builtin_convertvector(
2235 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2238 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2239 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2240 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2241 (__v2df)_mm_cvtepu32_pd(__A),
2245 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2246 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2247 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2248 (__v2df)_mm_cvtepu32_pd(__A),
2249 (__v2df)_mm_setzero_pd());
2252 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2253 _mm256_cvtepu32_pd (__m128i __A) {
2254 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2257 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2258 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2259 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2260 (__v4df)_mm256_cvtepu32_pd(__A),
2264 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2265 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2266 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2267 (__v4df)_mm256_cvtepu32_pd(__A),
2268 (__v4df)_mm256_setzero_pd());
2271 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2272 _mm_cvtepu32_ps (__m128i __A) {
2273 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2276 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2277 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2278 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2279 (__v4sf)_mm_cvtepu32_ps(__A),
2283 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2284 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2285 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2286 (__v4sf)_mm_cvtepu32_ps(__A),
2287 (__v4sf)_mm_setzero_ps());
2290 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2291 _mm256_cvtepu32_ps (__m256i __A) {
2292 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2295 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2296 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2297 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2298 (__v8sf)_mm256_cvtepu32_ps(__A),
2302 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2303 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2304 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2305 (__v8sf)_mm256_cvtepu32_ps(__A),
2306 (__v8sf)_mm256_setzero_ps());
2309 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2310 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2311 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2312 (__v2df)_mm_div_pd(__A, __B),
2316 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2317 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2318 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2319 (__v2df)_mm_div_pd(__A, __B),
2320 (__v2df)_mm_setzero_pd());
2323 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2324 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2325 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2326 (__v4df)_mm256_div_pd(__A, __B),
2330 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2331 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2332 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2333 (__v4df)_mm256_div_pd(__A, __B),
2334 (__v4df)_mm256_setzero_pd());
2337 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2338 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2339 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2340 (__v4sf)_mm_div_ps(__A, __B),
2344 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2345 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2346 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2347 (__v4sf)_mm_div_ps(__A, __B),
2348 (__v4sf)_mm_setzero_ps());
2351 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2352 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2353 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2354 (__v8sf)_mm256_div_ps(__A, __B),
2358 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2359 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2360 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2361 (__v8sf)_mm256_div_ps(__A, __B),
2362 (__v8sf)_mm256_setzero_ps());
2365 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2366 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2367 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2372 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2373 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2374 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2380 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2381 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2382 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2387 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2388 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2389 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2391 _mm256_setzero_pd (),
2395 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2396 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2397 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2402 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2403 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2404 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2406 _mm_setzero_si128 (),
2410 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2411 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2412 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2417 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2418 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2419 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2421 _mm256_setzero_si256 (),
2425 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2426 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2427 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2433 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2434 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2435 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2442 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2443 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2444 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2450 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2451 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2452 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2454 _mm256_setzero_pd (),
2459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2460 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2461 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2467 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2468 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2469 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2471 _mm_setzero_si128 (),
2476 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2477 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2479 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2485 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2486 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2487 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2489 _mm256_setzero_si256 (),
2494 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2495 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2496 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2501 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2502 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2503 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2510 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2511 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2512 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2517 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2518 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2519 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2521 _mm256_setzero_ps (),
2526 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2527 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2528 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2534 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2535 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2536 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2538 _mm_setzero_si128 (),
2542 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2543 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2545 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2551 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2552 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2553 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2555 _mm256_setzero_si256 (),
2560 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2561 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2562 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2567 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2568 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2569 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2575 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2576 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2577 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2582 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2583 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2584 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2586 _mm256_setzero_ps (),
2590 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2591 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2592 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2598 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2599 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2601 _mm_setzero_si128 (),
2605 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2606 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2607 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2612 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2613 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2614 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2616 _mm256_setzero_si256 (),
2620 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2621 _mm_getexp_pd (__m128d __A) {
2622 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2628 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2629 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2630 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2635 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2636 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2637 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2643 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2644 _mm256_getexp_pd (__m256d __A) {
2645 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2647 _mm256_setzero_pd (),
2651 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2652 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2653 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2658 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2659 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2660 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2662 _mm256_setzero_pd (),
2666 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2667 _mm_getexp_ps (__m128 __A) {
2668 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2674 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2675 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2676 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2681 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2682 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2683 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2689 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2690 _mm256_getexp_ps (__m256 __A) {
2691 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2693 _mm256_setzero_ps (),
2697 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2698 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2699 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2704 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2705 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2706 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2708 _mm256_setzero_ps (),
2712 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2713 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2714 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2715 (__v2df)_mm_max_pd(__A, __B),
2719 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2720 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2721 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2722 (__v2df)_mm_max_pd(__A, __B),
2723 (__v2df)_mm_setzero_pd());
2726 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2727 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2728 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2729 (__v4df)_mm256_max_pd(__A, __B),
2733 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2734 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2735 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2736 (__v4df)_mm256_max_pd(__A, __B),
2737 (__v4df)_mm256_setzero_pd());
2740 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2741 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2742 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2743 (__v4sf)_mm_max_ps(__A, __B),
2747 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2748 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2749 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2750 (__v4sf)_mm_max_ps(__A, __B),
2751 (__v4sf)_mm_setzero_ps());
2754 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2755 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2756 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2757 (__v8sf)_mm256_max_ps(__A, __B),
2761 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2762 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2763 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2764 (__v8sf)_mm256_max_ps(__A, __B),
2765 (__v8sf)_mm256_setzero_ps());
2768 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2769 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2770 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2771 (__v2df)_mm_min_pd(__A, __B),
2775 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2776 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2777 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2778 (__v2df)_mm_min_pd(__A, __B),
2779 (__v2df)_mm_setzero_pd());
2782 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2783 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2784 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2785 (__v4df)_mm256_min_pd(__A, __B),
2789 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2790 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2791 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2792 (__v4df)_mm256_min_pd(__A, __B),
2793 (__v4df)_mm256_setzero_pd());
2796 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2797 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2798 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2799 (__v4sf)_mm_min_ps(__A, __B),
2803 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2804 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2805 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2806 (__v4sf)_mm_min_ps(__A, __B),
2807 (__v4sf)_mm_setzero_ps());
2810 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2811 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2812 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2813 (__v8sf)_mm256_min_ps(__A, __B),
2817 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2818 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2819 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2820 (__v8sf)_mm256_min_ps(__A, __B),
2821 (__v8sf)_mm256_setzero_ps());
2824 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2825 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2826 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2827 (__v2df)_mm_mul_pd(__A, __B),
2831 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2832 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2833 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2834 (__v2df)_mm_mul_pd(__A, __B),
2835 (__v2df)_mm_setzero_pd());
2838 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2839 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2840 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2841 (__v4df)_mm256_mul_pd(__A, __B),
2845 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2846 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2847 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2848 (__v4df)_mm256_mul_pd(__A, __B),
2849 (__v4df)_mm256_setzero_pd());
2852 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2853 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2854 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2855 (__v4sf)_mm_mul_ps(__A, __B),
2859 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2860 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2861 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2862 (__v4sf)_mm_mul_ps(__A, __B),
2863 (__v4sf)_mm_setzero_ps());
2866 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2867 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2868 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2869 (__v8sf)_mm256_mul_ps(__A, __B),
2873 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2874 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2875 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2876 (__v8sf)_mm256_mul_ps(__A, __B),
2877 (__v8sf)_mm256_setzero_ps());
2880 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2881 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2882 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2883 (__v4si)_mm_abs_epi32(__A),
2887 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2888 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2889 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2890 (__v4si)_mm_abs_epi32(__A),
2891 (__v4si)_mm_setzero_si128());
2894 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2895 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2896 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2897 (__v8si)_mm256_abs_epi32(__A),
2901 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2902 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2903 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2904 (__v8si)_mm256_abs_epi32(__A),
2905 (__v8si)_mm256_setzero_si256());
2908 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2909 _mm_abs_epi64 (__m128i __A) {
2910 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2913 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2914 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2915 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2916 (__v2di)_mm_abs_epi64(__A),
2920 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2921 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
2922 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2923 (__v2di)_mm_abs_epi64(__A),
2924 (__v2di)_mm_setzero_si128());
2927 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2928 _mm256_abs_epi64 (__m256i __A) {
2929 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
2932 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2933 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2934 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2935 (__v4di)_mm256_abs_epi64(__A),
2939 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2940 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
2941 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
2942 (__v4di)_mm256_abs_epi64(__A),
2943 (__v4di)_mm256_setzero_si256());
2946 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2947 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
2948 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2949 (__v4si)_mm_max_epi32(__A, __B),
2950 (__v4si)_mm_setzero_si128());
2953 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2954 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2955 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
2956 (__v4si)_mm_max_epi32(__A, __B),
2960 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2961 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
2962 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2963 (__v8si)_mm256_max_epi32(__A, __B),
2964 (__v8si)_mm256_setzero_si256());
2967 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2968 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
2969 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
2970 (__v8si)_mm256_max_epi32(__A, __B),
2974 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2975 _mm_max_epi64 (__m128i __A, __m128i __B) {
2976 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
2979 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2980 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
2981 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2982 (__v2di)_mm_max_epi64(__A, __B),
2983 (__v2di)_mm_setzero_si128());
2986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2987 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
2988 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
2989 (__v2di)_mm_max_epi64(__A, __B),
2993 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2994 _mm256_max_epi64 (__m256i __A, __m256i __B) {
2995 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
2998 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2999 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3000 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3001 (__v4di)_mm256_max_epi64(__A, __B),
3002 (__v4di)_mm256_setzero_si256());
3005 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3006 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3007 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3008 (__v4di)_mm256_max_epi64(__A, __B),
3012 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3013 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3014 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3015 (__v4si)_mm_max_epu32(__A, __B),
3016 (__v4si)_mm_setzero_si128());
3019 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3020 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3021 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3022 (__v4si)_mm_max_epu32(__A, __B),
3026 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3027 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3028 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3029 (__v8si)_mm256_max_epu32(__A, __B),
3030 (__v8si)_mm256_setzero_si256());
3033 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3034 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3035 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3036 (__v8si)_mm256_max_epu32(__A, __B),
3040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3041 _mm_max_epu64 (__m128i __A, __m128i __B) {
3042 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3046 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3047 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3048 (__v2di)_mm_max_epu64(__A, __B),
3049 (__v2di)_mm_setzero_si128());
3052 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3053 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3054 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3055 (__v2di)_mm_max_epu64(__A, __B),
3059 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3060 _mm256_max_epu64 (__m256i __A, __m256i __B) {
3061 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3064 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3065 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3066 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3067 (__v4di)_mm256_max_epu64(__A, __B),
3068 (__v4di)_mm256_setzero_si256());
3071 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3072 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3073 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3074 (__v4di)_mm256_max_epu64(__A, __B),
3078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3079 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3080 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3081 (__v4si)_mm_min_epi32(__A, __B),
3082 (__v4si)_mm_setzero_si128());
3085 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3086 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3087 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3088 (__v4si)_mm_min_epi32(__A, __B),
3092 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3093 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3094 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3095 (__v8si)_mm256_min_epi32(__A, __B),
3096 (__v8si)_mm256_setzero_si256());
3099 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3100 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3102 (__v8si)_mm256_min_epi32(__A, __B),
3106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3107 _mm_min_epi64 (__m128i __A, __m128i __B) {
3108 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3111 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3112 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3113 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3114 (__v2di)_mm_min_epi64(__A, __B),
3118 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3119 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3120 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3121 (__v2di)_mm_min_epi64(__A, __B),
3122 (__v2di)_mm_setzero_si128());
3125 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3126 _mm256_min_epi64 (__m256i __A, __m256i __B) {
3127 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3131 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3132 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3133 (__v4di)_mm256_min_epi64(__A, __B),
3137 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3138 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3140 (__v4di)_mm256_min_epi64(__A, __B),
3141 (__v4di)_mm256_setzero_si256());
3144 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3145 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3146 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3147 (__v4si)_mm_min_epu32(__A, __B),
3148 (__v4si)_mm_setzero_si128());
3151 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3152 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3153 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3154 (__v4si)_mm_min_epu32(__A, __B),
3158 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3159 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3160 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3161 (__v8si)_mm256_min_epu32(__A, __B),
3162 (__v8si)_mm256_setzero_si256());
3165 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3166 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3167 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3168 (__v8si)_mm256_min_epu32(__A, __B),
3172 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3173 _mm_min_epu64 (__m128i __A, __m128i __B) {
3174 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3178 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3179 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3180 (__v2di)_mm_min_epu64(__A, __B),
3184 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3185 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3186 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3187 (__v2di)_mm_min_epu64(__A, __B),
3188 (__v2di)_mm_setzero_si128());
3191 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3192 _mm256_min_epu64 (__m256i __A, __m256i __B) {
3193 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3196 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3197 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3199 (__v4di)_mm256_min_epu64(__A, __B),
3203 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3204 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3205 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3206 (__v4di)_mm256_min_epu64(__A, __B),
3207 (__v4di)_mm256_setzero_si256());
3210 #define _mm_roundscale_pd(A, imm) \
3211 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3213 (__v2df)_mm_setzero_pd(), \
3217 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3218 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3220 (__v2df)(__m128d)(W), \
3224 #define _mm_maskz_roundscale_pd(U, A, imm) \
3225 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3227 (__v2df)_mm_setzero_pd(), \
3231 #define _mm256_roundscale_pd(A, imm) \
3232 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3234 (__v4df)_mm256_setzero_pd(), \
3238 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3239 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3241 (__v4df)(__m256d)(W), \
3245 #define _mm256_maskz_roundscale_pd(U, A, imm) \
3246 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3248 (__v4df)_mm256_setzero_pd(), \
3251 #define _mm_roundscale_ps(A, imm) \
3252 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3253 (__v4sf)_mm_setzero_ps(), \
3257 #define _mm_mask_roundscale_ps(W, U, A, imm) \
3258 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3259 (__v4sf)(__m128)(W), \
3263 #define _mm_maskz_roundscale_ps(U, A, imm) \
3264 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3265 (__v4sf)_mm_setzero_ps(), \
3268 #define _mm256_roundscale_ps(A, imm) \
3269 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3270 (__v8sf)_mm256_setzero_ps(), \
3273 #define _mm256_mask_roundscale_ps(W, U, A, imm) \
3274 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3275 (__v8sf)(__m256)(W), \
3279 #define _mm256_maskz_roundscale_ps(U, A, imm) \
3280 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3281 (__v8sf)_mm256_setzero_ps(), \
3284 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3285 _mm_scalef_pd (__m128d __A, __m128d __B) {
3286 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3293 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3294 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3296 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3302 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3303 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3304 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3311 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3312 _mm256_scalef_pd (__m256d __A, __m256d __B) {
3313 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3316 _mm256_setzero_pd (),
3320 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3321 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3323 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3329 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3330 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3331 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3334 _mm256_setzero_pd (),
3338 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3339 _mm_scalef_ps (__m128 __A, __m128 __B) {
3340 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3347 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3348 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3349 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3355 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3356 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3357 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3364 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3365 _mm256_scalef_ps (__m256 __A, __m256 __B) {
3366 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3369 _mm256_setzero_ps (),
3373 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3374 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3376 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3382 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3383 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3384 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3387 _mm256_setzero_ps (),
3391 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3392 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3393 (__v2di)(__m128i)(index), \
3394 (__v2df)(__m128d)(v1), (int)(scale))
3396 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3397 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3398 (__v2di)(__m128i)(index), \
3399 (__v2df)(__m128d)(v1), (int)(scale))
3401 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3402 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3403 (__v2di)(__m128i)(index), \
3404 (__v2di)(__m128i)(v1), (int)(scale))
3406 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3407 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3408 (__v2di)(__m128i)(index), \
3409 (__v2di)(__m128i)(v1), (int)(scale))
3411 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3412 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3413 (__v4di)(__m256i)(index), \
3414 (__v4df)(__m256d)(v1), (int)(scale))
3416 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3417 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3418 (__v4di)(__m256i)(index), \
3419 (__v4df)(__m256d)(v1), (int)(scale))
3421 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3422 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3423 (__v4di)(__m256i)(index), \
3424 (__v4di)(__m256i)(v1), (int)(scale))
3426 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3427 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3428 (__v4di)(__m256i)(index), \
3429 (__v4di)(__m256i)(v1), (int)(scale))
3431 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3432 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3433 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3436 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3437 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3438 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3441 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3442 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3443 (__v2di)(__m128i)(index), \
3444 (__v4si)(__m128i)(v1), (int)(scale))
3446 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3447 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3448 (__v2di)(__m128i)(index), \
3449 (__v4si)(__m128i)(v1), (int)(scale))
3451 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3452 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3453 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3456 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3457 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
3458 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3461 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3462 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
3463 (__v4di)(__m256i)(index), \
3464 (__v4si)(__m128i)(v1), (int)(scale))
3466 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3467 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
3468 (__v4di)(__m256i)(index), \
3469 (__v4si)(__m128i)(v1), (int)(scale))
3471 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3472 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
3473 (__v4si)(__m128i)(index), \
3474 (__v2df)(__m128d)(v1), (int)(scale))
3476 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3477 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
3478 (__v4si)(__m128i)(index), \
3479 (__v2df)(__m128d)(v1), (int)(scale))
3481 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3482 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
3483 (__v4si)(__m128i)(index), \
3484 (__v2di)(__m128i)(v1), (int)(scale))
3486 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3487 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
3488 (__v4si)(__m128i)(index), \
3489 (__v2di)(__m128i)(v1), (int)(scale))
3491 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3492 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
3493 (__v4si)(__m128i)(index), \
3494 (__v4df)(__m256d)(v1), (int)(scale))
3496 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3497 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
3498 (__v4si)(__m128i)(index), \
3499 (__v4df)(__m256d)(v1), (int)(scale))
3501 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3502 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
3503 (__v4si)(__m128i)(index), \
3504 (__v4di)(__m256i)(v1), (int)(scale))
3506 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3507 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
3508 (__v4si)(__m128i)(index), \
3509 (__v4di)(__m256i)(v1), (int)(scale))
3511 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3512 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
3513 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3516 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3517 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
3518 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3521 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3522 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
3523 (__v4si)(__m128i)(index), \
3524 (__v4si)(__m128i)(v1), (int)(scale))
3526 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3527 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
3528 (__v4si)(__m128i)(index), \
3529 (__v4si)(__m128i)(v1), (int)(scale))
3531 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3532 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
3533 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3536 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3537 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
3538 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3541 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3542 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
3543 (__v8si)(__m256i)(index), \
3544 (__v8si)(__m256i)(v1), (int)(scale))
3546 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3547 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
3548 (__v8si)(__m256i)(index), \
3549 (__v8si)(__m256i)(v1), (int)(scale))
3551 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3552 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3553 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3554 (__v2df)_mm_sqrt_pd(__A),
3558 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3559 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3560 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3561 (__v2df)_mm_sqrt_pd(__A),
3562 (__v2df)_mm_setzero_pd());
3565 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3566 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3567 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3568 (__v4df)_mm256_sqrt_pd(__A),
3572 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3573 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3574 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3575 (__v4df)_mm256_sqrt_pd(__A),
3576 (__v4df)_mm256_setzero_pd());
3579 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3580 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3581 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3582 (__v4sf)_mm_sqrt_ps(__A),
3586 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3587 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3588 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3589 (__v4sf)_mm_sqrt_ps(__A),
3590 (__v4sf)_mm_setzero_ps());
3593 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3594 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3595 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3596 (__v8sf)_mm256_sqrt_ps(__A),
3600 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3601 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3602 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3603 (__v8sf)_mm256_sqrt_ps(__A),
3604 (__v8sf)_mm256_setzero_ps());
3607 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3608 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3609 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3610 (__v2df)_mm_sub_pd(__A, __B),
3614 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3615 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3617 (__v2df)_mm_sub_pd(__A, __B),
3618 (__v2df)_mm_setzero_pd());
3621 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3622 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3623 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3624 (__v4df)_mm256_sub_pd(__A, __B),
3628 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3629 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3631 (__v4df)_mm256_sub_pd(__A, __B),
3632 (__v4df)_mm256_setzero_pd());
3635 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3636 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3637 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3638 (__v4sf)_mm_sub_ps(__A, __B),
3642 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3643 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3645 (__v4sf)_mm_sub_ps(__A, __B),
3646 (__v4sf)_mm_setzero_ps());
3649 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3650 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3651 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3652 (__v8sf)_mm256_sub_ps(__A, __B),
3656 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3657 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3659 (__v8sf)_mm256_sub_ps(__A, __B),
3660 (__v8sf)_mm256_setzero_ps());
3663 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3664 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3665 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3669 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3670 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3672 return (__m128i)__builtin_ia32_selectd_128(__U,
3673 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3677 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3678 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3680 return (__m128i)__builtin_ia32_selectd_128(__U,
3681 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3685 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3686 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3688 return (__m128i)__builtin_ia32_selectd_128(__U,
3689 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3690 (__v4si)_mm_setzero_si128());
3693 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3694 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3695 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3699 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3700 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3702 return (__m256i)__builtin_ia32_selectd_256(__U,
3703 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3707 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3708 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3710 return (__m256i)__builtin_ia32_selectd_256(__U,
3711 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3715 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3716 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3718 return (__m256i)__builtin_ia32_selectd_256(__U,
3719 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3720 (__v8si)_mm256_setzero_si256());
3723 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3724 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3725 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3729 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3730 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3731 return (__m128d)__builtin_ia32_selectpd_128(__U,
3732 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3736 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3737 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3738 return (__m128d)__builtin_ia32_selectpd_128(__U,
3739 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3740 (__v2df)(__m128d)__I);
3743 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3744 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3745 return (__m128d)__builtin_ia32_selectpd_128(__U,
3746 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3747 (__v2df)_mm_setzero_pd());
3750 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3751 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3752 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3756 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3757 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3759 return (__m256d)__builtin_ia32_selectpd_256(__U,
3760 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3764 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3765 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3767 return (__m256d)__builtin_ia32_selectpd_256(__U,
3768 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3769 (__v4df)(__m256d)__I);
3772 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3773 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3775 return (__m256d)__builtin_ia32_selectpd_256(__U,
3776 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3777 (__v4df)_mm256_setzero_pd());
3780 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3781 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3782 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3786 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3787 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3788 return (__m128)__builtin_ia32_selectps_128(__U,
3789 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3793 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3794 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3795 return (__m128)__builtin_ia32_selectps_128(__U,
3796 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3797 (__v4sf)(__m128)__I);
3800 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3801 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3802 return (__m128)__builtin_ia32_selectps_128(__U,
3803 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3804 (__v4sf)_mm_setzero_ps());
3807 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3808 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3809 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3813 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3814 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3815 return (__m256)__builtin_ia32_selectps_256(__U,
3816 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3820 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3821 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3823 return (__m256)__builtin_ia32_selectps_256(__U,
3824 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3825 (__v8sf)(__m256)__I);
3828 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3829 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3831 return (__m256)__builtin_ia32_selectps_256(__U,
3832 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3833 (__v8sf)_mm256_setzero_ps());
3836 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3837 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3838 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3842 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3843 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3845 return (__m128i)__builtin_ia32_selectq_128(__U,
3846 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3850 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3851 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3853 return (__m128i)__builtin_ia32_selectq_128(__U,
3854 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3858 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3859 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3861 return (__m128i)__builtin_ia32_selectq_128(__U,
3862 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3863 (__v2di)_mm_setzero_si128());
3867 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3868 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3869 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3873 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3874 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3876 return (__m256i)__builtin_ia32_selectq_256(__U,
3877 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3881 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3882 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3884 return (__m256i)__builtin_ia32_selectq_256(__U,
3885 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3889 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3890 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3892 return (__m256i)__builtin_ia32_selectq_256(__U,
3893 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3894 (__v4di)_mm256_setzero_si256());
3897 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3898 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3900 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3901 (__v4si)_mm_cvtepi8_epi32(__A),
3905 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3906 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
3908 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3909 (__v4si)_mm_cvtepi8_epi32(__A),
3910 (__v4si)_mm_setzero_si128());
3913 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3914 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3916 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3917 (__v8si)_mm256_cvtepi8_epi32(__A),
3921 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3922 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
3924 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3925 (__v8si)_mm256_cvtepi8_epi32(__A),
3926 (__v8si)_mm256_setzero_si256());
3929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3930 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
3932 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3933 (__v2di)_mm_cvtepi8_epi64(__A),
3937 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
3940 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3941 (__v2di)_mm_cvtepi8_epi64(__A),
3942 (__v2di)_mm_setzero_si128());
3945 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3946 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
3948 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3949 (__v4di)_mm256_cvtepi8_epi64(__A),
3953 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3954 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
3956 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3957 (__v4di)_mm256_cvtepi8_epi64(__A),
3958 (__v4di)_mm256_setzero_si256());
3961 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3962 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
3964 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3965 (__v2di)_mm_cvtepi32_epi64(__X),
3969 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3970 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
3972 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3973 (__v2di)_mm_cvtepi32_epi64(__X),
3974 (__v2di)_mm_setzero_si128());
3977 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3978 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
3980 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3981 (__v4di)_mm256_cvtepi32_epi64(__X),
3985 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3986 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
3988 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3989 (__v4di)_mm256_cvtepi32_epi64(__X),
3990 (__v4di)_mm256_setzero_si256());
3993 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3994 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3996 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3997 (__v4si)_mm_cvtepi16_epi32(__A),
4001 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4002 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4004 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4005 (__v4si)_mm_cvtepi16_epi32(__A),
4006 (__v4si)_mm_setzero_si128());
4009 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4010 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4012 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4013 (__v8si)_mm256_cvtepi16_epi32(__A),
4017 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4018 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4020 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4021 (__v8si)_mm256_cvtepi16_epi32(__A),
4022 (__v8si)_mm256_setzero_si256());
4025 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4026 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4028 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4029 (__v2di)_mm_cvtepi16_epi64(__A),
4033 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4034 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4036 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4037 (__v2di)_mm_cvtepi16_epi64(__A),
4038 (__v2di)_mm_setzero_si128());
4041 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4042 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4044 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4045 (__v4di)_mm256_cvtepi16_epi64(__A),
4049 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4050 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4052 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4053 (__v4di)_mm256_cvtepi16_epi64(__A),
4054 (__v4di)_mm256_setzero_si256());
4058 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4059 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4061 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4062 (__v4si)_mm_cvtepu8_epi32(__A),
4066 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4067 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4069 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4070 (__v4si)_mm_cvtepu8_epi32(__A),
4071 (__v4si)_mm_setzero_si128());
4074 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4075 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4077 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4078 (__v8si)_mm256_cvtepu8_epi32(__A),
4082 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4083 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4086 (__v8si)_mm256_cvtepu8_epi32(__A),
4087 (__v8si)_mm256_setzero_si256());
4090 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4091 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4093 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4094 (__v2di)_mm_cvtepu8_epi64(__A),
4098 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4099 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4101 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4102 (__v2di)_mm_cvtepu8_epi64(__A),
4103 (__v2di)_mm_setzero_si128());
4106 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4107 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4109 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4110 (__v4di)_mm256_cvtepu8_epi64(__A),
4114 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4115 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4117 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4118 (__v4di)_mm256_cvtepu8_epi64(__A),
4119 (__v4di)_mm256_setzero_si256());
4122 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4123 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4125 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4126 (__v2di)_mm_cvtepu32_epi64(__X),
4130 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4131 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4133 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4134 (__v2di)_mm_cvtepu32_epi64(__X),
4135 (__v2di)_mm_setzero_si128());
4138 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4139 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4141 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4142 (__v4di)_mm256_cvtepu32_epi64(__X),
4146 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4147 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4149 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4150 (__v4di)_mm256_cvtepu32_epi64(__X),
4151 (__v4di)_mm256_setzero_si256());
4154 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4155 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4157 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4158 (__v4si)_mm_cvtepu16_epi32(__A),
4162 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4163 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4165 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4166 (__v4si)_mm_cvtepu16_epi32(__A),
4167 (__v4si)_mm_setzero_si128());
4170 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4171 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4173 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4174 (__v8si)_mm256_cvtepu16_epi32(__A),
4178 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4179 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4181 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4182 (__v8si)_mm256_cvtepu16_epi32(__A),
4183 (__v8si)_mm256_setzero_si256());
4186 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4187 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4189 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4190 (__v2di)_mm_cvtepu16_epi64(__A),
4194 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4195 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4197 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4198 (__v2di)_mm_cvtepu16_epi64(__A),
4199 (__v2di)_mm_setzero_si128());
4202 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4203 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4205 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4206 (__v4di)_mm256_cvtepu16_epi64(__A),
4210 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4211 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4214 (__v4di)_mm256_cvtepu16_epi64(__A),
4215 (__v4di)_mm256_setzero_si256());
4219 #define _mm_rol_epi32(a, b) \
4220 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4222 #define _mm_mask_rol_epi32(w, u, a, b) \
4223 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4224 (__v4si)_mm_rol_epi32((a), (b)), \
4225 (__v4si)(__m128i)(w))
4227 #define _mm_maskz_rol_epi32(u, a, b) \
4228 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4229 (__v4si)_mm_rol_epi32((a), (b)), \
4230 (__v4si)_mm_setzero_si128())
4232 #define _mm256_rol_epi32(a, b) \
4233 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4235 #define _mm256_mask_rol_epi32(w, u, a, b) \
4236 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4237 (__v8si)_mm256_rol_epi32((a), (b)), \
4238 (__v8si)(__m256i)(w))
4240 #define _mm256_maskz_rol_epi32(u, a, b) \
4241 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4242 (__v8si)_mm256_rol_epi32((a), (b)), \
4243 (__v8si)_mm256_setzero_si256())
4245 #define _mm_rol_epi64(a, b) \
4246 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4248 #define _mm_mask_rol_epi64(w, u, a, b) \
4249 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4250 (__v2di)_mm_rol_epi64((a), (b)), \
4251 (__v2di)(__m128i)(w))
4253 #define _mm_maskz_rol_epi64(u, a, b) \
4254 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4255 (__v2di)_mm_rol_epi64((a), (b)), \
4256 (__v2di)_mm_setzero_si128())
4258 #define _mm256_rol_epi64(a, b) \
4259 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4261 #define _mm256_mask_rol_epi64(w, u, a, b) \
4262 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4263 (__v4di)_mm256_rol_epi64((a), (b)), \
4264 (__v4di)(__m256i)(w))
4266 #define _mm256_maskz_rol_epi64(u, a, b) \
4267 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4268 (__v4di)_mm256_rol_epi64((a), (b)), \
4269 (__v4di)_mm256_setzero_si256())
4271 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4272 _mm_rolv_epi32 (__m128i __A, __m128i __B)
4274 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4277 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4278 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4280 return (__m128i)__builtin_ia32_selectd_128(__U,
4281 (__v4si)_mm_rolv_epi32(__A, __B),
4285 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4286 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4288 return (__m128i)__builtin_ia32_selectd_128(__U,
4289 (__v4si)_mm_rolv_epi32(__A, __B),
4290 (__v4si)_mm_setzero_si128());
4293 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4294 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
4296 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4299 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4300 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4302 return (__m256i)__builtin_ia32_selectd_256(__U,
4303 (__v8si)_mm256_rolv_epi32(__A, __B),
4307 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4308 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4310 return (__m256i)__builtin_ia32_selectd_256(__U,
4311 (__v8si)_mm256_rolv_epi32(__A, __B),
4312 (__v8si)_mm256_setzero_si256());
4315 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4316 _mm_rolv_epi64 (__m128i __A, __m128i __B)
4318 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4321 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4322 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4324 return (__m128i)__builtin_ia32_selectq_128(__U,
4325 (__v2di)_mm_rolv_epi64(__A, __B),
4329 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4330 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4332 return (__m128i)__builtin_ia32_selectq_128(__U,
4333 (__v2di)_mm_rolv_epi64(__A, __B),
4334 (__v2di)_mm_setzero_si128());
4337 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4338 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
4340 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4343 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4344 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4346 return (__m256i)__builtin_ia32_selectq_256(__U,
4347 (__v4di)_mm256_rolv_epi64(__A, __B),
4351 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4352 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4354 return (__m256i)__builtin_ia32_selectq_256(__U,
4355 (__v4di)_mm256_rolv_epi64(__A, __B),
4356 (__v4di)_mm256_setzero_si256());
4359 #define _mm_ror_epi32(a, b) \
4360 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4362 #define _mm_mask_ror_epi32(w, u, a, b) \
4363 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4364 (__v4si)_mm_ror_epi32((a), (b)), \
4365 (__v4si)(__m128i)(w))
4367 #define _mm_maskz_ror_epi32(u, a, b) \
4368 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4369 (__v4si)_mm_ror_epi32((a), (b)), \
4370 (__v4si)_mm_setzero_si128())
4372 #define _mm256_ror_epi32(a, b) \
4373 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4375 #define _mm256_mask_ror_epi32(w, u, a, b) \
4376 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4377 (__v8si)_mm256_ror_epi32((a), (b)), \
4378 (__v8si)(__m256i)(w))
4380 #define _mm256_maskz_ror_epi32(u, a, b) \
4381 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4382 (__v8si)_mm256_ror_epi32((a), (b)), \
4383 (__v8si)_mm256_setzero_si256())
4385 #define _mm_ror_epi64(a, b) \
4386 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4388 #define _mm_mask_ror_epi64(w, u, a, b) \
4389 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4390 (__v2di)_mm_ror_epi64((a), (b)), \
4391 (__v2di)(__m128i)(w))
4393 #define _mm_maskz_ror_epi64(u, a, b) \
4394 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4395 (__v2di)_mm_ror_epi64((a), (b)), \
4396 (__v2di)_mm_setzero_si128())
4398 #define _mm256_ror_epi64(a, b) \
4399 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4401 #define _mm256_mask_ror_epi64(w, u, a, b) \
4402 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4403 (__v4di)_mm256_ror_epi64((a), (b)), \
4404 (__v4di)(__m256i)(w))
4406 #define _mm256_maskz_ror_epi64(u, a, b) \
4407 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4408 (__v4di)_mm256_ror_epi64((a), (b)), \
4409 (__v4di)_mm256_setzero_si256())
4411 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4412 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4414 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4415 (__v4si)_mm_sll_epi32(__A, __B),
4419 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4420 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4422 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4423 (__v4si)_mm_sll_epi32(__A, __B),
4424 (__v4si)_mm_setzero_si128());
4427 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4428 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4430 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4431 (__v8si)_mm256_sll_epi32(__A, __B),
4435 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4436 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4438 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4439 (__v8si)_mm256_sll_epi32(__A, __B),
4440 (__v8si)_mm256_setzero_si256());
4443 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4444 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4446 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4447 (__v4si)_mm_slli_epi32(__A, __B),
4451 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4452 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4454 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4455 (__v4si)_mm_slli_epi32(__A, __B),
4456 (__v4si)_mm_setzero_si128());
4459 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4460 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4462 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4463 (__v8si)_mm256_slli_epi32(__A, __B),
4467 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4468 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
4470 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4471 (__v8si)_mm256_slli_epi32(__A, __B),
4472 (__v8si)_mm256_setzero_si256());
4475 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4476 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4478 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4479 (__v2di)_mm_sll_epi64(__A, __B),
4483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4484 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4486 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4487 (__v2di)_mm_sll_epi64(__A, __B),
4488 (__v2di)_mm_setzero_si128());
4491 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4492 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4494 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4495 (__v4di)_mm256_sll_epi64(__A, __B),
4499 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4500 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4502 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4503 (__v4di)_mm256_sll_epi64(__A, __B),
4504 (__v4di)_mm256_setzero_si256());
4507 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4508 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4510 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4511 (__v2di)_mm_slli_epi64(__A, __B),
4515 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4516 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
4518 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4519 (__v2di)_mm_slli_epi64(__A, __B),
4520 (__v2di)_mm_setzero_si128());
4523 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4524 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4526 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4527 (__v4di)_mm256_slli_epi64(__A, __B),
4531 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4532 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
4534 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4535 (__v4di)_mm256_slli_epi64(__A, __B),
4536 (__v4di)_mm256_setzero_si256());
4539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4540 _mm_rorv_epi32 (__m128i __A, __m128i __B)
4542 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4545 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4546 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4548 return (__m128i)__builtin_ia32_selectd_128(__U,
4549 (__v4si)_mm_rorv_epi32(__A, __B),
4553 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4554 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4556 return (__m128i)__builtin_ia32_selectd_128(__U,
4557 (__v4si)_mm_rorv_epi32(__A, __B),
4558 (__v4si)_mm_setzero_si128());
4561 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4562 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
4564 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4567 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4568 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4570 return (__m256i)__builtin_ia32_selectd_256(__U,
4571 (__v8si)_mm256_rorv_epi32(__A, __B),
4575 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4576 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4578 return (__m256i)__builtin_ia32_selectd_256(__U,
4579 (__v8si)_mm256_rorv_epi32(__A, __B),
4580 (__v8si)_mm256_setzero_si256());
4583 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4584 _mm_rorv_epi64 (__m128i __A, __m128i __B)
4586 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4589 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4590 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4592 return (__m128i)__builtin_ia32_selectq_128(__U,
4593 (__v2di)_mm_rorv_epi64(__A, __B),
4597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4598 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4600 return (__m128i)__builtin_ia32_selectq_128(__U,
4601 (__v2di)_mm_rorv_epi64(__A, __B),
4602 (__v2di)_mm_setzero_si128());
4605 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4606 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
4608 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4611 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4612 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4614 return (__m256i)__builtin_ia32_selectq_256(__U,
4615 (__v4di)_mm256_rorv_epi64(__A, __B),
4619 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4620 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4622 return (__m256i)__builtin_ia32_selectq_256(__U,
4623 (__v4di)_mm256_rorv_epi64(__A, __B),
4624 (__v4di)_mm256_setzero_si256());
4627 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4628 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4630 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4631 (__v2di)_mm_sllv_epi64(__X, __Y),
4635 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4636 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4638 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4639 (__v2di)_mm_sllv_epi64(__X, __Y),
4640 (__v2di)_mm_setzero_si128());
4643 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4644 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4646 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4647 (__v4di)_mm256_sllv_epi64(__X, __Y),
4651 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4652 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4654 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4655 (__v4di)_mm256_sllv_epi64(__X, __Y),
4656 (__v4di)_mm256_setzero_si256());
4659 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4660 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4662 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4663 (__v4si)_mm_sllv_epi32(__X, __Y),
4667 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4668 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4670 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4671 (__v4si)_mm_sllv_epi32(__X, __Y),
4672 (__v4si)_mm_setzero_si128());
4675 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4676 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4678 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4679 (__v8si)_mm256_sllv_epi32(__X, __Y),
4683 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4684 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4686 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4687 (__v8si)_mm256_sllv_epi32(__X, __Y),
4688 (__v8si)_mm256_setzero_si256());
4691 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4692 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4694 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4695 (__v2di)_mm_srlv_epi64(__X, __Y),
4699 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4700 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4702 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4703 (__v2di)_mm_srlv_epi64(__X, __Y),
4704 (__v2di)_mm_setzero_si128());
4707 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4708 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4710 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4711 (__v4di)_mm256_srlv_epi64(__X, __Y),
4715 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4716 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4718 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4719 (__v4di)_mm256_srlv_epi64(__X, __Y),
4720 (__v4di)_mm256_setzero_si256());
4723 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4724 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4726 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4727 (__v4si)_mm_srlv_epi32(__X, __Y),
4731 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4732 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4734 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4735 (__v4si)_mm_srlv_epi32(__X, __Y),
4736 (__v4si)_mm_setzero_si128());
4739 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4740 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4742 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4743 (__v8si)_mm256_srlv_epi32(__X, __Y),
4747 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4748 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4750 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4751 (__v8si)_mm256_srlv_epi32(__X, __Y),
4752 (__v8si)_mm256_setzero_si256());
4755 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4756 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4758 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4759 (__v4si)_mm_srl_epi32(__A, __B),
4763 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4764 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4766 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4767 (__v4si)_mm_srl_epi32(__A, __B),
4768 (__v4si)_mm_setzero_si128());
4771 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4772 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4774 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4775 (__v8si)_mm256_srl_epi32(__A, __B),
4779 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4780 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4782 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4783 (__v8si)_mm256_srl_epi32(__A, __B),
4784 (__v8si)_mm256_setzero_si256());
4787 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4788 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4790 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4791 (__v4si)_mm_srli_epi32(__A, __B),
4795 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4796 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
4798 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4799 (__v4si)_mm_srli_epi32(__A, __B),
4800 (__v4si)_mm_setzero_si128());
4803 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4804 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4806 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4807 (__v8si)_mm256_srli_epi32(__A, __B),
4811 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4812 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
4814 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4815 (__v8si)_mm256_srli_epi32(__A, __B),
4816 (__v8si)_mm256_setzero_si256());
4819 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4820 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4822 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4823 (__v2di)_mm_srl_epi64(__A, __B),
4827 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4828 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4830 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4831 (__v2di)_mm_srl_epi64(__A, __B),
4832 (__v2di)_mm_setzero_si128());
4835 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4836 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4838 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4839 (__v4di)_mm256_srl_epi64(__A, __B),
4843 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4844 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4846 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4847 (__v4di)_mm256_srl_epi64(__A, __B),
4848 (__v4di)_mm256_setzero_si256());
4851 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4852 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4854 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4855 (__v2di)_mm_srli_epi64(__A, __B),
4859 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4860 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
4862 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4863 (__v2di)_mm_srli_epi64(__A, __B),
4864 (__v2di)_mm_setzero_si128());
4867 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4868 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4870 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4871 (__v4di)_mm256_srli_epi64(__A, __B),
4875 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4876 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
4878 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4879 (__v4di)_mm256_srli_epi64(__A, __B),
4880 (__v4di)_mm256_setzero_si256());
4883 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4884 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4886 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4887 (__v4si)_mm_srav_epi32(__X, __Y),
4891 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4892 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4894 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4895 (__v4si)_mm_srav_epi32(__X, __Y),
4896 (__v4si)_mm_setzero_si128());
4899 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4900 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4902 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4903 (__v8si)_mm256_srav_epi32(__X, __Y),
4907 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4908 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4910 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4911 (__v8si)_mm256_srav_epi32(__X, __Y),
4912 (__v8si)_mm256_setzero_si256());
4915 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4916 _mm_srav_epi64(__m128i __X, __m128i __Y)
4918 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
4921 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4922 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4924 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4925 (__v2di)_mm_srav_epi64(__X, __Y),
4929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4930 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4932 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4933 (__v2di)_mm_srav_epi64(__X, __Y),
4934 (__v2di)_mm_setzero_si128());
4937 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4938 _mm256_srav_epi64(__m256i __X, __m256i __Y)
4940 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
4943 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4944 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4946 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4947 (__v4di)_mm256_srav_epi64(__X, __Y),
4951 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4952 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
4954 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4955 (__v4di)_mm256_srav_epi64(__X, __Y),
4956 (__v4di)_mm256_setzero_si256());
4959 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4960 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4962 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4967 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4968 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
4970 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
4972 (__v4si) _mm_setzero_si128 ());
4976 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4977 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
4979 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4984 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4985 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
4987 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
4989 (__v8si) _mm256_setzero_si256 ());
4992 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4993 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
4995 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5001 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5002 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5004 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5006 _mm_setzero_si128 (),
5011 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5012 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5014 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5020 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5021 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5023 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5025 _mm256_setzero_si256 (),
5030 static __inline__ void __DEFAULT_FN_ATTRS128
5031 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5033 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5038 static __inline__ void __DEFAULT_FN_ATTRS256
5039 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5041 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5047 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5049 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5055 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5057 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5059 (__v2di) _mm_setzero_si128 ());
5062 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5063 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5065 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5070 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5071 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5073 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5075 (__v4di) _mm256_setzero_si256 ());
5078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5079 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5081 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5087 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5088 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5090 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5092 _mm_setzero_si128 (),
5097 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5098 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5100 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5106 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5107 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5109 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5111 _mm256_setzero_si256 (),
5116 static __inline__ void __DEFAULT_FN_ATTRS128
5117 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5119 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5124 static __inline__ void __DEFAULT_FN_ATTRS256
5125 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5127 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5132 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5133 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5135 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5136 (__v2df)_mm_movedup_pd(__A),
5140 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5141 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5143 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5144 (__v2df)_mm_movedup_pd(__A),
5145 (__v2df)_mm_setzero_pd());
5148 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5149 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5151 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5152 (__v4df)_mm256_movedup_pd(__A),
5156 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5157 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5159 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5160 (__v4df)_mm256_movedup_pd(__A),
5161 (__v4df)_mm256_setzero_pd());
5164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5165 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5167 return (__m128i)__builtin_ia32_selectd_128(__M,
5168 (__v4si) _mm_set1_epi32(__A),
5172 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5173 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
5175 return (__m128i)__builtin_ia32_selectd_128(__M,
5176 (__v4si) _mm_set1_epi32(__A),
5177 (__v4si)_mm_setzero_si128());
5180 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5181 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5183 return (__m256i)__builtin_ia32_selectd_256(__M,
5184 (__v8si) _mm256_set1_epi32(__A),
5188 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5189 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5191 return (__m256i)__builtin_ia32_selectd_256(__M,
5192 (__v8si) _mm256_set1_epi32(__A),
5193 (__v8si)_mm256_setzero_si256());
5197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5198 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5200 return (__m128i) __builtin_ia32_selectq_128(__M,
5201 (__v2di) _mm_set1_epi64x(__A),
5205 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5206 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5208 return (__m128i) __builtin_ia32_selectq_128(__M,
5209 (__v2di) _mm_set1_epi64x(__A),
5210 (__v2di) _mm_setzero_si128());
5213 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5214 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5216 return (__m256i) __builtin_ia32_selectq_256(__M,
5217 (__v4di) _mm256_set1_epi64x(__A),
5221 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5222 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5224 return (__m256i) __builtin_ia32_selectq_256(__M,
5225 (__v4di) _mm256_set1_epi64x(__A),
5226 (__v4di) _mm256_setzero_si256());
5229 #define _mm_fixupimm_pd(A, B, C, imm) \
5230 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5231 (__v2df)(__m128d)(B), \
5232 (__v2di)(__m128i)(C), (int)(imm), \
5235 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5236 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5237 (__v2df)(__m128d)(B), \
5238 (__v2di)(__m128i)(C), (int)(imm), \
5241 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5242 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5243 (__v2df)(__m128d)(B), \
5244 (__v2di)(__m128i)(C), \
5245 (int)(imm), (__mmask8)(U))
5247 #define _mm256_fixupimm_pd(A, B, C, imm) \
5248 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5249 (__v4df)(__m256d)(B), \
5250 (__v4di)(__m256i)(C), (int)(imm), \
5253 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5254 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5255 (__v4df)(__m256d)(B), \
5256 (__v4di)(__m256i)(C), (int)(imm), \
5259 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5260 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5261 (__v4df)(__m256d)(B), \
5262 (__v4di)(__m256i)(C), \
5263 (int)(imm), (__mmask8)(U))
5265 #define _mm_fixupimm_ps(A, B, C, imm) \
5266 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5267 (__v4sf)(__m128)(B), \
5268 (__v4si)(__m128i)(C), (int)(imm), \
5271 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5272 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5273 (__v4sf)(__m128)(B), \
5274 (__v4si)(__m128i)(C), (int)(imm), \
5277 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5278 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5279 (__v4sf)(__m128)(B), \
5280 (__v4si)(__m128i)(C), (int)(imm), \
5283 #define _mm256_fixupimm_ps(A, B, C, imm) \
5284 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5285 (__v8sf)(__m256)(B), \
5286 (__v8si)(__m256i)(C), (int)(imm), \
5289 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5290 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5291 (__v8sf)(__m256)(B), \
5292 (__v8si)(__m256i)(C), (int)(imm), \
5295 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5296 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5297 (__v8sf)(__m256)(B), \
5298 (__v8si)(__m256i)(C), (int)(imm), \
5301 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5302 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5304 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5309 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5310 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
5312 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5318 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5319 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5321 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5326 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5327 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5329 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5331 _mm256_setzero_pd (),
5335 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5336 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5338 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5343 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5344 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
5346 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5352 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5353 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5355 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5360 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5361 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5363 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5365 _mm256_setzero_ps (),
5369 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5370 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5372 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5377 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5378 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5380 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5382 _mm_setzero_si128 (),
5386 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5387 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5389 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5395 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5397 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5399 _mm256_setzero_si256 (),
5403 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5404 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5406 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5411 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5412 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5414 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5416 _mm_setzero_si128 (),
5420 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5421 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5423 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5428 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5429 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5431 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5433 _mm256_setzero_si256 (),
5437 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5438 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5440 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5445 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5446 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5448 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5454 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5455 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5457 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5462 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5463 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5465 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5467 _mm256_setzero_pd (),
5471 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5472 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5474 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5479 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5480 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5482 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5488 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5489 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5491 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5496 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5497 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5499 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5501 _mm256_setzero_ps (),
5505 static __inline__ void __DEFAULT_FN_ATTRS128
5506 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5508 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5513 static __inline__ void __DEFAULT_FN_ATTRS256
5514 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5516 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5521 static __inline__ void __DEFAULT_FN_ATTRS128
5522 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5524 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5529 static __inline__ void __DEFAULT_FN_ATTRS256
5530 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5532 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5537 static __inline__ void __DEFAULT_FN_ATTRS128
5538 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5540 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5545 static __inline__ void __DEFAULT_FN_ATTRS256
5546 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5548 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5553 static __inline__ void __DEFAULT_FN_ATTRS128
5554 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5556 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5561 static __inline__ void __DEFAULT_FN_ATTRS256
5562 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5564 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5569 static __inline__ void __DEFAULT_FN_ATTRS128
5570 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5572 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5577 static __inline__ void __DEFAULT_FN_ATTRS256
5578 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5580 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5585 static __inline__ void __DEFAULT_FN_ATTRS128
5586 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5588 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5593 static __inline__ void __DEFAULT_FN_ATTRS256
5594 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5596 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5602 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5603 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5605 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5606 (__v2df)_mm_unpackhi_pd(__A, __B),
5610 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5611 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5613 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5614 (__v2df)_mm_unpackhi_pd(__A, __B),
5615 (__v2df)_mm_setzero_pd());
5618 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5619 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5621 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5622 (__v4df)_mm256_unpackhi_pd(__A, __B),
5626 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5627 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5629 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5630 (__v4df)_mm256_unpackhi_pd(__A, __B),
5631 (__v4df)_mm256_setzero_pd());
5634 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5635 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5637 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5638 (__v4sf)_mm_unpackhi_ps(__A, __B),
5642 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5643 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5645 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5646 (__v4sf)_mm_unpackhi_ps(__A, __B),
5647 (__v4sf)_mm_setzero_ps());
5650 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5651 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5653 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5654 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5658 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5659 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5661 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5662 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5663 (__v8sf)_mm256_setzero_ps());
5666 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5667 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5669 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5670 (__v2df)_mm_unpacklo_pd(__A, __B),
5674 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5675 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5677 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5678 (__v2df)_mm_unpacklo_pd(__A, __B),
5679 (__v2df)_mm_setzero_pd());
5682 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5683 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5685 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5686 (__v4df)_mm256_unpacklo_pd(__A, __B),
5690 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5691 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5693 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5694 (__v4df)_mm256_unpacklo_pd(__A, __B),
5695 (__v4df)_mm256_setzero_pd());
5698 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5699 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5701 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5702 (__v4sf)_mm_unpacklo_ps(__A, __B),
5706 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5707 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5709 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5710 (__v4sf)_mm_unpacklo_ps(__A, __B),
5711 (__v4sf)_mm_setzero_ps());
5714 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5715 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5717 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5718 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5722 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5723 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5725 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5726 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5727 (__v8sf)_mm256_setzero_ps());
5730 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5731 _mm_rcp14_pd (__m128d __A)
5733 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5739 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5740 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5742 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5747 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5748 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5750 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5756 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5757 _mm256_rcp14_pd (__m256d __A)
5759 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5761 _mm256_setzero_pd (),
5765 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5766 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5768 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5773 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5774 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5776 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5778 _mm256_setzero_pd (),
5782 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5783 _mm_rcp14_ps (__m128 __A)
5785 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5791 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5792 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5794 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5799 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5800 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
5802 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5808 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5809 _mm256_rcp14_ps (__m256 __A)
5811 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5813 _mm256_setzero_ps (),
5817 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5818 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
5820 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5825 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5826 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
5828 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
5830 _mm256_setzero_ps (),
5834 #define _mm_mask_permute_pd(W, U, X, C) \
5835 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5836 (__v2df)_mm_permute_pd((X), (C)), \
5837 (__v2df)(__m128d)(W))
5839 #define _mm_maskz_permute_pd(U, X, C) \
5840 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
5841 (__v2df)_mm_permute_pd((X), (C)), \
5842 (__v2df)_mm_setzero_pd())
5844 #define _mm256_mask_permute_pd(W, U, X, C) \
5845 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5846 (__v4df)_mm256_permute_pd((X), (C)), \
5847 (__v4df)(__m256d)(W))
5849 #define _mm256_maskz_permute_pd(U, X, C) \
5850 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
5851 (__v4df)_mm256_permute_pd((X), (C)), \
5852 (__v4df)_mm256_setzero_pd())
5854 #define _mm_mask_permute_ps(W, U, X, C) \
5855 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5856 (__v4sf)_mm_permute_ps((X), (C)), \
5857 (__v4sf)(__m128)(W))
5859 #define _mm_maskz_permute_ps(U, X, C) \
5860 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
5861 (__v4sf)_mm_permute_ps((X), (C)), \
5862 (__v4sf)_mm_setzero_ps())
5864 #define _mm256_mask_permute_ps(W, U, X, C) \
5865 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5866 (__v8sf)_mm256_permute_ps((X), (C)), \
5867 (__v8sf)(__m256)(W))
5869 #define _mm256_maskz_permute_ps(U, X, C) \
5870 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
5871 (__v8sf)_mm256_permute_ps((X), (C)), \
5872 (__v8sf)_mm256_setzero_ps())
5874 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5875 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
5877 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5878 (__v2df)_mm_permutevar_pd(__A, __C),
5882 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5883 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
5885 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5886 (__v2df)_mm_permutevar_pd(__A, __C),
5887 (__v2df)_mm_setzero_pd());
5890 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5891 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
5893 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5894 (__v4df)_mm256_permutevar_pd(__A, __C),
5898 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5899 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
5901 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5902 (__v4df)_mm256_permutevar_pd(__A, __C),
5903 (__v4df)_mm256_setzero_pd());
5906 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5907 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
5909 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5910 (__v4sf)_mm_permutevar_ps(__A, __C),
5914 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5915 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
5917 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5918 (__v4sf)_mm_permutevar_ps(__A, __C),
5919 (__v4sf)_mm_setzero_ps());
5922 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5923 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
5925 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5926 (__v8sf)_mm256_permutevar_ps(__A, __C),
5930 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5931 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
5933 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5934 (__v8sf)_mm256_permutevar_ps(__A, __C),
5935 (__v8sf)_mm256_setzero_ps());
5938 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5939 _mm_test_epi32_mask (__m128i __A, __m128i __B)
5941 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
5944 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5945 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5947 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
5948 _mm_setzero_si128());
5951 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5952 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
5954 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
5955 _mm256_setzero_si256());
5958 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5959 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5961 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
5962 _mm256_setzero_si256());
5965 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5966 _mm_test_epi64_mask (__m128i __A, __m128i __B)
5968 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
5971 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5972 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5974 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
5975 _mm_setzero_si128());
5978 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5979 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
5981 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
5982 _mm256_setzero_si256());
5985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
5986 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5988 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
5989 _mm256_setzero_si256());
5992 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5993 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
5995 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
5998 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
5999 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6001 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6002 _mm_setzero_si128());
6005 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6006 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6008 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6009 _mm256_setzero_si256());
6012 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6013 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6015 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6016 _mm256_setzero_si256());
6019 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6020 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
6022 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6025 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6026 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6028 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6029 _mm_setzero_si128());
6032 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6033 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6035 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6036 _mm256_setzero_si256());
6039 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6040 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6042 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6043 _mm256_setzero_si256());
6046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6047 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6049 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6050 (__v4si)_mm_unpackhi_epi32(__A, __B),
6054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6055 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6057 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6058 (__v4si)_mm_unpackhi_epi32(__A, __B),
6059 (__v4si)_mm_setzero_si128());
6062 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6063 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6065 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6066 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6070 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6071 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6073 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6074 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6075 (__v8si)_mm256_setzero_si256());
6078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6079 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6081 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6082 (__v2di)_mm_unpackhi_epi64(__A, __B),
6086 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6087 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6089 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6090 (__v2di)_mm_unpackhi_epi64(__A, __B),
6091 (__v2di)_mm_setzero_si128());
6094 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6095 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6097 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6098 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6102 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6103 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6105 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6106 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6107 (__v4di)_mm256_setzero_si256());
6110 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6111 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6113 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6114 (__v4si)_mm_unpacklo_epi32(__A, __B),
6118 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6119 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6121 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6122 (__v4si)_mm_unpacklo_epi32(__A, __B),
6123 (__v4si)_mm_setzero_si128());
6126 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6127 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6129 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6130 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6134 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6135 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6137 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6138 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6139 (__v8si)_mm256_setzero_si256());
6142 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6143 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6145 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6146 (__v2di)_mm_unpacklo_epi64(__A, __B),
6150 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6151 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6153 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6154 (__v2di)_mm_unpacklo_epi64(__A, __B),
6155 (__v2di)_mm_setzero_si128());
6158 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6159 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6161 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6162 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6166 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6167 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6169 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6170 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6171 (__v4di)_mm256_setzero_si256());
6174 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6175 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6177 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6178 (__v4si)_mm_sra_epi32(__A, __B),
6182 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6183 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6185 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6186 (__v4si)_mm_sra_epi32(__A, __B),
6187 (__v4si)_mm_setzero_si128());
6190 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6191 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6193 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6194 (__v8si)_mm256_sra_epi32(__A, __B),
6198 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6199 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6201 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6202 (__v8si)_mm256_sra_epi32(__A, __B),
6203 (__v8si)_mm256_setzero_si256());
6206 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6207 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6209 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6210 (__v4si)_mm_srai_epi32(__A, __B),
6214 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6215 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6217 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6218 (__v4si)_mm_srai_epi32(__A, __B),
6219 (__v4si)_mm_setzero_si128());
6222 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6223 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6225 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6226 (__v8si)_mm256_srai_epi32(__A, __B),
6230 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6231 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6233 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6234 (__v8si)_mm256_srai_epi32(__A, __B),
6235 (__v8si)_mm256_setzero_si256());
6238 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6239 _mm_sra_epi64(__m128i __A, __m128i __B)
6241 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6244 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6245 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6247 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6248 (__v2di)_mm_sra_epi64(__A, __B), \
6252 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6253 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6255 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6256 (__v2di)_mm_sra_epi64(__A, __B), \
6257 (__v2di)_mm_setzero_si128());
6260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6261 _mm256_sra_epi64(__m256i __A, __m128i __B)
6263 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6267 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6269 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6270 (__v4di)_mm256_sra_epi64(__A, __B), \
6274 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6275 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6278 (__v4di)_mm256_sra_epi64(__A, __B), \
6279 (__v4di)_mm256_setzero_si256());
6282 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6283 _mm_srai_epi64(__m128i __A, int __imm)
6285 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6288 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6289 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6291 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6292 (__v2di)_mm_srai_epi64(__A, __imm), \
6296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6297 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6299 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6300 (__v2di)_mm_srai_epi64(__A, __imm), \
6301 (__v2di)_mm_setzero_si128());
6304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6305 _mm256_srai_epi64(__m256i __A, int __imm)
6307 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6310 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6311 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6313 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6314 (__v4di)_mm256_srai_epi64(__A, __imm), \
6318 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6319 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6321 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6322 (__v4di)_mm256_srai_epi64(__A, __imm), \
6323 (__v4di)_mm256_setzero_si256());
6326 #define _mm_ternarylogic_epi32(A, B, C, imm) \
6327 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6328 (__v4si)(__m128i)(B), \
6329 (__v4si)(__m128i)(C), (int)(imm), \
6332 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6333 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6334 (__v4si)(__m128i)(B), \
6335 (__v4si)(__m128i)(C), (int)(imm), \
6338 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6339 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6340 (__v4si)(__m128i)(B), \
6341 (__v4si)(__m128i)(C), (int)(imm), \
6344 #define _mm256_ternarylogic_epi32(A, B, C, imm) \
6345 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6346 (__v8si)(__m256i)(B), \
6347 (__v8si)(__m256i)(C), (int)(imm), \
6350 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6351 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6352 (__v8si)(__m256i)(B), \
6353 (__v8si)(__m256i)(C), (int)(imm), \
6356 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6357 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6358 (__v8si)(__m256i)(B), \
6359 (__v8si)(__m256i)(C), (int)(imm), \
6362 #define _mm_ternarylogic_epi64(A, B, C, imm) \
6363 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6364 (__v2di)(__m128i)(B), \
6365 (__v2di)(__m128i)(C), (int)(imm), \
6368 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6369 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6370 (__v2di)(__m128i)(B), \
6371 (__v2di)(__m128i)(C), (int)(imm), \
6374 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6375 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6376 (__v2di)(__m128i)(B), \
6377 (__v2di)(__m128i)(C), (int)(imm), \
6380 #define _mm256_ternarylogic_epi64(A, B, C, imm) \
6381 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6382 (__v4di)(__m256i)(B), \
6383 (__v4di)(__m256i)(C), (int)(imm), \
6386 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6387 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6388 (__v4di)(__m256i)(B), \
6389 (__v4di)(__m256i)(C), (int)(imm), \
6392 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6393 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6394 (__v4di)(__m256i)(B), \
6395 (__v4di)(__m256i)(C), (int)(imm), \
6400 #define _mm256_shuffle_f32x4(A, B, imm) \
6401 (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6402 (__v8sf)(__m256)(B), (int)(imm))
6404 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6405 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6406 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6407 (__v8sf)(__m256)(W))
6409 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6410 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6411 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6412 (__v8sf)_mm256_setzero_ps())
6414 #define _mm256_shuffle_f64x2(A, B, imm) \
6415 (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6416 (__v4df)(__m256d)(B), (int)(imm))
6418 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6419 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6420 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6421 (__v4df)(__m256d)(W))
6423 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6424 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6425 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6426 (__v4df)_mm256_setzero_pd())
6428 #define _mm256_shuffle_i32x4(A, B, imm) \
6429 (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6430 (__v8si)(__m256i)(B), (int)(imm))
6432 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6433 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6434 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6435 (__v8si)(__m256i)(W))
6437 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6438 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6439 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6440 (__v8si)_mm256_setzero_si256())
6442 #define _mm256_shuffle_i64x2(A, B, imm) \
6443 (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6444 (__v4di)(__m256i)(B), (int)(imm))
6446 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6447 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6448 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6449 (__v4di)(__m256i)(W))
6452 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6453 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6454 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6455 (__v4di)_mm256_setzero_si256())
6457 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
6458 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6459 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6460 (__v2df)(__m128d)(W))
6462 #define _mm_maskz_shuffle_pd(U, A, B, M) \
6463 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6464 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6465 (__v2df)_mm_setzero_pd())
6467 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6468 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6469 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6470 (__v4df)(__m256d)(W))
6472 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
6473 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6474 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6475 (__v4df)_mm256_setzero_pd())
6477 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
6478 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6479 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6480 (__v4sf)(__m128)(W))
6482 #define _mm_maskz_shuffle_ps(U, A, B, M) \
6483 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6484 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6485 (__v4sf)_mm_setzero_ps())
6487 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6488 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6489 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6490 (__v8sf)(__m256)(W))
6492 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
6493 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6494 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6495 (__v8sf)_mm256_setzero_ps())
6497 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6498 _mm_rsqrt14_pd (__m128d __A)
6500 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6506 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6507 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6509 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6514 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6515 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6517 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6523 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6524 _mm256_rsqrt14_pd (__m256d __A)
6526 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6528 _mm256_setzero_pd (),
6532 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6533 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6535 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6540 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6541 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6543 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6545 _mm256_setzero_pd (),
6549 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6550 _mm_rsqrt14_ps (__m128 __A)
6552 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6558 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6559 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6561 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6566 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6567 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6569 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6575 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6576 _mm256_rsqrt14_ps (__m256 __A)
6578 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6580 _mm256_setzero_ps (),
6584 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6585 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6587 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6592 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6593 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6595 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6597 _mm256_setzero_ps (),
6601 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6602 _mm256_broadcast_f32x4(__m128 __A)
6604 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6605 0, 1, 2, 3, 0, 1, 2, 3);
6608 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6609 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6611 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6612 (__v8sf)_mm256_broadcast_f32x4(__A),
6616 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6617 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6619 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6620 (__v8sf)_mm256_broadcast_f32x4(__A),
6621 (__v8sf)_mm256_setzero_ps());
6624 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6625 _mm256_broadcast_i32x4(__m128i __A)
6627 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6628 0, 1, 2, 3, 0, 1, 2, 3);
6631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6632 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6634 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6635 (__v8si)_mm256_broadcast_i32x4(__A),
6639 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6640 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6642 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6643 (__v8si)_mm256_broadcast_i32x4(__A),
6644 (__v8si)_mm256_setzero_si256());
6647 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6648 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6650 return (__m256d)__builtin_ia32_selectpd_256(__M,
6651 (__v4df) _mm256_broadcastsd_pd(__A),
6655 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6656 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6658 return (__m256d)__builtin_ia32_selectpd_256(__M,
6659 (__v4df) _mm256_broadcastsd_pd(__A),
6660 (__v4df) _mm256_setzero_pd());
6663 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6664 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6666 return (__m128)__builtin_ia32_selectps_128(__M,
6667 (__v4sf) _mm_broadcastss_ps(__A),
6671 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6672 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6674 return (__m128)__builtin_ia32_selectps_128(__M,
6675 (__v4sf) _mm_broadcastss_ps(__A),
6676 (__v4sf) _mm_setzero_ps());
6679 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6680 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6682 return (__m256)__builtin_ia32_selectps_256(__M,
6683 (__v8sf) _mm256_broadcastss_ps(__A),
6687 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6688 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6690 return (__m256)__builtin_ia32_selectps_256(__M,
6691 (__v8sf) _mm256_broadcastss_ps(__A),
6692 (__v8sf) _mm256_setzero_ps());
6695 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6696 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6698 return (__m128i)__builtin_ia32_selectd_128(__M,
6699 (__v4si) _mm_broadcastd_epi32(__A),
6703 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6704 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6706 return (__m128i)__builtin_ia32_selectd_128(__M,
6707 (__v4si) _mm_broadcastd_epi32(__A),
6708 (__v4si) _mm_setzero_si128());
6711 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6712 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6714 return (__m256i)__builtin_ia32_selectd_256(__M,
6715 (__v8si) _mm256_broadcastd_epi32(__A),
6719 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6720 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6722 return (__m256i)__builtin_ia32_selectd_256(__M,
6723 (__v8si) _mm256_broadcastd_epi32(__A),
6724 (__v8si) _mm256_setzero_si256());
6727 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6728 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6730 return (__m128i)__builtin_ia32_selectq_128(__M,
6731 (__v2di) _mm_broadcastq_epi64(__A),
6735 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6736 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6738 return (__m128i)__builtin_ia32_selectq_128(__M,
6739 (__v2di) _mm_broadcastq_epi64(__A),
6740 (__v2di) _mm_setzero_si128());
6743 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6744 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6746 return (__m256i)__builtin_ia32_selectq_256(__M,
6747 (__v4di) _mm256_broadcastq_epi64(__A),
6751 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6752 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6754 return (__m256i)__builtin_ia32_selectq_256(__M,
6755 (__v4di) _mm256_broadcastq_epi64(__A),
6756 (__v4di) _mm256_setzero_si256());
6759 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6760 _mm_cvtsepi32_epi8 (__m128i __A)
6762 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6763 (__v16qi)_mm_undefined_si128(),
6767 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6768 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6770 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6771 (__v16qi) __O, __M);
6774 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6775 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6777 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6778 (__v16qi) _mm_setzero_si128 (),
6782 static __inline__ void __DEFAULT_FN_ATTRS128
6783 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6785 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6788 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6789 _mm256_cvtsepi32_epi8 (__m256i __A)
6791 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6792 (__v16qi)_mm_undefined_si128(),
6796 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6797 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6799 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6800 (__v16qi) __O, __M);
6803 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6804 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
6806 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6807 (__v16qi) _mm_setzero_si128 (),
6811 static __inline__ void __DEFAULT_FN_ATTRS128
6812 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
6814 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
6817 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6818 _mm_cvtsepi32_epi16 (__m128i __A)
6820 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6821 (__v8hi)_mm_setzero_si128 (),
6825 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6826 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
6828 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6833 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6834 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
6836 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
6837 (__v8hi) _mm_setzero_si128 (),
6841 static __inline__ void __DEFAULT_FN_ATTRS128
6842 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
6844 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
6847 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6848 _mm256_cvtsepi32_epi16 (__m256i __A)
6850 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6851 (__v8hi)_mm_undefined_si128(),
6855 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6856 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
6858 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6862 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6863 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
6865 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
6866 (__v8hi) _mm_setzero_si128 (),
6870 static __inline__ void __DEFAULT_FN_ATTRS256
6871 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
6873 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
6876 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6877 _mm_cvtsepi64_epi8 (__m128i __A)
6879 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6880 (__v16qi)_mm_undefined_si128(),
6884 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6885 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6887 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6888 (__v16qi) __O, __M);
6891 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6892 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
6894 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
6895 (__v16qi) _mm_setzero_si128 (),
6899 static __inline__ void __DEFAULT_FN_ATTRS128
6900 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6902 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
6905 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6906 _mm256_cvtsepi64_epi8 (__m256i __A)
6908 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6909 (__v16qi)_mm_undefined_si128(),
6913 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6914 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
6916 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6917 (__v16qi) __O, __M);
6920 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6921 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
6923 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
6924 (__v16qi) _mm_setzero_si128 (),
6928 static __inline__ void __DEFAULT_FN_ATTRS256
6929 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
6931 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
6934 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6935 _mm_cvtsepi64_epi32 (__m128i __A)
6937 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6938 (__v4si)_mm_undefined_si128(),
6942 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6943 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6945 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6949 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6950 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
6952 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
6953 (__v4si) _mm_setzero_si128 (),
6957 static __inline__ void __DEFAULT_FN_ATTRS128
6958 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
6960 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
6963 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6964 _mm256_cvtsepi64_epi32 (__m256i __A)
6966 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6967 (__v4si)_mm_undefined_si128(),
6971 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6972 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
6974 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6979 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6980 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
6982 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
6983 (__v4si) _mm_setzero_si128 (),
6987 static __inline__ void __DEFAULT_FN_ATTRS256
6988 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
6990 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
6993 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6994 _mm_cvtsepi64_epi16 (__m128i __A)
6996 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
6997 (__v8hi)_mm_undefined_si128(),
7001 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7002 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7004 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7008 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7009 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7011 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7012 (__v8hi) _mm_setzero_si128 (),
7016 static __inline__ void __DEFAULT_FN_ATTRS128
7017 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7019 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7022 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7023 _mm256_cvtsepi64_epi16 (__m256i __A)
7025 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7026 (__v8hi)_mm_undefined_si128(),
7030 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7031 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7033 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7037 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7038 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7040 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7041 (__v8hi) _mm_setzero_si128 (),
7045 static __inline__ void __DEFAULT_FN_ATTRS256
7046 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7048 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7051 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7052 _mm_cvtusepi32_epi8 (__m128i __A)
7054 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7055 (__v16qi)_mm_undefined_si128(),
7059 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7060 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7062 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7067 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7068 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7070 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7071 (__v16qi) _mm_setzero_si128 (),
7075 static __inline__ void __DEFAULT_FN_ATTRS128
7076 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7078 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7081 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7082 _mm256_cvtusepi32_epi8 (__m256i __A)
7084 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7085 (__v16qi)_mm_undefined_si128(),
7089 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7090 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7092 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7097 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7098 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7100 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7101 (__v16qi) _mm_setzero_si128 (),
7105 static __inline__ void __DEFAULT_FN_ATTRS256
7106 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7108 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7111 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7112 _mm_cvtusepi32_epi16 (__m128i __A)
7114 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7115 (__v8hi)_mm_undefined_si128(),
7119 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7120 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7122 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7126 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7127 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7129 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7130 (__v8hi) _mm_setzero_si128 (),
7134 static __inline__ void __DEFAULT_FN_ATTRS128
7135 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7137 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7140 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7141 _mm256_cvtusepi32_epi16 (__m256i __A)
7143 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7144 (__v8hi) _mm_undefined_si128(),
7148 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7149 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7151 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7155 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7156 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7158 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7159 (__v8hi) _mm_setzero_si128 (),
7163 static __inline__ void __DEFAULT_FN_ATTRS256
7164 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7166 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7169 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7170 _mm_cvtusepi64_epi8 (__m128i __A)
7172 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7173 (__v16qi)_mm_undefined_si128(),
7177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7178 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7180 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7186 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7188 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7189 (__v16qi) _mm_setzero_si128 (),
7193 static __inline__ void __DEFAULT_FN_ATTRS128
7194 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7196 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7199 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7200 _mm256_cvtusepi64_epi8 (__m256i __A)
7202 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7203 (__v16qi)_mm_undefined_si128(),
7207 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7208 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7210 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7215 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7216 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7218 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7219 (__v16qi) _mm_setzero_si128 (),
7223 static __inline__ void __DEFAULT_FN_ATTRS256
7224 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7226 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7229 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7230 _mm_cvtusepi64_epi32 (__m128i __A)
7232 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7233 (__v4si)_mm_undefined_si128(),
7237 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7238 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7240 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7244 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7245 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7247 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7248 (__v4si) _mm_setzero_si128 (),
7252 static __inline__ void __DEFAULT_FN_ATTRS128
7253 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7255 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7258 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7259 _mm256_cvtusepi64_epi32 (__m256i __A)
7261 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7262 (__v4si)_mm_undefined_si128(),
7266 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7267 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7269 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7273 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7274 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7276 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7277 (__v4si) _mm_setzero_si128 (),
7281 static __inline__ void __DEFAULT_FN_ATTRS256
7282 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7284 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7287 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7288 _mm_cvtusepi64_epi16 (__m128i __A)
7290 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7291 (__v8hi)_mm_undefined_si128(),
7295 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7296 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7298 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7302 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7303 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7305 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7306 (__v8hi) _mm_setzero_si128 (),
7310 static __inline__ void __DEFAULT_FN_ATTRS128
7311 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7313 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7316 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7317 _mm256_cvtusepi64_epi16 (__m256i __A)
7319 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7320 (__v8hi)_mm_undefined_si128(),
7324 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7325 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7327 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7331 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7332 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7334 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7335 (__v8hi) _mm_setzero_si128 (),
7339 static __inline__ void __DEFAULT_FN_ATTRS256
7340 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7342 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7345 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7346 _mm_cvtepi32_epi8 (__m128i __A)
7348 return (__m128i)__builtin_shufflevector(
7349 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7350 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7353 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7354 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7356 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7357 (__v16qi) __O, __M);
7360 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7361 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7363 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7365 _mm_setzero_si128 (),
7369 static __inline__ void __DEFAULT_FN_ATTRS256
7370 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7372 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7375 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7376 _mm256_cvtepi32_epi8 (__m256i __A)
7378 return (__m128i)__builtin_shufflevector(
7379 __builtin_convertvector((__v8si)__A, __v8qi),
7380 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7384 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7385 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7387 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7388 (__v16qi) __O, __M);
7391 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7392 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7394 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7395 (__v16qi) _mm_setzero_si128 (),
7399 static __inline__ void __DEFAULT_FN_ATTRS256
7400 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7402 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7405 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7406 _mm_cvtepi32_epi16 (__m128i __A)
7408 return (__m128i)__builtin_shufflevector(
7409 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7413 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7414 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7416 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7420 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7421 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7423 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7424 (__v8hi) _mm_setzero_si128 (),
7428 static __inline__ void __DEFAULT_FN_ATTRS128
7429 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7431 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7434 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7435 _mm256_cvtepi32_epi16 (__m256i __A)
7437 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7440 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7441 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7443 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7447 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7448 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7450 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7451 (__v8hi) _mm_setzero_si128 (),
7455 static __inline__ void __DEFAULT_FN_ATTRS256
7456 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7458 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7461 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7462 _mm_cvtepi64_epi8 (__m128i __A)
7464 return (__m128i)__builtin_shufflevector(
7465 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7466 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7469 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7470 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7472 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7473 (__v16qi) __O, __M);
7476 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7477 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7479 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7480 (__v16qi) _mm_setzero_si128 (),
7484 static __inline__ void __DEFAULT_FN_ATTRS128
7485 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7487 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7490 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7491 _mm256_cvtepi64_epi8 (__m256i __A)
7493 return (__m128i)__builtin_shufflevector(
7494 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7495 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7498 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7499 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7501 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7502 (__v16qi) __O, __M);
7505 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7506 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7508 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7509 (__v16qi) _mm_setzero_si128 (),
7513 static __inline__ void __DEFAULT_FN_ATTRS256
7514 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7516 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7519 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7520 _mm_cvtepi64_epi32 (__m128i __A)
7522 return (__m128i)__builtin_shufflevector(
7523 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7526 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7527 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7529 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7533 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7534 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7536 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7537 (__v4si) _mm_setzero_si128 (),
7541 static __inline__ void __DEFAULT_FN_ATTRS128
7542 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7544 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7547 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7548 _mm256_cvtepi64_epi32 (__m256i __A)
7550 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7553 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7554 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7556 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7557 (__v4si)_mm256_cvtepi64_epi32(__A),
7561 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7562 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7564 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7565 (__v4si)_mm256_cvtepi64_epi32(__A),
7566 (__v4si)_mm_setzero_si128());
7569 static __inline__ void __DEFAULT_FN_ATTRS256
7570 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7572 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7575 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7576 _mm_cvtepi64_epi16 (__m128i __A)
7578 return (__m128i)__builtin_shufflevector(
7579 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7583 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7584 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7586 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7591 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7592 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7594 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7595 (__v8hi) _mm_setzero_si128 (),
7599 static __inline__ void __DEFAULT_FN_ATTRS128
7600 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7602 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7605 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7606 _mm256_cvtepi64_epi16 (__m256i __A)
7608 return (__m128i)__builtin_shufflevector(
7609 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7613 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7614 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7616 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7620 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7621 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7623 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7624 (__v8hi) _mm_setzero_si128 (),
7628 static __inline__ void __DEFAULT_FN_ATTRS256
7629 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7631 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7634 #define _mm256_extractf32x4_ps(A, imm) \
7635 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7637 (__v4sf)_mm_undefined_ps(), \
7640 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7641 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7643 (__v4sf)(__m128)(W), \
7646 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7647 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7649 (__v4sf)_mm_setzero_ps(), \
7652 #define _mm256_extracti32x4_epi32(A, imm) \
7653 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7655 (__v4si)_mm_undefined_si128(), \
7658 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7659 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7661 (__v4si)(__m128i)(W), \
7664 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7665 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7667 (__v4si)_mm_setzero_si128(), \
7670 #define _mm256_insertf32x4(A, B, imm) \
7671 (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7672 (__v4sf)(__m128)(B), (int)(imm))
7674 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7675 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7676 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7677 (__v8sf)(__m256)(W))
7679 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
7680 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7681 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7682 (__v8sf)_mm256_setzero_ps())
7684 #define _mm256_inserti32x4(A, B, imm) \
7685 (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7686 (__v4si)(__m128i)(B), (int)(imm))
7688 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7689 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7690 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7691 (__v8si)(__m256i)(W))
7693 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
7694 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7695 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7696 (__v8si)_mm256_setzero_si256())
7698 #define _mm_getmant_pd(A, B, C) \
7699 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7700 (int)(((C)<<2) | (B)), \
7701 (__v2df)_mm_setzero_pd(), \
7704 #define _mm_mask_getmant_pd(W, U, A, B, C) \
7705 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7706 (int)(((C)<<2) | (B)), \
7707 (__v2df)(__m128d)(W), \
7710 #define _mm_maskz_getmant_pd(U, A, B, C) \
7711 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7712 (int)(((C)<<2) | (B)), \
7713 (__v2df)_mm_setzero_pd(), \
7716 #define _mm256_getmant_pd(A, B, C) \
7717 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7718 (int)(((C)<<2) | (B)), \
7719 (__v4df)_mm256_setzero_pd(), \
7722 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
7723 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7724 (int)(((C)<<2) | (B)), \
7725 (__v4df)(__m256d)(W), \
7728 #define _mm256_maskz_getmant_pd(U, A, B, C) \
7729 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7730 (int)(((C)<<2) | (B)), \
7731 (__v4df)_mm256_setzero_pd(), \
7734 #define _mm_getmant_ps(A, B, C) \
7735 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7736 (int)(((C)<<2) | (B)), \
7737 (__v4sf)_mm_setzero_ps(), \
7740 #define _mm_mask_getmant_ps(W, U, A, B, C) \
7741 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7742 (int)(((C)<<2) | (B)), \
7743 (__v4sf)(__m128)(W), \
7746 #define _mm_maskz_getmant_ps(U, A, B, C) \
7747 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7748 (int)(((C)<<2) | (B)), \
7749 (__v4sf)_mm_setzero_ps(), \
7752 #define _mm256_getmant_ps(A, B, C) \
7753 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7754 (int)(((C)<<2) | (B)), \
7755 (__v8sf)_mm256_setzero_ps(), \
7758 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
7759 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7760 (int)(((C)<<2) | (B)), \
7761 (__v8sf)(__m256)(W), \
7764 #define _mm256_maskz_getmant_ps(U, A, B, C) \
7765 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7766 (int)(((C)<<2) | (B)), \
7767 (__v8sf)_mm256_setzero_ps(), \
7770 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7771 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7772 (double const *)(addr), \
7773 (__v2di)(__m128i)(index), \
7774 (__mmask8)(mask), (int)(scale))
7776 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7777 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7778 (long long const *)(addr), \
7779 (__v2di)(__m128i)(index), \
7780 (__mmask8)(mask), (int)(scale))
7782 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7783 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7784 (double const *)(addr), \
7785 (__v4di)(__m256i)(index), \
7786 (__mmask8)(mask), (int)(scale))
7788 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7789 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7790 (long long const *)(addr), \
7791 (__v4di)(__m256i)(index), \
7792 (__mmask8)(mask), (int)(scale))
7794 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7795 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7796 (float const *)(addr), \
7797 (__v2di)(__m128i)(index), \
7798 (__mmask8)(mask), (int)(scale))
7800 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7801 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7802 (int const *)(addr), \
7803 (__v2di)(__m128i)(index), \
7804 (__mmask8)(mask), (int)(scale))
7806 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7807 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7808 (float const *)(addr), \
7809 (__v4di)(__m256i)(index), \
7810 (__mmask8)(mask), (int)(scale))
7812 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7813 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
7814 (int const *)(addr), \
7815 (__v4di)(__m256i)(index), \
7816 (__mmask8)(mask), (int)(scale))
7818 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7819 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
7820 (double const *)(addr), \
7821 (__v4si)(__m128i)(index), \
7822 (__mmask8)(mask), (int)(scale))
7824 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7825 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
7826 (long long const *)(addr), \
7827 (__v4si)(__m128i)(index), \
7828 (__mmask8)(mask), (int)(scale))
7830 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
7831 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
7832 (double const *)(addr), \
7833 (__v4si)(__m128i)(index), \
7834 (__mmask8)(mask), (int)(scale))
7836 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7837 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
7838 (long long const *)(addr), \
7839 (__v4si)(__m128i)(index), \
7840 (__mmask8)(mask), (int)(scale))
7842 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7843 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
7844 (float const *)(addr), \
7845 (__v4si)(__m128i)(index), \
7846 (__mmask8)(mask), (int)(scale))
7848 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7849 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
7850 (int const *)(addr), \
7851 (__v4si)(__m128i)(index), \
7852 (__mmask8)(mask), (int)(scale))
7854 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
7855 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
7856 (float const *)(addr), \
7857 (__v8si)(__m256i)(index), \
7858 (__mmask8)(mask), (int)(scale))
7860 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7861 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
7862 (int const *)(addr), \
7863 (__v8si)(__m256i)(index), \
7864 (__mmask8)(mask), (int)(scale))
7866 #define _mm256_permutex_pd(X, C) \
7867 (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
7869 #define _mm256_mask_permutex_pd(W, U, X, C) \
7870 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7871 (__v4df)_mm256_permutex_pd((X), (C)), \
7872 (__v4df)(__m256d)(W))
7874 #define _mm256_maskz_permutex_pd(U, X, C) \
7875 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7876 (__v4df)_mm256_permutex_pd((X), (C)), \
7877 (__v4df)_mm256_setzero_pd())
7879 #define _mm256_permutex_epi64(X, C) \
7880 (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
7882 #define _mm256_mask_permutex_epi64(W, U, X, C) \
7883 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7884 (__v4di)_mm256_permutex_epi64((X), (C)), \
7885 (__v4di)(__m256i)(W))
7887 #define _mm256_maskz_permutex_epi64(U, X, C) \
7888 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7889 (__v4di)_mm256_permutex_epi64((X), (C)), \
7890 (__v4di)_mm256_setzero_si256())
7892 static __inline__ __m256d __DEFAULT_FN_ATTRS256
7893 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
7895 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
7898 static __inline__ __m256d __DEFAULT_FN_ATTRS256
7899 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
7902 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7903 (__v4df)_mm256_permutexvar_pd(__X, __Y),
7907 static __inline__ __m256d __DEFAULT_FN_ATTRS256
7908 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
7910 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
7911 (__v4df)_mm256_permutexvar_pd(__X, __Y),
7912 (__v4df)_mm256_setzero_pd());
7915 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7916 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
7918 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
7921 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7922 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
7924 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7925 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7926 (__v4di)_mm256_setzero_si256());
7929 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7930 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
7933 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
7934 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
7938 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
7940 static __inline__ __m256 __DEFAULT_FN_ATTRS256
7941 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
7943 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7944 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7948 static __inline__ __m256 __DEFAULT_FN_ATTRS256
7949 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
7951 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
7952 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
7953 (__v8sf)_mm256_setzero_ps());
7956 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
7958 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7959 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
7962 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7963 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7967 static __inline__ __m256i __DEFAULT_FN_ATTRS256
7968 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
7970 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
7971 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
7972 (__v8si)_mm256_setzero_si256());
7975 #define _mm_alignr_epi32(A, B, imm) \
7976 (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
7977 (__v4si)(__m128i)(B), (int)(imm))
7979 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
7980 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7981 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7982 (__v4si)(__m128i)(W))
7984 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
7985 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7986 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
7987 (__v4si)_mm_setzero_si128())
7989 #define _mm256_alignr_epi32(A, B, imm) \
7990 (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
7991 (__v8si)(__m256i)(B), (int)(imm))
7993 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
7994 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7995 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
7996 (__v8si)(__m256i)(W))
7998 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
7999 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8000 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8001 (__v8si)_mm256_setzero_si256())
8003 #define _mm_alignr_epi64(A, B, imm) \
8004 (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8005 (__v2di)(__m128i)(B), (int)(imm))
8007 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8008 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8009 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8010 (__v2di)(__m128i)(W))
8012 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
8013 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8014 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8015 (__v2di)_mm_setzero_si128())
8017 #define _mm256_alignr_epi64(A, B, imm) \
8018 (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8019 (__v4di)(__m256i)(B), (int)(imm))
8021 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8022 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8023 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8024 (__v4di)(__m256i)(W))
8026 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8027 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8028 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8029 (__v4di)_mm256_setzero_si256())
8031 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8032 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8034 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8035 (__v4sf)_mm_movehdup_ps(__A),
8039 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8040 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8042 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8043 (__v4sf)_mm_movehdup_ps(__A),
8044 (__v4sf)_mm_setzero_ps());
8047 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8048 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8050 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8051 (__v8sf)_mm256_movehdup_ps(__A),
8055 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8056 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8058 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8059 (__v8sf)_mm256_movehdup_ps(__A),
8060 (__v8sf)_mm256_setzero_ps());
8063 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8064 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8066 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8067 (__v4sf)_mm_moveldup_ps(__A),
8071 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8072 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8074 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8075 (__v4sf)_mm_moveldup_ps(__A),
8076 (__v4sf)_mm_setzero_ps());
8079 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8080 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8082 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8083 (__v8sf)_mm256_moveldup_ps(__A),
8087 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8088 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8090 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8091 (__v8sf)_mm256_moveldup_ps(__A),
8092 (__v8sf)_mm256_setzero_ps());
8095 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
8096 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8097 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8098 (__v8si)(__m256i)(W))
8100 #define _mm256_maskz_shuffle_epi32(U, A, I) \
8101 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8102 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8103 (__v8si)_mm256_setzero_si256())
8105 #define _mm_mask_shuffle_epi32(W, U, A, I) \
8106 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8107 (__v4si)_mm_shuffle_epi32((A), (I)), \
8108 (__v4si)(__m128i)(W))
8110 #define _mm_maskz_shuffle_epi32(U, A, I) \
8111 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8112 (__v4si)_mm_shuffle_epi32((A), (I)), \
8113 (__v4si)_mm_setzero_si128())
8115 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8116 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8118 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8123 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8124 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8126 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8128 (__v2df) _mm_setzero_pd ());
8131 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8132 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8134 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8139 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8140 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8142 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8144 (__v4df) _mm256_setzero_pd ());
8147 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8148 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8150 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8155 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8156 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8158 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8160 (__v4sf) _mm_setzero_ps ());
8163 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8164 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8166 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8171 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8172 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8174 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8176 (__v8sf) _mm256_setzero_ps ());
8179 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8180 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8182 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8187 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8188 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8190 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8196 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8197 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8199 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8204 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8205 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8207 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8209 _mm256_setzero_ps (),
8213 static __inline __m128i __DEFAULT_FN_ATTRS128
8214 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8216 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8221 static __inline __m128i __DEFAULT_FN_ATTRS128
8222 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8224 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8225 (__v8hi) _mm_setzero_si128 (),
8229 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8230 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8231 (__v8hi)(__m128i)(W), \
8234 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
8235 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8236 (__v8hi)_mm_setzero_si128(), \
8239 static __inline __m128i __DEFAULT_FN_ATTRS256
8240 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8242 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8247 static __inline __m128i __DEFAULT_FN_ATTRS256
8248 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8250 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8251 (__v8hi) _mm_setzero_si128(),
8254 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8255 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8256 (__v8hi)(__m128i)(W), \
8259 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8260 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8261 (__v8hi)_mm_setzero_si128(), \
8265 #undef __DEFAULT_FN_ATTRS128
8266 #undef __DEFAULT_FN_ATTRS256
8268 #endif /* __AVX512VLINTRIN_H */