1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
32 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
34 typedef short __v2hi __attribute__((__vector_size__(4)));
35 typedef char __v4qi __attribute__((__vector_size__(4)));
36 typedef char __v2qi __attribute__((__vector_size__(2)));
40 #define _mm_cmpeq_epi32_mask(A, B) \
41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
42 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
44 #define _mm_cmpge_epi32_mask(A, B) \
45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
46 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
48 #define _mm_cmpgt_epi32_mask(A, B) \
49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
50 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
52 #define _mm_cmple_epi32_mask(A, B) \
53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
54 #define _mm_mask_cmple_epi32_mask(k, A, B) \
55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
56 #define _mm_cmplt_epi32_mask(A, B) \
57 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
58 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
59 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
60 #define _mm_cmpneq_epi32_mask(A, B) \
61 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
62 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
63 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
65 #define _mm256_cmpeq_epi32_mask(A, B) \
66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
67 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
69 #define _mm256_cmpge_epi32_mask(A, B) \
70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
71 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
73 #define _mm256_cmpgt_epi32_mask(A, B) \
74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
75 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
77 #define _mm256_cmple_epi32_mask(A, B) \
78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
79 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
81 #define _mm256_cmplt_epi32_mask(A, B) \
82 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
83 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
84 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
85 #define _mm256_cmpneq_epi32_mask(A, B) \
86 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
87 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
88 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
90 #define _mm_cmpeq_epu32_mask(A, B) \
91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
92 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
94 #define _mm_cmpge_epu32_mask(A, B) \
95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
96 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
98 #define _mm_cmpgt_epu32_mask(A, B) \
99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
100 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
102 #define _mm_cmple_epu32_mask(A, B) \
103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
104 #define _mm_mask_cmple_epu32_mask(k, A, B) \
105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
106 #define _mm_cmplt_epu32_mask(A, B) \
107 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
108 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
109 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
110 #define _mm_cmpneq_epu32_mask(A, B) \
111 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
112 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
113 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
115 #define _mm256_cmpeq_epu32_mask(A, B) \
116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
117 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
119 #define _mm256_cmpge_epu32_mask(A, B) \
120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
121 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
123 #define _mm256_cmpgt_epu32_mask(A, B) \
124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
125 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
127 #define _mm256_cmple_epu32_mask(A, B) \
128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
129 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
131 #define _mm256_cmplt_epu32_mask(A, B) \
132 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
133 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
134 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
135 #define _mm256_cmpneq_epu32_mask(A, B) \
136 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
137 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
138 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
140 #define _mm_cmpeq_epi64_mask(A, B) \
141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
142 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
144 #define _mm_cmpge_epi64_mask(A, B) \
145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
146 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
148 #define _mm_cmpgt_epi64_mask(A, B) \
149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
150 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
152 #define _mm_cmple_epi64_mask(A, B) \
153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
154 #define _mm_mask_cmple_epi64_mask(k, A, B) \
155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
156 #define _mm_cmplt_epi64_mask(A, B) \
157 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
158 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
159 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
160 #define _mm_cmpneq_epi64_mask(A, B) \
161 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
162 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
163 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
165 #define _mm256_cmpeq_epi64_mask(A, B) \
166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
167 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
169 #define _mm256_cmpge_epi64_mask(A, B) \
170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
171 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
173 #define _mm256_cmpgt_epi64_mask(A, B) \
174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
175 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
177 #define _mm256_cmple_epi64_mask(A, B) \
178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
179 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
181 #define _mm256_cmplt_epi64_mask(A, B) \
182 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
183 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
184 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
185 #define _mm256_cmpneq_epi64_mask(A, B) \
186 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
187 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
188 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
190 #define _mm_cmpeq_epu64_mask(A, B) \
191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
192 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
194 #define _mm_cmpge_epu64_mask(A, B) \
195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
196 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
198 #define _mm_cmpgt_epu64_mask(A, B) \
199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
200 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
202 #define _mm_cmple_epu64_mask(A, B) \
203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
204 #define _mm_mask_cmple_epu64_mask(k, A, B) \
205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
206 #define _mm_cmplt_epu64_mask(A, B) \
207 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
208 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
209 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
210 #define _mm_cmpneq_epu64_mask(A, B) \
211 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
212 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
213 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
215 #define _mm256_cmpeq_epu64_mask(A, B) \
216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
217 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
219 #define _mm256_cmpge_epu64_mask(A, B) \
220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
221 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
223 #define _mm256_cmpgt_epu64_mask(A, B) \
224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
225 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
227 #define _mm256_cmple_epu64_mask(A, B) \
228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
229 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
231 #define _mm256_cmplt_epu64_mask(A, B) \
232 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
233 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
234 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
235 #define _mm256_cmpneq_epu64_mask(A, B) \
236 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
237 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
238 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
240 static __inline__ __m256i __DEFAULT_FN_ATTRS256
241 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
243 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
244 (__v8si)_mm256_add_epi32(__A, __B),
248 static __inline__ __m256i __DEFAULT_FN_ATTRS256
249 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
251 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
252 (__v8si)_mm256_add_epi32(__A, __B),
253 (__v8si)_mm256_setzero_si256());
256 static __inline__ __m256i __DEFAULT_FN_ATTRS256
257 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
259 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
260 (__v4di)_mm256_add_epi64(__A, __B),
264 static __inline__ __m256i __DEFAULT_FN_ATTRS256
265 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
267 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
268 (__v4di)_mm256_add_epi64(__A, __B),
269 (__v4di)_mm256_setzero_si256());
272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
273 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
276 (__v8si)_mm256_sub_epi32(__A, __B),
280 static __inline__ __m256i __DEFAULT_FN_ATTRS256
281 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
283 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
284 (__v8si)_mm256_sub_epi32(__A, __B),
285 (__v8si)_mm256_setzero_si256());
288 static __inline__ __m256i __DEFAULT_FN_ATTRS256
289 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
291 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
292 (__v4di)_mm256_sub_epi64(__A, __B),
296 static __inline__ __m256i __DEFAULT_FN_ATTRS256
297 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
299 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
300 (__v4di)_mm256_sub_epi64(__A, __B),
301 (__v4di)_mm256_setzero_si256());
304 static __inline__ __m128i __DEFAULT_FN_ATTRS128
305 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
307 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
308 (__v4si)_mm_add_epi32(__A, __B),
312 static __inline__ __m128i __DEFAULT_FN_ATTRS128
313 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
315 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
316 (__v4si)_mm_add_epi32(__A, __B),
317 (__v4si)_mm_setzero_si128());
320 static __inline__ __m128i __DEFAULT_FN_ATTRS128
321 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
323 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
324 (__v2di)_mm_add_epi64(__A, __B),
328 static __inline__ __m128i __DEFAULT_FN_ATTRS128
329 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
331 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
332 (__v2di)_mm_add_epi64(__A, __B),
333 (__v2di)_mm_setzero_si128());
336 static __inline__ __m128i __DEFAULT_FN_ATTRS128
337 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
339 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
340 (__v4si)_mm_sub_epi32(__A, __B),
344 static __inline__ __m128i __DEFAULT_FN_ATTRS128
345 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
347 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
348 (__v4si)_mm_sub_epi32(__A, __B),
349 (__v4si)_mm_setzero_si128());
352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
353 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
355 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
356 (__v2di)_mm_sub_epi64(__A, __B),
360 static __inline__ __m128i __DEFAULT_FN_ATTRS128
361 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
363 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
364 (__v2di)_mm_sub_epi64(__A, __B),
365 (__v2di)_mm_setzero_si128());
368 static __inline__ __m256i __DEFAULT_FN_ATTRS256
369 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
371 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
372 (__v4di)_mm256_mul_epi32(__X, __Y),
376 static __inline__ __m256i __DEFAULT_FN_ATTRS256
377 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
379 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
380 (__v4di)_mm256_mul_epi32(__X, __Y),
381 (__v4di)_mm256_setzero_si256());
384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
385 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
387 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
388 (__v2di)_mm_mul_epi32(__X, __Y),
392 static __inline__ __m128i __DEFAULT_FN_ATTRS128
393 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
395 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
396 (__v2di)_mm_mul_epi32(__X, __Y),
397 (__v2di)_mm_setzero_si128());
400 static __inline__ __m256i __DEFAULT_FN_ATTRS256
401 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
403 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
404 (__v4di)_mm256_mul_epu32(__X, __Y),
408 static __inline__ __m256i __DEFAULT_FN_ATTRS256
409 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
411 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
412 (__v4di)_mm256_mul_epu32(__X, __Y),
413 (__v4di)_mm256_setzero_si256());
416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
417 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
419 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
420 (__v2di)_mm_mul_epu32(__X, __Y),
424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
425 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
427 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
428 (__v2di)_mm_mul_epu32(__X, __Y),
429 (__v2di)_mm_setzero_si128());
432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
433 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
435 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
436 (__v8si)_mm256_mullo_epi32(__A, __B),
437 (__v8si)_mm256_setzero_si256());
440 static __inline__ __m256i __DEFAULT_FN_ATTRS256
441 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
443 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
444 (__v8si)_mm256_mullo_epi32(__A, __B),
448 static __inline__ __m128i __DEFAULT_FN_ATTRS128
449 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
451 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
452 (__v4si)_mm_mullo_epi32(__A, __B),
453 (__v4si)_mm_setzero_si128());
456 static __inline__ __m128i __DEFAULT_FN_ATTRS128
457 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
459 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
460 (__v4si)_mm_mullo_epi32(__A, __B),
464 static __inline__ __m256i __DEFAULT_FN_ATTRS256
465 _mm256_and_epi32(__m256i __a, __m256i __b)
467 return (__m256i)((__v8su)__a & (__v8su)__b);
470 static __inline__ __m256i __DEFAULT_FN_ATTRS256
471 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
473 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
474 (__v8si)_mm256_and_epi32(__A, __B),
478 static __inline__ __m256i __DEFAULT_FN_ATTRS256
479 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
481 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
484 static __inline__ __m128i __DEFAULT_FN_ATTRS128
485 _mm_and_epi32(__m128i __a, __m128i __b)
487 return (__m128i)((__v4su)__a & (__v4su)__b);
490 static __inline__ __m128i __DEFAULT_FN_ATTRS128
491 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
493 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
494 (__v4si)_mm_and_epi32(__A, __B),
498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
499 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
501 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
504 static __inline__ __m256i __DEFAULT_FN_ATTRS256
505 _mm256_andnot_epi32(__m256i __A, __m256i __B)
507 return (__m256i)(~(__v8su)__A & (__v8su)__B);
510 static __inline__ __m256i __DEFAULT_FN_ATTRS256
511 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
513 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
514 (__v8si)_mm256_andnot_epi32(__A, __B),
518 static __inline__ __m256i __DEFAULT_FN_ATTRS256
519 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
521 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
525 static __inline__ __m128i __DEFAULT_FN_ATTRS128
526 _mm_andnot_epi32(__m128i __A, __m128i __B)
528 return (__m128i)(~(__v4su)__A & (__v4su)__B);
531 static __inline__ __m128i __DEFAULT_FN_ATTRS128
532 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
534 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
535 (__v4si)_mm_andnot_epi32(__A, __B),
539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
540 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
542 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
545 static __inline__ __m256i __DEFAULT_FN_ATTRS256
546 _mm256_or_epi32(__m256i __a, __m256i __b)
548 return (__m256i)((__v8su)__a | (__v8su)__b);
551 static __inline__ __m256i __DEFAULT_FN_ATTRS256
552 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
554 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
555 (__v8si)_mm256_or_epi32(__A, __B),
559 static __inline__ __m256i __DEFAULT_FN_ATTRS256
560 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
562 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
565 static __inline__ __m128i __DEFAULT_FN_ATTRS128
566 _mm_or_epi32(__m128i __a, __m128i __b)
568 return (__m128i)((__v4su)__a | (__v4su)__b);
571 static __inline__ __m128i __DEFAULT_FN_ATTRS128
572 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
574 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
575 (__v4si)_mm_or_epi32(__A, __B),
579 static __inline__ __m128i __DEFAULT_FN_ATTRS128
580 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
582 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
585 static __inline__ __m256i __DEFAULT_FN_ATTRS256
586 _mm256_xor_epi32(__m256i __a, __m256i __b)
588 return (__m256i)((__v8su)__a ^ (__v8su)__b);
591 static __inline__ __m256i __DEFAULT_FN_ATTRS256
592 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
594 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
595 (__v8si)_mm256_xor_epi32(__A, __B),
599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
600 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
602 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
605 static __inline__ __m128i __DEFAULT_FN_ATTRS128
606 _mm_xor_epi32(__m128i __a, __m128i __b)
608 return (__m128i)((__v4su)__a ^ (__v4su)__b);
611 static __inline__ __m128i __DEFAULT_FN_ATTRS128
612 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
614 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
615 (__v4si)_mm_xor_epi32(__A, __B),
619 static __inline__ __m128i __DEFAULT_FN_ATTRS128
620 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
622 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
625 static __inline__ __m256i __DEFAULT_FN_ATTRS256
626 _mm256_and_epi64(__m256i __a, __m256i __b)
628 return (__m256i)((__v4du)__a & (__v4du)__b);
631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
632 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
634 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
635 (__v4di)_mm256_and_epi64(__A, __B),
639 static __inline__ __m256i __DEFAULT_FN_ATTRS256
640 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
642 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
645 static __inline__ __m128i __DEFAULT_FN_ATTRS128
646 _mm_and_epi64(__m128i __a, __m128i __b)
648 return (__m128i)((__v2du)__a & (__v2du)__b);
651 static __inline__ __m128i __DEFAULT_FN_ATTRS128
652 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
654 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
655 (__v2di)_mm_and_epi64(__A, __B),
659 static __inline__ __m128i __DEFAULT_FN_ATTRS128
660 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
662 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
665 static __inline__ __m256i __DEFAULT_FN_ATTRS256
666 _mm256_andnot_epi64(__m256i __A, __m256i __B)
668 return (__m256i)(~(__v4du)__A & (__v4du)__B);
671 static __inline__ __m256i __DEFAULT_FN_ATTRS256
672 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
674 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
675 (__v4di)_mm256_andnot_epi64(__A, __B),
679 static __inline__ __m256i __DEFAULT_FN_ATTRS256
680 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
682 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
686 static __inline__ __m128i __DEFAULT_FN_ATTRS128
687 _mm_andnot_epi64(__m128i __A, __m128i __B)
689 return (__m128i)(~(__v2du)__A & (__v2du)__B);
692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
693 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
695 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
696 (__v2di)_mm_andnot_epi64(__A, __B),
700 static __inline__ __m128i __DEFAULT_FN_ATTRS128
701 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
703 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
707 _mm256_or_epi64(__m256i __a, __m256i __b)
709 return (__m256i)((__v4du)__a | (__v4du)__b);
712 static __inline__ __m256i __DEFAULT_FN_ATTRS256
713 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
715 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
716 (__v4di)_mm256_or_epi64(__A, __B),
720 static __inline__ __m256i __DEFAULT_FN_ATTRS256
721 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
723 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
726 static __inline__ __m128i __DEFAULT_FN_ATTRS128
727 _mm_or_epi64(__m128i __a, __m128i __b)
729 return (__m128i)((__v2du)__a | (__v2du)__b);
732 static __inline__ __m128i __DEFAULT_FN_ATTRS128
733 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
735 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
736 (__v2di)_mm_or_epi64(__A, __B),
740 static __inline__ __m128i __DEFAULT_FN_ATTRS128
741 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
743 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
747 _mm256_xor_epi64(__m256i __a, __m256i __b)
749 return (__m256i)((__v4du)__a ^ (__v4du)__b);
752 static __inline__ __m256i __DEFAULT_FN_ATTRS256
753 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
755 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
756 (__v4di)_mm256_xor_epi64(__A, __B),
760 static __inline__ __m256i __DEFAULT_FN_ATTRS256
761 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
763 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
766 static __inline__ __m128i __DEFAULT_FN_ATTRS128
767 _mm_xor_epi64(__m128i __a, __m128i __b)
769 return (__m128i)((__v2du)__a ^ (__v2du)__b);
772 static __inline__ __m128i __DEFAULT_FN_ATTRS128
773 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
776 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
777 (__v2di)_mm_xor_epi64(__A, __B),
781 static __inline__ __m128i __DEFAULT_FN_ATTRS128
782 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
784 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
787 #define _mm_cmp_epi32_mask(a, b, p) \
788 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
789 (__v4si)(__m128i)(b), (int)(p), \
792 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
793 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
794 (__v4si)(__m128i)(b), (int)(p), \
797 #define _mm_cmp_epu32_mask(a, b, p) \
798 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
799 (__v4si)(__m128i)(b), (int)(p), \
802 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
803 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
804 (__v4si)(__m128i)(b), (int)(p), \
807 #define _mm256_cmp_epi32_mask(a, b, p) \
808 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
809 (__v8si)(__m256i)(b), (int)(p), \
812 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
813 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
814 (__v8si)(__m256i)(b), (int)(p), \
817 #define _mm256_cmp_epu32_mask(a, b, p) \
818 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
819 (__v8si)(__m256i)(b), (int)(p), \
822 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
823 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
824 (__v8si)(__m256i)(b), (int)(p), \
827 #define _mm_cmp_epi64_mask(a, b, p) \
828 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
829 (__v2di)(__m128i)(b), (int)(p), \
832 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
833 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
834 (__v2di)(__m128i)(b), (int)(p), \
837 #define _mm_cmp_epu64_mask(a, b, p) \
838 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
839 (__v2di)(__m128i)(b), (int)(p), \
842 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
843 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
844 (__v2di)(__m128i)(b), (int)(p), \
847 #define _mm256_cmp_epi64_mask(a, b, p) \
848 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
849 (__v4di)(__m256i)(b), (int)(p), \
852 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
853 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
854 (__v4di)(__m256i)(b), (int)(p), \
857 #define _mm256_cmp_epu64_mask(a, b, p) \
858 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
859 (__v4di)(__m256i)(b), (int)(p), \
862 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
863 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
864 (__v4di)(__m256i)(b), (int)(p), \
867 #define _mm256_cmp_ps_mask(a, b, p) \
868 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
869 (__v8sf)(__m256)(b), (int)(p), \
872 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \
873 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
874 (__v8sf)(__m256)(b), (int)(p), \
877 #define _mm256_cmp_pd_mask(a, b, p) \
878 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
879 (__v4df)(__m256d)(b), (int)(p), \
882 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \
883 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
884 (__v4df)(__m256d)(b), (int)(p), \
887 #define _mm_cmp_ps_mask(a, b, p) \
888 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
889 (__v4sf)(__m128)(b), (int)(p), \
892 #define _mm_mask_cmp_ps_mask(m, a, b, p) \
893 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
894 (__v4sf)(__m128)(b), (int)(p), \
897 #define _mm_cmp_pd_mask(a, b, p) \
898 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
899 (__v2df)(__m128d)(b), (int)(p), \
902 #define _mm_mask_cmp_pd_mask(m, a, b, p) \
903 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
904 (__v2df)(__m128d)(b), (int)(p), \
907 static __inline__ __m128d __DEFAULT_FN_ATTRS128
908 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
910 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
911 __builtin_ia32_vfmaddpd ((__v2df) __A,
917 static __inline__ __m128d __DEFAULT_FN_ATTRS128
918 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
920 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
921 __builtin_ia32_vfmaddpd ((__v2df) __A,
927 static __inline__ __m128d __DEFAULT_FN_ATTRS128
928 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
930 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
931 __builtin_ia32_vfmaddpd ((__v2df) __A,
934 (__v2df)_mm_setzero_pd());
937 static __inline__ __m128d __DEFAULT_FN_ATTRS128
938 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
940 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
941 __builtin_ia32_vfmaddpd ((__v2df) __A,
947 static __inline__ __m128d __DEFAULT_FN_ATTRS128
948 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
950 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
951 __builtin_ia32_vfmaddpd ((__v2df) __A,
954 (__v2df)_mm_setzero_pd());
957 static __inline__ __m128d __DEFAULT_FN_ATTRS128
958 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
960 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
961 __builtin_ia32_vfmaddpd (-(__v2df) __A,
967 static __inline__ __m128d __DEFAULT_FN_ATTRS128
968 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
970 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
971 __builtin_ia32_vfmaddpd (-(__v2df) __A,
974 (__v2df)_mm_setzero_pd());
977 static __inline__ __m128d __DEFAULT_FN_ATTRS128
978 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
980 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
981 __builtin_ia32_vfmaddpd (-(__v2df) __A,
984 (__v2df)_mm_setzero_pd());
987 static __inline__ __m256d __DEFAULT_FN_ATTRS256
988 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
990 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
991 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
997 static __inline__ __m256d __DEFAULT_FN_ATTRS256
998 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1000 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1001 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1007 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1008 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1010 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1011 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1014 (__v4df)_mm256_setzero_pd());
1017 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1018 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1020 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1021 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1027 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1028 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1030 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1031 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1034 (__v4df)_mm256_setzero_pd());
1037 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1038 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1040 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1041 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1047 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1048 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1050 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1051 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1054 (__v4df)_mm256_setzero_pd());
1057 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1058 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1060 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1061 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1064 (__v4df)_mm256_setzero_pd());
1067 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1068 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1070 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1071 __builtin_ia32_vfmaddps ((__v4sf) __A,
1077 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1078 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1080 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1081 __builtin_ia32_vfmaddps ((__v4sf) __A,
1087 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1088 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1090 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1091 __builtin_ia32_vfmaddps ((__v4sf) __A,
1094 (__v4sf)_mm_setzero_ps());
1097 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1098 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1100 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1101 __builtin_ia32_vfmaddps ((__v4sf) __A,
1107 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1108 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1110 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1111 __builtin_ia32_vfmaddps ((__v4sf) __A,
1114 (__v4sf)_mm_setzero_ps());
1117 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1118 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1120 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1121 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1127 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1130 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1131 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1134 (__v4sf)_mm_setzero_ps());
1137 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1138 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1140 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1141 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1144 (__v4sf)_mm_setzero_ps());
1147 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1148 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1150 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1151 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1157 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1158 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1160 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1161 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1167 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1168 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1170 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1171 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1174 (__v8sf)_mm256_setzero_ps());
1177 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1178 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1180 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1181 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1187 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1188 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1190 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1191 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1194 (__v8sf)_mm256_setzero_ps());
1197 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1198 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1200 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1201 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1207 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1208 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1210 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1211 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1214 (__v8sf)_mm256_setzero_ps());
1217 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1218 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1220 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1221 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1224 (__v8sf)_mm256_setzero_ps());
1227 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1228 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1230 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1231 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1237 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1238 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1240 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1241 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1247 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1248 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1250 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1251 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1254 (__v2df)_mm_setzero_pd());
1257 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1258 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1260 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1261 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1267 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1268 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1270 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1271 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1274 (__v2df)_mm_setzero_pd());
1277 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1278 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1280 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1281 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1287 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1288 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1290 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1291 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1297 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1298 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1300 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1301 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1304 (__v4df)_mm256_setzero_pd());
1307 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1308 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1310 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1311 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1317 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1318 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1320 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1321 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1324 (__v4df)_mm256_setzero_pd());
1327 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1328 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1330 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1331 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1337 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1338 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1340 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1341 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1347 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1348 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1350 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1351 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1354 (__v4sf)_mm_setzero_ps());
1357 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1358 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1360 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1361 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1367 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1368 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1370 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1371 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1374 (__v4sf)_mm_setzero_ps());
1377 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1378 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1381 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1382 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1388 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1389 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1391 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1392 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1398 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1399 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1401 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1402 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1405 (__v8sf)_mm256_setzero_ps());
1408 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1409 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1411 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1412 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1418 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1419 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1421 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1422 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1425 (__v8sf)_mm256_setzero_ps());
1428 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1429 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1431 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1432 __builtin_ia32_vfmaddpd ((__v2df) __A,
1438 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1439 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1441 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1442 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1448 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1449 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1451 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1452 __builtin_ia32_vfmaddps ((__v4sf) __A,
1458 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1459 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1461 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1462 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1468 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1469 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1471 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1472 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1478 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1479 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1481 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1482 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1488 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1489 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1491 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1492 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1498 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1499 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1501 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1502 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1508 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1509 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1511 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1512 __builtin_ia32_vfmaddpd ((__v2df) __A,
1518 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1519 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1521 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1522 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1528 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1529 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1531 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1532 __builtin_ia32_vfmaddps ((__v4sf) __A,
1538 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1539 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1541 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1542 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1548 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1549 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1551 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1552 __builtin_ia32_vfmaddpd ((__v2df) __A,
1558 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1559 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1561 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1562 __builtin_ia32_vfmaddpd ((__v2df) __A,
1568 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1569 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1571 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1572 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1578 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1579 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1581 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1582 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1588 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1589 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1591 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1592 __builtin_ia32_vfmaddps ((__v4sf) __A,
1598 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1599 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1601 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1602 __builtin_ia32_vfmaddps ((__v4sf) __A,
1608 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1609 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1611 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1612 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1618 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1619 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1621 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1622 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1628 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1629 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1630 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1631 (__v2df)_mm_add_pd(__A, __B),
1635 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1636 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1637 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1638 (__v2df)_mm_add_pd(__A, __B),
1639 (__v2df)_mm_setzero_pd());
1642 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1643 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1644 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1645 (__v4df)_mm256_add_pd(__A, __B),
1649 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1650 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1651 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1652 (__v4df)_mm256_add_pd(__A, __B),
1653 (__v4df)_mm256_setzero_pd());
1656 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1657 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1658 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1659 (__v4sf)_mm_add_ps(__A, __B),
1663 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1664 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1665 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1666 (__v4sf)_mm_add_ps(__A, __B),
1667 (__v4sf)_mm_setzero_ps());
1670 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1671 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1672 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1673 (__v8sf)_mm256_add_ps(__A, __B),
1677 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1678 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1679 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1680 (__v8sf)_mm256_add_ps(__A, __B),
1681 (__v8sf)_mm256_setzero_ps());
1684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1685 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1686 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1691 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1692 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1693 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1698 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1699 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1700 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1705 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1706 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1707 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1712 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1713 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1714 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1719 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1720 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1721 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1726 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1727 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1728 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1733 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1734 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1735 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1740 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1741 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1742 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1747 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1748 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1749 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1755 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1756 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1757 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1762 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1763 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1764 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1766 _mm256_setzero_pd (),
1770 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1771 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1772 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1777 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1778 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1779 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1781 _mm_setzero_si128 (),
1785 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1786 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1787 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1792 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1793 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
1794 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1796 _mm256_setzero_si256 (),
1800 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1801 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1802 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1807 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1808 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
1809 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1815 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1816 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1817 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1822 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1823 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
1824 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1826 _mm256_setzero_ps (),
1830 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1831 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1832 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1837 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1838 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
1839 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1841 _mm_setzero_si128 (),
1845 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1846 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1847 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1852 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1853 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
1854 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1856 _mm256_setzero_si256 (),
1860 static __inline__ void __DEFAULT_FN_ATTRS128
1861 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1862 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1867 static __inline__ void __DEFAULT_FN_ATTRS256
1868 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1869 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1874 static __inline__ void __DEFAULT_FN_ATTRS128
1875 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1876 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1881 static __inline__ void __DEFAULT_FN_ATTRS256
1882 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1883 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1888 static __inline__ void __DEFAULT_FN_ATTRS128
1889 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1890 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1895 static __inline__ void __DEFAULT_FN_ATTRS256
1896 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1897 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1902 static __inline__ void __DEFAULT_FN_ATTRS128
1903 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1904 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1909 static __inline__ void __DEFAULT_FN_ATTRS256
1910 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1911 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1916 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1917 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1918 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1919 (__v2df)_mm_cvtepi32_pd(__A),
1923 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1924 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1925 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1926 (__v2df)_mm_cvtepi32_pd(__A),
1927 (__v2df)_mm_setzero_pd());
1930 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1931 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1932 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1933 (__v4df)_mm256_cvtepi32_pd(__A),
1937 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1938 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
1939 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1940 (__v4df)_mm256_cvtepi32_pd(__A),
1941 (__v4df)_mm256_setzero_pd());
1944 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1945 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1946 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1947 (__v4sf)_mm_cvtepi32_ps(__A),
1951 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1952 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
1953 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1954 (__v4sf)_mm_cvtepi32_ps(__A),
1955 (__v4sf)_mm_setzero_ps());
1958 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1959 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1960 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1961 (__v8sf)_mm256_cvtepi32_ps(__A),
1965 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1966 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
1967 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1968 (__v8sf)_mm256_cvtepi32_ps(__A),
1969 (__v8sf)_mm256_setzero_ps());
1972 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1973 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1974 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1979 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1980 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
1981 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1983 _mm_setzero_si128 (),
1987 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1988 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1990 (__v4si)_mm256_cvtpd_epi32(__A),
1994 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1995 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
1996 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1997 (__v4si)_mm256_cvtpd_epi32(__A),
1998 (__v4si)_mm_setzero_si128());
2001 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2002 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
2003 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2008 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2009 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2010 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2016 static __inline__ __m128 __DEFAULT_FN_ATTRS256
2017 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2018 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2019 (__v4sf)_mm256_cvtpd_ps(__A),
2023 static __inline__ __m128 __DEFAULT_FN_ATTRS256
2024 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2025 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2026 (__v4sf)_mm256_cvtpd_ps(__A),
2027 (__v4sf)_mm_setzero_ps());
2030 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2031 _mm_cvtpd_epu32 (__m128d __A) {
2032 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2034 _mm_setzero_si128 (),
2038 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2039 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2040 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2045 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2046 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2047 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2049 _mm_setzero_si128 (),
2053 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2054 _mm256_cvtpd_epu32 (__m256d __A) {
2055 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2057 _mm_setzero_si128 (),
2061 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2062 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2063 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2068 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2069 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2070 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2072 _mm_setzero_si128 (),
2076 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2078 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2079 (__v4si)_mm_cvtps_epi32(__A),
2083 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2084 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2086 (__v4si)_mm_cvtps_epi32(__A),
2087 (__v4si)_mm_setzero_si128());
2090 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2091 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2092 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2093 (__v8si)_mm256_cvtps_epi32(__A),
2097 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2098 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2099 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2100 (__v8si)_mm256_cvtps_epi32(__A),
2101 (__v8si)_mm256_setzero_si256());
2104 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2105 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2106 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2107 (__v2df)_mm_cvtps_pd(__A),
2111 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2112 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2113 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2114 (__v2df)_mm_cvtps_pd(__A),
2115 (__v2df)_mm_setzero_pd());
2118 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2119 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2120 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2121 (__v4df)_mm256_cvtps_pd(__A),
2125 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2126 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2127 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2128 (__v4df)_mm256_cvtps_pd(__A),
2129 (__v4df)_mm256_setzero_pd());
2132 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2133 _mm_cvtps_epu32 (__m128 __A) {
2134 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2136 _mm_setzero_si128 (),
2140 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2141 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2142 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2148 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2149 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2151 _mm_setzero_si128 (),
2155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2156 _mm256_cvtps_epu32 (__m256 __A) {
2157 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2159 _mm256_setzero_si256 (),
2163 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2164 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2165 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2170 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2171 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2172 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2174 _mm256_setzero_si256 (),
2178 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2179 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2180 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2186 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2187 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2189 _mm_setzero_si128 (),
2193 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2194 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2195 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2196 (__v4si)_mm256_cvttpd_epi32(__A),
2200 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2201 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2202 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2203 (__v4si)_mm256_cvttpd_epi32(__A),
2204 (__v4si)_mm_setzero_si128());
2207 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2208 _mm_cvttpd_epu32 (__m128d __A) {
2209 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2211 _mm_setzero_si128 (),
2215 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2216 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2217 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2222 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2223 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2224 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2226 _mm_setzero_si128 (),
2230 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2231 _mm256_cvttpd_epu32 (__m256d __A) {
2232 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2234 _mm_setzero_si128 (),
2238 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2239 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2240 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2245 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2246 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2247 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2249 _mm_setzero_si128 (),
2253 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2254 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2255 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2256 (__v4si)_mm_cvttps_epi32(__A),
2260 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2261 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2262 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2263 (__v4si)_mm_cvttps_epi32(__A),
2264 (__v4si)_mm_setzero_si128());
2267 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2268 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2270 (__v8si)_mm256_cvttps_epi32(__A),
2274 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2275 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2276 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2277 (__v8si)_mm256_cvttps_epi32(__A),
2278 (__v8si)_mm256_setzero_si256());
2281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2282 _mm_cvttps_epu32 (__m128 __A) {
2283 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2285 _mm_setzero_si128 (),
2289 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2290 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2291 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2297 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2298 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2300 _mm_setzero_si128 (),
2304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2305 _mm256_cvttps_epu32 (__m256 __A) {
2306 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2308 _mm256_setzero_si256 (),
2312 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2313 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2314 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2319 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2320 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2321 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2323 _mm256_setzero_si256 (),
2327 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2328 _mm_cvtepu32_pd (__m128i __A) {
2329 return (__m128d) __builtin_convertvector(
2330 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2333 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2334 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2335 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2336 (__v2df)_mm_cvtepu32_pd(__A),
2340 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2341 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2342 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2343 (__v2df)_mm_cvtepu32_pd(__A),
2344 (__v2df)_mm_setzero_pd());
2347 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2348 _mm256_cvtepu32_pd (__m128i __A) {
2349 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2352 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2353 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2354 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2355 (__v4df)_mm256_cvtepu32_pd(__A),
2359 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2360 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2361 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2362 (__v4df)_mm256_cvtepu32_pd(__A),
2363 (__v4df)_mm256_setzero_pd());
2366 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2367 _mm_cvtepu32_ps (__m128i __A) {
2368 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2371 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2372 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2373 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2374 (__v4sf)_mm_cvtepu32_ps(__A),
2378 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2379 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2380 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2381 (__v4sf)_mm_cvtepu32_ps(__A),
2382 (__v4sf)_mm_setzero_ps());
2385 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2386 _mm256_cvtepu32_ps (__m256i __A) {
2387 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2390 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2391 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2392 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2393 (__v8sf)_mm256_cvtepu32_ps(__A),
2397 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2398 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2399 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2400 (__v8sf)_mm256_cvtepu32_ps(__A),
2401 (__v8sf)_mm256_setzero_ps());
2404 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2405 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2406 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2407 (__v2df)_mm_div_pd(__A, __B),
2411 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2412 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2413 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2414 (__v2df)_mm_div_pd(__A, __B),
2415 (__v2df)_mm_setzero_pd());
2418 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2419 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2420 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2421 (__v4df)_mm256_div_pd(__A, __B),
2425 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2426 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2427 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2428 (__v4df)_mm256_div_pd(__A, __B),
2429 (__v4df)_mm256_setzero_pd());
2432 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2433 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2434 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2435 (__v4sf)_mm_div_ps(__A, __B),
2439 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2440 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2441 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2442 (__v4sf)_mm_div_ps(__A, __B),
2443 (__v4sf)_mm_setzero_ps());
2446 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2447 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2448 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2449 (__v8sf)_mm256_div_ps(__A, __B),
2453 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2454 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2455 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2456 (__v8sf)_mm256_div_ps(__A, __B),
2457 (__v8sf)_mm256_setzero_ps());
2460 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2461 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2462 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2467 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2468 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2469 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2475 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2476 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2477 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2482 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2483 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2484 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2486 _mm256_setzero_pd (),
2490 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2491 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2492 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2497 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2498 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2499 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2501 _mm_setzero_si128 (),
2505 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2506 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2507 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2512 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2513 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2514 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2516 _mm256_setzero_si256 (),
2520 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2521 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2522 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2528 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2529 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2530 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2537 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2538 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2539 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2545 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2546 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2547 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2549 _mm256_setzero_pd (),
2554 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2555 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2556 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2562 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2563 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2564 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2566 _mm_setzero_si128 (),
2571 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2572 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2574 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2580 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2581 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2582 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2584 _mm256_setzero_si256 (),
2589 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2590 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2591 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2596 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2597 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2598 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2605 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2606 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2607 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2612 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2613 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2614 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2616 _mm256_setzero_ps (),
2621 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2623 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2630 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2631 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2633 _mm_setzero_si128 (),
2637 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2640 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2646 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2647 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2648 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2650 _mm256_setzero_si256 (),
2655 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2656 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2657 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2662 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2663 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2664 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2670 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2671 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2672 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2677 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2678 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2679 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2681 _mm256_setzero_ps (),
2685 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2686 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2687 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2693 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2694 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2696 _mm_setzero_si128 (),
2700 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2701 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2702 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2707 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2708 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2709 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2711 _mm256_setzero_si256 (),
2715 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2716 _mm_getexp_pd (__m128d __A) {
2717 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2723 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2724 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2725 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2730 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2731 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2732 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2738 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2739 _mm256_getexp_pd (__m256d __A) {
2740 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2742 _mm256_setzero_pd (),
2746 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2747 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2748 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2753 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2754 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
2755 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2757 _mm256_setzero_pd (),
2761 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2762 _mm_getexp_ps (__m128 __A) {
2763 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2769 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2770 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2771 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2776 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2777 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
2778 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2784 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2785 _mm256_getexp_ps (__m256 __A) {
2786 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2788 _mm256_setzero_ps (),
2792 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2793 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2794 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2799 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2800 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
2801 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2803 _mm256_setzero_ps (),
2807 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2808 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2809 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2810 (__v2df)_mm_max_pd(__A, __B),
2814 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2815 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2816 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2817 (__v2df)_mm_max_pd(__A, __B),
2818 (__v2df)_mm_setzero_pd());
2821 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2822 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2823 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2824 (__v4df)_mm256_max_pd(__A, __B),
2828 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2829 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2831 (__v4df)_mm256_max_pd(__A, __B),
2832 (__v4df)_mm256_setzero_pd());
2835 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2836 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2837 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2838 (__v4sf)_mm_max_ps(__A, __B),
2842 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2843 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2844 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2845 (__v4sf)_mm_max_ps(__A, __B),
2846 (__v4sf)_mm_setzero_ps());
2849 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2850 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2851 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2852 (__v8sf)_mm256_max_ps(__A, __B),
2856 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2857 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2858 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2859 (__v8sf)_mm256_max_ps(__A, __B),
2860 (__v8sf)_mm256_setzero_ps());
2863 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2864 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2865 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2866 (__v2df)_mm_min_pd(__A, __B),
2870 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2871 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2872 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2873 (__v2df)_mm_min_pd(__A, __B),
2874 (__v2df)_mm_setzero_pd());
2877 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2878 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2879 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2880 (__v4df)_mm256_min_pd(__A, __B),
2884 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2885 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2887 (__v4df)_mm256_min_pd(__A, __B),
2888 (__v4df)_mm256_setzero_pd());
2891 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2892 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2893 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2894 (__v4sf)_mm_min_ps(__A, __B),
2898 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2899 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2900 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2901 (__v4sf)_mm_min_ps(__A, __B),
2902 (__v4sf)_mm_setzero_ps());
2905 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2906 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2907 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2908 (__v8sf)_mm256_min_ps(__A, __B),
2912 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2913 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2914 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2915 (__v8sf)_mm256_min_ps(__A, __B),
2916 (__v8sf)_mm256_setzero_ps());
2919 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2920 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2921 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2922 (__v2df)_mm_mul_pd(__A, __B),
2926 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2927 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2928 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2929 (__v2df)_mm_mul_pd(__A, __B),
2930 (__v2df)_mm_setzero_pd());
2933 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2934 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2935 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2936 (__v4df)_mm256_mul_pd(__A, __B),
2940 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2941 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2942 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2943 (__v4df)_mm256_mul_pd(__A, __B),
2944 (__v4df)_mm256_setzero_pd());
2947 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2948 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2949 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2950 (__v4sf)_mm_mul_ps(__A, __B),
2954 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2955 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2956 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2957 (__v4sf)_mm_mul_ps(__A, __B),
2958 (__v4sf)_mm_setzero_ps());
2961 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2962 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2963 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2964 (__v8sf)_mm256_mul_ps(__A, __B),
2968 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2969 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2970 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2971 (__v8sf)_mm256_mul_ps(__A, __B),
2972 (__v8sf)_mm256_setzero_ps());
2975 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2976 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2977 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2978 (__v4si)_mm_abs_epi32(__A),
2982 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2983 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
2984 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2985 (__v4si)_mm_abs_epi32(__A),
2986 (__v4si)_mm_setzero_si128());
2989 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2990 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2992 (__v8si)_mm256_abs_epi32(__A),
2996 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2997 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
2998 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2999 (__v8si)_mm256_abs_epi32(__A),
3000 (__v8si)_mm256_setzero_si256());
3003 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3004 _mm_abs_epi64 (__m128i __A) {
3005 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
3008 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3009 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3010 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3011 (__v2di)_mm_abs_epi64(__A),
3015 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3016 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3017 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3018 (__v2di)_mm_abs_epi64(__A),
3019 (__v2di)_mm_setzero_si128());
3022 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3023 _mm256_abs_epi64 (__m256i __A) {
3024 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3027 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3028 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3030 (__v4di)_mm256_abs_epi64(__A),
3034 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3035 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3036 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3037 (__v4di)_mm256_abs_epi64(__A),
3038 (__v4di)_mm256_setzero_si256());
3041 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3042 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3043 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3044 (__v4si)_mm_max_epi32(__A, __B),
3045 (__v4si)_mm_setzero_si128());
3048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3049 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3050 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3051 (__v4si)_mm_max_epi32(__A, __B),
3055 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3056 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3057 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3058 (__v8si)_mm256_max_epi32(__A, __B),
3059 (__v8si)_mm256_setzero_si256());
3062 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3063 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3064 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3065 (__v8si)_mm256_max_epi32(__A, __B),
3069 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3070 _mm_max_epi64 (__m128i __A, __m128i __B) {
3071 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3074 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3075 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3076 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3077 (__v2di)_mm_max_epi64(__A, __B),
3078 (__v2di)_mm_setzero_si128());
3081 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3082 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3083 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3084 (__v2di)_mm_max_epi64(__A, __B),
3088 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3089 _mm256_max_epi64 (__m256i __A, __m256i __B) {
3090 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3093 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3094 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3095 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3096 (__v4di)_mm256_max_epi64(__A, __B),
3097 (__v4di)_mm256_setzero_si256());
3100 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3101 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3102 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3103 (__v4di)_mm256_max_epi64(__A, __B),
3107 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3108 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3109 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3110 (__v4si)_mm_max_epu32(__A, __B),
3111 (__v4si)_mm_setzero_si128());
3114 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3115 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3116 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3117 (__v4si)_mm_max_epu32(__A, __B),
3121 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3122 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3123 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3124 (__v8si)_mm256_max_epu32(__A, __B),
3125 (__v8si)_mm256_setzero_si256());
3128 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3129 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3130 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3131 (__v8si)_mm256_max_epu32(__A, __B),
3135 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3136 _mm_max_epu64 (__m128i __A, __m128i __B) {
3137 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3140 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3141 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3142 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3143 (__v2di)_mm_max_epu64(__A, __B),
3144 (__v2di)_mm_setzero_si128());
3147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3148 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3149 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3150 (__v2di)_mm_max_epu64(__A, __B),
3154 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3155 _mm256_max_epu64 (__m256i __A, __m256i __B) {
3156 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3159 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3160 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3161 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3162 (__v4di)_mm256_max_epu64(__A, __B),
3163 (__v4di)_mm256_setzero_si256());
3166 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3167 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3168 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3169 (__v4di)_mm256_max_epu64(__A, __B),
3173 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3174 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3175 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3176 (__v4si)_mm_min_epi32(__A, __B),
3177 (__v4si)_mm_setzero_si128());
3180 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3181 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3182 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3183 (__v4si)_mm_min_epi32(__A, __B),
3187 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3188 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3189 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3190 (__v8si)_mm256_min_epi32(__A, __B),
3191 (__v8si)_mm256_setzero_si256());
3194 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3195 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3196 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3197 (__v8si)_mm256_min_epi32(__A, __B),
3201 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3202 _mm_min_epi64 (__m128i __A, __m128i __B) {
3203 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3206 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3207 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3208 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3209 (__v2di)_mm_min_epi64(__A, __B),
3213 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3214 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3215 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3216 (__v2di)_mm_min_epi64(__A, __B),
3217 (__v2di)_mm_setzero_si128());
3220 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3221 _mm256_min_epi64 (__m256i __A, __m256i __B) {
3222 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3225 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3226 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3227 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3228 (__v4di)_mm256_min_epi64(__A, __B),
3232 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3233 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3234 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3235 (__v4di)_mm256_min_epi64(__A, __B),
3236 (__v4di)_mm256_setzero_si256());
3239 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3240 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3242 (__v4si)_mm_min_epu32(__A, __B),
3243 (__v4si)_mm_setzero_si128());
3246 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3247 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3249 (__v4si)_mm_min_epu32(__A, __B),
3253 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3254 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3256 (__v8si)_mm256_min_epu32(__A, __B),
3257 (__v8si)_mm256_setzero_si256());
3260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3261 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3263 (__v8si)_mm256_min_epu32(__A, __B),
3267 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3268 _mm_min_epu64 (__m128i __A, __m128i __B) {
3269 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3272 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3273 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3274 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3275 (__v2di)_mm_min_epu64(__A, __B),
3279 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3280 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3281 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3282 (__v2di)_mm_min_epu64(__A, __B),
3283 (__v2di)_mm_setzero_si128());
3286 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3287 _mm256_min_epu64 (__m256i __A, __m256i __B) {
3288 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3292 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3293 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3294 (__v4di)_mm256_min_epu64(__A, __B),
3298 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3299 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3300 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3301 (__v4di)_mm256_min_epu64(__A, __B),
3302 (__v4di)_mm256_setzero_si256());
3305 #define _mm_roundscale_pd(A, imm) \
3306 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3308 (__v2df)_mm_setzero_pd(), \
3312 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3313 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3315 (__v2df)(__m128d)(W), \
3319 #define _mm_maskz_roundscale_pd(U, A, imm) \
3320 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3322 (__v2df)_mm_setzero_pd(), \
3326 #define _mm256_roundscale_pd(A, imm) \
3327 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3329 (__v4df)_mm256_setzero_pd(), \
3333 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3334 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3336 (__v4df)(__m256d)(W), \
3340 #define _mm256_maskz_roundscale_pd(U, A, imm) \
3341 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3343 (__v4df)_mm256_setzero_pd(), \
3346 #define _mm_roundscale_ps(A, imm) \
3347 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3348 (__v4sf)_mm_setzero_ps(), \
3352 #define _mm_mask_roundscale_ps(W, U, A, imm) \
3353 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3354 (__v4sf)(__m128)(W), \
3358 #define _mm_maskz_roundscale_ps(U, A, imm) \
3359 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3360 (__v4sf)_mm_setzero_ps(), \
3363 #define _mm256_roundscale_ps(A, imm) \
3364 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3365 (__v8sf)_mm256_setzero_ps(), \
3368 #define _mm256_mask_roundscale_ps(W, U, A, imm) \
3369 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3370 (__v8sf)(__m256)(W), \
3374 #define _mm256_maskz_roundscale_ps(U, A, imm) \
3375 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3376 (__v8sf)_mm256_setzero_ps(), \
3379 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3380 _mm_scalef_pd (__m128d __A, __m128d __B) {
3381 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3388 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3389 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3391 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3397 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3398 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3399 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3406 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3407 _mm256_scalef_pd (__m256d __A, __m256d __B) {
3408 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3411 _mm256_setzero_pd (),
3415 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3416 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3418 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3424 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3425 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3426 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3429 _mm256_setzero_pd (),
3433 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3434 _mm_scalef_ps (__m128 __A, __m128 __B) {
3435 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3442 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3443 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3444 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3450 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3451 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3452 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3459 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3460 _mm256_scalef_ps (__m256 __A, __m256 __B) {
3461 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3464 _mm256_setzero_ps (),
3468 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3469 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3471 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3477 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3478 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3479 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3482 _mm256_setzero_ps (),
3486 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3487 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3488 (__v2di)(__m128i)(index), \
3489 (__v2df)(__m128d)(v1), (int)(scale))
3491 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3492 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3493 (__v2di)(__m128i)(index), \
3494 (__v2df)(__m128d)(v1), (int)(scale))
3496 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3497 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3498 (__v2di)(__m128i)(index), \
3499 (__v2di)(__m128i)(v1), (int)(scale))
3501 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3502 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3503 (__v2di)(__m128i)(index), \
3504 (__v2di)(__m128i)(v1), (int)(scale))
3506 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3507 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3508 (__v4di)(__m256i)(index), \
3509 (__v4df)(__m256d)(v1), (int)(scale))
3511 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3512 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3513 (__v4di)(__m256i)(index), \
3514 (__v4df)(__m256d)(v1), (int)(scale))
3516 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3517 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3518 (__v4di)(__m256i)(index), \
3519 (__v4di)(__m256i)(v1), (int)(scale))
3521 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3522 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3523 (__v4di)(__m256i)(index), \
3524 (__v4di)(__m256i)(v1), (int)(scale))
3526 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3527 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3528 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3531 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3532 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3533 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3536 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3537 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3538 (__v2di)(__m128i)(index), \
3539 (__v4si)(__m128i)(v1), (int)(scale))
3541 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3542 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3543 (__v2di)(__m128i)(index), \
3544 (__v4si)(__m128i)(v1), (int)(scale))
3546 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3547 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3548 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3551 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3552 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3553 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3556 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3557 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3558 (__v4di)(__m256i)(index), \
3559 (__v4si)(__m128i)(v1), (int)(scale))
3561 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3562 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3563 (__v4di)(__m256i)(index), \
3564 (__v4si)(__m128i)(v1), (int)(scale))
3566 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3567 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3568 (__v4si)(__m128i)(index), \
3569 (__v2df)(__m128d)(v1), (int)(scale))
3571 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3572 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3573 (__v4si)(__m128i)(index), \
3574 (__v2df)(__m128d)(v1), (int)(scale))
3576 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3577 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3578 (__v4si)(__m128i)(index), \
3579 (__v2di)(__m128i)(v1), (int)(scale))
3581 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3582 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3583 (__v4si)(__m128i)(index), \
3584 (__v2di)(__m128i)(v1), (int)(scale))
3586 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3587 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3588 (__v4si)(__m128i)(index), \
3589 (__v4df)(__m256d)(v1), (int)(scale))
3591 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3592 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3593 (__v4si)(__m128i)(index), \
3594 (__v4df)(__m256d)(v1), (int)(scale))
3596 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3597 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3598 (__v4si)(__m128i)(index), \
3599 (__v4di)(__m256i)(v1), (int)(scale))
3601 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3602 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3603 (__v4si)(__m128i)(index), \
3604 (__v4di)(__m256i)(v1), (int)(scale))
3606 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3607 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3608 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3611 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3612 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3613 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3616 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3617 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3618 (__v4si)(__m128i)(index), \
3619 (__v4si)(__m128i)(v1), (int)(scale))
3621 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3622 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3623 (__v4si)(__m128i)(index), \
3624 (__v4si)(__m128i)(v1), (int)(scale))
3626 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3627 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3628 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3631 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3632 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3633 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3636 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3637 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3638 (__v8si)(__m256i)(index), \
3639 (__v8si)(__m256i)(v1), (int)(scale))
3641 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3642 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3643 (__v8si)(__m256i)(index), \
3644 (__v8si)(__m256i)(v1), (int)(scale))
3646 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3647 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3648 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3649 (__v2df)_mm_sqrt_pd(__A),
3653 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3654 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3655 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3656 (__v2df)_mm_sqrt_pd(__A),
3657 (__v2df)_mm_setzero_pd());
3660 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3661 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3662 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3663 (__v4df)_mm256_sqrt_pd(__A),
3667 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3668 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3669 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3670 (__v4df)_mm256_sqrt_pd(__A),
3671 (__v4df)_mm256_setzero_pd());
3674 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3675 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3676 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3677 (__v4sf)_mm_sqrt_ps(__A),
3681 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3682 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3683 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3684 (__v4sf)_mm_sqrt_ps(__A),
3685 (__v4sf)_mm_setzero_ps());
3688 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3689 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3690 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3691 (__v8sf)_mm256_sqrt_ps(__A),
3695 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3696 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
3697 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3698 (__v8sf)_mm256_sqrt_ps(__A),
3699 (__v8sf)_mm256_setzero_ps());
3702 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3703 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3704 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3705 (__v2df)_mm_sub_pd(__A, __B),
3709 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3710 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3711 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3712 (__v2df)_mm_sub_pd(__A, __B),
3713 (__v2df)_mm_setzero_pd());
3716 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3717 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3718 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3719 (__v4df)_mm256_sub_pd(__A, __B),
3723 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3724 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3725 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3726 (__v4df)_mm256_sub_pd(__A, __B),
3727 (__v4df)_mm256_setzero_pd());
3730 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3731 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3732 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3733 (__v4sf)_mm_sub_ps(__A, __B),
3737 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3738 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3739 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3740 (__v4sf)_mm_sub_ps(__A, __B),
3741 (__v4sf)_mm_setzero_ps());
3744 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3745 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3746 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3747 (__v8sf)_mm256_sub_ps(__A, __B),
3751 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3752 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3753 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3754 (__v8sf)_mm256_sub_ps(__A, __B),
3755 (__v8sf)_mm256_setzero_ps());
3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3760 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3764 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3765 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3767 return (__m128i)__builtin_ia32_selectd_128(__U,
3768 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3772 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3773 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3775 return (__m128i)__builtin_ia32_selectd_128(__U,
3776 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3780 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3781 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3783 return (__m128i)__builtin_ia32_selectd_128(__U,
3784 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3785 (__v4si)_mm_setzero_si128());
3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3790 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3794 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3795 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3797 return (__m256i)__builtin_ia32_selectd_256(__U,
3798 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3802 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3803 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3805 return (__m256i)__builtin_ia32_selectd_256(__U,
3806 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3810 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3811 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3813 return (__m256i)__builtin_ia32_selectd_256(__U,
3814 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3815 (__v8si)_mm256_setzero_si256());
3818 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3819 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3820 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3826 return (__m128d)__builtin_ia32_selectpd_128(__U,
3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3831 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3832 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3833 return (__m128d)__builtin_ia32_selectpd_128(__U,
3834 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3835 (__v2df)(__m128d)__I);
3838 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3839 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3840 return (__m128d)__builtin_ia32_selectpd_128(__U,
3841 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3842 (__v2df)_mm_setzero_pd());
3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3847 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3851 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3852 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3854 return (__m256d)__builtin_ia32_selectpd_256(__U,
3855 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3859 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3860 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3862 return (__m256d)__builtin_ia32_selectpd_256(__U,
3863 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3864 (__v4df)(__m256d)__I);
3867 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3868 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3870 return (__m256d)__builtin_ia32_selectpd_256(__U,
3871 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3872 (__v4df)_mm256_setzero_pd());
3875 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3876 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3877 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3883 return (__m128)__builtin_ia32_selectps_128(__U,
3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3888 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3889 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3890 return (__m128)__builtin_ia32_selectps_128(__U,
3891 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3892 (__v4sf)(__m128)__I);
3895 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3896 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3897 return (__m128)__builtin_ia32_selectps_128(__U,
3898 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3899 (__v4sf)_mm_setzero_ps());
3902 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3903 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3904 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3908 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3909 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3910 return (__m256)__builtin_ia32_selectps_256(__U,
3911 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3915 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3916 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3918 return (__m256)__builtin_ia32_selectps_256(__U,
3919 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3920 (__v8sf)(__m256)__I);
3923 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3924 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3926 return (__m256)__builtin_ia32_selectps_256(__U,
3927 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3928 (__v8sf)_mm256_setzero_ps());
3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3933 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3937 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3938 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3940 return (__m128i)__builtin_ia32_selectq_128(__U,
3941 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3945 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3946 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3948 return (__m128i)__builtin_ia32_selectq_128(__U,
3949 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3953 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3954 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3956 return (__m128i)__builtin_ia32_selectq_128(__U,
3957 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3958 (__v2di)_mm_setzero_si128());
3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3964 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3968 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3969 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3971 return (__m256i)__builtin_ia32_selectq_256(__U,
3972 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3976 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3977 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3979 return (__m256i)__builtin_ia32_selectq_256(__U,
3980 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3984 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3985 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3987 return (__m256i)__builtin_ia32_selectq_256(__U,
3988 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3989 (__v4di)_mm256_setzero_si256());
3992 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3993 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3996 (__v4si)_mm_cvtepi8_epi32(__A),
4000 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4001 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
4003 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4004 (__v4si)_mm_cvtepi8_epi32(__A),
4005 (__v4si)_mm_setzero_si128());
4008 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4009 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4011 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4012 (__v8si)_mm256_cvtepi8_epi32(__A),
4016 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4017 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4019 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4020 (__v8si)_mm256_cvtepi8_epi32(__A),
4021 (__v8si)_mm256_setzero_si256());
4024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4025 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4027 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4028 (__v2di)_mm_cvtepi8_epi64(__A),
4032 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4033 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4035 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4036 (__v2di)_mm_cvtepi8_epi64(__A),
4037 (__v2di)_mm_setzero_si128());
4040 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4041 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4043 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4044 (__v4di)_mm256_cvtepi8_epi64(__A),
4048 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4049 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4051 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4052 (__v4di)_mm256_cvtepi8_epi64(__A),
4053 (__v4di)_mm256_setzero_si256());
4056 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4057 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4060 (__v2di)_mm_cvtepi32_epi64(__X),
4064 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4065 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4067 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4068 (__v2di)_mm_cvtepi32_epi64(__X),
4069 (__v2di)_mm_setzero_si128());
4072 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4073 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4075 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4076 (__v4di)_mm256_cvtepi32_epi64(__X),
4080 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4081 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4083 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4084 (__v4di)_mm256_cvtepi32_epi64(__X),
4085 (__v4di)_mm256_setzero_si256());
4088 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4089 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4091 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4092 (__v4si)_mm_cvtepi16_epi32(__A),
4096 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4097 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4099 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4100 (__v4si)_mm_cvtepi16_epi32(__A),
4101 (__v4si)_mm_setzero_si128());
4104 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4105 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4107 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4108 (__v8si)_mm256_cvtepi16_epi32(__A),
4112 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4113 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4115 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4116 (__v8si)_mm256_cvtepi16_epi32(__A),
4117 (__v8si)_mm256_setzero_si256());
4120 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4121 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4123 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4124 (__v2di)_mm_cvtepi16_epi64(__A),
4128 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4129 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4131 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4132 (__v2di)_mm_cvtepi16_epi64(__A),
4133 (__v2di)_mm_setzero_si128());
4136 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4137 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4140 (__v4di)_mm256_cvtepi16_epi64(__A),
4144 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4145 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4148 (__v4di)_mm256_cvtepi16_epi64(__A),
4149 (__v4di)_mm256_setzero_si256());
4153 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4154 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4156 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4157 (__v4si)_mm_cvtepu8_epi32(__A),
4161 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4162 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4164 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4165 (__v4si)_mm_cvtepu8_epi32(__A),
4166 (__v4si)_mm_setzero_si128());
4169 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4170 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4173 (__v8si)_mm256_cvtepu8_epi32(__A),
4177 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4178 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4180 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4181 (__v8si)_mm256_cvtepu8_epi32(__A),
4182 (__v8si)_mm256_setzero_si256());
4185 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4186 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4188 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4189 (__v2di)_mm_cvtepu8_epi64(__A),
4193 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4194 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4196 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4197 (__v2di)_mm_cvtepu8_epi64(__A),
4198 (__v2di)_mm_setzero_si128());
4201 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4202 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4204 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4205 (__v4di)_mm256_cvtepu8_epi64(__A),
4209 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4210 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4212 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4213 (__v4di)_mm256_cvtepu8_epi64(__A),
4214 (__v4di)_mm256_setzero_si256());
4217 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4218 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4220 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4221 (__v2di)_mm_cvtepu32_epi64(__X),
4225 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4226 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4228 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4229 (__v2di)_mm_cvtepu32_epi64(__X),
4230 (__v2di)_mm_setzero_si128());
4233 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4234 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4236 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4237 (__v4di)_mm256_cvtepu32_epi64(__X),
4241 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4242 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4244 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4245 (__v4di)_mm256_cvtepu32_epi64(__X),
4246 (__v4di)_mm256_setzero_si256());
4249 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4250 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4252 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4253 (__v4si)_mm_cvtepu16_epi32(__A),
4257 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4258 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4260 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4261 (__v4si)_mm_cvtepu16_epi32(__A),
4262 (__v4si)_mm_setzero_si128());
4265 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4266 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4269 (__v8si)_mm256_cvtepu16_epi32(__A),
4273 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4274 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4276 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4277 (__v8si)_mm256_cvtepu16_epi32(__A),
4278 (__v8si)_mm256_setzero_si256());
4281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4282 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4284 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4285 (__v2di)_mm_cvtepu16_epi64(__A),
4289 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4290 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4292 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4293 (__v2di)_mm_cvtepu16_epi64(__A),
4294 (__v2di)_mm_setzero_si128());
4297 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4298 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4300 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4301 (__v4di)_mm256_cvtepu16_epi64(__A),
4305 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4306 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4308 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4309 (__v4di)_mm256_cvtepu16_epi64(__A),
4310 (__v4di)_mm256_setzero_si256());
4314 #define _mm_rol_epi32(a, b) \
4315 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4317 #define _mm_mask_rol_epi32(w, u, a, b) \
4318 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4319 (__v4si)_mm_rol_epi32((a), (b)), \
4320 (__v4si)(__m128i)(w))
4322 #define _mm_maskz_rol_epi32(u, a, b) \
4323 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4324 (__v4si)_mm_rol_epi32((a), (b)), \
4325 (__v4si)_mm_setzero_si128())
4327 #define _mm256_rol_epi32(a, b) \
4328 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4330 #define _mm256_mask_rol_epi32(w, u, a, b) \
4331 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4332 (__v8si)_mm256_rol_epi32((a), (b)), \
4333 (__v8si)(__m256i)(w))
4335 #define _mm256_maskz_rol_epi32(u, a, b) \
4336 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4337 (__v8si)_mm256_rol_epi32((a), (b)), \
4338 (__v8si)_mm256_setzero_si256())
4340 #define _mm_rol_epi64(a, b) \
4341 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4343 #define _mm_mask_rol_epi64(w, u, a, b) \
4344 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4345 (__v2di)_mm_rol_epi64((a), (b)), \
4346 (__v2di)(__m128i)(w))
4348 #define _mm_maskz_rol_epi64(u, a, b) \
4349 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4350 (__v2di)_mm_rol_epi64((a), (b)), \
4351 (__v2di)_mm_setzero_si128())
4353 #define _mm256_rol_epi64(a, b) \
4354 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4356 #define _mm256_mask_rol_epi64(w, u, a, b) \
4357 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4358 (__v4di)_mm256_rol_epi64((a), (b)), \
4359 (__v4di)(__m256i)(w))
4361 #define _mm256_maskz_rol_epi64(u, a, b) \
4362 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4363 (__v4di)_mm256_rol_epi64((a), (b)), \
4364 (__v4di)_mm256_setzero_si256())
4366 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367 _mm_rolv_epi32 (__m128i __A, __m128i __B)
4369 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4372 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4373 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4375 return (__m128i)__builtin_ia32_selectd_128(__U,
4376 (__v4si)_mm_rolv_epi32(__A, __B),
4380 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4381 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4383 return (__m128i)__builtin_ia32_selectd_128(__U,
4384 (__v4si)_mm_rolv_epi32(__A, __B),
4385 (__v4si)_mm_setzero_si128());
4388 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
4391 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4395 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4397 return (__m256i)__builtin_ia32_selectd_256(__U,
4398 (__v8si)_mm256_rolv_epi32(__A, __B),
4402 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4403 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4405 return (__m256i)__builtin_ia32_selectd_256(__U,
4406 (__v8si)_mm256_rolv_epi32(__A, __B),
4407 (__v8si)_mm256_setzero_si256());
4410 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411 _mm_rolv_epi64 (__m128i __A, __m128i __B)
4413 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4417 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4419 return (__m128i)__builtin_ia32_selectq_128(__U,
4420 (__v2di)_mm_rolv_epi64(__A, __B),
4424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4425 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4427 return (__m128i)__builtin_ia32_selectq_128(__U,
4428 (__v2di)_mm_rolv_epi64(__A, __B),
4429 (__v2di)_mm_setzero_si128());
4432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
4435 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4438 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4439 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4441 return (__m256i)__builtin_ia32_selectq_256(__U,
4442 (__v4di)_mm256_rolv_epi64(__A, __B),
4446 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4447 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4449 return (__m256i)__builtin_ia32_selectq_256(__U,
4450 (__v4di)_mm256_rolv_epi64(__A, __B),
4451 (__v4di)_mm256_setzero_si256());
4454 #define _mm_ror_epi32(a, b) \
4455 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4457 #define _mm_mask_ror_epi32(w, u, a, b) \
4458 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4459 (__v4si)_mm_ror_epi32((a), (b)), \
4460 (__v4si)(__m128i)(w))
4462 #define _mm_maskz_ror_epi32(u, a, b) \
4463 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4464 (__v4si)_mm_ror_epi32((a), (b)), \
4465 (__v4si)_mm_setzero_si128())
4467 #define _mm256_ror_epi32(a, b) \
4468 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4470 #define _mm256_mask_ror_epi32(w, u, a, b) \
4471 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4472 (__v8si)_mm256_ror_epi32((a), (b)), \
4473 (__v8si)(__m256i)(w))
4475 #define _mm256_maskz_ror_epi32(u, a, b) \
4476 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4477 (__v8si)_mm256_ror_epi32((a), (b)), \
4478 (__v8si)_mm256_setzero_si256())
4480 #define _mm_ror_epi64(a, b) \
4481 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4483 #define _mm_mask_ror_epi64(w, u, a, b) \
4484 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4485 (__v2di)_mm_ror_epi64((a), (b)), \
4486 (__v2di)(__m128i)(w))
4488 #define _mm_maskz_ror_epi64(u, a, b) \
4489 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4490 (__v2di)_mm_ror_epi64((a), (b)), \
4491 (__v2di)_mm_setzero_si128())
4493 #define _mm256_ror_epi64(a, b) \
4494 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4496 #define _mm256_mask_ror_epi64(w, u, a, b) \
4497 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4498 (__v4di)_mm256_ror_epi64((a), (b)), \
4499 (__v4di)(__m256i)(w))
4501 #define _mm256_maskz_ror_epi64(u, a, b) \
4502 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4503 (__v4di)_mm256_ror_epi64((a), (b)), \
4504 (__v4di)_mm256_setzero_si256())
4506 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4507 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4509 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4510 (__v4si)_mm_sll_epi32(__A, __B),
4514 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4515 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4517 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4518 (__v4si)_mm_sll_epi32(__A, __B),
4519 (__v4si)_mm_setzero_si128());
4522 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4523 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4525 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4526 (__v8si)_mm256_sll_epi32(__A, __B),
4530 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4531 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4533 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4534 (__v8si)_mm256_sll_epi32(__A, __B),
4535 (__v8si)_mm256_setzero_si256());
4538 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4539 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4541 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4542 (__v4si)_mm_slli_epi32(__A, __B),
4546 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4547 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4549 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4550 (__v4si)_mm_slli_epi32(__A, __B),
4551 (__v4si)_mm_setzero_si128());
4554 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4555 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4557 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4558 (__v8si)_mm256_slli_epi32(__A, __B),
4562 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4563 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
4565 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4566 (__v8si)_mm256_slli_epi32(__A, __B),
4567 (__v8si)_mm256_setzero_si256());
4570 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4571 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4573 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4574 (__v2di)_mm_sll_epi64(__A, __B),
4578 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4579 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4581 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4582 (__v2di)_mm_sll_epi64(__A, __B),
4583 (__v2di)_mm_setzero_si128());
4586 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4587 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4589 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4590 (__v4di)_mm256_sll_epi64(__A, __B),
4594 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4595 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4597 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4598 (__v4di)_mm256_sll_epi64(__A, __B),
4599 (__v4di)_mm256_setzero_si256());
4602 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4603 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4605 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4606 (__v2di)_mm_slli_epi64(__A, __B),
4610 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4611 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
4613 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4614 (__v2di)_mm_slli_epi64(__A, __B),
4615 (__v2di)_mm_setzero_si128());
4618 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4619 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4621 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4622 (__v4di)_mm256_slli_epi64(__A, __B),
4626 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4627 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
4629 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4630 (__v4di)_mm256_slli_epi64(__A, __B),
4631 (__v4di)_mm256_setzero_si256());
4634 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635 _mm_rorv_epi32 (__m128i __A, __m128i __B)
4637 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4640 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4641 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4643 return (__m128i)__builtin_ia32_selectd_128(__U,
4644 (__v4si)_mm_rorv_epi32(__A, __B),
4648 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4649 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4651 return (__m128i)__builtin_ia32_selectd_128(__U,
4652 (__v4si)_mm_rorv_epi32(__A, __B),
4653 (__v4si)_mm_setzero_si128());
4656 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
4659 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4662 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4663 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4665 return (__m256i)__builtin_ia32_selectd_256(__U,
4666 (__v8si)_mm256_rorv_epi32(__A, __B),
4670 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4671 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4673 return (__m256i)__builtin_ia32_selectd_256(__U,
4674 (__v8si)_mm256_rorv_epi32(__A, __B),
4675 (__v8si)_mm256_setzero_si256());
4678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679 _mm_rorv_epi64 (__m128i __A, __m128i __B)
4681 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4685 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4687 return (__m128i)__builtin_ia32_selectq_128(__U,
4688 (__v2di)_mm_rorv_epi64(__A, __B),
4692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4693 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4695 return (__m128i)__builtin_ia32_selectq_128(__U,
4696 (__v2di)_mm_rorv_epi64(__A, __B),
4697 (__v2di)_mm_setzero_si128());
4700 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
4703 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4707 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4709 return (__m256i)__builtin_ia32_selectq_256(__U,
4710 (__v4di)_mm256_rorv_epi64(__A, __B),
4714 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4715 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4717 return (__m256i)__builtin_ia32_selectq_256(__U,
4718 (__v4di)_mm256_rorv_epi64(__A, __B),
4719 (__v4di)_mm256_setzero_si256());
4722 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4723 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4725 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4726 (__v2di)_mm_sllv_epi64(__X, __Y),
4730 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4731 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4733 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4734 (__v2di)_mm_sllv_epi64(__X, __Y),
4735 (__v2di)_mm_setzero_si128());
4738 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4739 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4742 (__v4di)_mm256_sllv_epi64(__X, __Y),
4746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4747 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4749 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4750 (__v4di)_mm256_sllv_epi64(__X, __Y),
4751 (__v4di)_mm256_setzero_si256());
4754 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4755 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4757 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4758 (__v4si)_mm_sllv_epi32(__X, __Y),
4762 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4763 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4765 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4766 (__v4si)_mm_sllv_epi32(__X, __Y),
4767 (__v4si)_mm_setzero_si128());
4770 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4771 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4773 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4774 (__v8si)_mm256_sllv_epi32(__X, __Y),
4778 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4779 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4781 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4782 (__v8si)_mm256_sllv_epi32(__X, __Y),
4783 (__v8si)_mm256_setzero_si256());
4786 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4787 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4789 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4790 (__v2di)_mm_srlv_epi64(__X, __Y),
4794 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4795 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4797 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4798 (__v2di)_mm_srlv_epi64(__X, __Y),
4799 (__v2di)_mm_setzero_si128());
4802 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4803 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4805 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4806 (__v4di)_mm256_srlv_epi64(__X, __Y),
4810 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4811 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4813 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4814 (__v4di)_mm256_srlv_epi64(__X, __Y),
4815 (__v4di)_mm256_setzero_si256());
4818 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4819 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4821 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4822 (__v4si)_mm_srlv_epi32(__X, __Y),
4826 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4827 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4829 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4830 (__v4si)_mm_srlv_epi32(__X, __Y),
4831 (__v4si)_mm_setzero_si128());
4834 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4835 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4837 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4838 (__v8si)_mm256_srlv_epi32(__X, __Y),
4842 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4843 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4845 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4846 (__v8si)_mm256_srlv_epi32(__X, __Y),
4847 (__v8si)_mm256_setzero_si256());
4850 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4851 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4853 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4854 (__v4si)_mm_srl_epi32(__A, __B),
4858 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4859 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4861 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4862 (__v4si)_mm_srl_epi32(__A, __B),
4863 (__v4si)_mm_setzero_si128());
4866 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4867 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4869 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4870 (__v8si)_mm256_srl_epi32(__A, __B),
4874 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4875 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4877 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4878 (__v8si)_mm256_srl_epi32(__A, __B),
4879 (__v8si)_mm256_setzero_si256());
4882 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4883 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4885 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4886 (__v4si)_mm_srli_epi32(__A, __B),
4890 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4891 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
4893 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4894 (__v4si)_mm_srli_epi32(__A, __B),
4895 (__v4si)_mm_setzero_si128());
4898 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4899 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4901 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4902 (__v8si)_mm256_srli_epi32(__A, __B),
4906 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4907 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
4909 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4910 (__v8si)_mm256_srli_epi32(__A, __B),
4911 (__v8si)_mm256_setzero_si256());
4914 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4915 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4917 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4918 (__v2di)_mm_srl_epi64(__A, __B),
4922 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4923 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4925 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4926 (__v2di)_mm_srl_epi64(__A, __B),
4927 (__v2di)_mm_setzero_si128());
4930 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4931 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4933 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4934 (__v4di)_mm256_srl_epi64(__A, __B),
4938 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4939 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4941 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4942 (__v4di)_mm256_srl_epi64(__A, __B),
4943 (__v4di)_mm256_setzero_si256());
4946 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4947 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4949 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4950 (__v2di)_mm_srli_epi64(__A, __B),
4954 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4955 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
4957 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4958 (__v2di)_mm_srli_epi64(__A, __B),
4959 (__v2di)_mm_setzero_si128());
4962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4963 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
4965 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4966 (__v4di)_mm256_srli_epi64(__A, __B),
4970 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4971 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
4973 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4974 (__v4di)_mm256_srli_epi64(__A, __B),
4975 (__v4di)_mm256_setzero_si256());
4978 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4979 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4982 (__v4si)_mm_srav_epi32(__X, __Y),
4986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4987 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4990 (__v4si)_mm_srav_epi32(__X, __Y),
4991 (__v4si)_mm_setzero_si128());
4994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4995 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4998 (__v8si)_mm256_srav_epi32(__X, __Y),
5002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5003 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5006 (__v8si)_mm256_srav_epi32(__X, __Y),
5007 (__v8si)_mm256_setzero_si256());
5010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011 _mm_srav_epi64(__m128i __X, __m128i __Y)
5013 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5017 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5020 (__v2di)_mm_srav_epi64(__X, __Y),
5024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5025 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5027 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5028 (__v2di)_mm_srav_epi64(__X, __Y),
5029 (__v2di)_mm_setzero_si128());
5032 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033 _mm256_srav_epi64(__m256i __X, __m256i __Y)
5035 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5038 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5039 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5041 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5042 (__v4di)_mm256_srav_epi64(__X, __Y),
5046 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5047 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5049 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5050 (__v4di)_mm256_srav_epi64(__X, __Y),
5051 (__v4di)_mm256_setzero_si256());
5054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5055 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5057 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5062 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5063 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5065 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5067 (__v4si) _mm_setzero_si128 ());
5071 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5072 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5074 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5079 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5080 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5082 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5084 (__v8si) _mm256_setzero_si256 ());
5087 static __inline __m128i __DEFAULT_FN_ATTRS128
5088 _mm_load_epi32 (void const *__P)
5090 return *(__m128i *) __P;
5093 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5094 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5096 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5102 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5103 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5105 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5107 _mm_setzero_si128 (),
5112 static __inline __m256i __DEFAULT_FN_ATTRS256
5113 _mm256_load_epi32 (void const *__P)
5115 return *(__m256i *) __P;
5118 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5119 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5121 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5127 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5128 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5130 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5132 _mm256_setzero_si256 (),
5137 static __inline void __DEFAULT_FN_ATTRS128
5138 _mm_store_epi32 (void *__P, __m128i __A)
5140 *(__m128i *) __P = __A;
5143 static __inline__ void __DEFAULT_FN_ATTRS128
5144 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5146 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5151 static __inline void __DEFAULT_FN_ATTRS256
5152 _mm256_store_epi32 (void *__P, __m256i __A)
5154 *(__m256i *) __P = __A;
5157 static __inline__ void __DEFAULT_FN_ATTRS256
5158 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5160 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5166 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5168 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5173 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5174 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5176 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5178 (__v2di) _mm_setzero_si128 ());
5181 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5182 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5184 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5189 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5190 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5192 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5194 (__v4di) _mm256_setzero_si256 ());
5197 static __inline __m128i __DEFAULT_FN_ATTRS128
5198 _mm_load_epi64 (void const *__P)
5200 return *(__m128i *) __P;
5203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5204 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5206 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5212 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5213 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5215 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5217 _mm_setzero_si128 (),
5222 static __inline __m256i __DEFAULT_FN_ATTRS256
5223 _mm256_load_epi64 (void const *__P)
5225 return *(__m256i *) __P;
5228 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5229 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5231 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5237 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5238 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5240 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5242 _mm256_setzero_si256 (),
5247 static __inline void __DEFAULT_FN_ATTRS128
5248 _mm_store_epi64 (void *__P, __m128i __A)
5250 *(__m128i *) __P = __A;
5253 static __inline__ void __DEFAULT_FN_ATTRS128
5254 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5256 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5261 static __inline void __DEFAULT_FN_ATTRS256
5262 _mm256_store_epi64 (void *__P, __m256i __A)
5264 *(__m256i *) __P = __A;
5267 static __inline__ void __DEFAULT_FN_ATTRS256
5268 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5270 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5275 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5276 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5278 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5279 (__v2df)_mm_movedup_pd(__A),
5283 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5284 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5286 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5287 (__v2df)_mm_movedup_pd(__A),
5288 (__v2df)_mm_setzero_pd());
5291 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5292 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5294 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5295 (__v4df)_mm256_movedup_pd(__A),
5299 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5300 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5302 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5303 (__v4df)_mm256_movedup_pd(__A),
5304 (__v4df)_mm256_setzero_pd());
5307 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5308 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5310 return (__m128i)__builtin_ia32_selectd_128(__M,
5311 (__v4si) _mm_set1_epi32(__A),
5315 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5316 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
5318 return (__m128i)__builtin_ia32_selectd_128(__M,
5319 (__v4si) _mm_set1_epi32(__A),
5320 (__v4si)_mm_setzero_si128());
5323 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5324 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5326 return (__m256i)__builtin_ia32_selectd_256(__M,
5327 (__v8si) _mm256_set1_epi32(__A),
5331 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5332 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
5334 return (__m256i)__builtin_ia32_selectd_256(__M,
5335 (__v8si) _mm256_set1_epi32(__A),
5336 (__v8si)_mm256_setzero_si256());
5340 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5341 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5343 return (__m128i) __builtin_ia32_selectq_128(__M,
5344 (__v2di) _mm_set1_epi64x(__A),
5348 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5349 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5351 return (__m128i) __builtin_ia32_selectq_128(__M,
5352 (__v2di) _mm_set1_epi64x(__A),
5353 (__v2di) _mm_setzero_si128());
5356 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5357 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5359 return (__m256i) __builtin_ia32_selectq_256(__M,
5360 (__v4di) _mm256_set1_epi64x(__A),
5364 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5365 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5367 return (__m256i) __builtin_ia32_selectq_256(__M,
5368 (__v4di) _mm256_set1_epi64x(__A),
5369 (__v4di) _mm256_setzero_si256());
5372 #define _mm_fixupimm_pd(A, B, C, imm) \
5373 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5374 (__v2df)(__m128d)(B), \
5375 (__v2di)(__m128i)(C), (int)(imm), \
5378 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5379 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5380 (__v2df)(__m128d)(B), \
5381 (__v2di)(__m128i)(C), (int)(imm), \
5384 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5385 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5386 (__v2df)(__m128d)(B), \
5387 (__v2di)(__m128i)(C), \
5388 (int)(imm), (__mmask8)(U))
5390 #define _mm256_fixupimm_pd(A, B, C, imm) \
5391 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5392 (__v4df)(__m256d)(B), \
5393 (__v4di)(__m256i)(C), (int)(imm), \
5396 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5397 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5398 (__v4df)(__m256d)(B), \
5399 (__v4di)(__m256i)(C), (int)(imm), \
5402 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5403 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5404 (__v4df)(__m256d)(B), \
5405 (__v4di)(__m256i)(C), \
5406 (int)(imm), (__mmask8)(U))
5408 #define _mm_fixupimm_ps(A, B, C, imm) \
5409 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5410 (__v4sf)(__m128)(B), \
5411 (__v4si)(__m128i)(C), (int)(imm), \
5414 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5415 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5416 (__v4sf)(__m128)(B), \
5417 (__v4si)(__m128i)(C), (int)(imm), \
5420 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5421 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5422 (__v4sf)(__m128)(B), \
5423 (__v4si)(__m128i)(C), (int)(imm), \
5426 #define _mm256_fixupimm_ps(A, B, C, imm) \
5427 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5428 (__v8sf)(__m256)(B), \
5429 (__v8si)(__m256i)(C), (int)(imm), \
5432 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5433 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5434 (__v8sf)(__m256)(B), \
5435 (__v8si)(__m256i)(C), (int)(imm), \
5438 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5439 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5440 (__v8sf)(__m256)(B), \
5441 (__v8si)(__m256i)(C), (int)(imm), \
5444 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5445 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5447 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5452 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5453 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
5455 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5461 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5462 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5464 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5469 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5470 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5472 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5474 _mm256_setzero_pd (),
5478 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5479 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5481 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5486 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5487 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
5489 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5495 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5496 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5498 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5503 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5504 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5506 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5508 _mm256_setzero_ps (),
5512 static __inline __m128i __DEFAULT_FN_ATTRS128
5513 _mm_loadu_epi64 (void const *__P)
5515 struct __loadu_epi64 {
5517 } __attribute__((__packed__, __may_alias__));
5518 return ((struct __loadu_epi64*)__P)->__v;
5521 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5522 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5524 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5529 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5530 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5532 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5534 _mm_setzero_si128 (),
5538 static __inline __m256i __DEFAULT_FN_ATTRS256
5539 _mm256_loadu_epi64 (void const *__P)
5541 struct __loadu_epi64 {
5543 } __attribute__((__packed__, __may_alias__));
5544 return ((struct __loadu_epi64*)__P)->__v;
5547 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5548 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5550 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5555 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5556 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5558 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5560 _mm256_setzero_si256 (),
5564 static __inline __m128i __DEFAULT_FN_ATTRS128
5565 _mm_loadu_epi32 (void const *__P)
5567 struct __loadu_epi32 {
5569 } __attribute__((__packed__, __may_alias__));
5570 return ((struct __loadu_epi32*)__P)->__v;
5573 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5574 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5576 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5581 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5582 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5584 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5586 _mm_setzero_si128 (),
5590 static __inline __m256i __DEFAULT_FN_ATTRS256
5591 _mm256_loadu_epi32 (void const *__P)
5593 struct __loadu_epi32 {
5595 } __attribute__((__packed__, __may_alias__));
5596 return ((struct __loadu_epi32*)__P)->__v;
5599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5600 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5602 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5607 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5608 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5610 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5612 _mm256_setzero_si256 (),
5616 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5617 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5619 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5624 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5625 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5627 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
5633 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5634 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5636 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5641 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5642 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5644 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
5646 _mm256_setzero_pd (),
5650 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5651 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5653 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5658 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5659 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5661 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
5667 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5668 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5670 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5675 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5676 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5678 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
5680 _mm256_setzero_ps (),
5684 static __inline__ void __DEFAULT_FN_ATTRS128
5685 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5687 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5692 static __inline__ void __DEFAULT_FN_ATTRS256
5693 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5695 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5700 static __inline__ void __DEFAULT_FN_ATTRS128
5701 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5703 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5708 static __inline__ void __DEFAULT_FN_ATTRS256
5709 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5711 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5716 static __inline void __DEFAULT_FN_ATTRS128
5717 _mm_storeu_epi64 (void *__P, __m128i __A)
5719 struct __storeu_epi64 {
5721 } __attribute__((__packed__, __may_alias__));
5722 ((struct __storeu_epi64*)__P)->__v = __A;
5725 static __inline__ void __DEFAULT_FN_ATTRS128
5726 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5728 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5733 static __inline void __DEFAULT_FN_ATTRS256
5734 _mm256_storeu_epi64 (void *__P, __m256i __A)
5736 struct __storeu_epi64 {
5738 } __attribute__((__packed__, __may_alias__));
5739 ((struct __storeu_epi64*)__P)->__v = __A;
5742 static __inline__ void __DEFAULT_FN_ATTRS256
5743 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5745 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5750 static __inline void __DEFAULT_FN_ATTRS128
5751 _mm_storeu_epi32 (void *__P, __m128i __A)
5753 struct __storeu_epi32 {
5755 } __attribute__((__packed__, __may_alias__));
5756 ((struct __storeu_epi32*)__P)->__v = __A;
5759 static __inline__ void __DEFAULT_FN_ATTRS128
5760 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5762 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5767 static __inline void __DEFAULT_FN_ATTRS256
5768 _mm256_storeu_epi32 (void *__P, __m256i __A)
5770 struct __storeu_epi32 {
5772 } __attribute__((__packed__, __may_alias__));
5773 ((struct __storeu_epi32*)__P)->__v = __A;
5776 static __inline__ void __DEFAULT_FN_ATTRS256
5777 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5779 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5784 static __inline__ void __DEFAULT_FN_ATTRS128
5785 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5787 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5792 static __inline__ void __DEFAULT_FN_ATTRS256
5793 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5795 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5800 static __inline__ void __DEFAULT_FN_ATTRS128
5801 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5803 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5808 static __inline__ void __DEFAULT_FN_ATTRS256
5809 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5811 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5817 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5818 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5820 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5821 (__v2df)_mm_unpackhi_pd(__A, __B),
5825 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5826 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5828 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5829 (__v2df)_mm_unpackhi_pd(__A, __B),
5830 (__v2df)_mm_setzero_pd());
5833 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5834 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5836 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5837 (__v4df)_mm256_unpackhi_pd(__A, __B),
5841 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5842 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5844 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5845 (__v4df)_mm256_unpackhi_pd(__A, __B),
5846 (__v4df)_mm256_setzero_pd());
5849 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5850 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5852 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5853 (__v4sf)_mm_unpackhi_ps(__A, __B),
5857 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5858 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5860 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5861 (__v4sf)_mm_unpackhi_ps(__A, __B),
5862 (__v4sf)_mm_setzero_ps());
5865 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5866 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5868 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5869 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5873 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5874 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5876 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5877 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5878 (__v8sf)_mm256_setzero_ps());
5881 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5882 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5884 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5885 (__v2df)_mm_unpacklo_pd(__A, __B),
5889 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5890 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5892 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5893 (__v2df)_mm_unpacklo_pd(__A, __B),
5894 (__v2df)_mm_setzero_pd());
5897 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5898 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5900 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5901 (__v4df)_mm256_unpacklo_pd(__A, __B),
5905 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5906 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5908 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5909 (__v4df)_mm256_unpacklo_pd(__A, __B),
5910 (__v4df)_mm256_setzero_pd());
5913 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5914 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5916 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5917 (__v4sf)_mm_unpacklo_ps(__A, __B),
5921 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5922 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5924 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5925 (__v4sf)_mm_unpacklo_ps(__A, __B),
5926 (__v4sf)_mm_setzero_ps());
5929 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5930 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5932 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5933 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5937 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5938 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5940 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5941 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5942 (__v8sf)_mm256_setzero_ps());
5945 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5946 _mm_rcp14_pd (__m128d __A)
5948 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5954 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5955 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5957 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5962 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5963 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
5965 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5971 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5972 _mm256_rcp14_pd (__m256d __A)
5974 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5976 _mm256_setzero_pd (),
5980 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5981 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5983 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5988 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5989 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
5991 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5993 _mm256_setzero_pd (),
5997 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5998 _mm_rcp14_ps (__m128 __A)
6000 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6006 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6007 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6009 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6014 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6015 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6017 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6023 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6024 _mm256_rcp14_ps (__m256 __A)
6026 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6028 _mm256_setzero_ps (),
6032 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6033 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6035 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6040 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6041 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6043 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6045 _mm256_setzero_ps (),
6049 #define _mm_mask_permute_pd(W, U, X, C) \
6050 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6051 (__v2df)_mm_permute_pd((X), (C)), \
6052 (__v2df)(__m128d)(W))
6054 #define _mm_maskz_permute_pd(U, X, C) \
6055 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6056 (__v2df)_mm_permute_pd((X), (C)), \
6057 (__v2df)_mm_setzero_pd())
6059 #define _mm256_mask_permute_pd(W, U, X, C) \
6060 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6061 (__v4df)_mm256_permute_pd((X), (C)), \
6062 (__v4df)(__m256d)(W))
6064 #define _mm256_maskz_permute_pd(U, X, C) \
6065 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6066 (__v4df)_mm256_permute_pd((X), (C)), \
6067 (__v4df)_mm256_setzero_pd())
6069 #define _mm_mask_permute_ps(W, U, X, C) \
6070 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6071 (__v4sf)_mm_permute_ps((X), (C)), \
6072 (__v4sf)(__m128)(W))
6074 #define _mm_maskz_permute_ps(U, X, C) \
6075 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6076 (__v4sf)_mm_permute_ps((X), (C)), \
6077 (__v4sf)_mm_setzero_ps())
6079 #define _mm256_mask_permute_ps(W, U, X, C) \
6080 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6081 (__v8sf)_mm256_permute_ps((X), (C)), \
6082 (__v8sf)(__m256)(W))
6084 #define _mm256_maskz_permute_ps(U, X, C) \
6085 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6086 (__v8sf)_mm256_permute_ps((X), (C)), \
6087 (__v8sf)_mm256_setzero_ps())
6089 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6090 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6093 (__v2df)_mm_permutevar_pd(__A, __C),
6097 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6098 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6100 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6101 (__v2df)_mm_permutevar_pd(__A, __C),
6102 (__v2df)_mm_setzero_pd());
6105 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6106 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6108 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6109 (__v4df)_mm256_permutevar_pd(__A, __C),
6113 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6114 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6116 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6117 (__v4df)_mm256_permutevar_pd(__A, __C),
6118 (__v4df)_mm256_setzero_pd());
6121 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6122 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6124 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6125 (__v4sf)_mm_permutevar_ps(__A, __C),
6129 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6130 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6132 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6133 (__v4sf)_mm_permutevar_ps(__A, __C),
6134 (__v4sf)_mm_setzero_ps());
6137 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6138 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6140 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6141 (__v8sf)_mm256_permutevar_ps(__A, __C),
6145 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6146 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6148 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6149 (__v8sf)_mm256_permutevar_ps(__A, __C),
6150 (__v8sf)_mm256_setzero_ps());
6153 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6154 _mm_test_epi32_mask (__m128i __A, __m128i __B)
6156 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6160 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6162 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6163 _mm_setzero_si128());
6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6167 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
6169 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6170 _mm256_setzero_si256());
6173 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6174 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6176 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6177 _mm256_setzero_si256());
6180 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6181 _mm_test_epi64_mask (__m128i __A, __m128i __B)
6183 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6187 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6189 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6190 _mm_setzero_si128());
6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6194 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
6196 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6197 _mm256_setzero_si256());
6200 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6201 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6203 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6204 _mm256_setzero_si256());
6207 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6208 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
6210 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6214 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6216 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6217 _mm_setzero_si128());
6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6221 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6223 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6224 _mm256_setzero_si256());
6227 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6228 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6230 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6231 _mm256_setzero_si256());
6234 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6235 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
6237 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
6240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6241 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6243 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6244 _mm_setzero_si128());
6247 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6248 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6250 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6251 _mm256_setzero_si256());
6254 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6255 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6257 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6258 _mm256_setzero_si256());
6261 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6262 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6264 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6265 (__v4si)_mm_unpackhi_epi32(__A, __B),
6269 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6270 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6272 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6273 (__v4si)_mm_unpackhi_epi32(__A, __B),
6274 (__v4si)_mm_setzero_si128());
6277 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6278 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6280 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6281 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6285 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6286 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6288 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6289 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6290 (__v8si)_mm256_setzero_si256());
6293 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6294 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6296 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6297 (__v2di)_mm_unpackhi_epi64(__A, __B),
6301 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6302 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6304 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6305 (__v2di)_mm_unpackhi_epi64(__A, __B),
6306 (__v2di)_mm_setzero_si128());
6309 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6310 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6312 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6313 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6317 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6318 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6320 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6321 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6322 (__v4di)_mm256_setzero_si256());
6325 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6326 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6328 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6329 (__v4si)_mm_unpacklo_epi32(__A, __B),
6333 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6334 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6336 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6337 (__v4si)_mm_unpacklo_epi32(__A, __B),
6338 (__v4si)_mm_setzero_si128());
6341 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6342 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6344 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6345 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6349 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6350 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6352 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6353 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6354 (__v8si)_mm256_setzero_si256());
6357 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6358 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6360 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6361 (__v2di)_mm_unpacklo_epi64(__A, __B),
6365 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6366 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6368 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6369 (__v2di)_mm_unpacklo_epi64(__A, __B),
6370 (__v2di)_mm_setzero_si128());
6373 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6374 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6376 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6377 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6381 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6382 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6384 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6385 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6386 (__v4di)_mm256_setzero_si256());
6389 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6390 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6392 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6393 (__v4si)_mm_sra_epi32(__A, __B),
6397 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6398 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6400 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6401 (__v4si)_mm_sra_epi32(__A, __B),
6402 (__v4si)_mm_setzero_si128());
6405 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6406 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6408 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6409 (__v8si)_mm256_sra_epi32(__A, __B),
6413 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6414 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6416 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6417 (__v8si)_mm256_sra_epi32(__A, __B),
6418 (__v8si)_mm256_setzero_si256());
6421 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6422 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6424 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6425 (__v4si)_mm_srai_epi32(__A, __B),
6429 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6430 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6432 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6433 (__v4si)_mm_srai_epi32(__A, __B),
6434 (__v4si)_mm_setzero_si128());
6437 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6438 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6440 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6441 (__v8si)_mm256_srai_epi32(__A, __B),
6445 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6446 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6448 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6449 (__v8si)_mm256_srai_epi32(__A, __B),
6450 (__v8si)_mm256_setzero_si256());
6453 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454 _mm_sra_epi64(__m128i __A, __m128i __B)
6456 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6460 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6462 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6463 (__v2di)_mm_sra_epi64(__A, __B), \
6467 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6468 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6470 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6471 (__v2di)_mm_sra_epi64(__A, __B), \
6472 (__v2di)_mm_setzero_si128());
6475 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476 _mm256_sra_epi64(__m256i __A, __m128i __B)
6478 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6481 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6482 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6484 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6485 (__v4di)_mm256_sra_epi64(__A, __B), \
6489 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6490 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6492 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6493 (__v4di)_mm256_sra_epi64(__A, __B), \
6494 (__v4di)_mm256_setzero_si256());
6497 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498 _mm_srai_epi64(__m128i __A, int __imm)
6500 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6503 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6504 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
6506 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6507 (__v2di)_mm_srai_epi64(__A, __imm), \
6511 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6512 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
6514 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6515 (__v2di)_mm_srai_epi64(__A, __imm), \
6516 (__v2di)_mm_setzero_si128());
6519 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6520 _mm256_srai_epi64(__m256i __A, int __imm)
6522 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6525 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6526 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
6528 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6529 (__v4di)_mm256_srai_epi64(__A, __imm), \
6533 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6534 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
6536 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6537 (__v4di)_mm256_srai_epi64(__A, __imm), \
6538 (__v4di)_mm256_setzero_si256());
6541 #define _mm_ternarylogic_epi32(A, B, C, imm) \
6542 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6543 (__v4si)(__m128i)(B), \
6544 (__v4si)(__m128i)(C), (int)(imm), \
6547 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6548 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6549 (__v4si)(__m128i)(B), \
6550 (__v4si)(__m128i)(C), (int)(imm), \
6553 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6554 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6555 (__v4si)(__m128i)(B), \
6556 (__v4si)(__m128i)(C), (int)(imm), \
6559 #define _mm256_ternarylogic_epi32(A, B, C, imm) \
6560 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6561 (__v8si)(__m256i)(B), \
6562 (__v8si)(__m256i)(C), (int)(imm), \
6565 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6566 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6567 (__v8si)(__m256i)(B), \
6568 (__v8si)(__m256i)(C), (int)(imm), \
6571 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6572 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6573 (__v8si)(__m256i)(B), \
6574 (__v8si)(__m256i)(C), (int)(imm), \
6577 #define _mm_ternarylogic_epi64(A, B, C, imm) \
6578 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6579 (__v2di)(__m128i)(B), \
6580 (__v2di)(__m128i)(C), (int)(imm), \
6583 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6584 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6585 (__v2di)(__m128i)(B), \
6586 (__v2di)(__m128i)(C), (int)(imm), \
6589 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6590 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6591 (__v2di)(__m128i)(B), \
6592 (__v2di)(__m128i)(C), (int)(imm), \
6595 #define _mm256_ternarylogic_epi64(A, B, C, imm) \
6596 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6597 (__v4di)(__m256i)(B), \
6598 (__v4di)(__m256i)(C), (int)(imm), \
6601 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6602 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6603 (__v4di)(__m256i)(B), \
6604 (__v4di)(__m256i)(C), (int)(imm), \
6607 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6608 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6609 (__v4di)(__m256i)(B), \
6610 (__v4di)(__m256i)(C), (int)(imm), \
6615 #define _mm256_shuffle_f32x4(A, B, imm) \
6616 (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6617 (__v8sf)(__m256)(B), (int)(imm))
6619 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6620 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6621 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6622 (__v8sf)(__m256)(W))
6624 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6625 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6626 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6627 (__v8sf)_mm256_setzero_ps())
6629 #define _mm256_shuffle_f64x2(A, B, imm) \
6630 (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6631 (__v4df)(__m256d)(B), (int)(imm))
6633 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6634 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6635 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6636 (__v4df)(__m256d)(W))
6638 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6639 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6640 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6641 (__v4df)_mm256_setzero_pd())
6643 #define _mm256_shuffle_i32x4(A, B, imm) \
6644 (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6645 (__v8si)(__m256i)(B), (int)(imm))
6647 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6648 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6649 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6650 (__v8si)(__m256i)(W))
6652 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6653 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6654 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6655 (__v8si)_mm256_setzero_si256())
6657 #define _mm256_shuffle_i64x2(A, B, imm) \
6658 (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6659 (__v4di)(__m256i)(B), (int)(imm))
6661 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6662 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6663 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6664 (__v4di)(__m256i)(W))
6667 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6668 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6669 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6670 (__v4di)_mm256_setzero_si256())
6672 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
6673 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6674 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6675 (__v2df)(__m128d)(W))
6677 #define _mm_maskz_shuffle_pd(U, A, B, M) \
6678 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6679 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6680 (__v2df)_mm_setzero_pd())
6682 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6683 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6684 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6685 (__v4df)(__m256d)(W))
6687 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
6688 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6689 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6690 (__v4df)_mm256_setzero_pd())
6692 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
6693 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6694 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6695 (__v4sf)(__m128)(W))
6697 #define _mm_maskz_shuffle_ps(U, A, B, M) \
6698 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6699 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6700 (__v4sf)_mm_setzero_ps())
6702 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6703 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6704 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6705 (__v8sf)(__m256)(W))
6707 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
6708 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6709 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6710 (__v8sf)_mm256_setzero_ps())
6712 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6713 _mm_rsqrt14_pd (__m128d __A)
6715 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6721 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6722 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6724 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6729 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6730 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
6732 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6738 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6739 _mm256_rsqrt14_pd (__m256d __A)
6741 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6743 _mm256_setzero_pd (),
6747 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6748 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6750 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6755 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6756 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
6758 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6760 _mm256_setzero_pd (),
6764 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6765 _mm_rsqrt14_ps (__m128 __A)
6767 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6773 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6774 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6776 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6781 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6782 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
6784 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6790 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6791 _mm256_rsqrt14_ps (__m256 __A)
6793 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6795 _mm256_setzero_ps (),
6799 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6800 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6802 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6807 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6808 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
6810 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6812 _mm256_setzero_ps (),
6816 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6817 _mm256_broadcast_f32x4(__m128 __A)
6819 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6820 0, 1, 2, 3, 0, 1, 2, 3);
6823 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6824 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6826 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6827 (__v8sf)_mm256_broadcast_f32x4(__A),
6831 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6832 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
6834 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6835 (__v8sf)_mm256_broadcast_f32x4(__A),
6836 (__v8sf)_mm256_setzero_ps());
6839 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6840 _mm256_broadcast_i32x4(__m128i __A)
6842 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6843 0, 1, 2, 3, 0, 1, 2, 3);
6846 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6847 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6849 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6850 (__v8si)_mm256_broadcast_i32x4(__A),
6854 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6855 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
6857 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6858 (__v8si)_mm256_broadcast_i32x4(__A),
6859 (__v8si)_mm256_setzero_si256());
6862 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6863 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6865 return (__m256d)__builtin_ia32_selectpd_256(__M,
6866 (__v4df) _mm256_broadcastsd_pd(__A),
6870 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6871 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6873 return (__m256d)__builtin_ia32_selectpd_256(__M,
6874 (__v4df) _mm256_broadcastsd_pd(__A),
6875 (__v4df) _mm256_setzero_pd());
6878 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6879 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6881 return (__m128)__builtin_ia32_selectps_128(__M,
6882 (__v4sf) _mm_broadcastss_ps(__A),
6886 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6887 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6889 return (__m128)__builtin_ia32_selectps_128(__M,
6890 (__v4sf) _mm_broadcastss_ps(__A),
6891 (__v4sf) _mm_setzero_ps());
6894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6895 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6897 return (__m256)__builtin_ia32_selectps_256(__M,
6898 (__v8sf) _mm256_broadcastss_ps(__A),
6902 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6903 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
6905 return (__m256)__builtin_ia32_selectps_256(__M,
6906 (__v8sf) _mm256_broadcastss_ps(__A),
6907 (__v8sf) _mm256_setzero_ps());
6910 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6911 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6913 return (__m128i)__builtin_ia32_selectd_128(__M,
6914 (__v4si) _mm_broadcastd_epi32(__A),
6918 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6919 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6921 return (__m128i)__builtin_ia32_selectd_128(__M,
6922 (__v4si) _mm_broadcastd_epi32(__A),
6923 (__v4si) _mm_setzero_si128());
6926 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6927 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6929 return (__m256i)__builtin_ia32_selectd_256(__M,
6930 (__v8si) _mm256_broadcastd_epi32(__A),
6934 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6935 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
6937 return (__m256i)__builtin_ia32_selectd_256(__M,
6938 (__v8si) _mm256_broadcastd_epi32(__A),
6939 (__v8si) _mm256_setzero_si256());
6942 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6943 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6945 return (__m128i)__builtin_ia32_selectq_128(__M,
6946 (__v2di) _mm_broadcastq_epi64(__A),
6950 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6951 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6953 return (__m128i)__builtin_ia32_selectq_128(__M,
6954 (__v2di) _mm_broadcastq_epi64(__A),
6955 (__v2di) _mm_setzero_si128());
6958 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6959 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6961 return (__m256i)__builtin_ia32_selectq_256(__M,
6962 (__v4di) _mm256_broadcastq_epi64(__A),
6966 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6967 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
6969 return (__m256i)__builtin_ia32_selectq_256(__M,
6970 (__v4di) _mm256_broadcastq_epi64(__A),
6971 (__v4di) _mm256_setzero_si256());
6974 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6975 _mm_cvtsepi32_epi8 (__m128i __A)
6977 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6978 (__v16qi)_mm_undefined_si128(),
6982 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6983 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6985 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6986 (__v16qi) __O, __M);
6989 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6990 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
6992 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6993 (__v16qi) _mm_setzero_si128 (),
6997 static __inline__ void __DEFAULT_FN_ATTRS128
6998 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7000 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7003 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7004 _mm256_cvtsepi32_epi8 (__m256i __A)
7006 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7007 (__v16qi)_mm_undefined_si128(),
7011 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7012 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7014 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7015 (__v16qi) __O, __M);
7018 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7019 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7021 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7022 (__v16qi) _mm_setzero_si128 (),
7026 static __inline__ void __DEFAULT_FN_ATTRS128
7027 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7029 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7032 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7033 _mm_cvtsepi32_epi16 (__m128i __A)
7035 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7036 (__v8hi)_mm_setzero_si128 (),
7040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7041 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7043 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7049 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7051 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7052 (__v8hi) _mm_setzero_si128 (),
7056 static __inline__ void __DEFAULT_FN_ATTRS128
7057 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7059 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7062 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7063 _mm256_cvtsepi32_epi16 (__m256i __A)
7065 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7066 (__v8hi)_mm_undefined_si128(),
7070 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7071 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7073 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7077 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7078 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7080 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7081 (__v8hi) _mm_setzero_si128 (),
7085 static __inline__ void __DEFAULT_FN_ATTRS256
7086 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7088 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7091 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7092 _mm_cvtsepi64_epi8 (__m128i __A)
7094 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7095 (__v16qi)_mm_undefined_si128(),
7099 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7100 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7102 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7103 (__v16qi) __O, __M);
7106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7107 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7109 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7110 (__v16qi) _mm_setzero_si128 (),
7114 static __inline__ void __DEFAULT_FN_ATTRS128
7115 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7117 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7120 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7121 _mm256_cvtsepi64_epi8 (__m256i __A)
7123 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7124 (__v16qi)_mm_undefined_si128(),
7128 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7129 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7131 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7132 (__v16qi) __O, __M);
7135 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7136 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7138 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7139 (__v16qi) _mm_setzero_si128 (),
7143 static __inline__ void __DEFAULT_FN_ATTRS256
7144 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7146 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7149 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7150 _mm_cvtsepi64_epi32 (__m128i __A)
7152 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7153 (__v4si)_mm_undefined_si128(),
7157 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7158 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7160 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7165 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7167 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7168 (__v4si) _mm_setzero_si128 (),
7172 static __inline__ void __DEFAULT_FN_ATTRS128
7173 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7175 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7178 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7179 _mm256_cvtsepi64_epi32 (__m256i __A)
7181 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7182 (__v4si)_mm_undefined_si128(),
7186 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7187 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7189 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7194 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7195 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7197 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7198 (__v4si) _mm_setzero_si128 (),
7202 static __inline__ void __DEFAULT_FN_ATTRS256
7203 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7205 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7208 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7209 _mm_cvtsepi64_epi16 (__m128i __A)
7211 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7212 (__v8hi)_mm_undefined_si128(),
7216 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7217 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7219 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7223 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7224 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7226 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7227 (__v8hi) _mm_setzero_si128 (),
7231 static __inline__ void __DEFAULT_FN_ATTRS128
7232 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7234 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7237 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7238 _mm256_cvtsepi64_epi16 (__m256i __A)
7240 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7241 (__v8hi)_mm_undefined_si128(),
7245 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7246 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7248 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7252 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7253 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7255 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7256 (__v8hi) _mm_setzero_si128 (),
7260 static __inline__ void __DEFAULT_FN_ATTRS256
7261 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7263 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7266 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7267 _mm_cvtusepi32_epi8 (__m128i __A)
7269 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7270 (__v16qi)_mm_undefined_si128(),
7274 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7275 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7277 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7282 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7283 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7285 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7286 (__v16qi) _mm_setzero_si128 (),
7290 static __inline__ void __DEFAULT_FN_ATTRS128
7291 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7293 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7296 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7297 _mm256_cvtusepi32_epi8 (__m256i __A)
7299 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7300 (__v16qi)_mm_undefined_si128(),
7304 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7305 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7307 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7312 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7313 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7315 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7316 (__v16qi) _mm_setzero_si128 (),
7320 static __inline__ void __DEFAULT_FN_ATTRS256
7321 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7323 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7326 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7327 _mm_cvtusepi32_epi16 (__m128i __A)
7329 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7330 (__v8hi)_mm_undefined_si128(),
7334 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7335 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7337 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7341 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7342 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7344 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7345 (__v8hi) _mm_setzero_si128 (),
7349 static __inline__ void __DEFAULT_FN_ATTRS128
7350 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7352 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7355 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7356 _mm256_cvtusepi32_epi16 (__m256i __A)
7358 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7359 (__v8hi) _mm_undefined_si128(),
7363 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7364 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7366 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7370 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7371 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7373 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7374 (__v8hi) _mm_setzero_si128 (),
7378 static __inline__ void __DEFAULT_FN_ATTRS256
7379 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7381 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7385 _mm_cvtusepi64_epi8 (__m128i __A)
7387 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7388 (__v16qi)_mm_undefined_si128(),
7392 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7393 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7395 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7400 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7401 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7403 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7404 (__v16qi) _mm_setzero_si128 (),
7408 static __inline__ void __DEFAULT_FN_ATTRS128
7409 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7411 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7414 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7415 _mm256_cvtusepi64_epi8 (__m256i __A)
7417 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7418 (__v16qi)_mm_undefined_si128(),
7422 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7423 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7425 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7430 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7431 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7433 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7434 (__v16qi) _mm_setzero_si128 (),
7438 static __inline__ void __DEFAULT_FN_ATTRS256
7439 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7441 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7444 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7445 _mm_cvtusepi64_epi32 (__m128i __A)
7447 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7448 (__v4si)_mm_undefined_si128(),
7452 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7453 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7455 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7460 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7462 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7463 (__v4si) _mm_setzero_si128 (),
7467 static __inline__ void __DEFAULT_FN_ATTRS128
7468 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7470 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7473 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7474 _mm256_cvtusepi64_epi32 (__m256i __A)
7476 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7477 (__v4si)_mm_undefined_si128(),
7481 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7482 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7484 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7488 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7489 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7491 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7492 (__v4si) _mm_setzero_si128 (),
7496 static __inline__ void __DEFAULT_FN_ATTRS256
7497 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7499 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7502 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7503 _mm_cvtusepi64_epi16 (__m128i __A)
7505 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7506 (__v8hi)_mm_undefined_si128(),
7510 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7511 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7513 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7517 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7518 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7520 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7521 (__v8hi) _mm_setzero_si128 (),
7525 static __inline__ void __DEFAULT_FN_ATTRS128
7526 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7528 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7531 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7532 _mm256_cvtusepi64_epi16 (__m256i __A)
7534 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7535 (__v8hi)_mm_undefined_si128(),
7539 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7540 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7542 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7546 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7547 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7549 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7550 (__v8hi) _mm_setzero_si128 (),
7554 static __inline__ void __DEFAULT_FN_ATTRS256
7555 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7557 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7560 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7561 _mm_cvtepi32_epi8 (__m128i __A)
7563 return (__m128i)__builtin_shufflevector(
7564 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7565 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7568 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7569 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7571 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7572 (__v16qi) __O, __M);
7575 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7576 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
7578 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7580 _mm_setzero_si128 (),
7584 static __inline__ void __DEFAULT_FN_ATTRS256
7585 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7587 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7590 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7591 _mm256_cvtepi32_epi8 (__m256i __A)
7593 return (__m128i)__builtin_shufflevector(
7594 __builtin_convertvector((__v8si)__A, __v8qi),
7595 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7599 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7600 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7602 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7603 (__v16qi) __O, __M);
7606 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7607 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
7609 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7610 (__v16qi) _mm_setzero_si128 (),
7614 static __inline__ void __DEFAULT_FN_ATTRS256
7615 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7617 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7620 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7621 _mm_cvtepi32_epi16 (__m128i __A)
7623 return (__m128i)__builtin_shufflevector(
7624 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7628 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7629 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7631 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7635 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7636 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
7638 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7639 (__v8hi) _mm_setzero_si128 (),
7643 static __inline__ void __DEFAULT_FN_ATTRS128
7644 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7646 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7650 _mm256_cvtepi32_epi16 (__m256i __A)
7652 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7655 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7656 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7658 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7662 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7663 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
7665 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7666 (__v8hi) _mm_setzero_si128 (),
7670 static __inline__ void __DEFAULT_FN_ATTRS256
7671 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7673 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7676 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7677 _mm_cvtepi64_epi8 (__m128i __A)
7679 return (__m128i)__builtin_shufflevector(
7680 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7681 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7685 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7687 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7688 (__v16qi) __O, __M);
7691 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7692 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
7694 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7695 (__v16qi) _mm_setzero_si128 (),
7699 static __inline__ void __DEFAULT_FN_ATTRS128
7700 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7702 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7705 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7706 _mm256_cvtepi64_epi8 (__m256i __A)
7708 return (__m128i)__builtin_shufflevector(
7709 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7710 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7713 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7714 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7716 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7717 (__v16qi) __O, __M);
7720 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7721 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
7723 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7724 (__v16qi) _mm_setzero_si128 (),
7728 static __inline__ void __DEFAULT_FN_ATTRS256
7729 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7731 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7734 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7735 _mm_cvtepi64_epi32 (__m128i __A)
7737 return (__m128i)__builtin_shufflevector(
7738 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7741 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7742 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7744 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7748 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7749 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
7751 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7752 (__v4si) _mm_setzero_si128 (),
7756 static __inline__ void __DEFAULT_FN_ATTRS128
7757 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7759 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7762 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7763 _mm256_cvtepi64_epi32 (__m256i __A)
7765 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7768 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7769 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7771 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7772 (__v4si)_mm256_cvtepi64_epi32(__A),
7776 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7777 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
7779 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7780 (__v4si)_mm256_cvtepi64_epi32(__A),
7781 (__v4si)_mm_setzero_si128());
7784 static __inline__ void __DEFAULT_FN_ATTRS256
7785 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7787 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7790 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7791 _mm_cvtepi64_epi16 (__m128i __A)
7793 return (__m128i)__builtin_shufflevector(
7794 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7798 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7799 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7801 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7806 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7807 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
7809 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7810 (__v8hi) _mm_setzero_si128 (),
7814 static __inline__ void __DEFAULT_FN_ATTRS128
7815 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7817 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7820 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7821 _mm256_cvtepi64_epi16 (__m256i __A)
7823 return (__m128i)__builtin_shufflevector(
7824 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7828 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7829 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7831 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7835 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7836 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
7838 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7839 (__v8hi) _mm_setzero_si128 (),
7843 static __inline__ void __DEFAULT_FN_ATTRS256
7844 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7846 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7849 #define _mm256_extractf32x4_ps(A, imm) \
7850 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7852 (__v4sf)_mm_undefined_ps(), \
7855 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7856 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7858 (__v4sf)(__m128)(W), \
7861 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7862 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7864 (__v4sf)_mm_setzero_ps(), \
7867 #define _mm256_extracti32x4_epi32(A, imm) \
7868 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7870 (__v4si)_mm_undefined_si128(), \
7873 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7874 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7876 (__v4si)(__m128i)(W), \
7879 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7880 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7882 (__v4si)_mm_setzero_si128(), \
7885 #define _mm256_insertf32x4(A, B, imm) \
7886 (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7887 (__v4sf)(__m128)(B), (int)(imm))
7889 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7890 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7891 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7892 (__v8sf)(__m256)(W))
7894 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
7895 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7896 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7897 (__v8sf)_mm256_setzero_ps())
7899 #define _mm256_inserti32x4(A, B, imm) \
7900 (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7901 (__v4si)(__m128i)(B), (int)(imm))
7903 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7904 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7905 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7906 (__v8si)(__m256i)(W))
7908 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
7909 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7910 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7911 (__v8si)_mm256_setzero_si256())
7913 #define _mm_getmant_pd(A, B, C) \
7914 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7915 (int)(((C)<<2) | (B)), \
7916 (__v2df)_mm_setzero_pd(), \
7919 #define _mm_mask_getmant_pd(W, U, A, B, C) \
7920 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7921 (int)(((C)<<2) | (B)), \
7922 (__v2df)(__m128d)(W), \
7925 #define _mm_maskz_getmant_pd(U, A, B, C) \
7926 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7927 (int)(((C)<<2) | (B)), \
7928 (__v2df)_mm_setzero_pd(), \
7931 #define _mm256_getmant_pd(A, B, C) \
7932 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7933 (int)(((C)<<2) | (B)), \
7934 (__v4df)_mm256_setzero_pd(), \
7937 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
7938 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7939 (int)(((C)<<2) | (B)), \
7940 (__v4df)(__m256d)(W), \
7943 #define _mm256_maskz_getmant_pd(U, A, B, C) \
7944 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7945 (int)(((C)<<2) | (B)), \
7946 (__v4df)_mm256_setzero_pd(), \
7949 #define _mm_getmant_ps(A, B, C) \
7950 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7951 (int)(((C)<<2) | (B)), \
7952 (__v4sf)_mm_setzero_ps(), \
7955 #define _mm_mask_getmant_ps(W, U, A, B, C) \
7956 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7957 (int)(((C)<<2) | (B)), \
7958 (__v4sf)(__m128)(W), \
7961 #define _mm_maskz_getmant_ps(U, A, B, C) \
7962 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7963 (int)(((C)<<2) | (B)), \
7964 (__v4sf)_mm_setzero_ps(), \
7967 #define _mm256_getmant_ps(A, B, C) \
7968 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7969 (int)(((C)<<2) | (B)), \
7970 (__v8sf)_mm256_setzero_ps(), \
7973 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
7974 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7975 (int)(((C)<<2) | (B)), \
7976 (__v8sf)(__m256)(W), \
7979 #define _mm256_maskz_getmant_ps(U, A, B, C) \
7980 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7981 (int)(((C)<<2) | (B)), \
7982 (__v8sf)_mm256_setzero_ps(), \
7985 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7986 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7987 (void const *)(addr), \
7988 (__v2di)(__m128i)(index), \
7989 (__mmask8)(mask), (int)(scale))
7991 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7992 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7993 (void const *)(addr), \
7994 (__v2di)(__m128i)(index), \
7995 (__mmask8)(mask), (int)(scale))
7997 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7998 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7999 (void const *)(addr), \
8000 (__v4di)(__m256i)(index), \
8001 (__mmask8)(mask), (int)(scale))
8003 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
8004 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8005 (void const *)(addr), \
8006 (__v4di)(__m256i)(index), \
8007 (__mmask8)(mask), (int)(scale))
8009 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8010 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8011 (void const *)(addr), \
8012 (__v2di)(__m128i)(index), \
8013 (__mmask8)(mask), (int)(scale))
8015 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8016 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8017 (void const *)(addr), \
8018 (__v2di)(__m128i)(index), \
8019 (__mmask8)(mask), (int)(scale))
8021 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8022 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8023 (void const *)(addr), \
8024 (__v4di)(__m256i)(index), \
8025 (__mmask8)(mask), (int)(scale))
8027 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8028 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8029 (void const *)(addr), \
8030 (__v4di)(__m256i)(index), \
8031 (__mmask8)(mask), (int)(scale))
8033 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8034 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8035 (void const *)(addr), \
8036 (__v4si)(__m128i)(index), \
8037 (__mmask8)(mask), (int)(scale))
8039 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8040 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8041 (void const *)(addr), \
8042 (__v4si)(__m128i)(index), \
8043 (__mmask8)(mask), (int)(scale))
8045 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8046 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8047 (void const *)(addr), \
8048 (__v4si)(__m128i)(index), \
8049 (__mmask8)(mask), (int)(scale))
8051 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8052 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8053 (void const *)(addr), \
8054 (__v4si)(__m128i)(index), \
8055 (__mmask8)(mask), (int)(scale))
8057 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8058 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8059 (void const *)(addr), \
8060 (__v4si)(__m128i)(index), \
8061 (__mmask8)(mask), (int)(scale))
8063 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8064 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8065 (void const *)(addr), \
8066 (__v4si)(__m128i)(index), \
8067 (__mmask8)(mask), (int)(scale))
8069 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8070 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8071 (void const *)(addr), \
8072 (__v8si)(__m256i)(index), \
8073 (__mmask8)(mask), (int)(scale))
8075 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8076 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8077 (void const *)(addr), \
8078 (__v8si)(__m256i)(index), \
8079 (__mmask8)(mask), (int)(scale))
8081 #define _mm256_permutex_pd(X, C) \
8082 (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
8084 #define _mm256_mask_permutex_pd(W, U, X, C) \
8085 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8086 (__v4df)_mm256_permutex_pd((X), (C)), \
8087 (__v4df)(__m256d)(W))
8089 #define _mm256_maskz_permutex_pd(U, X, C) \
8090 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8091 (__v4df)_mm256_permutex_pd((X), (C)), \
8092 (__v4df)_mm256_setzero_pd())
8094 #define _mm256_permutex_epi64(X, C) \
8095 (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
8097 #define _mm256_mask_permutex_epi64(W, U, X, C) \
8098 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8099 (__v4di)_mm256_permutex_epi64((X), (C)), \
8100 (__v4di)(__m256i)(W))
8102 #define _mm256_maskz_permutex_epi64(U, X, C) \
8103 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8104 (__v4di)_mm256_permutex_epi64((X), (C)), \
8105 (__v4di)_mm256_setzero_si256())
8107 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8108 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8110 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8113 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8114 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8117 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8118 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8122 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8123 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8125 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8126 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8127 (__v4df)_mm256_setzero_pd());
8130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8131 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8133 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8136 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8137 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8140 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8141 (__v4di)_mm256_setzero_si256());
8144 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8145 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8148 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8149 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8153 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8155 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8156 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8158 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8159 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8163 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8164 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
8166 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8167 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8168 (__v8sf)_mm256_setzero_ps());
8171 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8173 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8174 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8177 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8178 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8182 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8183 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
8185 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8186 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8187 (__v8si)_mm256_setzero_si256());
8190 #define _mm_alignr_epi32(A, B, imm) \
8191 (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8192 (__v4si)(__m128i)(B), (int)(imm))
8194 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8195 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8196 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8197 (__v4si)(__m128i)(W))
8199 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
8200 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8201 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8202 (__v4si)_mm_setzero_si128())
8204 #define _mm256_alignr_epi32(A, B, imm) \
8205 (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8206 (__v8si)(__m256i)(B), (int)(imm))
8208 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8209 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8210 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8211 (__v8si)(__m256i)(W))
8213 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8214 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8215 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8216 (__v8si)_mm256_setzero_si256())
8218 #define _mm_alignr_epi64(A, B, imm) \
8219 (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8220 (__v2di)(__m128i)(B), (int)(imm))
8222 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8223 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8224 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8225 (__v2di)(__m128i)(W))
8227 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
8228 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8229 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8230 (__v2di)_mm_setzero_si128())
8232 #define _mm256_alignr_epi64(A, B, imm) \
8233 (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8234 (__v4di)(__m256i)(B), (int)(imm))
8236 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8237 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8238 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8239 (__v4di)(__m256i)(W))
8241 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8242 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8243 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8244 (__v4di)_mm256_setzero_si256())
8246 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8247 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8249 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8250 (__v4sf)_mm_movehdup_ps(__A),
8254 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8255 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8257 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8258 (__v4sf)_mm_movehdup_ps(__A),
8259 (__v4sf)_mm_setzero_ps());
8262 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8263 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8265 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8266 (__v8sf)_mm256_movehdup_ps(__A),
8270 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8271 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8273 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8274 (__v8sf)_mm256_movehdup_ps(__A),
8275 (__v8sf)_mm256_setzero_ps());
8278 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8279 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8281 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8282 (__v4sf)_mm_moveldup_ps(__A),
8286 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8287 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8289 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8290 (__v4sf)_mm_moveldup_ps(__A),
8291 (__v4sf)_mm_setzero_ps());
8294 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8295 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8297 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8298 (__v8sf)_mm256_moveldup_ps(__A),
8302 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8303 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8305 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8306 (__v8sf)_mm256_moveldup_ps(__A),
8307 (__v8sf)_mm256_setzero_ps());
8310 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
8311 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8312 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8313 (__v8si)(__m256i)(W))
8315 #define _mm256_maskz_shuffle_epi32(U, A, I) \
8316 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8317 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8318 (__v8si)_mm256_setzero_si256())
8320 #define _mm_mask_shuffle_epi32(W, U, A, I) \
8321 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8322 (__v4si)_mm_shuffle_epi32((A), (I)), \
8323 (__v4si)(__m128i)(W))
8325 #define _mm_maskz_shuffle_epi32(U, A, I) \
8326 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8327 (__v4si)_mm_shuffle_epi32((A), (I)), \
8328 (__v4si)_mm_setzero_si128())
8330 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8331 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8333 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8338 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8339 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8341 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8343 (__v2df) _mm_setzero_pd ());
8346 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8347 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8349 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8354 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8355 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8357 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8359 (__v4df) _mm256_setzero_pd ());
8362 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8363 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8365 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8370 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8371 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8373 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8375 (__v4sf) _mm_setzero_ps ());
8378 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8379 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8381 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8386 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8387 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8389 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8391 (__v8sf) _mm256_setzero_ps ());
8394 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8395 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8397 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8402 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8403 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8405 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8411 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8412 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8414 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8419 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8420 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8422 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8424 _mm256_setzero_ps (),
8428 static __inline __m128i __DEFAULT_FN_ATTRS128
8429 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8431 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8436 static __inline __m128i __DEFAULT_FN_ATTRS128
8437 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8439 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8440 (__v8hi) _mm_setzero_si128 (),
8444 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8445 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8446 (__v8hi)(__m128i)(W), \
8449 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
8450 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8451 (__v8hi)_mm_setzero_si128(), \
8454 static __inline __m128i __DEFAULT_FN_ATTRS256
8455 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8457 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8462 static __inline __m128i __DEFAULT_FN_ATTRS256
8463 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8465 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8466 (__v8hi) _mm_setzero_si128(),
8469 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8470 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8471 (__v8hi)(__m128i)(W), \
8474 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8475 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8476 (__v8hi)_mm_setzero_si128(), \
8480 #undef __DEFAULT_FN_ATTRS128
8481 #undef __DEFAULT_FN_ATTRS256
8483 #endif /* __AVX512VLINTRIN_H */