1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512BWINTRIN_H
29 #define __AVX512BWINTRIN_H
31 typedef unsigned int __mmask32;
32 typedef unsigned long long __mmask64;
34 /* Define the default attributes for the functions in this file. */
35 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
36 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
38 static __inline __mmask32 __DEFAULT_FN_ATTRS
39 _knot_mask32(__mmask32 __M)
41 return __builtin_ia32_knotsi(__M);
44 static __inline __mmask64 __DEFAULT_FN_ATTRS
45 _knot_mask64(__mmask64 __M)
47 return __builtin_ia32_knotdi(__M);
50 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
51 _kand_mask32(__mmask32 __A, __mmask32 __B)
53 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
56 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
57 _kand_mask64(__mmask64 __A, __mmask64 __B)
59 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
62 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
63 _kandn_mask32(__mmask32 __A, __mmask32 __B)
65 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
68 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
69 _kandn_mask64(__mmask64 __A, __mmask64 __B)
71 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
74 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
75 _kor_mask32(__mmask32 __A, __mmask32 __B)
77 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
80 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
81 _kor_mask64(__mmask64 __A, __mmask64 __B)
83 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
86 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
87 _kxnor_mask32(__mmask32 __A, __mmask32 __B)
89 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
92 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
93 _kxnor_mask64(__mmask64 __A, __mmask64 __B)
95 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
98 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
99 _kxor_mask32(__mmask32 __A, __mmask32 __B)
101 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
104 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
105 _kxor_mask64(__mmask64 __A, __mmask64 __B)
107 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
110 static __inline__ unsigned char __DEFAULT_FN_ATTRS
111 _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
113 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
116 static __inline__ unsigned char __DEFAULT_FN_ATTRS
117 _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
119 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
122 static __inline__ unsigned char __DEFAULT_FN_ATTRS
123 _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
124 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
125 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
128 static __inline__ unsigned char __DEFAULT_FN_ATTRS
129 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
131 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
134 static __inline__ unsigned char __DEFAULT_FN_ATTRS
135 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
137 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
140 static __inline__ unsigned char __DEFAULT_FN_ATTRS
141 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
142 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
143 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
146 static __inline__ unsigned char __DEFAULT_FN_ATTRS
147 _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
149 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
152 static __inline__ unsigned char __DEFAULT_FN_ATTRS
153 _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
155 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
158 static __inline__ unsigned char __DEFAULT_FN_ATTRS
159 _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
160 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
161 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
164 static __inline__ unsigned char __DEFAULT_FN_ATTRS
165 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
167 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
170 static __inline__ unsigned char __DEFAULT_FN_ATTRS
171 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
173 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
176 static __inline__ unsigned char __DEFAULT_FN_ATTRS
177 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
178 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
179 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
182 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
183 _kadd_mask32(__mmask32 __A, __mmask32 __B)
185 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
188 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
189 _kadd_mask64(__mmask64 __A, __mmask64 __B)
191 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
194 #define _kshiftli_mask32(A, I) \
195 (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))
197 #define _kshiftri_mask32(A, I) \
198 (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))
200 #define _kshiftli_mask64(A, I) \
201 (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))
203 #define _kshiftri_mask64(A, I) \
204 (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))
206 static __inline__ unsigned int __DEFAULT_FN_ATTRS
207 _cvtmask32_u32(__mmask32 __A) {
208 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
211 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
212 _cvtmask64_u64(__mmask64 __A) {
213 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
216 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
217 _cvtu32_mask32(unsigned int __A) {
218 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
221 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
222 _cvtu64_mask64(unsigned long long __A) {
223 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
226 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
227 _load_mask32(__mmask32 *__A) {
228 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
231 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
232 _load_mask64(__mmask64 *__A) {
233 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
236 static __inline__ void __DEFAULT_FN_ATTRS
237 _store_mask32(__mmask32 *__A, __mmask32 __B) {
238 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
241 static __inline__ void __DEFAULT_FN_ATTRS
242 _store_mask64(__mmask64 *__A, __mmask64 __B) {
243 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
246 /* Integer compare */
248 #define _mm512_cmp_epi8_mask(a, b, p) \
249 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
250 (__v64qi)(__m512i)(b), (int)(p), \
253 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
254 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
255 (__v64qi)(__m512i)(b), (int)(p), \
258 #define _mm512_cmp_epu8_mask(a, b, p) \
259 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
260 (__v64qi)(__m512i)(b), (int)(p), \
263 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
264 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
265 (__v64qi)(__m512i)(b), (int)(p), \
268 #define _mm512_cmp_epi16_mask(a, b, p) \
269 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
270 (__v32hi)(__m512i)(b), (int)(p), \
273 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
274 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
275 (__v32hi)(__m512i)(b), (int)(p), \
278 #define _mm512_cmp_epu16_mask(a, b, p) \
279 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
280 (__v32hi)(__m512i)(b), (int)(p), \
283 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
284 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
285 (__v32hi)(__m512i)(b), (int)(p), \
288 #define _mm512_cmpeq_epi8_mask(A, B) \
289 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
290 #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
291 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
292 #define _mm512_cmpge_epi8_mask(A, B) \
293 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
294 #define _mm512_mask_cmpge_epi8_mask(k, A, B) \
295 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
296 #define _mm512_cmpgt_epi8_mask(A, B) \
297 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
298 #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
299 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
300 #define _mm512_cmple_epi8_mask(A, B) \
301 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
302 #define _mm512_mask_cmple_epi8_mask(k, A, B) \
303 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
304 #define _mm512_cmplt_epi8_mask(A, B) \
305 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
306 #define _mm512_mask_cmplt_epi8_mask(k, A, B) \
307 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
308 #define _mm512_cmpneq_epi8_mask(A, B) \
309 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
310 #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
311 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
313 #define _mm512_cmpeq_epu8_mask(A, B) \
314 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
315 #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
316 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
317 #define _mm512_cmpge_epu8_mask(A, B) \
318 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
319 #define _mm512_mask_cmpge_epu8_mask(k, A, B) \
320 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
321 #define _mm512_cmpgt_epu8_mask(A, B) \
322 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
323 #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
324 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
325 #define _mm512_cmple_epu8_mask(A, B) \
326 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
327 #define _mm512_mask_cmple_epu8_mask(k, A, B) \
328 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
329 #define _mm512_cmplt_epu8_mask(A, B) \
330 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
331 #define _mm512_mask_cmplt_epu8_mask(k, A, B) \
332 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
333 #define _mm512_cmpneq_epu8_mask(A, B) \
334 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
335 #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
336 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
338 #define _mm512_cmpeq_epi16_mask(A, B) \
339 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
340 #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
341 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
342 #define _mm512_cmpge_epi16_mask(A, B) \
343 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
344 #define _mm512_mask_cmpge_epi16_mask(k, A, B) \
345 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
346 #define _mm512_cmpgt_epi16_mask(A, B) \
347 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
348 #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
349 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
350 #define _mm512_cmple_epi16_mask(A, B) \
351 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
352 #define _mm512_mask_cmple_epi16_mask(k, A, B) \
353 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
354 #define _mm512_cmplt_epi16_mask(A, B) \
355 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
356 #define _mm512_mask_cmplt_epi16_mask(k, A, B) \
357 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
358 #define _mm512_cmpneq_epi16_mask(A, B) \
359 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
360 #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
361 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
363 #define _mm512_cmpeq_epu16_mask(A, B) \
364 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
365 #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
366 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
367 #define _mm512_cmpge_epu16_mask(A, B) \
368 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
369 #define _mm512_mask_cmpge_epu16_mask(k, A, B) \
370 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
371 #define _mm512_cmpgt_epu16_mask(A, B) \
372 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
373 #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
374 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
375 #define _mm512_cmple_epu16_mask(A, B) \
376 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
377 #define _mm512_mask_cmple_epu16_mask(k, A, B) \
378 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
379 #define _mm512_cmplt_epu16_mask(A, B) \
380 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
381 #define _mm512_mask_cmplt_epu16_mask(k, A, B) \
382 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
383 #define _mm512_cmpneq_epu16_mask(A, B) \
384 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
385 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
386 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
388 static __inline__ __m512i __DEFAULT_FN_ATTRS512
389 _mm512_add_epi8 (__m512i __A, __m512i __B) {
390 return (__m512i) ((__v64qu) __A + (__v64qu) __B);
393 static __inline__ __m512i __DEFAULT_FN_ATTRS512
394 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
395 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
396 (__v64qi)_mm512_add_epi8(__A, __B),
400 static __inline__ __m512i __DEFAULT_FN_ATTRS512
401 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
402 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
403 (__v64qi)_mm512_add_epi8(__A, __B),
404 (__v64qi)_mm512_setzero_si512());
407 static __inline__ __m512i __DEFAULT_FN_ATTRS512
408 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
409 return (__m512i) ((__v64qu) __A - (__v64qu) __B);
412 static __inline__ __m512i __DEFAULT_FN_ATTRS512
413 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
414 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
415 (__v64qi)_mm512_sub_epi8(__A, __B),
419 static __inline__ __m512i __DEFAULT_FN_ATTRS512
420 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
421 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
422 (__v64qi)_mm512_sub_epi8(__A, __B),
423 (__v64qi)_mm512_setzero_si512());
426 static __inline__ __m512i __DEFAULT_FN_ATTRS512
427 _mm512_add_epi16 (__m512i __A, __m512i __B) {
428 return (__m512i) ((__v32hu) __A + (__v32hu) __B);
431 static __inline__ __m512i __DEFAULT_FN_ATTRS512
432 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
433 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
434 (__v32hi)_mm512_add_epi16(__A, __B),
438 static __inline__ __m512i __DEFAULT_FN_ATTRS512
439 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
440 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
441 (__v32hi)_mm512_add_epi16(__A, __B),
442 (__v32hi)_mm512_setzero_si512());
445 static __inline__ __m512i __DEFAULT_FN_ATTRS512
446 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
447 return (__m512i) ((__v32hu) __A - (__v32hu) __B);
450 static __inline__ __m512i __DEFAULT_FN_ATTRS512
451 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
452 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
453 (__v32hi)_mm512_sub_epi16(__A, __B),
457 static __inline__ __m512i __DEFAULT_FN_ATTRS512
458 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
459 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
460 (__v32hi)_mm512_sub_epi16(__A, __B),
461 (__v32hi)_mm512_setzero_si512());
464 static __inline__ __m512i __DEFAULT_FN_ATTRS512
465 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
466 return (__m512i) ((__v32hu) __A * (__v32hu) __B);
469 static __inline__ __m512i __DEFAULT_FN_ATTRS512
470 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
471 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
472 (__v32hi)_mm512_mullo_epi16(__A, __B),
476 static __inline__ __m512i __DEFAULT_FN_ATTRS512
477 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
478 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
479 (__v32hi)_mm512_mullo_epi16(__A, __B),
480 (__v32hi)_mm512_setzero_si512());
483 static __inline__ __m512i __DEFAULT_FN_ATTRS512
484 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
486 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
491 static __inline__ __m512i __DEFAULT_FN_ATTRS512
492 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
494 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
499 static __inline__ __m512i __DEFAULT_FN_ATTRS512
500 _mm512_abs_epi8 (__m512i __A)
502 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
505 static __inline__ __m512i __DEFAULT_FN_ATTRS512
506 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
508 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
509 (__v64qi)_mm512_abs_epi8(__A),
513 static __inline__ __m512i __DEFAULT_FN_ATTRS512
514 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
516 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
517 (__v64qi)_mm512_abs_epi8(__A),
518 (__v64qi)_mm512_setzero_si512());
521 static __inline__ __m512i __DEFAULT_FN_ATTRS512
522 _mm512_abs_epi16 (__m512i __A)
524 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
527 static __inline__ __m512i __DEFAULT_FN_ATTRS512
528 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
530 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
531 (__v32hi)_mm512_abs_epi16(__A),
535 static __inline__ __m512i __DEFAULT_FN_ATTRS512
536 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
538 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
539 (__v32hi)_mm512_abs_epi16(__A),
540 (__v32hi)_mm512_setzero_si512());
543 static __inline__ __m512i __DEFAULT_FN_ATTRS512
544 _mm512_packs_epi32(__m512i __A, __m512i __B)
546 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
549 static __inline__ __m512i __DEFAULT_FN_ATTRS512
550 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
552 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
553 (__v32hi)_mm512_packs_epi32(__A, __B),
554 (__v32hi)_mm512_setzero_si512());
557 static __inline__ __m512i __DEFAULT_FN_ATTRS512
558 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
560 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
561 (__v32hi)_mm512_packs_epi32(__A, __B),
565 static __inline__ __m512i __DEFAULT_FN_ATTRS512
566 _mm512_packs_epi16(__m512i __A, __m512i __B)
568 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
571 static __inline__ __m512i __DEFAULT_FN_ATTRS512
572 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
574 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
575 (__v64qi)_mm512_packs_epi16(__A, __B),
579 static __inline__ __m512i __DEFAULT_FN_ATTRS512
580 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
582 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
583 (__v64qi)_mm512_packs_epi16(__A, __B),
584 (__v64qi)_mm512_setzero_si512());
587 static __inline__ __m512i __DEFAULT_FN_ATTRS512
588 _mm512_packus_epi32(__m512i __A, __m512i __B)
590 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
593 static __inline__ __m512i __DEFAULT_FN_ATTRS512
594 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
596 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
597 (__v32hi)_mm512_packus_epi32(__A, __B),
598 (__v32hi)_mm512_setzero_si512());
601 static __inline__ __m512i __DEFAULT_FN_ATTRS512
602 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
604 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
605 (__v32hi)_mm512_packus_epi32(__A, __B),
609 static __inline__ __m512i __DEFAULT_FN_ATTRS512
610 _mm512_packus_epi16(__m512i __A, __m512i __B)
612 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
615 static __inline__ __m512i __DEFAULT_FN_ATTRS512
616 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
618 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
619 (__v64qi)_mm512_packus_epi16(__A, __B),
623 static __inline__ __m512i __DEFAULT_FN_ATTRS512
624 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
626 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
627 (__v64qi)_mm512_packus_epi16(__A, __B),
628 (__v64qi)_mm512_setzero_si512());
631 static __inline__ __m512i __DEFAULT_FN_ATTRS512
632 _mm512_adds_epi8 (__m512i __A, __m512i __B)
634 return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B);
637 static __inline__ __m512i __DEFAULT_FN_ATTRS512
638 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
640 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
641 (__v64qi)_mm512_adds_epi8(__A, __B),
645 static __inline__ __m512i __DEFAULT_FN_ATTRS512
646 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
648 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
649 (__v64qi)_mm512_adds_epi8(__A, __B),
650 (__v64qi)_mm512_setzero_si512());
653 static __inline__ __m512i __DEFAULT_FN_ATTRS512
654 _mm512_adds_epi16 (__m512i __A, __m512i __B)
656 return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B);
659 static __inline__ __m512i __DEFAULT_FN_ATTRS512
660 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
662 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
663 (__v32hi)_mm512_adds_epi16(__A, __B),
667 static __inline__ __m512i __DEFAULT_FN_ATTRS512
668 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
670 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
671 (__v32hi)_mm512_adds_epi16(__A, __B),
672 (__v32hi)_mm512_setzero_si512());
675 static __inline__ __m512i __DEFAULT_FN_ATTRS512
676 _mm512_adds_epu8 (__m512i __A, __m512i __B)
678 return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B);
681 static __inline__ __m512i __DEFAULT_FN_ATTRS512
682 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
684 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
685 (__v64qi)_mm512_adds_epu8(__A, __B),
689 static __inline__ __m512i __DEFAULT_FN_ATTRS512
690 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
692 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
693 (__v64qi)_mm512_adds_epu8(__A, __B),
694 (__v64qi)_mm512_setzero_si512());
697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
698 _mm512_adds_epu16 (__m512i __A, __m512i __B)
700 return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B);
703 static __inline__ __m512i __DEFAULT_FN_ATTRS512
704 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
706 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
707 (__v32hi)_mm512_adds_epu16(__A, __B),
711 static __inline__ __m512i __DEFAULT_FN_ATTRS512
712 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
714 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
715 (__v32hi)_mm512_adds_epu16(__A, __B),
716 (__v32hi)_mm512_setzero_si512());
719 static __inline__ __m512i __DEFAULT_FN_ATTRS512
720 _mm512_avg_epu8 (__m512i __A, __m512i __B)
722 typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
723 return (__m512i)__builtin_convertvector(
724 ((__builtin_convertvector((__v64qu) __A, __v64hu) +
725 __builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
729 static __inline__ __m512i __DEFAULT_FN_ATTRS512
730 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
733 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
734 (__v64qi)_mm512_avg_epu8(__A, __B),
738 static __inline__ __m512i __DEFAULT_FN_ATTRS512
739 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
741 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
742 (__v64qi)_mm512_avg_epu8(__A, __B),
743 (__v64qi)_mm512_setzero_si512());
746 static __inline__ __m512i __DEFAULT_FN_ATTRS512
747 _mm512_avg_epu16 (__m512i __A, __m512i __B)
749 typedef unsigned int __v32su __attribute__((__vector_size__(128)));
750 return (__m512i)__builtin_convertvector(
751 ((__builtin_convertvector((__v32hu) __A, __v32su) +
752 __builtin_convertvector((__v32hu) __B, __v32su)) + 1)
756 static __inline__ __m512i __DEFAULT_FN_ATTRS512
757 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
760 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
761 (__v32hi)_mm512_avg_epu16(__A, __B),
765 static __inline__ __m512i __DEFAULT_FN_ATTRS512
766 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
768 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
769 (__v32hi)_mm512_avg_epu16(__A, __B),
770 (__v32hi) _mm512_setzero_si512());
773 static __inline__ __m512i __DEFAULT_FN_ATTRS512
774 _mm512_max_epi8 (__m512i __A, __m512i __B)
776 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
779 static __inline__ __m512i __DEFAULT_FN_ATTRS512
780 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
782 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
783 (__v64qi)_mm512_max_epi8(__A, __B),
784 (__v64qi)_mm512_setzero_si512());
787 static __inline__ __m512i __DEFAULT_FN_ATTRS512
788 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
790 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
791 (__v64qi)_mm512_max_epi8(__A, __B),
795 static __inline__ __m512i __DEFAULT_FN_ATTRS512
796 _mm512_max_epi16 (__m512i __A, __m512i __B)
798 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
801 static __inline__ __m512i __DEFAULT_FN_ATTRS512
802 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
804 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
805 (__v32hi)_mm512_max_epi16(__A, __B),
806 (__v32hi)_mm512_setzero_si512());
809 static __inline__ __m512i __DEFAULT_FN_ATTRS512
810 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
813 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
814 (__v32hi)_mm512_max_epi16(__A, __B),
818 static __inline__ __m512i __DEFAULT_FN_ATTRS512
819 _mm512_max_epu8 (__m512i __A, __m512i __B)
821 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
824 static __inline__ __m512i __DEFAULT_FN_ATTRS512
825 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
827 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
828 (__v64qi)_mm512_max_epu8(__A, __B),
829 (__v64qi)_mm512_setzero_si512());
832 static __inline__ __m512i __DEFAULT_FN_ATTRS512
833 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
835 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
836 (__v64qi)_mm512_max_epu8(__A, __B),
840 static __inline__ __m512i __DEFAULT_FN_ATTRS512
841 _mm512_max_epu16 (__m512i __A, __m512i __B)
843 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
846 static __inline__ __m512i __DEFAULT_FN_ATTRS512
847 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
849 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
850 (__v32hi)_mm512_max_epu16(__A, __B),
851 (__v32hi)_mm512_setzero_si512());
854 static __inline__ __m512i __DEFAULT_FN_ATTRS512
855 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
857 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
858 (__v32hi)_mm512_max_epu16(__A, __B),
862 static __inline__ __m512i __DEFAULT_FN_ATTRS512
863 _mm512_min_epi8 (__m512i __A, __m512i __B)
865 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
868 static __inline__ __m512i __DEFAULT_FN_ATTRS512
869 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
871 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
872 (__v64qi)_mm512_min_epi8(__A, __B),
873 (__v64qi)_mm512_setzero_si512());
876 static __inline__ __m512i __DEFAULT_FN_ATTRS512
877 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
879 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
880 (__v64qi)_mm512_min_epi8(__A, __B),
884 static __inline__ __m512i __DEFAULT_FN_ATTRS512
885 _mm512_min_epi16 (__m512i __A, __m512i __B)
887 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
890 static __inline__ __m512i __DEFAULT_FN_ATTRS512
891 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
893 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
894 (__v32hi)_mm512_min_epi16(__A, __B),
895 (__v32hi)_mm512_setzero_si512());
898 static __inline__ __m512i __DEFAULT_FN_ATTRS512
899 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
901 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
902 (__v32hi)_mm512_min_epi16(__A, __B),
906 static __inline__ __m512i __DEFAULT_FN_ATTRS512
907 _mm512_min_epu8 (__m512i __A, __m512i __B)
909 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
912 static __inline__ __m512i __DEFAULT_FN_ATTRS512
913 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
915 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
916 (__v64qi)_mm512_min_epu8(__A, __B),
917 (__v64qi)_mm512_setzero_si512());
920 static __inline__ __m512i __DEFAULT_FN_ATTRS512
921 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
923 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
924 (__v64qi)_mm512_min_epu8(__A, __B),
928 static __inline__ __m512i __DEFAULT_FN_ATTRS512
929 _mm512_min_epu16 (__m512i __A, __m512i __B)
931 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
934 static __inline__ __m512i __DEFAULT_FN_ATTRS512
935 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
937 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
938 (__v32hi)_mm512_min_epu16(__A, __B),
939 (__v32hi)_mm512_setzero_si512());
942 static __inline__ __m512i __DEFAULT_FN_ATTRS512
943 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
945 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
946 (__v32hi)_mm512_min_epu16(__A, __B),
950 static __inline__ __m512i __DEFAULT_FN_ATTRS512
951 _mm512_shuffle_epi8(__m512i __A, __m512i __B)
953 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
956 static __inline__ __m512i __DEFAULT_FN_ATTRS512
957 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
959 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
960 (__v64qi)_mm512_shuffle_epi8(__A, __B),
964 static __inline__ __m512i __DEFAULT_FN_ATTRS512
965 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
967 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
968 (__v64qi)_mm512_shuffle_epi8(__A, __B),
969 (__v64qi)_mm512_setzero_si512());
972 static __inline__ __m512i __DEFAULT_FN_ATTRS512
973 _mm512_subs_epi8 (__m512i __A, __m512i __B)
975 return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B);
978 static __inline__ __m512i __DEFAULT_FN_ATTRS512
979 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
981 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
982 (__v64qi)_mm512_subs_epi8(__A, __B),
986 static __inline__ __m512i __DEFAULT_FN_ATTRS512
987 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
989 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
990 (__v64qi)_mm512_subs_epi8(__A, __B),
991 (__v64qi)_mm512_setzero_si512());
994 static __inline__ __m512i __DEFAULT_FN_ATTRS512
995 _mm512_subs_epi16 (__m512i __A, __m512i __B)
997 return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B);
1000 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1001 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1003 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1004 (__v32hi)_mm512_subs_epi16(__A, __B),
1008 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1009 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
1011 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1012 (__v32hi)_mm512_subs_epi16(__A, __B),
1013 (__v32hi)_mm512_setzero_si512());
1016 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1017 _mm512_subs_epu8 (__m512i __A, __m512i __B)
1019 return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B);
1022 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1023 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
1025 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1026 (__v64qi)_mm512_subs_epu8(__A, __B),
1030 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1031 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
1033 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1034 (__v64qi)_mm512_subs_epu8(__A, __B),
1035 (__v64qi)_mm512_setzero_si512());
1038 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1039 _mm512_subs_epu16 (__m512i __A, __m512i __B)
1041 return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B);
1044 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1045 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1047 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1048 (__v32hi)_mm512_subs_epu16(__A, __B),
1052 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1053 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1055 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1056 (__v32hi)_mm512_subs_epu16(__A, __B),
1057 (__v32hi)_mm512_setzero_si512());
1060 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1061 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
1063 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
1067 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1068 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
1071 return (__m512i)__builtin_ia32_selectw_512(__U,
1072 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1076 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1077 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
1080 return (__m512i)__builtin_ia32_selectw_512(__U,
1081 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1085 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1086 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
1089 return (__m512i)__builtin_ia32_selectw_512(__U,
1090 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
1091 (__v32hi)_mm512_setzero_si512());
1094 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1095 _mm512_mulhrs_epi16(__m512i __A, __m512i __B)
1097 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
1100 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1103 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1104 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1108 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1109 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1111 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1112 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
1113 (__v32hi)_mm512_setzero_si512());
1116 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1117 _mm512_mulhi_epi16(__m512i __A, __m512i __B)
1119 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
1122 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1123 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
1126 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1127 (__v32hi)_mm512_mulhi_epi16(__A, __B),
1131 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1132 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1134 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1135 (__v32hi)_mm512_mulhi_epi16(__A, __B),
1136 (__v32hi)_mm512_setzero_si512());
1139 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1140 _mm512_mulhi_epu16(__m512i __A, __m512i __B)
1142 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
1145 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1146 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1148 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1149 (__v32hi)_mm512_mulhi_epu16(__A, __B),
1153 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1154 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
1156 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1157 (__v32hi)_mm512_mulhi_epu16(__A, __B),
1158 (__v32hi)_mm512_setzero_si512());
1161 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1162 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1163 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1166 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1167 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1169 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1170 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1174 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1175 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1176 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1177 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1178 (__v32hi)_mm512_setzero_si512());
1181 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1182 _mm512_madd_epi16(__m512i __A, __m512i __B) {
1183 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1186 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1187 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1188 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1189 (__v16si)_mm512_madd_epi16(__A, __B),
1193 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1194 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1195 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1196 (__v16si)_mm512_madd_epi16(__A, __B),
1197 (__v16si)_mm512_setzero_si512());
1200 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1201 _mm512_cvtsepi16_epi8 (__m512i __A) {
1202 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1203 (__v32qi)_mm256_setzero_si256(),
1207 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1208 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1209 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1214 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1215 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1216 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1217 (__v32qi) _mm256_setzero_si256(),
1221 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1222 _mm512_cvtusepi16_epi8 (__m512i __A) {
1223 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1224 (__v32qi) _mm256_setzero_si256(),
1228 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1229 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1230 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1235 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1236 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1237 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1238 (__v32qi) _mm256_setzero_si256(),
1242 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1243 _mm512_cvtepi16_epi8 (__m512i __A) {
1244 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1245 (__v32qi) _mm256_undefined_si256(),
1249 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1250 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1251 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1256 static __inline__ __m256i __DEFAULT_FN_ATTRS512
1257 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1258 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1259 (__v32qi) _mm256_setzero_si256(),
1263 static __inline__ void __DEFAULT_FN_ATTRS512
1264 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1266 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1269 static __inline__ void __DEFAULT_FN_ATTRS512
1270 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1272 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1275 static __inline__ void __DEFAULT_FN_ATTRS512
1276 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1278 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1281 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1282 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1283 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1285 10, 64+10, 11, 64+11,
1286 12, 64+12, 13, 64+13,
1287 14, 64+14, 15, 64+15,
1288 24, 64+24, 25, 64+25,
1289 26, 64+26, 27, 64+27,
1290 28, 64+28, 29, 64+29,
1291 30, 64+30, 31, 64+31,
1292 40, 64+40, 41, 64+41,
1293 42, 64+42, 43, 64+43,
1294 44, 64+44, 45, 64+45,
1295 46, 64+46, 47, 64+47,
1296 56, 64+56, 57, 64+57,
1297 58, 64+58, 59, 64+59,
1298 60, 64+60, 61, 64+61,
1299 62, 64+62, 63, 64+63);
1302 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1303 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1304 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1305 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1309 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1310 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1311 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1312 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1313 (__v64qi)_mm512_setzero_si512());
1316 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1317 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1318 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1321 12, 32+12, 13, 32+13,
1322 14, 32+14, 15, 32+15,
1323 20, 32+20, 21, 32+21,
1324 22, 32+22, 23, 32+23,
1325 28, 32+28, 29, 32+29,
1326 30, 32+30, 31, 32+31);
1329 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1330 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1331 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1332 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1336 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1337 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1338 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1339 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1340 (__v32hi)_mm512_setzero_si512());
1343 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1344 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1345 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1350 16, 64+16, 17, 64+17,
1351 18, 64+18, 19, 64+19,
1352 20, 64+20, 21, 64+21,
1353 22, 64+22, 23, 64+23,
1354 32, 64+32, 33, 64+33,
1355 34, 64+34, 35, 64+35,
1356 36, 64+36, 37, 64+37,
1357 38, 64+38, 39, 64+39,
1358 48, 64+48, 49, 64+49,
1359 50, 64+50, 51, 64+51,
1360 52, 64+52, 53, 64+53,
1361 54, 64+54, 55, 64+55);
1364 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1365 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1366 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1367 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1371 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1373 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1374 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1375 (__v64qi)_mm512_setzero_si512());
1378 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1379 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1380 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1384 10, 32+10, 11, 32+11,
1385 16, 32+16, 17, 32+17,
1386 18, 32+18, 19, 32+19,
1387 24, 32+24, 25, 32+25,
1388 26, 32+26, 27, 32+27);
1391 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1392 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1393 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1394 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1398 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1399 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1400 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1401 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1402 (__v32hi)_mm512_setzero_si512());
1405 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1406 _mm512_cvtepi8_epi16(__m256i __A)
1408 /* This function always performs a signed extension, but __v32qi is a char
1409 which may be signed or unsigned, so use __v32qs. */
1410 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1413 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1414 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1416 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1417 (__v32hi)_mm512_cvtepi8_epi16(__A),
1421 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1422 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1424 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1425 (__v32hi)_mm512_cvtepi8_epi16(__A),
1426 (__v32hi)_mm512_setzero_si512());
1429 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1430 _mm512_cvtepu8_epi16(__m256i __A)
1432 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1435 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1436 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1438 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1439 (__v32hi)_mm512_cvtepu8_epi16(__A),
1443 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1444 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1446 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1447 (__v32hi)_mm512_cvtepu8_epi16(__A),
1448 (__v32hi)_mm512_setzero_si512());
1452 #define _mm512_shufflehi_epi16(A, imm) \
1453 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1455 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1456 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1457 (__v32hi)_mm512_shufflehi_epi16((A), \
1459 (__v32hi)(__m512i)(W))
1461 #define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1462 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1463 (__v32hi)_mm512_shufflehi_epi16((A), \
1465 (__v32hi)_mm512_setzero_si512())
1467 #define _mm512_shufflelo_epi16(A, imm) \
1468 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1471 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1472 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1473 (__v32hi)_mm512_shufflelo_epi16((A), \
1475 (__v32hi)(__m512i)(W))
1478 #define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1479 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1480 (__v32hi)_mm512_shufflelo_epi16((A), \
1482 (__v32hi)_mm512_setzero_si512())
1484 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1485 _mm512_sllv_epi16(__m512i __A, __m512i __B)
1487 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1490 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1491 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1493 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1494 (__v32hi)_mm512_sllv_epi16(__A, __B),
1498 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1499 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1501 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1502 (__v32hi)_mm512_sllv_epi16(__A, __B),
1503 (__v32hi)_mm512_setzero_si512());
1506 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1507 _mm512_sll_epi16(__m512i __A, __m128i __B)
1509 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1512 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1513 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1515 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1516 (__v32hi)_mm512_sll_epi16(__A, __B),
1520 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1521 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1523 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1524 (__v32hi)_mm512_sll_epi16(__A, __B),
1525 (__v32hi)_mm512_setzero_si512());
1528 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1529 _mm512_slli_epi16(__m512i __A, int __B)
1531 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1534 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1535 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1537 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1538 (__v32hi)_mm512_slli_epi16(__A, __B),
1542 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1543 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1545 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1546 (__v32hi)_mm512_slli_epi16(__A, __B),
1547 (__v32hi)_mm512_setzero_si512());
1550 #define _mm512_bslli_epi128(a, imm) \
1551 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1553 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1554 _mm512_srlv_epi16(__m512i __A, __m512i __B)
1556 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1559 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1560 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1562 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1563 (__v32hi)_mm512_srlv_epi16(__A, __B),
1567 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1568 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1570 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1571 (__v32hi)_mm512_srlv_epi16(__A, __B),
1572 (__v32hi)_mm512_setzero_si512());
1575 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1576 _mm512_srav_epi16(__m512i __A, __m512i __B)
1578 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1581 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1582 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1584 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1585 (__v32hi)_mm512_srav_epi16(__A, __B),
1589 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1590 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1592 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1593 (__v32hi)_mm512_srav_epi16(__A, __B),
1594 (__v32hi)_mm512_setzero_si512());
1597 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1598 _mm512_sra_epi16(__m512i __A, __m128i __B)
1600 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1603 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1604 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1606 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1607 (__v32hi)_mm512_sra_epi16(__A, __B),
1611 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1612 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1614 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1615 (__v32hi)_mm512_sra_epi16(__A, __B),
1616 (__v32hi)_mm512_setzero_si512());
1619 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1620 _mm512_srai_epi16(__m512i __A, int __B)
1622 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1625 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1626 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1628 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1629 (__v32hi)_mm512_srai_epi16(__A, __B),
1633 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1634 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1636 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1637 (__v32hi)_mm512_srai_epi16(__A, __B),
1638 (__v32hi)_mm512_setzero_si512());
1641 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1642 _mm512_srl_epi16(__m512i __A, __m128i __B)
1644 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1647 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1648 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1650 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1651 (__v32hi)_mm512_srl_epi16(__A, __B),
1655 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1656 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1658 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1659 (__v32hi)_mm512_srl_epi16(__A, __B),
1660 (__v32hi)_mm512_setzero_si512());
1663 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1664 _mm512_srli_epi16(__m512i __A, int __B)
1666 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1669 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1670 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1672 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1673 (__v32hi)_mm512_srli_epi16(__A, __B),
1677 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1678 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1680 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1681 (__v32hi)_mm512_srli_epi16(__A, __B),
1682 (__v32hi)_mm512_setzero_si512());
1685 #define _mm512_bsrli_epi128(a, imm) \
1686 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1688 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1689 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1691 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1696 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1697 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1699 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1701 (__v32hi) _mm512_setzero_si512 ());
1704 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1705 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1707 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1712 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1713 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1715 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1717 (__v64qi) _mm512_setzero_si512 ());
1720 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1721 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1723 return (__m512i) __builtin_ia32_selectb_512(__M,
1724 (__v64qi)_mm512_set1_epi8(__A),
1728 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1729 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1731 return (__m512i) __builtin_ia32_selectb_512(__M,
1732 (__v64qi) _mm512_set1_epi8(__A),
1733 (__v64qi) _mm512_setzero_si512());
1736 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1737 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1739 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1743 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1744 _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1746 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1750 static __inline __m512i __DEFAULT_FN_ATTRS512
1751 _mm512_loadu_epi16 (void const *__P)
1753 struct __loadu_epi16 {
1755 } __attribute__((__packed__, __may_alias__));
1756 return ((struct __loadu_epi16*)__P)->__v;
1759 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1760 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1762 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1767 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1768 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1770 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1772 _mm512_setzero_si512 (),
1776 static __inline __m512i __DEFAULT_FN_ATTRS512
1777 _mm512_loadu_epi8 (void const *__P)
1779 struct __loadu_epi8 {
1781 } __attribute__((__packed__, __may_alias__));
1782 return ((struct __loadu_epi8*)__P)->__v;
1785 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1786 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1788 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1793 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1794 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1796 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1798 _mm512_setzero_si512 (),
1802 static __inline void __DEFAULT_FN_ATTRS512
1803 _mm512_storeu_epi16 (void *__P, __m512i __A)
1805 struct __storeu_epi16 {
1807 } __attribute__((__packed__, __may_alias__));
1808 ((struct __storeu_epi16*)__P)->__v = __A;
1811 static __inline__ void __DEFAULT_FN_ATTRS512
1812 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1814 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1819 static __inline void __DEFAULT_FN_ATTRS512
1820 _mm512_storeu_epi8 (void *__P, __m512i __A)
1822 struct __storeu_epi8 {
1824 } __attribute__((__packed__, __may_alias__));
1825 ((struct __storeu_epi8*)__P)->__v = __A;
1828 static __inline__ void __DEFAULT_FN_ATTRS512
1829 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1831 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1836 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1837 _mm512_test_epi8_mask (__m512i __A, __m512i __B)
1839 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1840 _mm512_setzero_si512());
1843 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1844 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1846 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1847 _mm512_setzero_si512());
1850 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1851 _mm512_test_epi16_mask (__m512i __A, __m512i __B)
1853 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1854 _mm512_setzero_si512());
1857 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1858 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1860 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1861 _mm512_setzero_si512());
1864 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1865 _mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1867 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1870 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1871 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1873 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1874 _mm512_setzero_si512());
1877 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1878 _mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1880 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1881 _mm512_setzero_si512());
1884 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1885 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1887 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1888 _mm512_setzero_si512());
1891 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
1892 _mm512_movepi8_mask (__m512i __A)
1894 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1897 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
1898 _mm512_movepi16_mask (__m512i __A)
1900 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1903 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1904 _mm512_movm_epi8 (__mmask64 __A)
1906 return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1909 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1910 _mm512_movm_epi16 (__mmask32 __A)
1912 return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1915 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1916 _mm512_broadcastb_epi8 (__m128i __A)
1918 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1919 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1920 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1921 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1922 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1925 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1926 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1928 return (__m512i)__builtin_ia32_selectb_512(__M,
1929 (__v64qi) _mm512_broadcastb_epi8(__A),
1933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1934 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1936 return (__m512i)__builtin_ia32_selectb_512(__M,
1937 (__v64qi) _mm512_broadcastb_epi8(__A),
1938 (__v64qi) _mm512_setzero_si512());
1941 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1942 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1944 return (__m512i) __builtin_ia32_selectw_512(__M,
1945 (__v32hi) _mm512_set1_epi16(__A),
1949 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1950 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1952 return (__m512i) __builtin_ia32_selectw_512(__M,
1953 (__v32hi) _mm512_set1_epi16(__A),
1954 (__v32hi) _mm512_setzero_si512());
1957 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1958 _mm512_broadcastw_epi16 (__m128i __A)
1960 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1965 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1966 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1968 return (__m512i)__builtin_ia32_selectw_512(__M,
1969 (__v32hi) _mm512_broadcastw_epi16(__A),
1973 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1974 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1976 return (__m512i)__builtin_ia32_selectw_512(__M,
1977 (__v32hi) _mm512_broadcastw_epi16(__A),
1978 (__v32hi) _mm512_setzero_si512());
1981 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1982 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1984 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1987 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1988 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1991 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1992 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1993 (__v32hi)_mm512_setzero_si512());
1996 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1997 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
2000 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
2001 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
2005 #define _mm512_alignr_epi8(A, B, N) \
2006 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
2007 (__v64qi)(__m512i)(B), (int)(N))
2009 #define _mm512_mask_alignr_epi8(W, U, A, B, N) \
2010 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
2011 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
2012 (__v64qi)(__m512i)(W))
2014 #define _mm512_maskz_alignr_epi8(U, A, B, N) \
2015 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
2016 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
2017 (__v64qi)(__m512i)_mm512_setzero_si512())
2019 #define _mm512_dbsad_epu8(A, B, imm) \
2020 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
2021 (__v64qi)(__m512i)(B), (int)(imm))
2023 #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
2024 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2025 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2026 (__v32hi)(__m512i)(W))
2028 #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
2029 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
2030 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
2031 (__v32hi)_mm512_setzero_si512())
2033 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2034 _mm512_sad_epu8 (__m512i __A, __m512i __B)
2036 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
2040 #undef __DEFAULT_FN_ATTRS512
2041 #undef __DEFAULT_FN_ATTRS