1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 *===-----------------------------------------------------------------------===
25 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512BWINTRIN_H
29 #define __AVX512BWINTRIN_H
31 typedef unsigned int __mmask32;
32 typedef unsigned long long __mmask64;
34 /* Define the default attributes for the functions in this file. */
35 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
39 #define _mm512_cmp_epi8_mask(a, b, p) \
40 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
41 (__v64qi)(__m512i)(b), (int)(p), \
44 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
45 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
46 (__v64qi)(__m512i)(b), (int)(p), \
49 #define _mm512_cmp_epu8_mask(a, b, p) \
50 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
51 (__v64qi)(__m512i)(b), (int)(p), \
54 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
55 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
56 (__v64qi)(__m512i)(b), (int)(p), \
59 #define _mm512_cmp_epi16_mask(a, b, p) \
60 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
61 (__v32hi)(__m512i)(b), (int)(p), \
64 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
65 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
66 (__v32hi)(__m512i)(b), (int)(p), \
69 #define _mm512_cmp_epu16_mask(a, b, p) \
70 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
71 (__v32hi)(__m512i)(b), (int)(p), \
74 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
75 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
76 (__v32hi)(__m512i)(b), (int)(p), \
79 #define _mm512_cmpeq_epi8_mask(A, B) \
80 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
81 #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
82 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
83 #define _mm512_cmpge_epi8_mask(A, B) \
84 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
85 #define _mm512_mask_cmpge_epi8_mask(k, A, B) \
86 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
87 #define _mm512_cmpgt_epi8_mask(A, B) \
88 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
89 #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
90 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
91 #define _mm512_cmple_epi8_mask(A, B) \
92 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
93 #define _mm512_mask_cmple_epi8_mask(k, A, B) \
94 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
95 #define _mm512_cmplt_epi8_mask(A, B) \
96 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
97 #define _mm512_mask_cmplt_epi8_mask(k, A, B) \
98 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
99 #define _mm512_cmpneq_epi8_mask(A, B) \
100 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
101 #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
102 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
104 #define _mm512_cmpeq_epu8_mask(A, B) \
105 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
106 #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
107 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
108 #define _mm512_cmpge_epu8_mask(A, B) \
109 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
110 #define _mm512_mask_cmpge_epu8_mask(k, A, B) \
111 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
112 #define _mm512_cmpgt_epu8_mask(A, B) \
113 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
114 #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
115 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
116 #define _mm512_cmple_epu8_mask(A, B) \
117 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
118 #define _mm512_mask_cmple_epu8_mask(k, A, B) \
119 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
120 #define _mm512_cmplt_epu8_mask(A, B) \
121 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
122 #define _mm512_mask_cmplt_epu8_mask(k, A, B) \
123 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
124 #define _mm512_cmpneq_epu8_mask(A, B) \
125 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
126 #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
127 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
129 #define _mm512_cmpeq_epi16_mask(A, B) \
130 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
131 #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
132 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
133 #define _mm512_cmpge_epi16_mask(A, B) \
134 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
135 #define _mm512_mask_cmpge_epi16_mask(k, A, B) \
136 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
137 #define _mm512_cmpgt_epi16_mask(A, B) \
138 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
139 #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
140 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
141 #define _mm512_cmple_epi16_mask(A, B) \
142 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
143 #define _mm512_mask_cmple_epi16_mask(k, A, B) \
144 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
145 #define _mm512_cmplt_epi16_mask(A, B) \
146 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
147 #define _mm512_mask_cmplt_epi16_mask(k, A, B) \
148 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
149 #define _mm512_cmpneq_epi16_mask(A, B) \
150 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
151 #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
152 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
154 #define _mm512_cmpeq_epu16_mask(A, B) \
155 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
156 #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
157 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
158 #define _mm512_cmpge_epu16_mask(A, B) \
159 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
160 #define _mm512_mask_cmpge_epu16_mask(k, A, B) \
161 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
162 #define _mm512_cmpgt_epu16_mask(A, B) \
163 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
164 #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
165 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
166 #define _mm512_cmple_epu16_mask(A, B) \
167 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
168 #define _mm512_mask_cmple_epu16_mask(k, A, B) \
169 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
170 #define _mm512_cmplt_epu16_mask(A, B) \
171 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
172 #define _mm512_mask_cmplt_epu16_mask(k, A, B) \
173 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
174 #define _mm512_cmpneq_epu16_mask(A, B) \
175 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
176 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
177 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
179 static __inline__ __m512i __DEFAULT_FN_ATTRS
180 _mm512_add_epi8 (__m512i __A, __m512i __B) {
181 return (__m512i) ((__v64qu) __A + (__v64qu) __B);
184 static __inline__ __m512i __DEFAULT_FN_ATTRS
185 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
186 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
187 (__v64qi)_mm512_add_epi8(__A, __B),
191 static __inline__ __m512i __DEFAULT_FN_ATTRS
192 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
193 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
194 (__v64qi)_mm512_add_epi8(__A, __B),
195 (__v64qi)_mm512_setzero_si512());
198 static __inline__ __m512i __DEFAULT_FN_ATTRS
199 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
200 return (__m512i) ((__v64qu) __A - (__v64qu) __B);
203 static __inline__ __m512i __DEFAULT_FN_ATTRS
204 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
205 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
206 (__v64qi)_mm512_sub_epi8(__A, __B),
210 static __inline__ __m512i __DEFAULT_FN_ATTRS
211 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
212 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
213 (__v64qi)_mm512_sub_epi8(__A, __B),
214 (__v64qi)_mm512_setzero_si512());
217 static __inline__ __m512i __DEFAULT_FN_ATTRS
218 _mm512_add_epi16 (__m512i __A, __m512i __B) {
219 return (__m512i) ((__v32hu) __A + (__v32hu) __B);
222 static __inline__ __m512i __DEFAULT_FN_ATTRS
223 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
224 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
225 (__v32hi)_mm512_add_epi16(__A, __B),
229 static __inline__ __m512i __DEFAULT_FN_ATTRS
230 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
231 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
232 (__v32hi)_mm512_add_epi16(__A, __B),
233 (__v32hi)_mm512_setzero_si512());
236 static __inline__ __m512i __DEFAULT_FN_ATTRS
237 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
238 return (__m512i) ((__v32hu) __A - (__v32hu) __B);
241 static __inline__ __m512i __DEFAULT_FN_ATTRS
242 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
243 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
244 (__v32hi)_mm512_sub_epi16(__A, __B),
248 static __inline__ __m512i __DEFAULT_FN_ATTRS
249 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
250 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
251 (__v32hi)_mm512_sub_epi16(__A, __B),
252 (__v32hi)_mm512_setzero_si512());
255 static __inline__ __m512i __DEFAULT_FN_ATTRS
256 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
257 return (__m512i) ((__v32hu) __A * (__v32hu) __B);
260 static __inline__ __m512i __DEFAULT_FN_ATTRS
261 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
262 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
263 (__v32hi)_mm512_mullo_epi16(__A, __B),
267 static __inline__ __m512i __DEFAULT_FN_ATTRS
268 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
269 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
270 (__v32hi)_mm512_mullo_epi16(__A, __B),
271 (__v32hi)_mm512_setzero_si512());
274 static __inline__ __m512i __DEFAULT_FN_ATTRS
275 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
277 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
282 static __inline__ __m512i __DEFAULT_FN_ATTRS
283 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
285 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
290 static __inline__ __m512i __DEFAULT_FN_ATTRS
291 _mm512_abs_epi8 (__m512i __A)
293 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
296 static __inline__ __m512i __DEFAULT_FN_ATTRS
297 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
299 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
300 (__v64qi)_mm512_abs_epi8(__A),
304 static __inline__ __m512i __DEFAULT_FN_ATTRS
305 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
307 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
308 (__v64qi)_mm512_abs_epi8(__A),
309 (__v64qi)_mm512_setzero_si512());
312 static __inline__ __m512i __DEFAULT_FN_ATTRS
313 _mm512_abs_epi16 (__m512i __A)
315 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
318 static __inline__ __m512i __DEFAULT_FN_ATTRS
319 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
321 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
322 (__v32hi)_mm512_abs_epi16(__A),
326 static __inline__ __m512i __DEFAULT_FN_ATTRS
327 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
329 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
330 (__v32hi)_mm512_abs_epi16(__A),
331 (__v32hi)_mm512_setzero_si512());
334 static __inline__ __m512i __DEFAULT_FN_ATTRS
335 _mm512_packs_epi32(__m512i __A, __m512i __B)
337 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
340 static __inline__ __m512i __DEFAULT_FN_ATTRS
341 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
343 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
344 (__v32hi)_mm512_packs_epi32(__A, __B),
345 (__v32hi)_mm512_setzero_si512());
348 static __inline__ __m512i __DEFAULT_FN_ATTRS
349 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
351 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
352 (__v32hi)_mm512_packs_epi32(__A, __B),
356 static __inline__ __m512i __DEFAULT_FN_ATTRS
357 _mm512_packs_epi16(__m512i __A, __m512i __B)
359 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
362 static __inline__ __m512i __DEFAULT_FN_ATTRS
363 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
365 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
366 (__v64qi)_mm512_packs_epi16(__A, __B),
370 static __inline__ __m512i __DEFAULT_FN_ATTRS
371 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
373 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
374 (__v64qi)_mm512_packs_epi16(__A, __B),
375 (__v64qi)_mm512_setzero_si512());
378 static __inline__ __m512i __DEFAULT_FN_ATTRS
379 _mm512_packus_epi32(__m512i __A, __m512i __B)
381 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
384 static __inline__ __m512i __DEFAULT_FN_ATTRS
385 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
387 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
388 (__v32hi)_mm512_packus_epi32(__A, __B),
389 (__v32hi)_mm512_setzero_si512());
392 static __inline__ __m512i __DEFAULT_FN_ATTRS
393 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
395 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
396 (__v32hi)_mm512_packus_epi32(__A, __B),
400 static __inline__ __m512i __DEFAULT_FN_ATTRS
401 _mm512_packus_epi16(__m512i __A, __m512i __B)
403 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
406 static __inline__ __m512i __DEFAULT_FN_ATTRS
407 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
409 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
410 (__v64qi)_mm512_packus_epi16(__A, __B),
414 static __inline__ __m512i __DEFAULT_FN_ATTRS
415 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
417 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
418 (__v64qi)_mm512_packus_epi16(__A, __B),
419 (__v64qi)_mm512_setzero_si512());
422 static __inline__ __m512i __DEFAULT_FN_ATTRS
423 _mm512_adds_epi8 (__m512i __A, __m512i __B)
425 return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
427 (__v64qi) _mm512_setzero_si512(),
431 static __inline__ __m512i __DEFAULT_FN_ATTRS
432 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
435 return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
441 static __inline__ __m512i __DEFAULT_FN_ATTRS
442 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
444 return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
446 (__v64qi) _mm512_setzero_si512(),
450 static __inline__ __m512i __DEFAULT_FN_ATTRS
451 _mm512_adds_epi16 (__m512i __A, __m512i __B)
453 return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
455 (__v32hi) _mm512_setzero_si512(),
459 static __inline__ __m512i __DEFAULT_FN_ATTRS
460 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
463 return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
469 static __inline__ __m512i __DEFAULT_FN_ATTRS
470 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
472 return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
474 (__v32hi) _mm512_setzero_si512(),
478 static __inline__ __m512i __DEFAULT_FN_ATTRS
479 _mm512_adds_epu8 (__m512i __A, __m512i __B)
481 return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
483 (__v64qi) _mm512_setzero_si512(),
487 static __inline__ __m512i __DEFAULT_FN_ATTRS
488 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
491 return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
497 static __inline__ __m512i __DEFAULT_FN_ATTRS
498 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
500 return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
502 (__v64qi) _mm512_setzero_si512(),
506 static __inline__ __m512i __DEFAULT_FN_ATTRS
507 _mm512_adds_epu16 (__m512i __A, __m512i __B)
509 return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
511 (__v32hi) _mm512_setzero_si512(),
515 static __inline__ __m512i __DEFAULT_FN_ATTRS
516 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
519 return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
525 static __inline__ __m512i __DEFAULT_FN_ATTRS
526 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
528 return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
530 (__v32hi) _mm512_setzero_si512(),
534 static __inline__ __m512i __DEFAULT_FN_ATTRS
535 _mm512_avg_epu8 (__m512i __A, __m512i __B)
537 typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
538 return (__m512i)__builtin_convertvector(
539 ((__builtin_convertvector((__v64qu) __A, __v64hu) +
540 __builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
544 static __inline__ __m512i __DEFAULT_FN_ATTRS
545 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
548 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
549 (__v64qi)_mm512_avg_epu8(__A, __B),
553 static __inline__ __m512i __DEFAULT_FN_ATTRS
554 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
556 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
557 (__v64qi)_mm512_avg_epu8(__A, __B),
558 (__v64qi)_mm512_setzero_si512());
561 static __inline__ __m512i __DEFAULT_FN_ATTRS
562 _mm512_avg_epu16 (__m512i __A, __m512i __B)
564 typedef unsigned int __v32su __attribute__((__vector_size__(128)));
565 return (__m512i)__builtin_convertvector(
566 ((__builtin_convertvector((__v32hu) __A, __v32su) +
567 __builtin_convertvector((__v32hu) __B, __v32su)) + 1)
571 static __inline__ __m512i __DEFAULT_FN_ATTRS
572 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
575 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
576 (__v32hi)_mm512_avg_epu16(__A, __B),
580 static __inline__ __m512i __DEFAULT_FN_ATTRS
581 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
583 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
584 (__v32hi)_mm512_avg_epu16(__A, __B),
585 (__v32hi) _mm512_setzero_si512());
588 static __inline__ __m512i __DEFAULT_FN_ATTRS
589 _mm512_max_epi8 (__m512i __A, __m512i __B)
591 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
594 static __inline__ __m512i __DEFAULT_FN_ATTRS
595 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
597 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
598 (__v64qi)_mm512_max_epi8(__A, __B),
599 (__v64qi)_mm512_setzero_si512());
602 static __inline__ __m512i __DEFAULT_FN_ATTRS
603 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
605 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
606 (__v64qi)_mm512_max_epi8(__A, __B),
610 static __inline__ __m512i __DEFAULT_FN_ATTRS
611 _mm512_max_epi16 (__m512i __A, __m512i __B)
613 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
616 static __inline__ __m512i __DEFAULT_FN_ATTRS
617 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
619 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
620 (__v32hi)_mm512_max_epi16(__A, __B),
621 (__v32hi)_mm512_setzero_si512());
624 static __inline__ __m512i __DEFAULT_FN_ATTRS
625 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
628 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
629 (__v32hi)_mm512_max_epi16(__A, __B),
633 static __inline__ __m512i __DEFAULT_FN_ATTRS
634 _mm512_max_epu8 (__m512i __A, __m512i __B)
636 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
639 static __inline__ __m512i __DEFAULT_FN_ATTRS
640 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
642 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
643 (__v64qi)_mm512_max_epu8(__A, __B),
644 (__v64qi)_mm512_setzero_si512());
647 static __inline__ __m512i __DEFAULT_FN_ATTRS
648 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
650 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
651 (__v64qi)_mm512_max_epu8(__A, __B),
655 static __inline__ __m512i __DEFAULT_FN_ATTRS
656 _mm512_max_epu16 (__m512i __A, __m512i __B)
658 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
661 static __inline__ __m512i __DEFAULT_FN_ATTRS
662 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
664 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
665 (__v32hi)_mm512_max_epu16(__A, __B),
666 (__v32hi)_mm512_setzero_si512());
669 static __inline__ __m512i __DEFAULT_FN_ATTRS
670 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
672 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
673 (__v32hi)_mm512_max_epu16(__A, __B),
677 static __inline__ __m512i __DEFAULT_FN_ATTRS
678 _mm512_min_epi8 (__m512i __A, __m512i __B)
680 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
683 static __inline__ __m512i __DEFAULT_FN_ATTRS
684 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
686 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
687 (__v64qi)_mm512_min_epi8(__A, __B),
688 (__v64qi)_mm512_setzero_si512());
691 static __inline__ __m512i __DEFAULT_FN_ATTRS
692 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
694 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
695 (__v64qi)_mm512_min_epi8(__A, __B),
699 static __inline__ __m512i __DEFAULT_FN_ATTRS
700 _mm512_min_epi16 (__m512i __A, __m512i __B)
702 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
705 static __inline__ __m512i __DEFAULT_FN_ATTRS
706 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
708 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
709 (__v32hi)_mm512_min_epi16(__A, __B),
710 (__v32hi)_mm512_setzero_si512());
713 static __inline__ __m512i __DEFAULT_FN_ATTRS
714 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
716 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
717 (__v32hi)_mm512_min_epi16(__A, __B),
721 static __inline__ __m512i __DEFAULT_FN_ATTRS
722 _mm512_min_epu8 (__m512i __A, __m512i __B)
724 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
727 static __inline__ __m512i __DEFAULT_FN_ATTRS
728 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
730 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
731 (__v64qi)_mm512_min_epu8(__A, __B),
732 (__v64qi)_mm512_setzero_si512());
735 static __inline__ __m512i __DEFAULT_FN_ATTRS
736 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
738 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
739 (__v64qi)_mm512_min_epu8(__A, __B),
743 static __inline__ __m512i __DEFAULT_FN_ATTRS
744 _mm512_min_epu16 (__m512i __A, __m512i __B)
746 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
749 static __inline__ __m512i __DEFAULT_FN_ATTRS
750 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
752 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
753 (__v32hi)_mm512_min_epu16(__A, __B),
754 (__v32hi)_mm512_setzero_si512());
757 static __inline__ __m512i __DEFAULT_FN_ATTRS
758 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
760 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
761 (__v32hi)_mm512_min_epu16(__A, __B),
765 static __inline__ __m512i __DEFAULT_FN_ATTRS
766 _mm512_shuffle_epi8(__m512i __A, __m512i __B)
768 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
771 static __inline__ __m512i __DEFAULT_FN_ATTRS
772 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
774 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
775 (__v64qi)_mm512_shuffle_epi8(__A, __B),
779 static __inline__ __m512i __DEFAULT_FN_ATTRS
780 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
782 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
783 (__v64qi)_mm512_shuffle_epi8(__A, __B),
784 (__v64qi)_mm512_setzero_si512());
787 static __inline__ __m512i __DEFAULT_FN_ATTRS
788 _mm512_subs_epi8 (__m512i __A, __m512i __B)
790 return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
792 (__v64qi) _mm512_setzero_si512(),
796 static __inline__ __m512i __DEFAULT_FN_ATTRS
797 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
800 return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
806 static __inline__ __m512i __DEFAULT_FN_ATTRS
807 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
809 return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
811 (__v64qi) _mm512_setzero_si512(),
815 static __inline__ __m512i __DEFAULT_FN_ATTRS
816 _mm512_subs_epi16 (__m512i __A, __m512i __B)
818 return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
820 (__v32hi) _mm512_setzero_si512(),
824 static __inline__ __m512i __DEFAULT_FN_ATTRS
825 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
828 return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
834 static __inline__ __m512i __DEFAULT_FN_ATTRS
835 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
837 return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
839 (__v32hi) _mm512_setzero_si512(),
843 static __inline__ __m512i __DEFAULT_FN_ATTRS
844 _mm512_subs_epu8 (__m512i __A, __m512i __B)
846 return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
848 (__v64qi) _mm512_setzero_si512(),
852 static __inline__ __m512i __DEFAULT_FN_ATTRS
853 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
856 return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
862 static __inline__ __m512i __DEFAULT_FN_ATTRS
863 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
865 return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
867 (__v64qi) _mm512_setzero_si512(),
871 static __inline__ __m512i __DEFAULT_FN_ATTRS
872 _mm512_subs_epu16 (__m512i __A, __m512i __B)
874 return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
876 (__v32hi) _mm512_setzero_si512(),
880 static __inline__ __m512i __DEFAULT_FN_ATTRS
881 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
884 return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
890 static __inline__ __m512i __DEFAULT_FN_ATTRS
891 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
893 return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
895 (__v32hi) _mm512_setzero_si512(),
899 static __inline__ __m512i __DEFAULT_FN_ATTRS
900 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
902 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
906 static __inline__ __m512i __DEFAULT_FN_ATTRS
907 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
910 return (__m512i)__builtin_ia32_selectw_512(__U,
911 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
915 static __inline__ __m512i __DEFAULT_FN_ATTRS
916 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
919 return (__m512i)__builtin_ia32_selectw_512(__U,
920 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
924 static __inline__ __m512i __DEFAULT_FN_ATTRS
925 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
928 return (__m512i)__builtin_ia32_selectw_512(__U,
929 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
930 (__v32hi)_mm512_setzero_si512());
933 static __inline__ __m512i __DEFAULT_FN_ATTRS
934 _mm512_mulhrs_epi16(__m512i __A, __m512i __B)
936 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
939 static __inline__ __m512i __DEFAULT_FN_ATTRS
940 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
942 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
943 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
947 static __inline__ __m512i __DEFAULT_FN_ATTRS
948 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
950 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
951 (__v32hi)_mm512_mulhrs_epi16(__A, __B),
952 (__v32hi)_mm512_setzero_si512());
955 static __inline__ __m512i __DEFAULT_FN_ATTRS
956 _mm512_mulhi_epi16(__m512i __A, __m512i __B)
958 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
961 static __inline__ __m512i __DEFAULT_FN_ATTRS
962 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
965 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
966 (__v32hi)_mm512_mulhi_epi16(__A, __B),
970 static __inline__ __m512i __DEFAULT_FN_ATTRS
971 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
973 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
974 (__v32hi)_mm512_mulhi_epi16(__A, __B),
975 (__v32hi)_mm512_setzero_si512());
978 static __inline__ __m512i __DEFAULT_FN_ATTRS
979 _mm512_mulhi_epu16(__m512i __A, __m512i __B)
981 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
984 static __inline__ __m512i __DEFAULT_FN_ATTRS
985 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
987 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
988 (__v32hi)_mm512_mulhi_epu16(__A, __B),
992 static __inline__ __m512i __DEFAULT_FN_ATTRS
993 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
995 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
996 (__v32hi)_mm512_mulhi_epu16(__A, __B),
997 (__v32hi)_mm512_setzero_si512());
1000 static __inline__ __m512i __DEFAULT_FN_ATTRS
1001 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
1002 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
1005 static __inline__ __m512i __DEFAULT_FN_ATTRS
1006 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
1008 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1009 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1013 static __inline__ __m512i __DEFAULT_FN_ATTRS
1014 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
1015 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
1016 (__v32hi)_mm512_maddubs_epi16(__X, __Y),
1017 (__v32hi)_mm512_setzero_si512());
1020 static __inline__ __m512i __DEFAULT_FN_ATTRS
1021 _mm512_madd_epi16(__m512i __A, __m512i __B) {
1022 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
1025 static __inline__ __m512i __DEFAULT_FN_ATTRS
1026 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
1027 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1028 (__v16si)_mm512_madd_epi16(__A, __B),
1032 static __inline__ __m512i __DEFAULT_FN_ATTRS
1033 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
1034 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
1035 (__v16si)_mm512_madd_epi16(__A, __B),
1036 (__v16si)_mm512_setzero_si512());
1039 static __inline__ __m256i __DEFAULT_FN_ATTRS
1040 _mm512_cvtsepi16_epi8 (__m512i __A) {
1041 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1042 (__v32qi)_mm256_setzero_si256(),
1046 static __inline__ __m256i __DEFAULT_FN_ATTRS
1047 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1048 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1053 static __inline__ __m256i __DEFAULT_FN_ATTRS
1054 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
1055 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
1056 (__v32qi) _mm256_setzero_si256(),
1060 static __inline__ __m256i __DEFAULT_FN_ATTRS
1061 _mm512_cvtusepi16_epi8 (__m512i __A) {
1062 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1063 (__v32qi) _mm256_setzero_si256(),
1067 static __inline__ __m256i __DEFAULT_FN_ATTRS
1068 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1069 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1074 static __inline__ __m256i __DEFAULT_FN_ATTRS
1075 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
1076 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
1077 (__v32qi) _mm256_setzero_si256(),
1081 static __inline__ __m256i __DEFAULT_FN_ATTRS
1082 _mm512_cvtepi16_epi8 (__m512i __A) {
1083 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1084 (__v32qi) _mm256_undefined_si256(),
1088 static __inline__ __m256i __DEFAULT_FN_ATTRS
1089 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
1090 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1095 static __inline__ __m256i __DEFAULT_FN_ATTRS
1096 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
1097 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
1098 (__v32qi) _mm256_setzero_si256(),
1102 static __inline__ void __DEFAULT_FN_ATTRS
1103 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1105 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1108 static __inline__ void __DEFAULT_FN_ATTRS
1109 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1111 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1114 static __inline__ void __DEFAULT_FN_ATTRS
1115 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
1117 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
1120 static __inline__ __m512i __DEFAULT_FN_ATTRS
1121 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
1122 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1124 10, 64+10, 11, 64+11,
1125 12, 64+12, 13, 64+13,
1126 14, 64+14, 15, 64+15,
1127 24, 64+24, 25, 64+25,
1128 26, 64+26, 27, 64+27,
1129 28, 64+28, 29, 64+29,
1130 30, 64+30, 31, 64+31,
1131 40, 64+40, 41, 64+41,
1132 42, 64+42, 43, 64+43,
1133 44, 64+44, 45, 64+45,
1134 46, 64+46, 47, 64+47,
1135 56, 64+56, 57, 64+57,
1136 58, 64+58, 59, 64+59,
1137 60, 64+60, 61, 64+61,
1138 62, 64+62, 63, 64+63);
1141 static __inline__ __m512i __DEFAULT_FN_ATTRS
1142 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1143 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1144 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1148 static __inline__ __m512i __DEFAULT_FN_ATTRS
1149 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1150 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1151 (__v64qi)_mm512_unpackhi_epi8(__A, __B),
1152 (__v64qi)_mm512_setzero_si512());
1155 static __inline__ __m512i __DEFAULT_FN_ATTRS
1156 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
1157 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1160 12, 32+12, 13, 32+13,
1161 14, 32+14, 15, 32+15,
1162 20, 32+20, 21, 32+21,
1163 22, 32+22, 23, 32+23,
1164 28, 32+28, 29, 32+29,
1165 30, 32+30, 31, 32+31);
1168 static __inline__ __m512i __DEFAULT_FN_ATTRS
1169 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1170 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1171 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1175 static __inline__ __m512i __DEFAULT_FN_ATTRS
1176 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1177 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1178 (__v32hi)_mm512_unpackhi_epi16(__A, __B),
1179 (__v32hi)_mm512_setzero_si512());
1182 static __inline__ __m512i __DEFAULT_FN_ATTRS
1183 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
1184 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
1189 16, 64+16, 17, 64+17,
1190 18, 64+18, 19, 64+19,
1191 20, 64+20, 21, 64+21,
1192 22, 64+22, 23, 64+23,
1193 32, 64+32, 33, 64+33,
1194 34, 64+34, 35, 64+35,
1195 36, 64+36, 37, 64+37,
1196 38, 64+38, 39, 64+39,
1197 48, 64+48, 49, 64+49,
1198 50, 64+50, 51, 64+51,
1199 52, 64+52, 53, 64+53,
1200 54, 64+54, 55, 64+55);
1203 static __inline__ __m512i __DEFAULT_FN_ATTRS
1204 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
1205 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1206 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1210 static __inline__ __m512i __DEFAULT_FN_ATTRS
1211 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
1212 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
1213 (__v64qi)_mm512_unpacklo_epi8(__A, __B),
1214 (__v64qi)_mm512_setzero_si512());
1217 static __inline__ __m512i __DEFAULT_FN_ATTRS
1218 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
1219 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
1223 10, 32+10, 11, 32+11,
1224 16, 32+16, 17, 32+17,
1225 18, 32+18, 19, 32+19,
1226 24, 32+24, 25, 32+25,
1227 26, 32+26, 27, 32+27);
1230 static __inline__ __m512i __DEFAULT_FN_ATTRS
1231 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
1232 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1233 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1237 static __inline__ __m512i __DEFAULT_FN_ATTRS
1238 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
1239 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1240 (__v32hi)_mm512_unpacklo_epi16(__A, __B),
1241 (__v32hi)_mm512_setzero_si512());
1244 static __inline__ __m512i __DEFAULT_FN_ATTRS
1245 _mm512_cvtepi8_epi16(__m256i __A)
1247 /* This function always performs a signed extension, but __v32qi is a char
1248 which may be signed or unsigned, so use __v32qs. */
1249 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
1252 static __inline__ __m512i __DEFAULT_FN_ATTRS
1253 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1255 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1256 (__v32hi)_mm512_cvtepi8_epi16(__A),
1260 static __inline__ __m512i __DEFAULT_FN_ATTRS
1261 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
1263 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1264 (__v32hi)_mm512_cvtepi8_epi16(__A),
1265 (__v32hi)_mm512_setzero_si512());
1268 static __inline__ __m512i __DEFAULT_FN_ATTRS
1269 _mm512_cvtepu8_epi16(__m256i __A)
1271 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
1274 static __inline__ __m512i __DEFAULT_FN_ATTRS
1275 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
1277 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1278 (__v32hi)_mm512_cvtepu8_epi16(__A),
1282 static __inline__ __m512i __DEFAULT_FN_ATTRS
1283 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
1285 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1286 (__v32hi)_mm512_cvtepu8_epi16(__A),
1287 (__v32hi)_mm512_setzero_si512());
1291 #define _mm512_shufflehi_epi16(A, imm) \
1292 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
1294 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
1295 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1296 (__v32hi)_mm512_shufflehi_epi16((A), \
1298 (__v32hi)(__m512i)(W))
1300 #define _mm512_maskz_shufflehi_epi16(U, A, imm) \
1301 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1302 (__v32hi)_mm512_shufflehi_epi16((A), \
1304 (__v32hi)_mm512_setzero_si512())
1306 #define _mm512_shufflelo_epi16(A, imm) \
1307 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
1310 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
1311 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1312 (__v32hi)_mm512_shufflelo_epi16((A), \
1314 (__v32hi)(__m512i)(W))
1317 #define _mm512_maskz_shufflelo_epi16(U, A, imm) \
1318 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1319 (__v32hi)_mm512_shufflelo_epi16((A), \
1321 (__v32hi)_mm512_setzero_si512())
1323 static __inline__ __m512i __DEFAULT_FN_ATTRS
1324 _mm512_sllv_epi16(__m512i __A, __m512i __B)
1326 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
1329 static __inline__ __m512i __DEFAULT_FN_ATTRS
1330 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1332 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1333 (__v32hi)_mm512_sllv_epi16(__A, __B),
1337 static __inline__ __m512i __DEFAULT_FN_ATTRS
1338 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1340 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1341 (__v32hi)_mm512_sllv_epi16(__A, __B),
1342 (__v32hi)_mm512_setzero_si512());
1345 static __inline__ __m512i __DEFAULT_FN_ATTRS
1346 _mm512_sll_epi16(__m512i __A, __m128i __B)
1348 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
1351 static __inline__ __m512i __DEFAULT_FN_ATTRS
1352 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1354 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1355 (__v32hi)_mm512_sll_epi16(__A, __B),
1359 static __inline__ __m512i __DEFAULT_FN_ATTRS
1360 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1362 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1363 (__v32hi)_mm512_sll_epi16(__A, __B),
1364 (__v32hi)_mm512_setzero_si512());
1367 static __inline__ __m512i __DEFAULT_FN_ATTRS
1368 _mm512_slli_epi16(__m512i __A, int __B)
1370 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B);
1373 static __inline__ __m512i __DEFAULT_FN_ATTRS
1374 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1376 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1377 (__v32hi)_mm512_slli_epi16(__A, __B),
1381 static __inline__ __m512i __DEFAULT_FN_ATTRS
1382 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B)
1384 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1385 (__v32hi)_mm512_slli_epi16(__A, __B),
1386 (__v32hi)_mm512_setzero_si512());
1389 #define _mm512_bslli_epi128(a, imm) \
1390 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1392 static __inline__ __m512i __DEFAULT_FN_ATTRS
1393 _mm512_srlv_epi16(__m512i __A, __m512i __B)
1395 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
1398 static __inline__ __m512i __DEFAULT_FN_ATTRS
1399 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1401 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1402 (__v32hi)_mm512_srlv_epi16(__A, __B),
1406 static __inline__ __m512i __DEFAULT_FN_ATTRS
1407 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1409 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1410 (__v32hi)_mm512_srlv_epi16(__A, __B),
1411 (__v32hi)_mm512_setzero_si512());
1414 static __inline__ __m512i __DEFAULT_FN_ATTRS
1415 _mm512_srav_epi16(__m512i __A, __m512i __B)
1417 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
1420 static __inline__ __m512i __DEFAULT_FN_ATTRS
1421 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
1423 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1424 (__v32hi)_mm512_srav_epi16(__A, __B),
1428 static __inline__ __m512i __DEFAULT_FN_ATTRS
1429 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
1431 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1432 (__v32hi)_mm512_srav_epi16(__A, __B),
1433 (__v32hi)_mm512_setzero_si512());
1436 static __inline__ __m512i __DEFAULT_FN_ATTRS
1437 _mm512_sra_epi16(__m512i __A, __m128i __B)
1439 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
1442 static __inline__ __m512i __DEFAULT_FN_ATTRS
1443 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1445 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1446 (__v32hi)_mm512_sra_epi16(__A, __B),
1450 static __inline__ __m512i __DEFAULT_FN_ATTRS
1451 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1453 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1454 (__v32hi)_mm512_sra_epi16(__A, __B),
1455 (__v32hi)_mm512_setzero_si512());
1458 static __inline__ __m512i __DEFAULT_FN_ATTRS
1459 _mm512_srai_epi16(__m512i __A, int __B)
1461 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B);
1464 static __inline__ __m512i __DEFAULT_FN_ATTRS
1465 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1467 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1468 (__v32hi)_mm512_srai_epi16(__A, __B),
1472 static __inline__ __m512i __DEFAULT_FN_ATTRS
1473 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B)
1475 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1476 (__v32hi)_mm512_srai_epi16(__A, __B),
1477 (__v32hi)_mm512_setzero_si512());
1480 static __inline__ __m512i __DEFAULT_FN_ATTRS
1481 _mm512_srl_epi16(__m512i __A, __m128i __B)
1483 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
1486 static __inline__ __m512i __DEFAULT_FN_ATTRS
1487 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
1489 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1490 (__v32hi)_mm512_srl_epi16(__A, __B),
1494 static __inline__ __m512i __DEFAULT_FN_ATTRS
1495 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
1497 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1498 (__v32hi)_mm512_srl_epi16(__A, __B),
1499 (__v32hi)_mm512_setzero_si512());
1502 static __inline__ __m512i __DEFAULT_FN_ATTRS
1503 _mm512_srli_epi16(__m512i __A, int __B)
1505 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B);
1508 static __inline__ __m512i __DEFAULT_FN_ATTRS
1509 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B)
1511 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1512 (__v32hi)_mm512_srli_epi16(__A, __B),
1516 static __inline__ __m512i __DEFAULT_FN_ATTRS
1517 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
1519 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
1520 (__v32hi)_mm512_srli_epi16(__A, __B),
1521 (__v32hi)_mm512_setzero_si512());
1524 #define _mm512_bsrli_epi128(a, imm) \
1525 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))
1527 static __inline__ __m512i __DEFAULT_FN_ATTRS
1528 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
1530 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1535 static __inline__ __m512i __DEFAULT_FN_ATTRS
1536 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
1538 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
1540 (__v32hi) _mm512_setzero_si512 ());
1543 static __inline__ __m512i __DEFAULT_FN_ATTRS
1544 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
1546 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1551 static __inline__ __m512i __DEFAULT_FN_ATTRS
1552 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
1554 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
1556 (__v64qi) _mm512_setzero_si512 ());
1559 static __inline__ __m512i __DEFAULT_FN_ATTRS
1560 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
1562 return (__m512i) __builtin_ia32_selectb_512(__M,
1563 (__v64qi)_mm512_set1_epi8(__A),
1567 static __inline__ __m512i __DEFAULT_FN_ATTRS
1568 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
1570 return (__m512i) __builtin_ia32_selectb_512(__M,
1571 (__v64qi) _mm512_set1_epi8(__A),
1572 (__v64qi) _mm512_setzero_si512());
1575 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1576 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
1578 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
1582 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1583 _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
1585 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
1589 static __inline__ __m512i __DEFAULT_FN_ATTRS
1590 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
1592 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1597 static __inline__ __m512i __DEFAULT_FN_ATTRS
1598 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
1600 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P,
1602 _mm512_setzero_si512 (),
1606 static __inline__ __m512i __DEFAULT_FN_ATTRS
1607 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
1609 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1614 static __inline__ __m512i __DEFAULT_FN_ATTRS
1615 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
1617 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P,
1619 _mm512_setzero_si512 (),
1622 static __inline__ void __DEFAULT_FN_ATTRS
1623 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
1625 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
1630 static __inline__ void __DEFAULT_FN_ATTRS
1631 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
1633 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
1638 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1639 _mm512_test_epi8_mask (__m512i __A, __m512i __B)
1641 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
1642 _mm512_setzero_si512());
1645 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1646 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1648 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1649 _mm512_setzero_si512());
1652 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1653 _mm512_test_epi16_mask (__m512i __A, __m512i __B)
1655 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
1656 _mm512_setzero_si512());
1659 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1660 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1662 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1663 _mm512_setzero_si512());
1666 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1667 _mm512_testn_epi8_mask (__m512i __A, __m512i __B)
1669 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
1672 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1673 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
1675 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
1676 _mm512_setzero_si512());
1679 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1680 _mm512_testn_epi16_mask (__m512i __A, __m512i __B)
1682 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
1683 _mm512_setzero_si512());
1686 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1687 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
1689 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
1690 _mm512_setzero_si512());
1693 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
1694 _mm512_movepi8_mask (__m512i __A)
1696 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
1699 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
1700 _mm512_movepi16_mask (__m512i __A)
1702 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
1705 static __inline__ __m512i __DEFAULT_FN_ATTRS
1706 _mm512_movm_epi8 (__mmask64 __A)
1708 return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
1711 static __inline__ __m512i __DEFAULT_FN_ATTRS
1712 _mm512_movm_epi16 (__mmask32 __A)
1714 return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
1717 static __inline__ __m512i __DEFAULT_FN_ATTRS
1718 _mm512_broadcastb_epi8 (__m128i __A)
1720 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
1721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1727 static __inline__ __m512i __DEFAULT_FN_ATTRS
1728 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
1730 return (__m512i)__builtin_ia32_selectb_512(__M,
1731 (__v64qi) _mm512_broadcastb_epi8(__A),
1735 static __inline__ __m512i __DEFAULT_FN_ATTRS
1736 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
1738 return (__m512i)__builtin_ia32_selectb_512(__M,
1739 (__v64qi) _mm512_broadcastb_epi8(__A),
1740 (__v64qi) _mm512_setzero_si512());
1743 static __inline__ __m512i __DEFAULT_FN_ATTRS
1744 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
1746 return (__m512i) __builtin_ia32_selectw_512(__M,
1747 (__v32hi) _mm512_set1_epi16(__A),
1751 static __inline__ __m512i __DEFAULT_FN_ATTRS
1752 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
1754 return (__m512i) __builtin_ia32_selectw_512(__M,
1755 (__v32hi) _mm512_set1_epi16(__A),
1756 (__v32hi) _mm512_setzero_si512());
1759 static __inline__ __m512i __DEFAULT_FN_ATTRS
1760 _mm512_broadcastw_epi16 (__m128i __A)
1762 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
1763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1767 static __inline__ __m512i __DEFAULT_FN_ATTRS
1768 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
1770 return (__m512i)__builtin_ia32_selectw_512(__M,
1771 (__v32hi) _mm512_broadcastw_epi16(__A),
1775 static __inline__ __m512i __DEFAULT_FN_ATTRS
1776 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
1778 return (__m512i)__builtin_ia32_selectw_512(__M,
1779 (__v32hi) _mm512_broadcastw_epi16(__A),
1780 (__v32hi) _mm512_setzero_si512());
1783 static __inline__ __m512i __DEFAULT_FN_ATTRS
1784 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
1786 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
1789 static __inline__ __m512i __DEFAULT_FN_ATTRS
1790 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
1793 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1794 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1795 (__v32hi)_mm512_setzero_si512());
1798 static __inline__ __m512i __DEFAULT_FN_ATTRS
1799 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
1802 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
1803 (__v32hi)_mm512_permutexvar_epi16(__A, __B),
1807 #define _mm512_alignr_epi8(A, B, N) \
1808 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
1809 (__v64qi)(__m512i)(B), (int)(N))
1811 #define _mm512_mask_alignr_epi8(W, U, A, B, N) \
1812 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1813 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1814 (__v64qi)(__m512i)(W))
1816 #define _mm512_maskz_alignr_epi8(U, A, B, N) \
1817 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
1818 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
1819 (__v64qi)(__m512i)_mm512_setzero_si512())
1821 #define _mm512_dbsad_epu8(A, B, imm) \
1822 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
1823 (__v64qi)(__m512i)(B), (int)(imm))
1825 #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
1826 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1827 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
1828 (__v32hi)(__m512i)(W))
1830 #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
1831 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
1832 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
1833 (__v32hi)_mm512_setzero_si512())
1835 static __inline__ __m512i __DEFAULT_FN_ATTRS
1836 _mm512_sad_epu8 (__m512i __A, __m512i __B)
1838 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
1844 #undef __DEFAULT_FN_ATTRS