1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
50 /* Rounding mode macros. */
51 #define _MM_FROUND_TO_NEAREST_INT 0x00
52 #define _MM_FROUND_TO_NEG_INF 0x01
53 #define _MM_FROUND_TO_POS_INF 0x02
54 #define _MM_FROUND_TO_ZERO 0x03
55 #define _MM_FROUND_CUR_DIRECTION 0x04
57 /* Constants for integer comparison predicates */
59 _MM_CMPINT_EQ, /* Equal */
60 _MM_CMPINT_LT, /* Less than */
61 _MM_CMPINT_LE, /* Less than or Equal */
63 _MM_CMPINT_NE, /* Not Equal */
64 _MM_CMPINT_NLT, /* Not Less than */
65 #define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
66 _MM_CMPINT_NLE /* Not Less than or Equal */
67 #define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
72 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
162 _MM_MANT_NORM_1_2, /* interval [1, 2) */
163 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
164 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
165 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
166 } _MM_MANTISSA_NORM_ENUM;
170 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
171 _MM_MANT_SIGN_zero, /* sign = 0 */
172 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
173 } _MM_MANTISSA_SIGN_ENUM;
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
178 /* Create vectors with repeated elements */
180 static __inline __m512i __DEFAULT_FN_ATTRS
181 _mm512_setzero_si512(void)
183 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
186 #define _mm512_setzero_epi32 _mm512_setzero_si512
188 static __inline__ __m512d __DEFAULT_FN_ATTRS
189 _mm512_undefined_pd(void)
191 return (__m512d)__builtin_ia32_undef512();
194 static __inline__ __m512 __DEFAULT_FN_ATTRS
195 _mm512_undefined(void)
197 return (__m512)__builtin_ia32_undef512();
200 static __inline__ __m512 __DEFAULT_FN_ATTRS
201 _mm512_undefined_ps(void)
203 return (__m512)__builtin_ia32_undef512();
206 static __inline__ __m512i __DEFAULT_FN_ATTRS
207 _mm512_undefined_epi32(void)
209 return (__m512i)__builtin_ia32_undef512();
212 static __inline__ __m512i __DEFAULT_FN_ATTRS
213 _mm512_broadcastd_epi32 (__m128i __A)
215 return (__m512i)__builtin_shufflevector((__v4si) __A,
216 (__v4si)_mm_undefined_si128(),
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
220 static __inline__ __m512i __DEFAULT_FN_ATTRS
221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
223 return (__m512i)__builtin_ia32_selectd_512(__M,
224 (__v16si) _mm512_broadcastd_epi32(__A),
228 static __inline__ __m512i __DEFAULT_FN_ATTRS
229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
231 return (__m512i)__builtin_ia32_selectd_512(__M,
232 (__v16si) _mm512_broadcastd_epi32(__A),
233 (__v16si) _mm512_setzero_si512());
236 static __inline__ __m512i __DEFAULT_FN_ATTRS
237 _mm512_broadcastq_epi64 (__m128i __A)
239 return (__m512i)__builtin_shufflevector((__v2di) __A,
240 (__v2di) _mm_undefined_si128(),
241 0, 0, 0, 0, 0, 0, 0, 0);
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
247 return (__m512i)__builtin_ia32_selectq_512(__M,
248 (__v8di) _mm512_broadcastq_epi64(__A),
253 static __inline__ __m512i __DEFAULT_FN_ATTRS
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
256 return (__m512i)__builtin_ia32_selectq_512(__M,
257 (__v8di) _mm512_broadcastq_epi64(__A),
258 (__v8di) _mm512_setzero_si512());
262 static __inline __m512 __DEFAULT_FN_ATTRS
263 _mm512_setzero_ps(void)
265 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
269 #define _mm512_setzero _mm512_setzero_ps
271 static __inline __m512d __DEFAULT_FN_ATTRS
272 _mm512_setzero_pd(void)
274 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
277 static __inline __m512 __DEFAULT_FN_ATTRS
278 _mm512_set1_ps(float __w)
280 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281 __w, __w, __w, __w, __w, __w, __w, __w };
284 static __inline __m512d __DEFAULT_FN_ATTRS
285 _mm512_set1_pd(double __w)
287 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
290 static __inline __m512i __DEFAULT_FN_ATTRS
291 _mm512_set1_epi8(char __w)
293 return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w };
303 static __inline __m512i __DEFAULT_FN_ATTRS
304 _mm512_set1_epi16(short __w)
306 return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
307 __w, __w, __w, __w, __w, __w, __w, __w,
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w };
312 static __inline __m512i __DEFAULT_FN_ATTRS
313 _mm512_set1_epi32(int __s)
315 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
316 __s, __s, __s, __s, __s, __s, __s, __s };
319 static __inline __m512i __DEFAULT_FN_ATTRS
320 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
322 return (__m512i)__builtin_ia32_selectd_512(__M,
323 (__v16si)_mm512_set1_epi32(__A),
324 (__v16si)_mm512_setzero_si512());
327 static __inline __m512i __DEFAULT_FN_ATTRS
328 _mm512_set1_epi64(long long __d)
330 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
334 static __inline __m512i __DEFAULT_FN_ATTRS
335 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
337 return (__m512i)__builtin_ia32_selectq_512(__M,
338 (__v8di)_mm512_set1_epi64(__A),
339 (__v8di)_mm512_setzero_si512());
343 static __inline__ __m512 __DEFAULT_FN_ATTRS
344 _mm512_broadcastss_ps(__m128 __A)
346 return (__m512)__builtin_shufflevector((__v4sf) __A,
347 (__v4sf)_mm_undefined_ps(),
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
351 static __inline __m512i __DEFAULT_FN_ATTRS
352 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
354 return (__m512i)(__v16si)
355 { __D, __C, __B, __A, __D, __C, __B, __A,
356 __D, __C, __B, __A, __D, __C, __B, __A };
359 static __inline __m512i __DEFAULT_FN_ATTRS
360 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
363 return (__m512i) (__v8di)
364 { __D, __C, __B, __A, __D, __C, __B, __A };
367 static __inline __m512d __DEFAULT_FN_ATTRS
368 _mm512_set4_pd (double __A, double __B, double __C, double __D)
371 { __D, __C, __B, __A, __D, __C, __B, __A };
374 static __inline __m512 __DEFAULT_FN_ATTRS
375 _mm512_set4_ps (float __A, float __B, float __C, float __D)
378 { __D, __C, __B, __A, __D, __C, __B, __A,
379 __D, __C, __B, __A, __D, __C, __B, __A };
382 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
383 _mm512_set4_epi32((e3),(e2),(e1),(e0))
385 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
386 _mm512_set4_epi64((e3),(e2),(e1),(e0))
388 #define _mm512_setr4_pd(e0,e1,e2,e3) \
389 _mm512_set4_pd((e3),(e2),(e1),(e0))
391 #define _mm512_setr4_ps(e0,e1,e2,e3) \
392 _mm512_set4_ps((e3),(e2),(e1),(e0))
394 static __inline__ __m512d __DEFAULT_FN_ATTRS
395 _mm512_broadcastsd_pd(__m128d __A)
397 return (__m512d)__builtin_shufflevector((__v2df) __A,
398 (__v2df) _mm_undefined_pd(),
399 0, 0, 0, 0, 0, 0, 0, 0);
402 /* Cast between vector types */
404 static __inline __m512d __DEFAULT_FN_ATTRS
405 _mm512_castpd256_pd512(__m256d __a)
407 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
410 static __inline __m512 __DEFAULT_FN_ATTRS
411 _mm512_castps256_ps512(__m256 __a)
413 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
414 -1, -1, -1, -1, -1, -1, -1, -1);
417 static __inline __m128d __DEFAULT_FN_ATTRS
418 _mm512_castpd512_pd128(__m512d __a)
420 return __builtin_shufflevector(__a, __a, 0, 1);
423 static __inline __m256d __DEFAULT_FN_ATTRS
424 _mm512_castpd512_pd256 (__m512d __A)
426 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
429 static __inline __m128 __DEFAULT_FN_ATTRS
430 _mm512_castps512_ps128(__m512 __a)
432 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
435 static __inline __m256 __DEFAULT_FN_ATTRS
436 _mm512_castps512_ps256 (__m512 __A)
438 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
441 static __inline __m512 __DEFAULT_FN_ATTRS
442 _mm512_castpd_ps (__m512d __A)
444 return (__m512) (__A);
447 static __inline __m512i __DEFAULT_FN_ATTRS
448 _mm512_castpd_si512 (__m512d __A)
450 return (__m512i) (__A);
453 static __inline__ __m512d __DEFAULT_FN_ATTRS
454 _mm512_castpd128_pd512 (__m128d __A)
456 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
459 static __inline __m512d __DEFAULT_FN_ATTRS
460 _mm512_castps_pd (__m512 __A)
462 return (__m512d) (__A);
465 static __inline __m512i __DEFAULT_FN_ATTRS
466 _mm512_castps_si512 (__m512 __A)
468 return (__m512i) (__A);
471 static __inline__ __m512 __DEFAULT_FN_ATTRS
472 _mm512_castps128_ps512 (__m128 __A)
474 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
477 static __inline__ __m512i __DEFAULT_FN_ATTRS
478 _mm512_castsi128_si512 (__m128i __A)
480 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
483 static __inline__ __m512i __DEFAULT_FN_ATTRS
484 _mm512_castsi256_si512 (__m256i __A)
486 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
489 static __inline __m512 __DEFAULT_FN_ATTRS
490 _mm512_castsi512_ps (__m512i __A)
492 return (__m512) (__A);
495 static __inline __m512d __DEFAULT_FN_ATTRS
496 _mm512_castsi512_pd (__m512i __A)
498 return (__m512d) (__A);
501 static __inline __m128i __DEFAULT_FN_ATTRS
502 _mm512_castsi512_si128 (__m512i __A)
504 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
507 static __inline __m256i __DEFAULT_FN_ATTRS
508 _mm512_castsi512_si256 (__m512i __A)
510 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
513 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
514 _mm512_int2mask(int __a)
516 return (__mmask16)__a;
519 static __inline__ int __DEFAULT_FN_ATTRS
520 _mm512_mask2int(__mmask16 __a)
525 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
526 /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
527 /// contain the value of the source vector. The upper 384 bits are set
530 /// \headerfile <x86intrin.h>
532 /// This intrinsic has no corresponding instruction.
535 /// A 128-bit vector of [2 x double].
536 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537 /// contain the value of the parameter. The upper 384 bits are set to zero.
538 static __inline __m512d __DEFAULT_FN_ATTRS
539 _mm512_zextpd128_pd512(__m128d __a)
541 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
544 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
545 /// 256-bit floating-point vector of [4 x double]. The lower 256 bits
546 /// contain the value of the source vector. The upper 256 bits are set
549 /// \headerfile <x86intrin.h>
551 /// This intrinsic has no corresponding instruction.
554 /// A 256-bit vector of [4 x double].
555 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
556 /// contain the value of the parameter. The upper 256 bits are set to zero.
557 static __inline __m512d __DEFAULT_FN_ATTRS
558 _mm512_zextpd256_pd512(__m256d __a)
560 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
563 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
564 /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
565 /// the value of the source vector. The upper 384 bits are set to zero.
567 /// \headerfile <x86intrin.h>
569 /// This intrinsic has no corresponding instruction.
572 /// A 128-bit vector of [4 x float].
573 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
574 /// contain the value of the parameter. The upper 384 bits are set to zero.
575 static __inline __m512 __DEFAULT_FN_ATTRS
576 _mm512_zextps128_ps512(__m128 __a)
578 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
581 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
582 /// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain
583 /// the value of the source vector. The upper 256 bits are set to zero.
585 /// \headerfile <x86intrin.h>
587 /// This intrinsic has no corresponding instruction.
590 /// A 256-bit vector of [8 x float].
591 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
592 /// contain the value of the parameter. The upper 256 bits are set to zero.
593 static __inline __m512 __DEFAULT_FN_ATTRS
594 _mm512_zextps256_ps512(__m256 __a)
596 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
599 /// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
600 /// The lower 128 bits contain the value of the source vector. The upper
601 /// 384 bits are set to zero.
603 /// \headerfile <x86intrin.h>
605 /// This intrinsic has no corresponding instruction.
608 /// A 128-bit integer vector.
609 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
610 /// the parameter. The upper 384 bits are set to zero.
611 static __inline __m512i __DEFAULT_FN_ATTRS
612 _mm512_zextsi128_si512(__m128i __a)
614 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
617 /// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
618 /// The lower 256 bits contain the value of the source vector. The upper
619 /// 256 bits are set to zero.
621 /// \headerfile <x86intrin.h>
623 /// This intrinsic has no corresponding instruction.
626 /// A 256-bit integer vector.
627 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
628 /// the parameter. The upper 256 bits are set to zero.
629 static __inline __m512i __DEFAULT_FN_ATTRS
630 _mm512_zextsi256_si512(__m256i __a)
632 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
635 /* Bitwise operators */
636 static __inline__ __m512i __DEFAULT_FN_ATTRS
637 _mm512_and_epi32(__m512i __a, __m512i __b)
639 return (__m512i)((__v16su)__a & (__v16su)__b);
642 static __inline__ __m512i __DEFAULT_FN_ATTRS
643 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
645 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
646 (__v16si) _mm512_and_epi32(__a, __b),
650 static __inline__ __m512i __DEFAULT_FN_ATTRS
651 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
653 return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
657 static __inline__ __m512i __DEFAULT_FN_ATTRS
658 _mm512_and_epi64(__m512i __a, __m512i __b)
660 return (__m512i)((__v8du)__a & (__v8du)__b);
663 static __inline__ __m512i __DEFAULT_FN_ATTRS
664 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
666 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
667 (__v8di) _mm512_and_epi64(__a, __b),
671 static __inline__ __m512i __DEFAULT_FN_ATTRS
672 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
674 return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
678 static __inline__ __m512i __DEFAULT_FN_ATTRS
679 _mm512_andnot_si512 (__m512i __A, __m512i __B)
681 return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
684 static __inline__ __m512i __DEFAULT_FN_ATTRS
685 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
687 return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
690 static __inline__ __m512i __DEFAULT_FN_ATTRS
691 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
693 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
694 (__v16si)_mm512_andnot_epi32(__A, __B),
698 static __inline__ __m512i __DEFAULT_FN_ATTRS
699 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
701 return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
705 static __inline__ __m512i __DEFAULT_FN_ATTRS
706 _mm512_andnot_epi64(__m512i __A, __m512i __B)
708 return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
711 static __inline__ __m512i __DEFAULT_FN_ATTRS
712 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
714 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
715 (__v8di)_mm512_andnot_epi64(__A, __B),
719 static __inline__ __m512i __DEFAULT_FN_ATTRS
720 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
722 return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
726 static __inline__ __m512i __DEFAULT_FN_ATTRS
727 _mm512_or_epi32(__m512i __a, __m512i __b)
729 return (__m512i)((__v16su)__a | (__v16su)__b);
732 static __inline__ __m512i __DEFAULT_FN_ATTRS
733 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
735 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
736 (__v16si)_mm512_or_epi32(__a, __b),
740 static __inline__ __m512i __DEFAULT_FN_ATTRS
741 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
743 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
746 static __inline__ __m512i __DEFAULT_FN_ATTRS
747 _mm512_or_epi64(__m512i __a, __m512i __b)
749 return (__m512i)((__v8du)__a | (__v8du)__b);
752 static __inline__ __m512i __DEFAULT_FN_ATTRS
753 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
755 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
756 (__v8di)_mm512_or_epi64(__a, __b),
760 static __inline__ __m512i __DEFAULT_FN_ATTRS
761 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
763 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
766 static __inline__ __m512i __DEFAULT_FN_ATTRS
767 _mm512_xor_epi32(__m512i __a, __m512i __b)
769 return (__m512i)((__v16su)__a ^ (__v16su)__b);
772 static __inline__ __m512i __DEFAULT_FN_ATTRS
773 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
775 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
776 (__v16si)_mm512_xor_epi32(__a, __b),
780 static __inline__ __m512i __DEFAULT_FN_ATTRS
781 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
783 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
786 static __inline__ __m512i __DEFAULT_FN_ATTRS
787 _mm512_xor_epi64(__m512i __a, __m512i __b)
789 return (__m512i)((__v8du)__a ^ (__v8du)__b);
792 static __inline__ __m512i __DEFAULT_FN_ATTRS
793 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
795 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
796 (__v8di)_mm512_xor_epi64(__a, __b),
800 static __inline__ __m512i __DEFAULT_FN_ATTRS
801 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
803 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
806 static __inline__ __m512i __DEFAULT_FN_ATTRS
807 _mm512_and_si512(__m512i __a, __m512i __b)
809 return (__m512i)((__v8du)__a & (__v8du)__b);
812 static __inline__ __m512i __DEFAULT_FN_ATTRS
813 _mm512_or_si512(__m512i __a, __m512i __b)
815 return (__m512i)((__v8du)__a | (__v8du)__b);
818 static __inline__ __m512i __DEFAULT_FN_ATTRS
819 _mm512_xor_si512(__m512i __a, __m512i __b)
821 return (__m512i)((__v8du)__a ^ (__v8du)__b);
826 static __inline __m512d __DEFAULT_FN_ATTRS
827 _mm512_add_pd(__m512d __a, __m512d __b)
829 return (__m512d)((__v8df)__a + (__v8df)__b);
832 static __inline __m512 __DEFAULT_FN_ATTRS
833 _mm512_add_ps(__m512 __a, __m512 __b)
835 return (__m512)((__v16sf)__a + (__v16sf)__b);
838 static __inline __m512d __DEFAULT_FN_ATTRS
839 _mm512_mul_pd(__m512d __a, __m512d __b)
841 return (__m512d)((__v8df)__a * (__v8df)__b);
844 static __inline __m512 __DEFAULT_FN_ATTRS
845 _mm512_mul_ps(__m512 __a, __m512 __b)
847 return (__m512)((__v16sf)__a * (__v16sf)__b);
850 static __inline __m512d __DEFAULT_FN_ATTRS
851 _mm512_sub_pd(__m512d __a, __m512d __b)
853 return (__m512d)((__v8df)__a - (__v8df)__b);
856 static __inline __m512 __DEFAULT_FN_ATTRS
857 _mm512_sub_ps(__m512 __a, __m512 __b)
859 return (__m512)((__v16sf)__a - (__v16sf)__b);
862 static __inline__ __m512i __DEFAULT_FN_ATTRS
863 _mm512_add_epi64 (__m512i __A, __m512i __B)
865 return (__m512i) ((__v8du) __A + (__v8du) __B);
868 static __inline__ __m512i __DEFAULT_FN_ATTRS
869 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872 (__v8di)_mm512_add_epi64(__A, __B),
876 static __inline__ __m512i __DEFAULT_FN_ATTRS
877 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
880 (__v8di)_mm512_add_epi64(__A, __B),
881 (__v8di)_mm512_setzero_si512());
884 static __inline__ __m512i __DEFAULT_FN_ATTRS
885 _mm512_sub_epi64 (__m512i __A, __m512i __B)
887 return (__m512i) ((__v8du) __A - (__v8du) __B);
890 static __inline__ __m512i __DEFAULT_FN_ATTRS
891 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
893 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
894 (__v8di)_mm512_sub_epi64(__A, __B),
898 static __inline__ __m512i __DEFAULT_FN_ATTRS
899 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
901 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
902 (__v8di)_mm512_sub_epi64(__A, __B),
903 (__v8di)_mm512_setzero_si512());
906 static __inline__ __m512i __DEFAULT_FN_ATTRS
907 _mm512_add_epi32 (__m512i __A, __m512i __B)
909 return (__m512i) ((__v16su) __A + (__v16su) __B);
912 static __inline__ __m512i __DEFAULT_FN_ATTRS
913 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
915 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
916 (__v16si)_mm512_add_epi32(__A, __B),
920 static __inline__ __m512i __DEFAULT_FN_ATTRS
921 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
923 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
924 (__v16si)_mm512_add_epi32(__A, __B),
925 (__v16si)_mm512_setzero_si512());
928 static __inline__ __m512i __DEFAULT_FN_ATTRS
929 _mm512_sub_epi32 (__m512i __A, __m512i __B)
931 return (__m512i) ((__v16su) __A - (__v16su) __B);
934 static __inline__ __m512i __DEFAULT_FN_ATTRS
935 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
937 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
938 (__v16si)_mm512_sub_epi32(__A, __B),
942 static __inline__ __m512i __DEFAULT_FN_ATTRS
943 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
945 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
946 (__v16si)_mm512_sub_epi32(__A, __B),
947 (__v16si)_mm512_setzero_si512());
950 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
951 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
952 (__v8df)(__m512d)(B), \
953 (__v8df)(__m512d)(W), (__mmask8)(U), \
956 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
957 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
958 (__v8df)(__m512d)(B), \
959 (__v8df)_mm512_setzero_pd(), \
960 (__mmask8)(U), (int)(R)); })
962 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
963 (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
964 (__v8df)(__m512d)(B), \
965 (__v8df)_mm512_undefined_pd(), \
966 (__mmask8)-1, (int)(R)); })
968 static __inline__ __m512d __DEFAULT_FN_ATTRS
969 _mm512_max_pd(__m512d __A, __m512d __B)
971 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
974 _mm512_setzero_pd (),
976 _MM_FROUND_CUR_DIRECTION);
979 static __inline__ __m512d __DEFAULT_FN_ATTRS
980 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
982 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
986 _MM_FROUND_CUR_DIRECTION);
989 static __inline__ __m512d __DEFAULT_FN_ATTRS
990 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
992 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
995 _mm512_setzero_pd (),
997 _MM_FROUND_CUR_DIRECTION);
1000 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
1001 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1002 (__v16sf)(__m512)(B), \
1003 (__v16sf)(__m512)(W), (__mmask16)(U), \
1006 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1007 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1008 (__v16sf)(__m512)(B), \
1009 (__v16sf)_mm512_setzero_ps(), \
1010 (__mmask16)(U), (int)(R)); })
1012 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1013 (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1014 (__v16sf)(__m512)(B), \
1015 (__v16sf)_mm512_undefined_ps(), \
1016 (__mmask16)-1, (int)(R)); })
1018 static __inline__ __m512 __DEFAULT_FN_ATTRS
1019 _mm512_max_ps(__m512 __A, __m512 __B)
1021 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1024 _mm512_setzero_ps (),
1026 _MM_FROUND_CUR_DIRECTION);
1029 static __inline__ __m512 __DEFAULT_FN_ATTRS
1030 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1032 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1036 _MM_FROUND_CUR_DIRECTION);
1039 static __inline__ __m512 __DEFAULT_FN_ATTRS
1040 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1042 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1045 _mm512_setzero_ps (),
1047 _MM_FROUND_CUR_DIRECTION);
1050 static __inline__ __m128 __DEFAULT_FN_ATTRS
1051 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1052 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1056 _MM_FROUND_CUR_DIRECTION);
1059 static __inline__ __m128 __DEFAULT_FN_ATTRS
1060 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1061 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1063 (__v4sf) _mm_setzero_ps (),
1065 _MM_FROUND_CUR_DIRECTION);
1068 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
1069 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1070 (__v4sf)(__m128)(B), \
1071 (__v4sf)_mm_setzero_ps(), \
1072 (__mmask8)-1, (int)(R)); })
1074 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1075 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1076 (__v4sf)(__m128)(B), \
1077 (__v4sf)(__m128)(W), (__mmask8)(U), \
1080 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1081 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1082 (__v4sf)(__m128)(B), \
1083 (__v4sf)_mm_setzero_ps(), \
1084 (__mmask8)(U), (int)(R)); })
1086 static __inline__ __m128d __DEFAULT_FN_ATTRS
1087 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1088 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1092 _MM_FROUND_CUR_DIRECTION);
1095 static __inline__ __m128d __DEFAULT_FN_ATTRS
1096 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1097 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1099 (__v2df) _mm_setzero_pd (),
1101 _MM_FROUND_CUR_DIRECTION);
1104 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
1105 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1106 (__v2df)(__m128d)(B), \
1107 (__v2df)_mm_setzero_pd(), \
1108 (__mmask8)-1, (int)(R)); })
1110 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1111 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1112 (__v2df)(__m128d)(B), \
1113 (__v2df)(__m128d)(W), \
1114 (__mmask8)(U), (int)(R)); })
1116 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1117 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1118 (__v2df)(__m128d)(B), \
1119 (__v2df)_mm_setzero_pd(), \
1120 (__mmask8)(U), (int)(R)); })
1122 static __inline __m512i
1124 _mm512_max_epi32(__m512i __A, __m512i __B)
1126 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1129 _mm512_setzero_si512 (),
1133 static __inline__ __m512i __DEFAULT_FN_ATTRS
1134 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1136 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1138 (__v16si) __W, __M);
1141 static __inline__ __m512i __DEFAULT_FN_ATTRS
1142 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1144 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1147 _mm512_setzero_si512 (),
1151 static __inline __m512i __DEFAULT_FN_ATTRS
1152 _mm512_max_epu32(__m512i __A, __m512i __B)
1154 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1157 _mm512_setzero_si512 (),
1161 static __inline__ __m512i __DEFAULT_FN_ATTRS
1162 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1164 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1166 (__v16si) __W, __M);
1169 static __inline__ __m512i __DEFAULT_FN_ATTRS
1170 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1172 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1175 _mm512_setzero_si512 (),
1179 static __inline __m512i __DEFAULT_FN_ATTRS
1180 _mm512_max_epi64(__m512i __A, __m512i __B)
1182 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1185 _mm512_setzero_si512 (),
1189 static __inline__ __m512i __DEFAULT_FN_ATTRS
1190 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1192 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1197 static __inline__ __m512i __DEFAULT_FN_ATTRS
1198 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1200 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1203 _mm512_setzero_si512 (),
1207 static __inline __m512i __DEFAULT_FN_ATTRS
1208 _mm512_max_epu64(__m512i __A, __m512i __B)
1210 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1213 _mm512_setzero_si512 (),
1217 static __inline__ __m512i __DEFAULT_FN_ATTRS
1218 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1220 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1225 static __inline__ __m512i __DEFAULT_FN_ATTRS
1226 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1228 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1231 _mm512_setzero_si512 (),
1235 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1236 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1237 (__v8df)(__m512d)(B), \
1238 (__v8df)(__m512d)(W), (__mmask8)(U), \
1241 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1242 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1243 (__v8df)(__m512d)(B), \
1244 (__v8df)_mm512_setzero_pd(), \
1245 (__mmask8)(U), (int)(R)); })
1247 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1248 (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1249 (__v8df)(__m512d)(B), \
1250 (__v8df)_mm512_undefined_pd(), \
1251 (__mmask8)-1, (int)(R)); })
1253 static __inline__ __m512d __DEFAULT_FN_ATTRS
1254 _mm512_min_pd(__m512d __A, __m512d __B)
1256 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1259 _mm512_setzero_pd (),
1261 _MM_FROUND_CUR_DIRECTION);
1264 static __inline__ __m512d __DEFAULT_FN_ATTRS
1265 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1267 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1271 _MM_FROUND_CUR_DIRECTION);
1274 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1275 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1276 (__v16sf)(__m512)(B), \
1277 (__v16sf)(__m512)(W), (__mmask16)(U), \
1280 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1281 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1282 (__v16sf)(__m512)(B), \
1283 (__v16sf)_mm512_setzero_ps(), \
1284 (__mmask16)(U), (int)(R)); })
1286 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1287 (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1288 (__v16sf)(__m512)(B), \
1289 (__v16sf)_mm512_undefined_ps(), \
1290 (__mmask16)-1, (int)(R)); })
1292 static __inline__ __m512d __DEFAULT_FN_ATTRS
1293 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1295 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1298 _mm512_setzero_pd (),
1300 _MM_FROUND_CUR_DIRECTION);
1303 static __inline__ __m512 __DEFAULT_FN_ATTRS
1304 _mm512_min_ps(__m512 __A, __m512 __B)
1306 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1309 _mm512_setzero_ps (),
1311 _MM_FROUND_CUR_DIRECTION);
1314 static __inline__ __m512 __DEFAULT_FN_ATTRS
1315 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1317 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1321 _MM_FROUND_CUR_DIRECTION);
1324 static __inline__ __m512 __DEFAULT_FN_ATTRS
1325 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1327 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1330 _mm512_setzero_ps (),
1332 _MM_FROUND_CUR_DIRECTION);
1335 static __inline__ __m128 __DEFAULT_FN_ATTRS
1336 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1337 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1341 _MM_FROUND_CUR_DIRECTION);
1344 static __inline__ __m128 __DEFAULT_FN_ATTRS
1345 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1346 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1348 (__v4sf) _mm_setzero_ps (),
1350 _MM_FROUND_CUR_DIRECTION);
1353 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
1354 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1355 (__v4sf)(__m128)(B), \
1356 (__v4sf)_mm_setzero_ps(), \
1357 (__mmask8)-1, (int)(R)); })
1359 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1360 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1361 (__v4sf)(__m128)(B), \
1362 (__v4sf)(__m128)(W), (__mmask8)(U), \
1365 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1366 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1367 (__v4sf)(__m128)(B), \
1368 (__v4sf)_mm_setzero_ps(), \
1369 (__mmask8)(U), (int)(R)); })
1371 static __inline__ __m128d __DEFAULT_FN_ATTRS
1372 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1373 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1377 _MM_FROUND_CUR_DIRECTION);
1380 static __inline__ __m128d __DEFAULT_FN_ATTRS
1381 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1382 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1384 (__v2df) _mm_setzero_pd (),
1386 _MM_FROUND_CUR_DIRECTION);
1389 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
1390 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1391 (__v2df)(__m128d)(B), \
1392 (__v2df)_mm_setzero_pd(), \
1393 (__mmask8)-1, (int)(R)); })
1395 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1396 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1397 (__v2df)(__m128d)(B), \
1398 (__v2df)(__m128d)(W), \
1399 (__mmask8)(U), (int)(R)); })
1401 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1402 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1403 (__v2df)(__m128d)(B), \
1404 (__v2df)_mm_setzero_pd(), \
1405 (__mmask8)(U), (int)(R)); })
1407 static __inline __m512i
1409 _mm512_min_epi32(__m512i __A, __m512i __B)
1411 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1414 _mm512_setzero_si512 (),
1418 static __inline__ __m512i __DEFAULT_FN_ATTRS
1419 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1421 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1423 (__v16si) __W, __M);
1426 static __inline__ __m512i __DEFAULT_FN_ATTRS
1427 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1429 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1432 _mm512_setzero_si512 (),
1436 static __inline __m512i __DEFAULT_FN_ATTRS
1437 _mm512_min_epu32(__m512i __A, __m512i __B)
1439 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1442 _mm512_setzero_si512 (),
1446 static __inline__ __m512i __DEFAULT_FN_ATTRS
1447 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1449 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1451 (__v16si) __W, __M);
1454 static __inline__ __m512i __DEFAULT_FN_ATTRS
1455 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1457 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1460 _mm512_setzero_si512 (),
1464 static __inline __m512i __DEFAULT_FN_ATTRS
1465 _mm512_min_epi64(__m512i __A, __m512i __B)
1467 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1470 _mm512_setzero_si512 (),
1474 static __inline__ __m512i __DEFAULT_FN_ATTRS
1475 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1477 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1482 static __inline__ __m512i __DEFAULT_FN_ATTRS
1483 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1485 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1488 _mm512_setzero_si512 (),
1492 static __inline __m512i __DEFAULT_FN_ATTRS
1493 _mm512_min_epu64(__m512i __A, __m512i __B)
1495 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1498 _mm512_setzero_si512 (),
1502 static __inline__ __m512i __DEFAULT_FN_ATTRS
1503 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1505 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1510 static __inline__ __m512i __DEFAULT_FN_ATTRS
1511 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1513 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1516 _mm512_setzero_si512 (),
1520 static __inline __m512i __DEFAULT_FN_ATTRS
1521 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1523 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1526 static __inline __m512i __DEFAULT_FN_ATTRS
1527 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1529 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1530 (__v8di)_mm512_mul_epi32(__X, __Y),
1534 static __inline __m512i __DEFAULT_FN_ATTRS
1535 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1537 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1538 (__v8di)_mm512_mul_epi32(__X, __Y),
1539 (__v8di)_mm512_setzero_si512 ());
1542 static __inline __m512i __DEFAULT_FN_ATTRS
1543 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1545 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1548 static __inline __m512i __DEFAULT_FN_ATTRS
1549 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1551 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1552 (__v8di)_mm512_mul_epu32(__X, __Y),
1556 static __inline __m512i __DEFAULT_FN_ATTRS
1557 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1559 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1560 (__v8di)_mm512_mul_epu32(__X, __Y),
1561 (__v8di)_mm512_setzero_si512 ());
1564 static __inline __m512i __DEFAULT_FN_ATTRS
1565 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1567 return (__m512i) ((__v16su) __A * (__v16su) __B);
1570 static __inline __m512i __DEFAULT_FN_ATTRS
1571 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1573 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1574 (__v16si)_mm512_mullo_epi32(__A, __B),
1575 (__v16si)_mm512_setzero_si512());
1578 static __inline __m512i __DEFAULT_FN_ATTRS
1579 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1581 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1582 (__v16si)_mm512_mullo_epi32(__A, __B),
1586 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1587 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1588 (__v8df)(__m512d)(W), (__mmask8)(U), \
1591 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1592 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1593 (__v8df)_mm512_setzero_pd(), \
1594 (__mmask8)(U), (int)(R)); })
1596 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1597 (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1598 (__v8df)_mm512_undefined_pd(), \
1599 (__mmask8)-1, (int)(R)); })
1601 static __inline__ __m512d __DEFAULT_FN_ATTRS
1602 _mm512_sqrt_pd(__m512d __a)
1604 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1605 (__v8df) _mm512_setzero_pd (),
1607 _MM_FROUND_CUR_DIRECTION);
1610 static __inline__ __m512d __DEFAULT_FN_ATTRS
1611 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1613 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1616 _MM_FROUND_CUR_DIRECTION);
1619 static __inline__ __m512d __DEFAULT_FN_ATTRS
1620 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1622 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1624 _mm512_setzero_pd (),
1626 _MM_FROUND_CUR_DIRECTION);
1629 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1630 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1631 (__v16sf)(__m512)(W), (__mmask16)(U), \
1634 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1635 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1636 (__v16sf)_mm512_setzero_ps(), \
1637 (__mmask16)(U), (int)(R)); })
1639 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1640 (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1641 (__v16sf)_mm512_undefined_ps(), \
1642 (__mmask16)-1, (int)(R)); })
1644 static __inline__ __m512 __DEFAULT_FN_ATTRS
1645 _mm512_sqrt_ps(__m512 __a)
1647 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1648 (__v16sf) _mm512_setzero_ps (),
1650 _MM_FROUND_CUR_DIRECTION);
1653 static __inline__ __m512 __DEFAULT_FN_ATTRS
1654 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1656 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1659 _MM_FROUND_CUR_DIRECTION);
1662 static __inline__ __m512 __DEFAULT_FN_ATTRS
1663 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1665 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1666 (__v16sf) _mm512_setzero_ps (),
1668 _MM_FROUND_CUR_DIRECTION);
1671 static __inline__ __m512d __DEFAULT_FN_ATTRS
1672 _mm512_rsqrt14_pd(__m512d __A)
1674 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1676 _mm512_setzero_pd (),
1679 static __inline__ __m512d __DEFAULT_FN_ATTRS
1680 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1682 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1687 static __inline__ __m512d __DEFAULT_FN_ATTRS
1688 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1690 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1692 _mm512_setzero_pd (),
1696 static __inline__ __m512 __DEFAULT_FN_ATTRS
1697 _mm512_rsqrt14_ps(__m512 __A)
1699 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1701 _mm512_setzero_ps (),
1705 static __inline__ __m512 __DEFAULT_FN_ATTRS
1706 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1708 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1713 static __inline__ __m512 __DEFAULT_FN_ATTRS
1714 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1716 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1718 _mm512_setzero_ps (),
1722 static __inline__ __m128 __DEFAULT_FN_ATTRS
1723 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1725 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1732 static __inline__ __m128 __DEFAULT_FN_ATTRS
1733 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1735 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1741 static __inline__ __m128 __DEFAULT_FN_ATTRS
1742 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1744 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1746 (__v4sf) _mm_setzero_ps (),
1750 static __inline__ __m128d __DEFAULT_FN_ATTRS
1751 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1753 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1760 static __inline__ __m128d __DEFAULT_FN_ATTRS
1761 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1763 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1769 static __inline__ __m128d __DEFAULT_FN_ATTRS
1770 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1772 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1774 (__v2df) _mm_setzero_pd (),
1778 static __inline__ __m512d __DEFAULT_FN_ATTRS
1779 _mm512_rcp14_pd(__m512d __A)
1781 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1783 _mm512_setzero_pd (),
1787 static __inline__ __m512d __DEFAULT_FN_ATTRS
1788 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1790 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1795 static __inline__ __m512d __DEFAULT_FN_ATTRS
1796 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1798 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1800 _mm512_setzero_pd (),
1804 static __inline__ __m512 __DEFAULT_FN_ATTRS
1805 _mm512_rcp14_ps(__m512 __A)
1807 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1809 _mm512_setzero_ps (),
1813 static __inline__ __m512 __DEFAULT_FN_ATTRS
1814 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1816 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1821 static __inline__ __m512 __DEFAULT_FN_ATTRS
1822 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1824 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1826 _mm512_setzero_ps (),
1830 static __inline__ __m128 __DEFAULT_FN_ATTRS
1831 _mm_rcp14_ss(__m128 __A, __m128 __B)
1833 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1840 static __inline__ __m128 __DEFAULT_FN_ATTRS
1841 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1843 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1849 static __inline__ __m128 __DEFAULT_FN_ATTRS
1850 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1852 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1854 (__v4sf) _mm_setzero_ps (),
1858 static __inline__ __m128d __DEFAULT_FN_ATTRS
1859 _mm_rcp14_sd(__m128d __A, __m128d __B)
1861 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1868 static __inline__ __m128d __DEFAULT_FN_ATTRS
1869 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1871 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1877 static __inline__ __m128d __DEFAULT_FN_ATTRS
1878 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1880 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1882 (__v2df) _mm_setzero_pd (),
1886 static __inline __m512 __DEFAULT_FN_ATTRS
1887 _mm512_floor_ps(__m512 __A)
1889 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1892 _MM_FROUND_CUR_DIRECTION);
1895 static __inline__ __m512 __DEFAULT_FN_ATTRS
1896 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1898 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1901 _MM_FROUND_CUR_DIRECTION);
1904 static __inline __m512d __DEFAULT_FN_ATTRS
1905 _mm512_floor_pd(__m512d __A)
1907 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1910 _MM_FROUND_CUR_DIRECTION);
1913 static __inline__ __m512d __DEFAULT_FN_ATTRS
1914 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1916 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1919 _MM_FROUND_CUR_DIRECTION);
1922 static __inline__ __m512 __DEFAULT_FN_ATTRS
1923 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1925 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1928 _MM_FROUND_CUR_DIRECTION);
1931 static __inline __m512 __DEFAULT_FN_ATTRS
1932 _mm512_ceil_ps(__m512 __A)
1934 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1937 _MM_FROUND_CUR_DIRECTION);
1940 static __inline __m512d __DEFAULT_FN_ATTRS
1941 _mm512_ceil_pd(__m512d __A)
1943 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1946 _MM_FROUND_CUR_DIRECTION);
1949 static __inline__ __m512d __DEFAULT_FN_ATTRS
1950 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1952 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1955 _MM_FROUND_CUR_DIRECTION);
1958 static __inline __m512i __DEFAULT_FN_ATTRS
1959 _mm512_abs_epi64(__m512i __A)
1961 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1963 _mm512_setzero_si512 (),
1967 static __inline__ __m512i __DEFAULT_FN_ATTRS
1968 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1970 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1975 static __inline__ __m512i __DEFAULT_FN_ATTRS
1976 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1978 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1980 _mm512_setzero_si512 (),
1984 static __inline __m512i __DEFAULT_FN_ATTRS
1985 _mm512_abs_epi32(__m512i __A)
1987 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1989 _mm512_setzero_si512 (),
1993 static __inline__ __m512i __DEFAULT_FN_ATTRS
1994 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1996 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2001 static __inline__ __m512i __DEFAULT_FN_ATTRS
2002 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
2004 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2006 _mm512_setzero_si512 (),
2010 static __inline__ __m128 __DEFAULT_FN_ATTRS
2011 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2012 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2016 _MM_FROUND_CUR_DIRECTION);
2019 static __inline__ __m128 __DEFAULT_FN_ATTRS
2020 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2021 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2023 (__v4sf) _mm_setzero_ps (),
2025 _MM_FROUND_CUR_DIRECTION);
2028 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
2029 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2030 (__v4sf)(__m128)(B), \
2031 (__v4sf)_mm_setzero_ps(), \
2032 (__mmask8)-1, (int)(R)); })
2034 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
2035 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2036 (__v4sf)(__m128)(B), \
2037 (__v4sf)(__m128)(W), (__mmask8)(U), \
2040 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
2041 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2042 (__v4sf)(__m128)(B), \
2043 (__v4sf)_mm_setzero_ps(), \
2044 (__mmask8)(U), (int)(R)); })
2046 static __inline__ __m128d __DEFAULT_FN_ATTRS
2047 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2048 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2052 _MM_FROUND_CUR_DIRECTION);
2055 static __inline__ __m128d __DEFAULT_FN_ATTRS
2056 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2057 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2059 (__v2df) _mm_setzero_pd (),
2061 _MM_FROUND_CUR_DIRECTION);
2063 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
2064 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2065 (__v2df)(__m128d)(B), \
2066 (__v2df)_mm_setzero_pd(), \
2067 (__mmask8)-1, (int)(R)); })
2069 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2070 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2071 (__v2df)(__m128d)(B), \
2072 (__v2df)(__m128d)(W), \
2073 (__mmask8)(U), (int)(R)); })
2075 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2076 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2077 (__v2df)(__m128d)(B), \
2078 (__v2df)_mm_setzero_pd(), \
2079 (__mmask8)(U), (int)(R)); })
2081 static __inline__ __m512d __DEFAULT_FN_ATTRS
2082 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2083 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2084 (__v8df)_mm512_add_pd(__A, __B),
2088 static __inline__ __m512d __DEFAULT_FN_ATTRS
2089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2090 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2091 (__v8df)_mm512_add_pd(__A, __B),
2092 (__v8df)_mm512_setzero_pd());
2095 static __inline__ __m512 __DEFAULT_FN_ATTRS
2096 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2097 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2098 (__v16sf)_mm512_add_ps(__A, __B),
2102 static __inline__ __m512 __DEFAULT_FN_ATTRS
2103 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2104 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2105 (__v16sf)_mm512_add_ps(__A, __B),
2106 (__v16sf)_mm512_setzero_ps());
2109 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2110 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2111 (__v8df)(__m512d)(B), \
2112 (__v8df)_mm512_setzero_pd(), \
2113 (__mmask8)-1, (int)(R)); })
2115 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2116 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2117 (__v8df)(__m512d)(B), \
2118 (__v8df)(__m512d)(W), (__mmask8)(U), \
2121 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2122 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2123 (__v8df)(__m512d)(B), \
2124 (__v8df)_mm512_setzero_pd(), \
2125 (__mmask8)(U), (int)(R)); })
2127 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2128 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2129 (__v16sf)(__m512)(B), \
2130 (__v16sf)_mm512_setzero_ps(), \
2131 (__mmask16)-1, (int)(R)); })
2133 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2134 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2135 (__v16sf)(__m512)(B), \
2136 (__v16sf)(__m512)(W), (__mmask16)(U), \
2139 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2140 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2141 (__v16sf)(__m512)(B), \
2142 (__v16sf)_mm512_setzero_ps(), \
2143 (__mmask16)(U), (int)(R)); })
2145 static __inline__ __m128 __DEFAULT_FN_ATTRS
2146 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2147 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2151 _MM_FROUND_CUR_DIRECTION);
2154 static __inline__ __m128 __DEFAULT_FN_ATTRS
2155 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2156 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2158 (__v4sf) _mm_setzero_ps (),
2160 _MM_FROUND_CUR_DIRECTION);
2162 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2163 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2164 (__v4sf)(__m128)(B), \
2165 (__v4sf)_mm_setzero_ps(), \
2166 (__mmask8)-1, (int)(R)); })
2168 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2169 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2170 (__v4sf)(__m128)(B), \
2171 (__v4sf)(__m128)(W), (__mmask8)(U), \
2174 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2175 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2176 (__v4sf)(__m128)(B), \
2177 (__v4sf)_mm_setzero_ps(), \
2178 (__mmask8)(U), (int)(R)); })
2180 static __inline__ __m128d __DEFAULT_FN_ATTRS
2181 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2182 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2186 _MM_FROUND_CUR_DIRECTION);
2189 static __inline__ __m128d __DEFAULT_FN_ATTRS
2190 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2191 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2193 (__v2df) _mm_setzero_pd (),
2195 _MM_FROUND_CUR_DIRECTION);
2198 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2199 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2200 (__v2df)(__m128d)(B), \
2201 (__v2df)_mm_setzero_pd(), \
2202 (__mmask8)-1, (int)(R)); })
2204 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2205 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2206 (__v2df)(__m128d)(B), \
2207 (__v2df)(__m128d)(W), \
2208 (__mmask8)(U), (int)(R)); })
2210 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2211 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2212 (__v2df)(__m128d)(B), \
2213 (__v2df)_mm_setzero_pd(), \
2214 (__mmask8)(U), (int)(R)); })
2216 static __inline__ __m512d __DEFAULT_FN_ATTRS
2217 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2218 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2219 (__v8df)_mm512_sub_pd(__A, __B),
2223 static __inline__ __m512d __DEFAULT_FN_ATTRS
2224 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2225 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2226 (__v8df)_mm512_sub_pd(__A, __B),
2227 (__v8df)_mm512_setzero_pd());
2230 static __inline__ __m512 __DEFAULT_FN_ATTRS
2231 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2232 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2233 (__v16sf)_mm512_sub_ps(__A, __B),
2237 static __inline__ __m512 __DEFAULT_FN_ATTRS
2238 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2239 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2240 (__v16sf)_mm512_sub_ps(__A, __B),
2241 (__v16sf)_mm512_setzero_ps());
2244 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2245 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2246 (__v8df)(__m512d)(B), \
2247 (__v8df)_mm512_setzero_pd(), \
2248 (__mmask8)-1, (int)(R)); })
2250 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2251 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2252 (__v8df)(__m512d)(B), \
2253 (__v8df)(__m512d)(W), (__mmask8)(U), \
2256 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2257 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2258 (__v8df)(__m512d)(B), \
2259 (__v8df)_mm512_setzero_pd(), \
2260 (__mmask8)(U), (int)(R)); })
2262 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2263 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2264 (__v16sf)(__m512)(B), \
2265 (__v16sf)_mm512_setzero_ps(), \
2266 (__mmask16)-1, (int)(R)); })
2268 #define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \
2269 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2270 (__v16sf)(__m512)(B), \
2271 (__v16sf)(__m512)(W), (__mmask16)(U), \
2274 #define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \
2275 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2276 (__v16sf)(__m512)(B), \
2277 (__v16sf)_mm512_setzero_ps(), \
2278 (__mmask16)(U), (int)(R)); });
2280 static __inline__ __m128 __DEFAULT_FN_ATTRS
2281 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2282 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2286 _MM_FROUND_CUR_DIRECTION);
2289 static __inline__ __m128 __DEFAULT_FN_ATTRS
2290 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2291 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2293 (__v4sf) _mm_setzero_ps (),
2295 _MM_FROUND_CUR_DIRECTION);
2297 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2298 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2299 (__v4sf)(__m128)(B), \
2300 (__v4sf)_mm_setzero_ps(), \
2301 (__mmask8)-1, (int)(R)); })
2303 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2304 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2305 (__v4sf)(__m128)(B), \
2306 (__v4sf)(__m128)(W), (__mmask8)(U), \
2309 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2310 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2311 (__v4sf)(__m128)(B), \
2312 (__v4sf)_mm_setzero_ps(), \
2313 (__mmask8)(U), (int)(R)); })
2315 static __inline__ __m128d __DEFAULT_FN_ATTRS
2316 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2317 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2321 _MM_FROUND_CUR_DIRECTION);
2324 static __inline__ __m128d __DEFAULT_FN_ATTRS
2325 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2326 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2328 (__v2df) _mm_setzero_pd (),
2330 _MM_FROUND_CUR_DIRECTION);
2333 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2334 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2335 (__v2df)(__m128d)(B), \
2336 (__v2df)_mm_setzero_pd(), \
2337 (__mmask8)-1, (int)(R)); })
2339 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2340 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2341 (__v2df)(__m128d)(B), \
2342 (__v2df)(__m128d)(W), \
2343 (__mmask8)(U), (int)(R)); })
2345 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2346 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2347 (__v2df)(__m128d)(B), \
2348 (__v2df)_mm_setzero_pd(), \
2349 (__mmask8)(U), (int)(R)); })
2351 static __inline__ __m512d __DEFAULT_FN_ATTRS
2352 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2353 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2354 (__v8df)_mm512_mul_pd(__A, __B),
2358 static __inline__ __m512d __DEFAULT_FN_ATTRS
2359 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2360 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2361 (__v8df)_mm512_mul_pd(__A, __B),
2362 (__v8df)_mm512_setzero_pd());
2365 static __inline__ __m512 __DEFAULT_FN_ATTRS
2366 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2367 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2368 (__v16sf)_mm512_mul_ps(__A, __B),
2372 static __inline__ __m512 __DEFAULT_FN_ATTRS
2373 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2374 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2375 (__v16sf)_mm512_mul_ps(__A, __B),
2376 (__v16sf)_mm512_setzero_ps());
2379 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2380 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2381 (__v8df)(__m512d)(B), \
2382 (__v8df)_mm512_setzero_pd(), \
2383 (__mmask8)-1, (int)(R)); })
2385 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2386 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2387 (__v8df)(__m512d)(B), \
2388 (__v8df)(__m512d)(W), (__mmask8)(U), \
2391 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2392 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2393 (__v8df)(__m512d)(B), \
2394 (__v8df)_mm512_setzero_pd(), \
2395 (__mmask8)(U), (int)(R)); })
2397 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2398 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2399 (__v16sf)(__m512)(B), \
2400 (__v16sf)_mm512_setzero_ps(), \
2401 (__mmask16)-1, (int)(R)); })
2403 #define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \
2404 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2405 (__v16sf)(__m512)(B), \
2406 (__v16sf)(__m512)(W), (__mmask16)(U), \
2409 #define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \
2410 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2411 (__v16sf)(__m512)(B), \
2412 (__v16sf)_mm512_setzero_ps(), \
2413 (__mmask16)(U), (int)(R)); });
2415 static __inline__ __m128 __DEFAULT_FN_ATTRS
2416 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2417 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2421 _MM_FROUND_CUR_DIRECTION);
2424 static __inline__ __m128 __DEFAULT_FN_ATTRS
2425 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2426 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2428 (__v4sf) _mm_setzero_ps (),
2430 _MM_FROUND_CUR_DIRECTION);
2433 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
2434 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2435 (__v4sf)(__m128)(B), \
2436 (__v4sf)_mm_setzero_ps(), \
2437 (__mmask8)-1, (int)(R)); })
2439 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2440 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2441 (__v4sf)(__m128)(B), \
2442 (__v4sf)(__m128)(W), (__mmask8)(U), \
2445 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2446 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2447 (__v4sf)(__m128)(B), \
2448 (__v4sf)_mm_setzero_ps(), \
2449 (__mmask8)(U), (int)(R)); })
2451 static __inline__ __m128d __DEFAULT_FN_ATTRS
2452 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2453 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2457 _MM_FROUND_CUR_DIRECTION);
2460 static __inline__ __m128d __DEFAULT_FN_ATTRS
2461 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2462 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2464 (__v2df) _mm_setzero_pd (),
2466 _MM_FROUND_CUR_DIRECTION);
2469 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
2470 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2471 (__v2df)(__m128d)(B), \
2472 (__v2df)_mm_setzero_pd(), \
2473 (__mmask8)-1, (int)(R)); })
2475 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2476 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2477 (__v2df)(__m128d)(B), \
2478 (__v2df)(__m128d)(W), \
2479 (__mmask8)(U), (int)(R)); })
2481 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2482 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2483 (__v2df)(__m128d)(B), \
2484 (__v2df)_mm_setzero_pd(), \
2485 (__mmask8)(U), (int)(R)); })
2487 static __inline __m512d __DEFAULT_FN_ATTRS
2488 _mm512_div_pd(__m512d __a, __m512d __b)
2490 return (__m512d)((__v8df)__a/(__v8df)__b);
2493 static __inline__ __m512d __DEFAULT_FN_ATTRS
2494 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2495 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2496 (__v8df)_mm512_div_pd(__A, __B),
2500 static __inline__ __m512d __DEFAULT_FN_ATTRS
2501 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2502 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2503 (__v8df)_mm512_div_pd(__A, __B),
2504 (__v8df)_mm512_setzero_pd());
2507 static __inline __m512 __DEFAULT_FN_ATTRS
2508 _mm512_div_ps(__m512 __a, __m512 __b)
2510 return (__m512)((__v16sf)__a/(__v16sf)__b);
2513 static __inline__ __m512 __DEFAULT_FN_ATTRS
2514 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2515 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2516 (__v16sf)_mm512_div_ps(__A, __B),
2520 static __inline__ __m512 __DEFAULT_FN_ATTRS
2521 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2522 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2523 (__v16sf)_mm512_div_ps(__A, __B),
2524 (__v16sf)_mm512_setzero_ps());
2527 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2528 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2529 (__v8df)(__m512d)(B), \
2530 (__v8df)_mm512_setzero_pd(), \
2531 (__mmask8)-1, (int)(R)); })
2533 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2534 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2535 (__v8df)(__m512d)(B), \
2536 (__v8df)(__m512d)(W), (__mmask8)(U), \
2539 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2540 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2541 (__v8df)(__m512d)(B), \
2542 (__v8df)_mm512_setzero_pd(), \
2543 (__mmask8)(U), (int)(R)); })
2545 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2546 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2547 (__v16sf)(__m512)(B), \
2548 (__v16sf)_mm512_setzero_ps(), \
2549 (__mmask16)-1, (int)(R)); })
2551 #define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \
2552 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2553 (__v16sf)(__m512)(B), \
2554 (__v16sf)(__m512)(W), (__mmask16)(U), \
2557 #define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \
2558 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2559 (__v16sf)(__m512)(B), \
2560 (__v16sf)_mm512_setzero_ps(), \
2561 (__mmask16)(U), (int)(R)); });
2563 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
2564 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2565 (__v16sf)(__m512)(A), (__mmask16)-1, \
2566 _MM_FROUND_CUR_DIRECTION); })
2568 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2569 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2570 (__v16sf)(__m512)(A), (__mmask16)(B), \
2571 _MM_FROUND_CUR_DIRECTION); })
2573 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2574 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2575 (__v16sf)_mm512_setzero_ps(), \
2577 _MM_FROUND_CUR_DIRECTION); })
2579 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2580 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2581 (__v16sf)(__m512)(A), (__mmask16)(B), \
2584 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2585 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2586 (__v16sf)_mm512_setzero_ps(), \
2587 (__mmask16)(A), (int)(R)); })
2589 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2590 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2591 (__v16sf)_mm512_undefined_ps(), \
2592 (__mmask16)-1, (int)(R)); })
2594 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
2595 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2596 (__v8df)(__m512d)(A), (__mmask8)-1, \
2597 _MM_FROUND_CUR_DIRECTION); })
2599 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2600 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2601 (__v8df)(__m512d)(A), (__mmask8)(B), \
2602 _MM_FROUND_CUR_DIRECTION); })
2604 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2605 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2606 (__v8df)_mm512_setzero_pd(), \
2608 _MM_FROUND_CUR_DIRECTION); })
2610 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2611 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2612 (__v8df)(__m512d)(A), (__mmask8)(B), \
2615 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2616 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2617 (__v8df)_mm512_setzero_pd(), \
2618 (__mmask8)(A), (int)(R)); })
2620 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2621 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2622 (__v8df)_mm512_undefined_pd(), \
2623 (__mmask8)-1, (int)(R)); })
2625 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2626 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2627 (__v8df)(__m512d)(B), \
2628 (__v8df)(__m512d)(C), (__mmask8)-1, \
2632 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2633 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2634 (__v8df)(__m512d)(B), \
2635 (__v8df)(__m512d)(C), \
2636 (__mmask8)(U), (int)(R)); })
2639 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2640 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2641 (__v8df)(__m512d)(B), \
2642 (__v8df)(__m512d)(C), \
2643 (__mmask8)(U), (int)(R)); })
2646 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2647 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2648 (__v8df)(__m512d)(B), \
2649 (__v8df)(__m512d)(C), \
2650 (__mmask8)(U), (int)(R)); })
2653 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2654 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2655 (__v8df)(__m512d)(B), \
2656 -(__v8df)(__m512d)(C), \
2657 (__mmask8)-1, (int)(R)); })
2660 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2661 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2662 (__v8df)(__m512d)(B), \
2663 -(__v8df)(__m512d)(C), \
2664 (__mmask8)(U), (int)(R)); })
2667 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2668 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2669 (__v8df)(__m512d)(B), \
2670 -(__v8df)(__m512d)(C), \
2671 (__mmask8)(U), (int)(R)); })
2674 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2675 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2676 (__v8df)(__m512d)(B), \
2677 (__v8df)(__m512d)(C), (__mmask8)-1, \
2681 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2682 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2683 (__v8df)(__m512d)(B), \
2684 (__v8df)(__m512d)(C), \
2685 (__mmask8)(U), (int)(R)); })
2688 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2689 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2690 (__v8df)(__m512d)(B), \
2691 (__v8df)(__m512d)(C), \
2692 (__mmask8)(U), (int)(R)); })
2695 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2696 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2697 (__v8df)(__m512d)(B), \
2698 -(__v8df)(__m512d)(C), \
2699 (__mmask8)-1, (int)(R)); })
2702 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2703 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2704 (__v8df)(__m512d)(B), \
2705 -(__v8df)(__m512d)(C), \
2706 (__mmask8)(U), (int)(R)); })
2709 static __inline__ __m512d __DEFAULT_FN_ATTRS
2710 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2712 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2716 _MM_FROUND_CUR_DIRECTION);
2719 static __inline__ __m512d __DEFAULT_FN_ATTRS
2720 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2722 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2726 _MM_FROUND_CUR_DIRECTION);
2729 static __inline__ __m512d __DEFAULT_FN_ATTRS
2730 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2732 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2736 _MM_FROUND_CUR_DIRECTION);
2739 static __inline__ __m512d __DEFAULT_FN_ATTRS
2740 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2742 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2746 _MM_FROUND_CUR_DIRECTION);
2749 static __inline__ __m512d __DEFAULT_FN_ATTRS
2750 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2752 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2756 _MM_FROUND_CUR_DIRECTION);
2759 static __inline__ __m512d __DEFAULT_FN_ATTRS
2760 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2762 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2766 _MM_FROUND_CUR_DIRECTION);
2769 static __inline__ __m512d __DEFAULT_FN_ATTRS
2770 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2772 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2776 _MM_FROUND_CUR_DIRECTION);
2779 static __inline__ __m512d __DEFAULT_FN_ATTRS
2780 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2782 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2786 _MM_FROUND_CUR_DIRECTION);
2789 static __inline__ __m512d __DEFAULT_FN_ATTRS
2790 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2792 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2796 _MM_FROUND_CUR_DIRECTION);
2799 static __inline__ __m512d __DEFAULT_FN_ATTRS
2800 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2802 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2806 _MM_FROUND_CUR_DIRECTION);
2809 static __inline__ __m512d __DEFAULT_FN_ATTRS
2810 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2812 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2816 _MM_FROUND_CUR_DIRECTION);
2819 static __inline__ __m512d __DEFAULT_FN_ATTRS
2820 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2822 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2826 _MM_FROUND_CUR_DIRECTION);
2829 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2830 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2831 (__v16sf)(__m512)(B), \
2832 (__v16sf)(__m512)(C), (__mmask16)-1, \
2836 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2837 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2838 (__v16sf)(__m512)(B), \
2839 (__v16sf)(__m512)(C), \
2840 (__mmask16)(U), (int)(R)); })
2843 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2844 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2845 (__v16sf)(__m512)(B), \
2846 (__v16sf)(__m512)(C), \
2847 (__mmask16)(U), (int)(R)); })
2850 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2851 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2852 (__v16sf)(__m512)(B), \
2853 (__v16sf)(__m512)(C), \
2854 (__mmask16)(U), (int)(R)); })
2857 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2858 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2859 (__v16sf)(__m512)(B), \
2860 -(__v16sf)(__m512)(C), \
2861 (__mmask16)-1, (int)(R)); })
2864 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2865 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2866 (__v16sf)(__m512)(B), \
2867 -(__v16sf)(__m512)(C), \
2868 (__mmask16)(U), (int)(R)); })
2871 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2872 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2873 (__v16sf)(__m512)(B), \
2874 -(__v16sf)(__m512)(C), \
2875 (__mmask16)(U), (int)(R)); })
2878 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2879 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2880 (__v16sf)(__m512)(B), \
2881 (__v16sf)(__m512)(C), (__mmask16)-1, \
2885 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2886 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2887 (__v16sf)(__m512)(B), \
2888 (__v16sf)(__m512)(C), \
2889 (__mmask16)(U), (int)(R)); })
2892 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2893 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2894 (__v16sf)(__m512)(B), \
2895 (__v16sf)(__m512)(C), \
2896 (__mmask16)(U), (int)(R)); })
2899 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2900 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2901 (__v16sf)(__m512)(B), \
2902 -(__v16sf)(__m512)(C), \
2903 (__mmask16)-1, (int)(R)); })
2906 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2907 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2908 (__v16sf)(__m512)(B), \
2909 -(__v16sf)(__m512)(C), \
2910 (__mmask16)(U), (int)(R)); })
2913 static __inline__ __m512 __DEFAULT_FN_ATTRS
2914 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2916 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2920 _MM_FROUND_CUR_DIRECTION);
2923 static __inline__ __m512 __DEFAULT_FN_ATTRS
2924 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2926 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2930 _MM_FROUND_CUR_DIRECTION);
2933 static __inline__ __m512 __DEFAULT_FN_ATTRS
2934 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2936 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2940 _MM_FROUND_CUR_DIRECTION);
2943 static __inline__ __m512 __DEFAULT_FN_ATTRS
2944 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2946 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2950 _MM_FROUND_CUR_DIRECTION);
2953 static __inline__ __m512 __DEFAULT_FN_ATTRS
2954 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2956 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2960 _MM_FROUND_CUR_DIRECTION);
2963 static __inline__ __m512 __DEFAULT_FN_ATTRS
2964 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2966 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2970 _MM_FROUND_CUR_DIRECTION);
2973 static __inline__ __m512 __DEFAULT_FN_ATTRS
2974 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2976 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2980 _MM_FROUND_CUR_DIRECTION);
2983 static __inline__ __m512 __DEFAULT_FN_ATTRS
2984 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2986 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2990 _MM_FROUND_CUR_DIRECTION);
2993 static __inline__ __m512 __DEFAULT_FN_ATTRS
2994 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2996 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3000 _MM_FROUND_CUR_DIRECTION);
3003 static __inline__ __m512 __DEFAULT_FN_ATTRS
3004 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3006 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3010 _MM_FROUND_CUR_DIRECTION);
3013 static __inline__ __m512 __DEFAULT_FN_ATTRS
3014 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3016 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3020 _MM_FROUND_CUR_DIRECTION);
3023 static __inline__ __m512 __DEFAULT_FN_ATTRS
3024 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3026 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3030 _MM_FROUND_CUR_DIRECTION);
3033 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
3034 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3035 (__v8df)(__m512d)(B), \
3036 (__v8df)(__m512d)(C), \
3037 (__mmask8)-1, (int)(R)); })
3040 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
3041 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3042 (__v8df)(__m512d)(B), \
3043 (__v8df)(__m512d)(C), \
3044 (__mmask8)(U), (int)(R)); })
3047 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
3048 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3049 (__v8df)(__m512d)(B), \
3050 (__v8df)(__m512d)(C), \
3051 (__mmask8)(U), (int)(R)); })
3054 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
3055 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3056 (__v8df)(__m512d)(B), \
3057 (__v8df)(__m512d)(C), \
3058 (__mmask8)(U), (int)(R)); })
3061 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
3062 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3063 (__v8df)(__m512d)(B), \
3064 -(__v8df)(__m512d)(C), \
3065 (__mmask8)-1, (int)(R)); })
3068 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3069 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3070 (__v8df)(__m512d)(B), \
3071 -(__v8df)(__m512d)(C), \
3072 (__mmask8)(U), (int)(R)); })
3075 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3076 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3077 (__v8df)(__m512d)(B), \
3078 -(__v8df)(__m512d)(C), \
3079 (__mmask8)(U), (int)(R)); })
3082 static __inline__ __m512d __DEFAULT_FN_ATTRS
3083 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3085 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3089 _MM_FROUND_CUR_DIRECTION);
3092 static __inline__ __m512d __DEFAULT_FN_ATTRS
3093 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3095 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3099 _MM_FROUND_CUR_DIRECTION);
3102 static __inline__ __m512d __DEFAULT_FN_ATTRS
3103 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3105 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3109 _MM_FROUND_CUR_DIRECTION);
3112 static __inline__ __m512d __DEFAULT_FN_ATTRS
3113 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3115 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3119 _MM_FROUND_CUR_DIRECTION);
3122 static __inline__ __m512d __DEFAULT_FN_ATTRS
3123 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3125 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3129 _MM_FROUND_CUR_DIRECTION);
3132 static __inline__ __m512d __DEFAULT_FN_ATTRS
3133 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3135 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3139 _MM_FROUND_CUR_DIRECTION);
3142 static __inline__ __m512d __DEFAULT_FN_ATTRS
3143 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3145 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3149 _MM_FROUND_CUR_DIRECTION);
3152 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3153 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3154 (__v16sf)(__m512)(B), \
3155 (__v16sf)(__m512)(C), \
3156 (__mmask16)-1, (int)(R)); })
3159 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3160 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3161 (__v16sf)(__m512)(B), \
3162 (__v16sf)(__m512)(C), \
3163 (__mmask16)(U), (int)(R)); })
3166 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3167 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3168 (__v16sf)(__m512)(B), \
3169 (__v16sf)(__m512)(C), \
3170 (__mmask16)(U), (int)(R)); })
3173 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3174 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3175 (__v16sf)(__m512)(B), \
3176 (__v16sf)(__m512)(C), \
3177 (__mmask16)(U), (int)(R)); })
3180 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3181 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3182 (__v16sf)(__m512)(B), \
3183 -(__v16sf)(__m512)(C), \
3184 (__mmask16)-1, (int)(R)); })
3187 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3188 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3189 (__v16sf)(__m512)(B), \
3190 -(__v16sf)(__m512)(C), \
3191 (__mmask16)(U), (int)(R)); })
3194 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3195 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3196 (__v16sf)(__m512)(B), \
3197 -(__v16sf)(__m512)(C), \
3198 (__mmask16)(U), (int)(R)); })
3201 static __inline__ __m512 __DEFAULT_FN_ATTRS
3202 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3204 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3208 _MM_FROUND_CUR_DIRECTION);
3211 static __inline__ __m512 __DEFAULT_FN_ATTRS
3212 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3214 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3218 _MM_FROUND_CUR_DIRECTION);
3221 static __inline__ __m512 __DEFAULT_FN_ATTRS
3222 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3224 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3228 _MM_FROUND_CUR_DIRECTION);
3231 static __inline__ __m512 __DEFAULT_FN_ATTRS
3232 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3234 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3238 _MM_FROUND_CUR_DIRECTION);
3241 static __inline__ __m512 __DEFAULT_FN_ATTRS
3242 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3244 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3248 _MM_FROUND_CUR_DIRECTION);
3251 static __inline__ __m512 __DEFAULT_FN_ATTRS
3252 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3254 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3258 _MM_FROUND_CUR_DIRECTION);
3261 static __inline__ __m512 __DEFAULT_FN_ATTRS
3262 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3264 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3268 _MM_FROUND_CUR_DIRECTION);
3271 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3272 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3273 (__v8df)(__m512d)(B), \
3274 (__v8df)(__m512d)(C), \
3275 (__mmask8)(U), (int)(R)); })
3278 static __inline__ __m512d __DEFAULT_FN_ATTRS
3279 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3281 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3285 _MM_FROUND_CUR_DIRECTION);
3288 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3289 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3290 (__v16sf)(__m512)(B), \
3291 (__v16sf)(__m512)(C), \
3292 (__mmask16)(U), (int)(R)); })
3295 static __inline__ __m512 __DEFAULT_FN_ATTRS
3296 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3298 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3302 _MM_FROUND_CUR_DIRECTION);
3305 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3306 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3307 (__v8df)(__m512d)(B), \
3308 (__v8df)(__m512d)(C), \
3309 (__mmask8)(U), (int)(R)); })
3312 static __inline__ __m512d __DEFAULT_FN_ATTRS
3313 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3315 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3319 _MM_FROUND_CUR_DIRECTION);
3322 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3323 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3324 (__v16sf)(__m512)(B), \
3325 (__v16sf)(__m512)(C), \
3326 (__mmask16)(U), (int)(R)); })
3329 static __inline__ __m512 __DEFAULT_FN_ATTRS
3330 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3332 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3336 _MM_FROUND_CUR_DIRECTION);
3339 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3340 (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3341 (__v8df)(__m512d)(B), \
3342 (__v8df)(__m512d)(C), \
3343 (__mmask8)(U), (int)(R)); })
3346 static __inline__ __m512d __DEFAULT_FN_ATTRS
3347 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3349 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3353 _MM_FROUND_CUR_DIRECTION);
3356 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3357 (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3358 (__v16sf)(__m512)(B), \
3359 (__v16sf)(__m512)(C), \
3360 (__mmask16)(U), (int)(R)); })
3363 static __inline__ __m512 __DEFAULT_FN_ATTRS
3364 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3366 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3370 _MM_FROUND_CUR_DIRECTION);
3373 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3374 (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3375 (__v8df)(__m512d)(B), \
3376 (__v8df)(__m512d)(C), \
3377 (__mmask8)(U), (int)(R)); })
3380 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3381 (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3382 (__v8df)(__m512d)(B), \
3383 (__v8df)(__m512d)(C), \
3384 (__mmask8)(U), (int)(R)); })
3387 static __inline__ __m512d __DEFAULT_FN_ATTRS
3388 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3390 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3394 _MM_FROUND_CUR_DIRECTION);
3397 static __inline__ __m512d __DEFAULT_FN_ATTRS
3398 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3400 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3404 _MM_FROUND_CUR_DIRECTION);
3407 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3408 (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3409 (__v16sf)(__m512)(B), \
3410 (__v16sf)(__m512)(C), \
3411 (__mmask16)(U), (int)(R)); })
3414 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3415 (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3416 (__v16sf)(__m512)(B), \
3417 (__v16sf)(__m512)(C), \
3418 (__mmask16)(U), (int)(R)); })
3421 static __inline__ __m512 __DEFAULT_FN_ATTRS
3422 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3424 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3428 _MM_FROUND_CUR_DIRECTION);
3431 static __inline__ __m512 __DEFAULT_FN_ATTRS
3432 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3434 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3438 _MM_FROUND_CUR_DIRECTION);
3443 /* Vector permutations */
3445 static __inline __m512i __DEFAULT_FN_ATTRS
3446 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3448 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3455 static __inline__ __m512i __DEFAULT_FN_ATTRS
3456 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3457 __m512i __I, __m512i __B)
3459 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3466 static __inline__ __m512i __DEFAULT_FN_ATTRS
3467 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3468 __m512i __I, __m512i __B)
3470 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3477 static __inline __m512i __DEFAULT_FN_ATTRS
3478 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3480 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3487 static __inline__ __m512i __DEFAULT_FN_ATTRS
3488 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3491 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3499 static __inline__ __m512i __DEFAULT_FN_ATTRS
3500 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3501 __m512i __I, __m512i __B)
3503 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3510 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3511 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3512 (__v8di)(__m512i)(A), \
3513 ((int)(I) & 0x7) + 0, \
3514 ((int)(I) & 0x7) + 1, \
3515 ((int)(I) & 0x7) + 2, \
3516 ((int)(I) & 0x7) + 3, \
3517 ((int)(I) & 0x7) + 4, \
3518 ((int)(I) & 0x7) + 5, \
3519 ((int)(I) & 0x7) + 6, \
3520 ((int)(I) & 0x7) + 7); })
3522 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3523 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3524 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3525 (__v8di)(__m512i)(W)); })
3527 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3528 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3529 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3530 (__v8di)_mm512_setzero_si512()); })
3532 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3533 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3534 (__v16si)(__m512i)(A), \
3535 ((int)(I) & 0xf) + 0, \
3536 ((int)(I) & 0xf) + 1, \
3537 ((int)(I) & 0xf) + 2, \
3538 ((int)(I) & 0xf) + 3, \
3539 ((int)(I) & 0xf) + 4, \
3540 ((int)(I) & 0xf) + 5, \
3541 ((int)(I) & 0xf) + 6, \
3542 ((int)(I) & 0xf) + 7, \
3543 ((int)(I) & 0xf) + 8, \
3544 ((int)(I) & 0xf) + 9, \
3545 ((int)(I) & 0xf) + 10, \
3546 ((int)(I) & 0xf) + 11, \
3547 ((int)(I) & 0xf) + 12, \
3548 ((int)(I) & 0xf) + 13, \
3549 ((int)(I) & 0xf) + 14, \
3550 ((int)(I) & 0xf) + 15); })
3552 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3553 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3554 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3555 (__v16si)(__m512i)(W)); })
3557 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3558 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3559 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3560 (__v16si)_mm512_setzero_si512()); })
3561 /* Vector Extract */
3563 #define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
3564 (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
3565 (__v8df)_mm512_undefined_pd(), \
3566 ((I) & 1) ? 4 : 0, \
3567 ((I) & 1) ? 5 : 1, \
3568 ((I) & 1) ? 6 : 2, \
3569 ((I) & 1) ? 7 : 3); })
3571 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3572 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3573 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3576 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3577 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3578 (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3579 (__v4df)_mm256_setzero_pd()); })
3581 #define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
3582 (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
3583 (__v16sf)_mm512_undefined_ps(), \
3584 0 + ((I) & 0x3) * 4, \
3585 1 + ((I) & 0x3) * 4, \
3586 2 + ((I) & 0x3) * 4, \
3587 3 + ((I) & 0x3) * 4); })
3589 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3590 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3591 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3594 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3595 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3596 (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3597 (__v4sf)_mm_setzero_ps()); })
3601 static __inline __m512d __DEFAULT_FN_ATTRS
3602 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3604 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3609 static __inline __m512 __DEFAULT_FN_ATTRS
3610 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3612 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3617 static __inline __m512i __DEFAULT_FN_ATTRS
3618 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3620 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3625 static __inline __m512i __DEFAULT_FN_ATTRS
3626 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3628 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3635 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3636 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3637 (__v16sf)(__m512)(B), (int)(P), \
3638 (__mmask16)-1, (int)(R)); })
3640 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3641 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3642 (__v16sf)(__m512)(B), (int)(P), \
3643 (__mmask16)(U), (int)(R)); })
3645 #define _mm512_cmp_ps_mask(A, B, P) \
3646 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3647 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3648 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3650 #define _mm512_cmpeq_ps_mask(A, B) \
3651 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3652 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3653 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3655 #define _mm512_cmplt_ps_mask(A, B) \
3656 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3657 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3658 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3660 #define _mm512_cmple_ps_mask(A, B) \
3661 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3662 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3663 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3665 #define _mm512_cmpunord_ps_mask(A, B) \
3666 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3667 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3668 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3670 #define _mm512_cmpneq_ps_mask(A, B) \
3671 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3672 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3673 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3675 #define _mm512_cmpnlt_ps_mask(A, B) \
3676 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3677 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3678 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3680 #define _mm512_cmpnle_ps_mask(A, B) \
3681 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3682 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3683 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3685 #define _mm512_cmpord_ps_mask(A, B) \
3686 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3687 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3688 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3690 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3691 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3692 (__v8df)(__m512d)(B), (int)(P), \
3693 (__mmask8)-1, (int)(R)); })
3695 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3696 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3697 (__v8df)(__m512d)(B), (int)(P), \
3698 (__mmask8)(U), (int)(R)); })
3700 #define _mm512_cmp_pd_mask(A, B, P) \
3701 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3702 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3703 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3705 #define _mm512_cmpeq_pd_mask(A, B) \
3706 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3707 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3708 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3710 #define _mm512_cmplt_pd_mask(A, B) \
3711 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3712 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3713 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3715 #define _mm512_cmple_pd_mask(A, B) \
3716 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3717 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3718 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3720 #define _mm512_cmpunord_pd_mask(A, B) \
3721 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3722 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3723 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3725 #define _mm512_cmpneq_pd_mask(A, B) \
3726 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3727 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3728 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3730 #define _mm512_cmpnlt_pd_mask(A, B) \
3731 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3732 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3733 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3735 #define _mm512_cmpnle_pd_mask(A, B) \
3736 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3737 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3738 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3740 #define _mm512_cmpord_pd_mask(A, B) \
3741 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3742 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3743 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3747 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3748 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3749 (__v16si)_mm512_undefined_epi32(), \
3750 (__mmask16)-1, (int)(R)); })
3752 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3753 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3754 (__v16si)(__m512i)(W), \
3755 (__mmask16)(U), (int)(R)); })
3757 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3758 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3759 (__v16si)_mm512_setzero_si512(), \
3760 (__mmask16)(U), (int)(R)); })
3763 static __inline __m512i __DEFAULT_FN_ATTRS
3764 _mm512_cvttps_epu32(__m512 __A)
3766 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3768 _mm512_setzero_si512 (),
3770 _MM_FROUND_CUR_DIRECTION);
3773 static __inline__ __m512i __DEFAULT_FN_ATTRS
3774 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3776 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3779 _MM_FROUND_CUR_DIRECTION);
3782 static __inline__ __m512i __DEFAULT_FN_ATTRS
3783 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3785 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3786 (__v16si) _mm512_setzero_si512 (),
3788 _MM_FROUND_CUR_DIRECTION);
3791 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3792 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3793 (__v16sf)_mm512_setzero_ps(), \
3794 (__mmask16)-1, (int)(R)); })
3796 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3797 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3798 (__v16sf)(__m512)(W), \
3799 (__mmask16)(U), (int)(R)); })
3801 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3802 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3803 (__v16sf)_mm512_setzero_ps(), \
3804 (__mmask16)(U), (int)(R)); })
3806 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3807 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3808 (__v16sf)_mm512_setzero_ps(), \
3809 (__mmask16)-1, (int)(R)); })
3811 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3812 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3813 (__v16sf)(__m512)(W), \
3814 (__mmask16)(U), (int)(R)); })
3816 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3817 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3818 (__v16sf)_mm512_setzero_ps(), \
3819 (__mmask16)(U), (int)(R)); })
3821 static __inline__ __m512 __DEFAULT_FN_ATTRS
3822 _mm512_cvtepu32_ps (__m512i __A)
3824 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3825 (__v16sf) _mm512_undefined_ps (),
3827 _MM_FROUND_CUR_DIRECTION);
3830 static __inline__ __m512 __DEFAULT_FN_ATTRS
3831 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3833 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3836 _MM_FROUND_CUR_DIRECTION);
3839 static __inline__ __m512 __DEFAULT_FN_ATTRS
3840 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3842 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3843 (__v16sf) _mm512_setzero_ps (),
3845 _MM_FROUND_CUR_DIRECTION);
3848 static __inline __m512d __DEFAULT_FN_ATTRS
3849 _mm512_cvtepi32_pd(__m256i __A)
3851 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3854 static __inline__ __m512d __DEFAULT_FN_ATTRS
3855 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3857 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3858 (__v8df)_mm512_cvtepi32_pd(__A),
3862 static __inline__ __m512d __DEFAULT_FN_ATTRS
3863 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3865 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3866 (__v8df)_mm512_cvtepi32_pd(__A),
3867 (__v8df)_mm512_setzero_pd());
3870 static __inline__ __m512d __DEFAULT_FN_ATTRS
3871 _mm512_cvtepi32lo_pd(__m512i __A)
3873 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3876 static __inline__ __m512d __DEFAULT_FN_ATTRS
3877 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3879 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3882 static __inline__ __m512 __DEFAULT_FN_ATTRS
3883 _mm512_cvtepi32_ps (__m512i __A)
3885 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3886 (__v16sf) _mm512_undefined_ps (),
3888 _MM_FROUND_CUR_DIRECTION);
3891 static __inline__ __m512 __DEFAULT_FN_ATTRS
3892 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3894 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3897 _MM_FROUND_CUR_DIRECTION);
3900 static __inline__ __m512 __DEFAULT_FN_ATTRS
3901 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3903 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3904 (__v16sf) _mm512_setzero_ps (),
3906 _MM_FROUND_CUR_DIRECTION);
3909 static __inline __m512d __DEFAULT_FN_ATTRS
3910 _mm512_cvtepu32_pd(__m256i __A)
3912 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3915 static __inline__ __m512d __DEFAULT_FN_ATTRS
3916 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3918 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3919 (__v8df)_mm512_cvtepu32_pd(__A),
3923 static __inline__ __m512d __DEFAULT_FN_ATTRS
3924 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3926 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3927 (__v8df)_mm512_cvtepu32_pd(__A),
3928 (__v8df)_mm512_setzero_pd());
3931 static __inline__ __m512d __DEFAULT_FN_ATTRS
3932 _mm512_cvtepu32lo_pd(__m512i __A)
3934 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3937 static __inline__ __m512d __DEFAULT_FN_ATTRS
3938 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3940 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3943 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3944 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3945 (__v8sf)_mm256_setzero_ps(), \
3946 (__mmask8)-1, (int)(R)); })
3948 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3949 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3950 (__v8sf)(__m256)(W), (__mmask8)(U), \
3953 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3954 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3955 (__v8sf)_mm256_setzero_ps(), \
3956 (__mmask8)(U), (int)(R)); })
3958 static __inline__ __m256 __DEFAULT_FN_ATTRS
3959 _mm512_cvtpd_ps (__m512d __A)
3961 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3962 (__v8sf) _mm256_undefined_ps (),
3964 _MM_FROUND_CUR_DIRECTION);
3967 static __inline__ __m256 __DEFAULT_FN_ATTRS
3968 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3970 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3973 _MM_FROUND_CUR_DIRECTION);
3976 static __inline__ __m256 __DEFAULT_FN_ATTRS
3977 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3979 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3980 (__v8sf) _mm256_setzero_ps (),
3982 _MM_FROUND_CUR_DIRECTION);
3985 static __inline__ __m512 __DEFAULT_FN_ATTRS
3986 _mm512_cvtpd_pslo (__m512d __A)
3988 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3989 (__v8sf) _mm256_setzero_ps (),
3990 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3993 static __inline__ __m512 __DEFAULT_FN_ATTRS
3994 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3996 return (__m512) __builtin_shufflevector (
3997 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3999 (__v8sf) _mm256_setzero_ps (),
4000 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4003 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
4004 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4005 (__v16hi)_mm256_undefined_si256(), \
4008 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
4009 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4010 (__v16hi)(__m256i)(U), \
4013 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
4014 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4015 (__v16hi)_mm256_setzero_si256(), \
4018 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
4019 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4020 (__v16hi)_mm256_setzero_si256(), \
4023 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
4024 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4025 (__v16hi)(__m256i)(U), \
4028 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
4029 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4030 (__v16hi)_mm256_setzero_si256(), \
4033 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
4034 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4035 (__v16sf)_mm512_undefined_ps(), \
4036 (__mmask16)-1, (int)(R)); })
4038 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
4039 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4040 (__v16sf)(__m512)(W), \
4041 (__mmask16)(U), (int)(R)); })
4043 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
4044 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4045 (__v16sf)_mm512_setzero_ps(), \
4046 (__mmask16)(U), (int)(R)); })
4049 static __inline __m512 __DEFAULT_FN_ATTRS
4050 _mm512_cvtph_ps(__m256i __A)
4052 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4054 _mm512_setzero_ps (),
4056 _MM_FROUND_CUR_DIRECTION);
4059 static __inline__ __m512 __DEFAULT_FN_ATTRS
4060 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
4062 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4065 _MM_FROUND_CUR_DIRECTION);
4068 static __inline__ __m512 __DEFAULT_FN_ATTRS
4069 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4071 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4072 (__v16sf) _mm512_setzero_ps (),
4074 _MM_FROUND_CUR_DIRECTION);
4077 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4078 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4079 (__v8si)_mm256_setzero_si256(), \
4080 (__mmask8)-1, (int)(R)); })
4082 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4083 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4084 (__v8si)(__m256i)(W), \
4085 (__mmask8)(U), (int)(R)); })
4087 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
4088 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4089 (__v8si)_mm256_setzero_si256(), \
4090 (__mmask8)(U), (int)(R)); })
4092 static __inline __m256i __DEFAULT_FN_ATTRS
4093 _mm512_cvttpd_epi32(__m512d __a)
4095 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4096 (__v8si)_mm256_setzero_si256(),
4098 _MM_FROUND_CUR_DIRECTION);
4101 static __inline__ __m256i __DEFAULT_FN_ATTRS
4102 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4104 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4107 _MM_FROUND_CUR_DIRECTION);
4110 static __inline__ __m256i __DEFAULT_FN_ATTRS
4111 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4113 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4114 (__v8si) _mm256_setzero_si256 (),
4116 _MM_FROUND_CUR_DIRECTION);
4119 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4120 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4121 (__v16si)_mm512_setzero_si512(), \
4122 (__mmask16)-1, (int)(R)); })
4124 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4125 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4126 (__v16si)(__m512i)(W), \
4127 (__mmask16)(U), (int)(R)); })
4129 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4130 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4131 (__v16si)_mm512_setzero_si512(), \
4132 (__mmask16)(U), (int)(R)); })
4134 static __inline __m512i __DEFAULT_FN_ATTRS
4135 _mm512_cvttps_epi32(__m512 __a)
4138 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4139 (__v16si) _mm512_setzero_si512 (),
4140 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4143 static __inline__ __m512i __DEFAULT_FN_ATTRS
4144 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4146 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4149 _MM_FROUND_CUR_DIRECTION);
4152 static __inline__ __m512i __DEFAULT_FN_ATTRS
4153 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4155 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4156 (__v16si) _mm512_setzero_si512 (),
4158 _MM_FROUND_CUR_DIRECTION);
4161 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4162 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4163 (__v16si)_mm512_setzero_si512(), \
4164 (__mmask16)-1, (int)(R)); })
4166 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4167 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4168 (__v16si)(__m512i)(W), \
4169 (__mmask16)(U), (int)(R)); })
4171 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4172 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4173 (__v16si)_mm512_setzero_si512(), \
4174 (__mmask16)(U), (int)(R)); })
4176 static __inline__ __m512i __DEFAULT_FN_ATTRS
4177 _mm512_cvtps_epi32 (__m512 __A)
4179 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4180 (__v16si) _mm512_undefined_epi32 (),
4182 _MM_FROUND_CUR_DIRECTION);
4185 static __inline__ __m512i __DEFAULT_FN_ATTRS
4186 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4188 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4191 _MM_FROUND_CUR_DIRECTION);
4194 static __inline__ __m512i __DEFAULT_FN_ATTRS
4195 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4197 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4199 _mm512_setzero_si512 (),
4201 _MM_FROUND_CUR_DIRECTION);
4204 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4205 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4206 (__v8si)_mm256_setzero_si256(), \
4207 (__mmask8)-1, (int)(R)); })
4209 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4210 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4211 (__v8si)(__m256i)(W), \
4212 (__mmask8)(U), (int)(R)); })
4214 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4215 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4216 (__v8si)_mm256_setzero_si256(), \
4217 (__mmask8)(U), (int)(R)); })
4219 static __inline__ __m256i __DEFAULT_FN_ATTRS
4220 _mm512_cvtpd_epi32 (__m512d __A)
4222 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4224 _mm256_undefined_si256 (),
4226 _MM_FROUND_CUR_DIRECTION);
4229 static __inline__ __m256i __DEFAULT_FN_ATTRS
4230 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4232 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4235 _MM_FROUND_CUR_DIRECTION);
4238 static __inline__ __m256i __DEFAULT_FN_ATTRS
4239 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4241 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4243 _mm256_setzero_si256 (),
4245 _MM_FROUND_CUR_DIRECTION);
4248 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4249 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4250 (__v16si)_mm512_setzero_si512(), \
4251 (__mmask16)-1, (int)(R)); })
4253 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4254 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4255 (__v16si)(__m512i)(W), \
4256 (__mmask16)(U), (int)(R)); })
4258 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4259 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4260 (__v16si)_mm512_setzero_si512(), \
4261 (__mmask16)(U), (int)(R)); })
4263 static __inline__ __m512i __DEFAULT_FN_ATTRS
4264 _mm512_cvtps_epu32 ( __m512 __A)
4266 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4268 _mm512_undefined_epi32 (),\
4270 _MM_FROUND_CUR_DIRECTION);\
4273 static __inline__ __m512i __DEFAULT_FN_ATTRS
4274 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4276 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4279 _MM_FROUND_CUR_DIRECTION);
4282 static __inline__ __m512i __DEFAULT_FN_ATTRS
4283 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4285 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4287 _mm512_setzero_si512 (),
4289 _MM_FROUND_CUR_DIRECTION);
4292 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4293 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4294 (__v8si)_mm256_setzero_si256(), \
4295 (__mmask8)-1, (int)(R)); })
4297 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4298 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4300 (__mmask8)(U), (int)(R)); })
4302 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4303 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4304 (__v8si)_mm256_setzero_si256(), \
4305 (__mmask8)(U), (int)(R)); })
4307 static __inline__ __m256i __DEFAULT_FN_ATTRS
4308 _mm512_cvtpd_epu32 (__m512d __A)
4310 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4312 _mm256_undefined_si256 (),
4314 _MM_FROUND_CUR_DIRECTION);
4317 static __inline__ __m256i __DEFAULT_FN_ATTRS
4318 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4320 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4323 _MM_FROUND_CUR_DIRECTION);
4326 static __inline__ __m256i __DEFAULT_FN_ATTRS
4327 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4329 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4331 _mm256_setzero_si256 (),
4333 _MM_FROUND_CUR_DIRECTION);
4336 static __inline__ double __DEFAULT_FN_ATTRS
4337 _mm512_cvtsd_f64(__m512d __a)
4342 static __inline__ float __DEFAULT_FN_ATTRS
4343 _mm512_cvtss_f32(__m512 __a)
4348 /* Unpack and Interleave */
4350 static __inline __m512d __DEFAULT_FN_ATTRS
4351 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4353 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4354 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4357 static __inline__ __m512d __DEFAULT_FN_ATTRS
4358 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4360 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4361 (__v8df)_mm512_unpackhi_pd(__A, __B),
4365 static __inline__ __m512d __DEFAULT_FN_ATTRS
4366 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4368 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4369 (__v8df)_mm512_unpackhi_pd(__A, __B),
4370 (__v8df)_mm512_setzero_pd());
4373 static __inline __m512d __DEFAULT_FN_ATTRS
4374 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4376 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4377 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4380 static __inline__ __m512d __DEFAULT_FN_ATTRS
4381 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4383 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4384 (__v8df)_mm512_unpacklo_pd(__A, __B),
4388 static __inline__ __m512d __DEFAULT_FN_ATTRS
4389 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4391 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4392 (__v8df)_mm512_unpacklo_pd(__A, __B),
4393 (__v8df)_mm512_setzero_pd());
4396 static __inline __m512 __DEFAULT_FN_ATTRS
4397 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4399 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4401 2+4, 18+4, 3+4, 19+4,
4402 2+8, 18+8, 3+8, 19+8,
4403 2+12, 18+12, 3+12, 19+12);
4406 static __inline__ __m512 __DEFAULT_FN_ATTRS
4407 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4409 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4410 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4414 static __inline__ __m512 __DEFAULT_FN_ATTRS
4415 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4417 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4418 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4419 (__v16sf)_mm512_setzero_ps());
4422 static __inline __m512 __DEFAULT_FN_ATTRS
4423 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4425 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4427 0+4, 16+4, 1+4, 17+4,
4428 0+8, 16+8, 1+8, 17+8,
4429 0+12, 16+12, 1+12, 17+12);
4432 static __inline__ __m512 __DEFAULT_FN_ATTRS
4433 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4435 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4436 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4440 static __inline__ __m512 __DEFAULT_FN_ATTRS
4441 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4443 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4444 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4445 (__v16sf)_mm512_setzero_ps());
4448 static __inline__ __m512i __DEFAULT_FN_ATTRS
4449 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4451 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4453 2+4, 18+4, 3+4, 19+4,
4454 2+8, 18+8, 3+8, 19+8,
4455 2+12, 18+12, 3+12, 19+12);
4458 static __inline__ __m512i __DEFAULT_FN_ATTRS
4459 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4461 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4462 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4466 static __inline__ __m512i __DEFAULT_FN_ATTRS
4467 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4469 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4470 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4471 (__v16si)_mm512_setzero_si512());
4474 static __inline__ __m512i __DEFAULT_FN_ATTRS
4475 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4477 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4479 0+4, 16+4, 1+4, 17+4,
4480 0+8, 16+8, 1+8, 17+8,
4481 0+12, 16+12, 1+12, 17+12);
4484 static __inline__ __m512i __DEFAULT_FN_ATTRS
4485 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4487 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4488 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4492 static __inline__ __m512i __DEFAULT_FN_ATTRS
4493 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4495 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4496 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4497 (__v16si)_mm512_setzero_si512());
4500 static __inline__ __m512i __DEFAULT_FN_ATTRS
4501 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4503 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4504 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4507 static __inline__ __m512i __DEFAULT_FN_ATTRS
4508 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4510 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4511 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4515 static __inline__ __m512i __DEFAULT_FN_ATTRS
4516 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4518 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4519 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4520 (__v8di)_mm512_setzero_si512());
4523 static __inline__ __m512i __DEFAULT_FN_ATTRS
4524 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4526 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4527 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4530 static __inline__ __m512i __DEFAULT_FN_ATTRS
4531 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4533 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4534 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4538 static __inline__ __m512i __DEFAULT_FN_ATTRS
4539 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4541 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4542 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4543 (__v8di)_mm512_setzero_si512());
4549 static __inline __m512i __DEFAULT_FN_ATTRS
4550 _mm512_loadu_si512 (void const *__P)
4552 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4554 _mm512_setzero_si512 (),
4558 static __inline __m512i __DEFAULT_FN_ATTRS
4559 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4561 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4567 static __inline __m512i __DEFAULT_FN_ATTRS
4568 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4570 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4572 _mm512_setzero_si512 (),
4576 static __inline __m512i __DEFAULT_FN_ATTRS
4577 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4579 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4584 static __inline __m512i __DEFAULT_FN_ATTRS
4585 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4587 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4589 _mm512_setzero_si512 (),
4593 static __inline __m512 __DEFAULT_FN_ATTRS
4594 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4596 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4601 static __inline __m512 __DEFAULT_FN_ATTRS
4602 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4604 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4606 _mm512_setzero_ps (),
4610 static __inline __m512d __DEFAULT_FN_ATTRS
4611 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4613 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4618 static __inline __m512d __DEFAULT_FN_ATTRS
4619 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4621 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4623 _mm512_setzero_pd (),
4627 static __inline __m512d __DEFAULT_FN_ATTRS
4628 _mm512_loadu_pd(void const *__p)
4632 } __attribute__((__packed__, __may_alias__));
4633 return ((struct __loadu_pd*)__p)->__v;
4636 static __inline __m512 __DEFAULT_FN_ATTRS
4637 _mm512_loadu_ps(void const *__p)
4641 } __attribute__((__packed__, __may_alias__));
4642 return ((struct __loadu_ps*)__p)->__v;
4645 static __inline __m512 __DEFAULT_FN_ATTRS
4646 _mm512_load_ps(void const *__p)
4648 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4650 _mm512_setzero_ps (),
4654 static __inline __m512 __DEFAULT_FN_ATTRS
4655 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4657 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4662 static __inline __m512 __DEFAULT_FN_ATTRS
4663 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4665 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4667 _mm512_setzero_ps (),
4671 static __inline __m512d __DEFAULT_FN_ATTRS
4672 _mm512_load_pd(void const *__p)
4674 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4676 _mm512_setzero_pd (),
4680 static __inline __m512d __DEFAULT_FN_ATTRS
4681 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4683 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4688 static __inline __m512d __DEFAULT_FN_ATTRS
4689 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4691 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4693 _mm512_setzero_pd (),
4697 static __inline __m512i __DEFAULT_FN_ATTRS
4698 _mm512_load_si512 (void const *__P)
4700 return *(__m512i *) __P;
4703 static __inline __m512i __DEFAULT_FN_ATTRS
4704 _mm512_load_epi32 (void const *__P)
4706 return *(__m512i *) __P;
4709 static __inline __m512i __DEFAULT_FN_ATTRS
4710 _mm512_load_epi64 (void const *__P)
4712 return *(__m512i *) __P;
4715 /* SIMD store ops */
4717 static __inline void __DEFAULT_FN_ATTRS
4718 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4720 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4724 static __inline void __DEFAULT_FN_ATTRS
4725 _mm512_storeu_si512 (void *__P, __m512i __A)
4727 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4731 static __inline void __DEFAULT_FN_ATTRS
4732 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4734 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4738 static __inline void __DEFAULT_FN_ATTRS
4739 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4741 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4744 static __inline void __DEFAULT_FN_ATTRS
4745 _mm512_storeu_pd(void *__P, __m512d __A)
4747 __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4750 static __inline void __DEFAULT_FN_ATTRS
4751 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4753 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4757 static __inline void __DEFAULT_FN_ATTRS
4758 _mm512_storeu_ps(void *__P, __m512 __A)
4760 __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4763 static __inline void __DEFAULT_FN_ATTRS
4764 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4766 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4769 static __inline void __DEFAULT_FN_ATTRS
4770 _mm512_store_pd(void *__P, __m512d __A)
4772 *(__m512d*)__P = __A;
4775 static __inline void __DEFAULT_FN_ATTRS
4776 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4778 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4782 static __inline void __DEFAULT_FN_ATTRS
4783 _mm512_store_ps(void *__P, __m512 __A)
4785 *(__m512*)__P = __A;
4788 static __inline void __DEFAULT_FN_ATTRS
4789 _mm512_store_si512 (void *__P, __m512i __A)
4791 *(__m512i *) __P = __A;
4794 static __inline void __DEFAULT_FN_ATTRS
4795 _mm512_store_epi32 (void *__P, __m512i __A)
4797 *(__m512i *) __P = __A;
4800 static __inline void __DEFAULT_FN_ATTRS
4801 _mm512_store_epi64 (void *__P, __m512i __A)
4803 *(__m512i *) __P = __A;
4808 static __inline __mmask16 __DEFAULT_FN_ATTRS
4809 _mm512_knot(__mmask16 __M)
4811 return __builtin_ia32_knothi(__M);
4814 /* Integer compare */
4816 #define _mm512_cmpeq_epi32_mask(A, B) \
4817 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4818 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4819 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4820 #define _mm512_cmpge_epi32_mask(A, B) \
4821 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4822 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4823 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4824 #define _mm512_cmpgt_epi32_mask(A, B) \
4825 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4826 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4827 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4828 #define _mm512_cmple_epi32_mask(A, B) \
4829 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4830 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4831 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4832 #define _mm512_cmplt_epi32_mask(A, B) \
4833 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4834 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4835 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4836 #define _mm512_cmpneq_epi32_mask(A, B) \
4837 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4838 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4839 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4841 #define _mm512_cmpeq_epu32_mask(A, B) \
4842 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4843 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4844 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4845 #define _mm512_cmpge_epu32_mask(A, B) \
4846 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4847 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4848 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4849 #define _mm512_cmpgt_epu32_mask(A, B) \
4850 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4851 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4852 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4853 #define _mm512_cmple_epu32_mask(A, B) \
4854 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4855 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4856 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4857 #define _mm512_cmplt_epu32_mask(A, B) \
4858 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4859 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4860 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4861 #define _mm512_cmpneq_epu32_mask(A, B) \
4862 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4863 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4864 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4866 #define _mm512_cmpeq_epi64_mask(A, B) \
4867 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4868 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4869 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4870 #define _mm512_cmpge_epi64_mask(A, B) \
4871 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4872 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4873 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4874 #define _mm512_cmpgt_epi64_mask(A, B) \
4875 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4876 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4877 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4878 #define _mm512_cmple_epi64_mask(A, B) \
4879 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4880 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4881 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4882 #define _mm512_cmplt_epi64_mask(A, B) \
4883 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4884 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4885 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4886 #define _mm512_cmpneq_epi64_mask(A, B) \
4887 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4888 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4889 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4891 #define _mm512_cmpeq_epu64_mask(A, B) \
4892 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4893 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4894 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4895 #define _mm512_cmpge_epu64_mask(A, B) \
4896 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4897 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4898 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4899 #define _mm512_cmpgt_epu64_mask(A, B) \
4900 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4901 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4902 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4903 #define _mm512_cmple_epu64_mask(A, B) \
4904 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4905 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4906 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4907 #define _mm512_cmplt_epu64_mask(A, B) \
4908 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4909 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4910 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4911 #define _mm512_cmpneq_epu64_mask(A, B) \
4912 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4913 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4914 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4916 static __inline__ __m512i __DEFAULT_FN_ATTRS
4917 _mm512_cvtepi8_epi32(__m128i __A)
4919 /* This function always performs a signed extension, but __v16qi is a char
4920 which may be signed or unsigned, so use __v16qs. */
4921 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4924 static __inline__ __m512i __DEFAULT_FN_ATTRS
4925 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4927 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4928 (__v16si)_mm512_cvtepi8_epi32(__A),
4932 static __inline__ __m512i __DEFAULT_FN_ATTRS
4933 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4935 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4936 (__v16si)_mm512_cvtepi8_epi32(__A),
4937 (__v16si)_mm512_setzero_si512());
4940 static __inline__ __m512i __DEFAULT_FN_ATTRS
4941 _mm512_cvtepi8_epi64(__m128i __A)
4943 /* This function always performs a signed extension, but __v16qi is a char
4944 which may be signed or unsigned, so use __v16qs. */
4945 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4948 static __inline__ __m512i __DEFAULT_FN_ATTRS
4949 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4951 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4952 (__v8di)_mm512_cvtepi8_epi64(__A),
4956 static __inline__ __m512i __DEFAULT_FN_ATTRS
4957 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4959 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4960 (__v8di)_mm512_cvtepi8_epi64(__A),
4961 (__v8di)_mm512_setzero_si512 ());
4964 static __inline__ __m512i __DEFAULT_FN_ATTRS
4965 _mm512_cvtepi32_epi64(__m256i __X)
4967 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4970 static __inline__ __m512i __DEFAULT_FN_ATTRS
4971 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4973 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4974 (__v8di)_mm512_cvtepi32_epi64(__X),
4978 static __inline__ __m512i __DEFAULT_FN_ATTRS
4979 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4981 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4982 (__v8di)_mm512_cvtepi32_epi64(__X),
4983 (__v8di)_mm512_setzero_si512());
4986 static __inline__ __m512i __DEFAULT_FN_ATTRS
4987 _mm512_cvtepi16_epi32(__m256i __A)
4989 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4992 static __inline__ __m512i __DEFAULT_FN_ATTRS
4993 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4995 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4996 (__v16si)_mm512_cvtepi16_epi32(__A),
5000 static __inline__ __m512i __DEFAULT_FN_ATTRS
5001 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5003 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5004 (__v16si)_mm512_cvtepi16_epi32(__A),
5005 (__v16si)_mm512_setzero_si512 ());
5008 static __inline__ __m512i __DEFAULT_FN_ATTRS
5009 _mm512_cvtepi16_epi64(__m128i __A)
5011 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5014 static __inline__ __m512i __DEFAULT_FN_ATTRS
5015 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5017 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5018 (__v8di)_mm512_cvtepi16_epi64(__A),
5022 static __inline__ __m512i __DEFAULT_FN_ATTRS
5023 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5025 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5026 (__v8di)_mm512_cvtepi16_epi64(__A),
5027 (__v8di)_mm512_setzero_si512());
5030 static __inline__ __m512i __DEFAULT_FN_ATTRS
5031 _mm512_cvtepu8_epi32(__m128i __A)
5033 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5036 static __inline__ __m512i __DEFAULT_FN_ATTRS
5037 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5039 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5040 (__v16si)_mm512_cvtepu8_epi32(__A),
5044 static __inline__ __m512i __DEFAULT_FN_ATTRS
5045 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5047 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5048 (__v16si)_mm512_cvtepu8_epi32(__A),
5049 (__v16si)_mm512_setzero_si512());
5052 static __inline__ __m512i __DEFAULT_FN_ATTRS
5053 _mm512_cvtepu8_epi64(__m128i __A)
5055 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5058 static __inline__ __m512i __DEFAULT_FN_ATTRS
5059 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5061 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5062 (__v8di)_mm512_cvtepu8_epi64(__A),
5066 static __inline__ __m512i __DEFAULT_FN_ATTRS
5067 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5069 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5070 (__v8di)_mm512_cvtepu8_epi64(__A),
5071 (__v8di)_mm512_setzero_si512());
5074 static __inline__ __m512i __DEFAULT_FN_ATTRS
5075 _mm512_cvtepu32_epi64(__m256i __X)
5077 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5080 static __inline__ __m512i __DEFAULT_FN_ATTRS
5081 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5083 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5084 (__v8di)_mm512_cvtepu32_epi64(__X),
5088 static __inline__ __m512i __DEFAULT_FN_ATTRS
5089 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5091 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5092 (__v8di)_mm512_cvtepu32_epi64(__X),
5093 (__v8di)_mm512_setzero_si512());
5096 static __inline__ __m512i __DEFAULT_FN_ATTRS
5097 _mm512_cvtepu16_epi32(__m256i __A)
5099 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5102 static __inline__ __m512i __DEFAULT_FN_ATTRS
5103 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5105 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5106 (__v16si)_mm512_cvtepu16_epi32(__A),
5110 static __inline__ __m512i __DEFAULT_FN_ATTRS
5111 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5113 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5114 (__v16si)_mm512_cvtepu16_epi32(__A),
5115 (__v16si)_mm512_setzero_si512());
5118 static __inline__ __m512i __DEFAULT_FN_ATTRS
5119 _mm512_cvtepu16_epi64(__m128i __A)
5121 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5124 static __inline__ __m512i __DEFAULT_FN_ATTRS
5125 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5127 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5128 (__v8di)_mm512_cvtepu16_epi64(__A),
5132 static __inline__ __m512i __DEFAULT_FN_ATTRS
5133 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5135 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5136 (__v8di)_mm512_cvtepu16_epi64(__A),
5137 (__v8di)_mm512_setzero_si512());
5140 static __inline__ __m512i __DEFAULT_FN_ATTRS
5141 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
5143 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5146 _mm512_setzero_si512 (),
5150 static __inline__ __m512i __DEFAULT_FN_ATTRS
5151 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5153 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5159 static __inline__ __m512i __DEFAULT_FN_ATTRS
5160 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5162 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5165 _mm512_setzero_si512 (),
5169 static __inline__ __m512i __DEFAULT_FN_ATTRS
5170 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
5172 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5175 _mm512_setzero_si512 (),
5179 static __inline__ __m512i __DEFAULT_FN_ATTRS
5180 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5182 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5188 static __inline__ __m512i __DEFAULT_FN_ATTRS
5189 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5191 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5194 _mm512_setzero_si512 (),
5200 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5201 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5202 (__v16si)(__m512i)(b), (int)(p), \
5205 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5206 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5207 (__v16si)(__m512i)(b), (int)(p), \
5210 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5211 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5212 (__v8di)(__m512i)(b), (int)(p), \
5215 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5216 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5217 (__v8di)(__m512i)(b), (int)(p), \
5220 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5221 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5222 (__v16si)(__m512i)(b), (int)(p), \
5225 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5226 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5227 (__v16si)(__m512i)(b), (int)(p), \
5230 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5231 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5232 (__v8di)(__m512i)(b), (int)(p), \
5235 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5236 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5237 (__v8di)(__m512i)(b), (int)(p), \
5240 #define _mm512_rol_epi32(a, b) __extension__ ({ \
5241 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5242 (__v16si)_mm512_setzero_si512(), \
5245 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5246 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5247 (__v16si)(__m512i)(W), \
5250 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5251 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5252 (__v16si)_mm512_setzero_si512(), \
5255 #define _mm512_rol_epi64(a, b) __extension__ ({ \
5256 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5257 (__v8di)_mm512_setzero_si512(), \
5260 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5261 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5262 (__v8di)(__m512i)(W), (__mmask8)(U)); })
5264 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5265 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5266 (__v8di)_mm512_setzero_si512(), \
5268 static __inline__ __m512i __DEFAULT_FN_ATTRS
5269 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5271 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5274 _mm512_setzero_si512 (),
5278 static __inline__ __m512i __DEFAULT_FN_ATTRS
5279 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5281 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5287 static __inline__ __m512i __DEFAULT_FN_ATTRS
5288 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5290 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5293 _mm512_setzero_si512 (),
5297 static __inline__ __m512i __DEFAULT_FN_ATTRS
5298 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5300 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5303 _mm512_setzero_si512 (),
5307 static __inline__ __m512i __DEFAULT_FN_ATTRS
5308 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5310 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5316 static __inline__ __m512i __DEFAULT_FN_ATTRS
5317 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5319 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5322 _mm512_setzero_si512 (),
5326 #define _mm512_ror_epi32(A, B) __extension__ ({ \
5327 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5328 (__v16si)_mm512_setzero_si512(), \
5331 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5332 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5333 (__v16si)(__m512i)(W), \
5336 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5337 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5338 (__v16si)_mm512_setzero_si512(), \
5341 #define _mm512_ror_epi64(A, B) __extension__ ({ \
5342 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5343 (__v8di)_mm512_setzero_si512(), \
5346 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5347 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5348 (__v8di)(__m512i)(W), (__mmask8)(U)); })
5350 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5351 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5352 (__v8di)_mm512_setzero_si512(), \
5355 static __inline__ __m512i __DEFAULT_FN_ATTRS
5356 _mm512_slli_epi32(__m512i __A, int __B)
5358 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5361 static __inline__ __m512i __DEFAULT_FN_ATTRS
5362 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5364 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5365 (__v16si)_mm512_slli_epi32(__A, __B),
5369 static __inline__ __m512i __DEFAULT_FN_ATTRS
5370 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5371 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5372 (__v16si)_mm512_slli_epi32(__A, __B),
5373 (__v16si)_mm512_setzero_si512());
5376 static __inline__ __m512i __DEFAULT_FN_ATTRS
5377 _mm512_slli_epi64(__m512i __A, int __B)
5379 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5382 static __inline__ __m512i __DEFAULT_FN_ATTRS
5383 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5385 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5386 (__v8di)_mm512_slli_epi64(__A, __B),
5390 static __inline__ __m512i __DEFAULT_FN_ATTRS
5391 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5393 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5394 (__v8di)_mm512_slli_epi64(__A, __B),
5395 (__v8di)_mm512_setzero_si512());
5398 static __inline__ __m512i __DEFAULT_FN_ATTRS
5399 _mm512_srli_epi32(__m512i __A, int __B)
5401 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5404 static __inline__ __m512i __DEFAULT_FN_ATTRS
5405 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5407 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5408 (__v16si)_mm512_srli_epi32(__A, __B),
5412 static __inline__ __m512i __DEFAULT_FN_ATTRS
5413 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5414 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5415 (__v16si)_mm512_srli_epi32(__A, __B),
5416 (__v16si)_mm512_setzero_si512());
5419 static __inline__ __m512i __DEFAULT_FN_ATTRS
5420 _mm512_srli_epi64(__m512i __A, int __B)
5422 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5425 static __inline__ __m512i __DEFAULT_FN_ATTRS
5426 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5428 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5429 (__v8di)_mm512_srli_epi64(__A, __B),
5433 static __inline__ __m512i __DEFAULT_FN_ATTRS
5434 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5436 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5437 (__v8di)_mm512_srli_epi64(__A, __B),
5438 (__v8di)_mm512_setzero_si512());
5441 static __inline__ __m512i __DEFAULT_FN_ATTRS
5442 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5444 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5449 static __inline__ __m512i __DEFAULT_FN_ATTRS
5450 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5452 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5454 _mm512_setzero_si512 (),
5458 static __inline__ void __DEFAULT_FN_ATTRS
5459 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5461 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5465 static __inline__ __m512i __DEFAULT_FN_ATTRS
5466 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5468 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5473 static __inline__ __m512i __DEFAULT_FN_ATTRS
5474 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5476 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5478 (__v16si) _mm512_setzero_si512 ());
5481 static __inline__ __m512i __DEFAULT_FN_ATTRS
5482 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5484 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5489 static __inline__ __m512i __DEFAULT_FN_ATTRS
5490 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5492 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5494 (__v8di) _mm512_setzero_si512 ());
5497 static __inline__ __m512i __DEFAULT_FN_ATTRS
5498 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5500 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5505 static __inline__ __m512i __DEFAULT_FN_ATTRS
5506 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5508 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5510 _mm512_setzero_si512 (),
5514 static __inline__ void __DEFAULT_FN_ATTRS
5515 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5517 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5521 static __inline__ __m512d __DEFAULT_FN_ATTRS
5522 _mm512_movedup_pd (__m512d __A)
5524 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5525 0, 0, 2, 2, 4, 4, 6, 6);
5528 static __inline__ __m512d __DEFAULT_FN_ATTRS
5529 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5531 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5532 (__v8df)_mm512_movedup_pd(__A),
5536 static __inline__ __m512d __DEFAULT_FN_ATTRS
5537 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5539 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5540 (__v8df)_mm512_movedup_pd(__A),
5541 (__v8df)_mm512_setzero_pd());
5544 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5545 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5546 (__v8df)(__m512d)(B), \
5547 (__v8di)(__m512i)(C), (int)(imm), \
5548 (__mmask8)-1, (int)(R)); })
5550 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5551 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5552 (__v8df)(__m512d)(B), \
5553 (__v8di)(__m512i)(C), (int)(imm), \
5554 (__mmask8)(U), (int)(R)); })
5556 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5557 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5558 (__v8df)(__m512d)(B), \
5559 (__v8di)(__m512i)(C), (int)(imm), \
5561 _MM_FROUND_CUR_DIRECTION); })
5563 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5564 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5565 (__v8df)(__m512d)(B), \
5566 (__v8di)(__m512i)(C), (int)(imm), \
5568 _MM_FROUND_CUR_DIRECTION); })
5570 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5571 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5572 (__v8df)(__m512d)(B), \
5573 (__v8di)(__m512i)(C), \
5574 (int)(imm), (__mmask8)(U), \
5577 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5578 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5579 (__v8df)(__m512d)(B), \
5580 (__v8di)(__m512i)(C), \
5581 (int)(imm), (__mmask8)(U), \
5582 _MM_FROUND_CUR_DIRECTION); })
5584 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5585 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5586 (__v16sf)(__m512)(B), \
5587 (__v16si)(__m512i)(C), (int)(imm), \
5588 (__mmask16)-1, (int)(R)); })
5590 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5591 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5592 (__v16sf)(__m512)(B), \
5593 (__v16si)(__m512i)(C), (int)(imm), \
5594 (__mmask16)(U), (int)(R)); })
5596 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5597 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5598 (__v16sf)(__m512)(B), \
5599 (__v16si)(__m512i)(C), (int)(imm), \
5601 _MM_FROUND_CUR_DIRECTION); })
5603 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5604 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5605 (__v16sf)(__m512)(B), \
5606 (__v16si)(__m512i)(C), (int)(imm), \
5608 _MM_FROUND_CUR_DIRECTION); })
5610 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5611 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5612 (__v16sf)(__m512)(B), \
5613 (__v16si)(__m512i)(C), \
5614 (int)(imm), (__mmask16)(U), \
5617 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5618 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5619 (__v16sf)(__m512)(B), \
5620 (__v16si)(__m512i)(C), \
5621 (int)(imm), (__mmask16)(U), \
5622 _MM_FROUND_CUR_DIRECTION); })
5624 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5625 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5626 (__v2df)(__m128d)(B), \
5627 (__v2di)(__m128i)(C), (int)(imm), \
5628 (__mmask8)-1, (int)(R)); })
5630 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5631 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5632 (__v2df)(__m128d)(B), \
5633 (__v2di)(__m128i)(C), (int)(imm), \
5634 (__mmask8)(U), (int)(R)); })
5636 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5637 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5638 (__v2df)(__m128d)(B), \
5639 (__v2di)(__m128i)(C), (int)(imm), \
5641 _MM_FROUND_CUR_DIRECTION); })
5643 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5644 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5645 (__v2df)(__m128d)(B), \
5646 (__v2di)(__m128i)(C), (int)(imm), \
5648 _MM_FROUND_CUR_DIRECTION); })
5650 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5651 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5652 (__v2df)(__m128d)(B), \
5653 (__v2di)(__m128i)(C), (int)(imm), \
5654 (__mmask8)(U), (int)(R)); })
5656 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5657 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5658 (__v2df)(__m128d)(B), \
5659 (__v2di)(__m128i)(C), (int)(imm), \
5661 _MM_FROUND_CUR_DIRECTION); })
5663 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5664 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5665 (__v4sf)(__m128)(B), \
5666 (__v4si)(__m128i)(C), (int)(imm), \
5667 (__mmask8)-1, (int)(R)); })
5669 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5670 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5671 (__v4sf)(__m128)(B), \
5672 (__v4si)(__m128i)(C), (int)(imm), \
5673 (__mmask8)(U), (int)(R)); })
5675 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5676 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5677 (__v4sf)(__m128)(B), \
5678 (__v4si)(__m128i)(C), (int)(imm), \
5680 _MM_FROUND_CUR_DIRECTION); })
5682 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5683 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5684 (__v4sf)(__m128)(B), \
5685 (__v4si)(__m128i)(C), (int)(imm), \
5687 _MM_FROUND_CUR_DIRECTION); })
5689 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5690 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5691 (__v4sf)(__m128)(B), \
5692 (__v4si)(__m128i)(C), (int)(imm), \
5693 (__mmask8)(U), (int)(R)); })
5695 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5696 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5697 (__v4sf)(__m128)(B), \
5698 (__v4si)(__m128i)(C), (int)(imm), \
5700 _MM_FROUND_CUR_DIRECTION); })
5702 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5703 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5704 (__v2df)(__m128d)(B), \
5705 (__v2df)_mm_setzero_pd(), \
5706 (__mmask8)-1, (int)(R)); })
5709 static __inline__ __m128d __DEFAULT_FN_ATTRS
5710 _mm_getexp_sd (__m128d __A, __m128d __B)
5712 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5713 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5716 static __inline__ __m128d __DEFAULT_FN_ATTRS
5717 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5719 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5723 _MM_FROUND_CUR_DIRECTION);
5726 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5727 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5728 (__v2df)(__m128d)(B), \
5729 (__v2df)(__m128d)(W), \
5730 (__mmask8)(U), (int)(R)); })
5732 static __inline__ __m128d __DEFAULT_FN_ATTRS
5733 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5735 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5737 (__v2df) _mm_setzero_pd (),
5739 _MM_FROUND_CUR_DIRECTION);
5742 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5743 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5744 (__v2df)(__m128d)(B), \
5745 (__v2df)_mm_setzero_pd(), \
5746 (__mmask8)(U), (int)(R)); })
5748 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5749 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5750 (__v4sf)(__m128)(B), \
5751 (__v4sf)_mm_setzero_ps(), \
5752 (__mmask8)-1, (int)(R)); })
5754 static __inline__ __m128 __DEFAULT_FN_ATTRS
5755 _mm_getexp_ss (__m128 __A, __m128 __B)
5757 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5758 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5761 static __inline__ __m128 __DEFAULT_FN_ATTRS
5762 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5764 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5768 _MM_FROUND_CUR_DIRECTION);
5771 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5772 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5773 (__v4sf)(__m128)(B), \
5774 (__v4sf)(__m128)(W), \
5775 (__mmask8)(U), (int)(R)); })
5777 static __inline__ __m128 __DEFAULT_FN_ATTRS
5778 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5780 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5782 (__v4sf) _mm_setzero_pd (),
5784 _MM_FROUND_CUR_DIRECTION);
5787 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5788 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5789 (__v4sf)(__m128)(B), \
5790 (__v4sf)_mm_setzero_ps(), \
5791 (__mmask8)(U), (int)(R)); })
5793 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5794 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5795 (__v2df)(__m128d)(B), \
5796 (int)(((D)<<2) | (C)), \
5797 (__v2df)_mm_setzero_pd(), \
5798 (__mmask8)-1, (int)(R)); })
5800 #define _mm_getmant_sd(A, B, C, D) __extension__ ({ \
5801 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5802 (__v2df)(__m128d)(B), \
5803 (int)(((D)<<2) | (C)), \
5804 (__v2df)_mm_setzero_pd(), \
5806 _MM_FROUND_CUR_DIRECTION); })
5808 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5809 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5810 (__v2df)(__m128d)(B), \
5811 (int)(((D)<<2) | (C)), \
5812 (__v2df)(__m128d)(W), \
5814 _MM_FROUND_CUR_DIRECTION); })
5816 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5817 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5818 (__v2df)(__m128d)(B), \
5819 (int)(((D)<<2) | (C)), \
5820 (__v2df)(__m128d)(W), \
5821 (__mmask8)(U), (int)(R)); })
5823 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5824 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5825 (__v2df)(__m128d)(B), \
5826 (int)(((D)<<2) | (C)), \
5827 (__v2df)_mm_setzero_pd(), \
5829 _MM_FROUND_CUR_DIRECTION); })
5831 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5832 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5833 (__v2df)(__m128d)(B), \
5834 (int)(((D)<<2) | (C)), \
5835 (__v2df)_mm_setzero_pd(), \
5836 (__mmask8)(U), (int)(R)); })
5838 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5839 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5840 (__v4sf)(__m128)(B), \
5841 (int)(((D)<<2) | (C)), \
5842 (__v4sf)_mm_setzero_ps(), \
5843 (__mmask8)-1, (int)(R)); })
5845 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5846 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5847 (__v4sf)(__m128)(B), \
5848 (int)(((D)<<2) | (C)), \
5849 (__v4sf)_mm_setzero_ps(), \
5851 _MM_FROUND_CUR_DIRECTION); })
5853 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5854 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5855 (__v4sf)(__m128)(B), \
5856 (int)(((D)<<2) | (C)), \
5857 (__v4sf)(__m128)(W), \
5859 _MM_FROUND_CUR_DIRECTION); })
5861 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5862 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5863 (__v4sf)(__m128)(B), \
5864 (int)(((D)<<2) | (C)), \
5865 (__v4sf)(__m128)(W), \
5866 (__mmask8)(U), (int)(R)); })
5868 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5869 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5870 (__v4sf)(__m128)(B), \
5871 (int)(((D)<<2) | (C)), \
5872 (__v4sf)_mm_setzero_pd(), \
5874 _MM_FROUND_CUR_DIRECTION); })
5876 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5877 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5878 (__v4sf)(__m128)(B), \
5879 (int)(((D)<<2) | (C)), \
5880 (__v4sf)_mm_setzero_ps(), \
5881 (__mmask8)(U), (int)(R)); })
5883 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5884 _mm512_kmov (__mmask16 __A)
5889 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5890 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5891 (int)(P), (int)(R)); })
5893 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5894 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5895 (int)(P), (int)(R)); })
5898 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
5899 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
5902 static __inline__ __m512i __DEFAULT_FN_ATTRS
5903 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5904 __mmask16 __U, __m512i __B)
5906 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5913 static __inline__ __m512i __DEFAULT_FN_ATTRS
5914 _mm512_sll_epi32(__m512i __A, __m128i __B)
5916 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5919 static __inline__ __m512i __DEFAULT_FN_ATTRS
5920 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5922 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5923 (__v16si)_mm512_sll_epi32(__A, __B),
5927 static __inline__ __m512i __DEFAULT_FN_ATTRS
5928 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5930 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5931 (__v16si)_mm512_sll_epi32(__A, __B),
5932 (__v16si)_mm512_setzero_si512());
5935 static __inline__ __m512i __DEFAULT_FN_ATTRS
5936 _mm512_sll_epi64(__m512i __A, __m128i __B)
5938 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5941 static __inline__ __m512i __DEFAULT_FN_ATTRS
5942 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5944 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5945 (__v8di)_mm512_sll_epi64(__A, __B),
5949 static __inline__ __m512i __DEFAULT_FN_ATTRS
5950 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5952 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5953 (__v8di)_mm512_sll_epi64(__A, __B),
5954 (__v8di)_mm512_setzero_si512());
5957 static __inline__ __m512i __DEFAULT_FN_ATTRS
5958 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5960 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5963 static __inline__ __m512i __DEFAULT_FN_ATTRS
5964 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5966 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5967 (__v16si)_mm512_sllv_epi32(__X, __Y),
5971 static __inline__ __m512i __DEFAULT_FN_ATTRS
5972 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5974 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5975 (__v16si)_mm512_sllv_epi32(__X, __Y),
5976 (__v16si)_mm512_setzero_si512());
5979 static __inline__ __m512i __DEFAULT_FN_ATTRS
5980 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5982 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5985 static __inline__ __m512i __DEFAULT_FN_ATTRS
5986 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5988 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5989 (__v8di)_mm512_sllv_epi64(__X, __Y),
5993 static __inline__ __m512i __DEFAULT_FN_ATTRS
5994 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5996 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5997 (__v8di)_mm512_sllv_epi64(__X, __Y),
5998 (__v8di)_mm512_setzero_si512());
6001 static __inline__ __m512i __DEFAULT_FN_ATTRS
6002 _mm512_sra_epi32(__m512i __A, __m128i __B)
6004 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6007 static __inline__ __m512i __DEFAULT_FN_ATTRS
6008 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6010 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6011 (__v16si)_mm512_sra_epi32(__A, __B),
6015 static __inline__ __m512i __DEFAULT_FN_ATTRS
6016 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6018 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6019 (__v16si)_mm512_sra_epi32(__A, __B),
6020 (__v16si)_mm512_setzero_si512());
6023 static __inline__ __m512i __DEFAULT_FN_ATTRS
6024 _mm512_sra_epi64(__m512i __A, __m128i __B)
6026 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6029 static __inline__ __m512i __DEFAULT_FN_ATTRS
6030 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6032 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6033 (__v8di)_mm512_sra_epi64(__A, __B),
6037 static __inline__ __m512i __DEFAULT_FN_ATTRS
6038 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6040 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6041 (__v8di)_mm512_sra_epi64(__A, __B),
6042 (__v8di)_mm512_setzero_si512());
6045 static __inline__ __m512i __DEFAULT_FN_ATTRS
6046 _mm512_srav_epi32(__m512i __X, __m512i __Y)
6048 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6051 static __inline__ __m512i __DEFAULT_FN_ATTRS
6052 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6054 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6055 (__v16si)_mm512_srav_epi32(__X, __Y),
6059 static __inline__ __m512i __DEFAULT_FN_ATTRS
6060 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6062 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6063 (__v16si)_mm512_srav_epi32(__X, __Y),
6064 (__v16si)_mm512_setzero_si512());
6067 static __inline__ __m512i __DEFAULT_FN_ATTRS
6068 _mm512_srav_epi64(__m512i __X, __m512i __Y)
6070 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6073 static __inline__ __m512i __DEFAULT_FN_ATTRS
6074 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6076 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6077 (__v8di)_mm512_srav_epi64(__X, __Y),
6081 static __inline__ __m512i __DEFAULT_FN_ATTRS
6082 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6084 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6085 (__v8di)_mm512_srav_epi64(__X, __Y),
6086 (__v8di)_mm512_setzero_si512());
6089 static __inline__ __m512i __DEFAULT_FN_ATTRS
6090 _mm512_srl_epi32(__m512i __A, __m128i __B)
6092 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6095 static __inline__ __m512i __DEFAULT_FN_ATTRS
6096 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6098 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6099 (__v16si)_mm512_srl_epi32(__A, __B),
6103 static __inline__ __m512i __DEFAULT_FN_ATTRS
6104 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6106 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6107 (__v16si)_mm512_srl_epi32(__A, __B),
6108 (__v16si)_mm512_setzero_si512());
6111 static __inline__ __m512i __DEFAULT_FN_ATTRS
6112 _mm512_srl_epi64(__m512i __A, __m128i __B)
6114 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6117 static __inline__ __m512i __DEFAULT_FN_ATTRS
6118 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6120 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6121 (__v8di)_mm512_srl_epi64(__A, __B),
6125 static __inline__ __m512i __DEFAULT_FN_ATTRS
6126 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6128 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6129 (__v8di)_mm512_srl_epi64(__A, __B),
6130 (__v8di)_mm512_setzero_si512());
6133 static __inline__ __m512i __DEFAULT_FN_ATTRS
6134 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
6136 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6139 static __inline__ __m512i __DEFAULT_FN_ATTRS
6140 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6142 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6143 (__v16si)_mm512_srlv_epi32(__X, __Y),
6147 static __inline__ __m512i __DEFAULT_FN_ATTRS
6148 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6150 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6151 (__v16si)_mm512_srlv_epi32(__X, __Y),
6152 (__v16si)_mm512_setzero_si512());
6155 static __inline__ __m512i __DEFAULT_FN_ATTRS
6156 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6158 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6161 static __inline__ __m512i __DEFAULT_FN_ATTRS
6162 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6164 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6165 (__v8di)_mm512_srlv_epi64(__X, __Y),
6169 static __inline__ __m512i __DEFAULT_FN_ATTRS
6170 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6172 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6173 (__v8di)_mm512_srlv_epi64(__X, __Y),
6174 (__v8di)_mm512_setzero_si512());
6177 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6178 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6179 (__v16si)(__m512i)(B), \
6180 (__v16si)(__m512i)(C), (int)(imm), \
6183 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6184 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6185 (__v16si)(__m512i)(B), \
6186 (__v16si)(__m512i)(C), (int)(imm), \
6189 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6190 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6191 (__v16si)(__m512i)(B), \
6192 (__v16si)(__m512i)(C), \
6193 (int)(imm), (__mmask16)(U)); })
6195 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6196 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6197 (__v8di)(__m512i)(B), \
6198 (__v8di)(__m512i)(C), (int)(imm), \
6201 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6202 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6203 (__v8di)(__m512i)(B), \
6204 (__v8di)(__m512i)(C), (int)(imm), \
6207 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6208 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6209 (__v8di)(__m512i)(B), \
6210 (__v8di)(__m512i)(C), (int)(imm), \
6214 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6215 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6218 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6219 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6221 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6222 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6224 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6225 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6227 static __inline__ unsigned __DEFAULT_FN_ATTRS
6228 _mm_cvtsd_u32 (__m128d __A)
6230 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6231 _MM_FROUND_CUR_DIRECTION);
6235 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6236 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6239 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6240 _mm_cvtsd_u64 (__m128d __A)
6242 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6244 _MM_FROUND_CUR_DIRECTION);
6248 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6249 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6251 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6252 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6255 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6256 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6258 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6259 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6262 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6263 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6265 static __inline__ unsigned __DEFAULT_FN_ATTRS
6266 _mm_cvtss_u32 (__m128 __A)
6268 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6269 _MM_FROUND_CUR_DIRECTION);
6273 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6274 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6277 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6278 _mm_cvtss_u64 (__m128 __A)
6280 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6282 _MM_FROUND_CUR_DIRECTION);
6286 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6287 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6289 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6290 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6292 static __inline__ int __DEFAULT_FN_ATTRS
6293 _mm_cvttsd_i32 (__m128d __A)
6295 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6296 _MM_FROUND_CUR_DIRECTION);
6300 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6301 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6303 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6304 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6306 static __inline__ long long __DEFAULT_FN_ATTRS
6307 _mm_cvttsd_i64 (__m128d __A)
6309 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6310 _MM_FROUND_CUR_DIRECTION);
6314 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6315 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6317 static __inline__ unsigned __DEFAULT_FN_ATTRS
6318 _mm_cvttsd_u32 (__m128d __A)
6320 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6321 _MM_FROUND_CUR_DIRECTION);
6325 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6326 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6329 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6330 _mm_cvttsd_u64 (__m128d __A)
6332 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6334 _MM_FROUND_CUR_DIRECTION);
6338 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6339 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6341 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6342 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6344 static __inline__ int __DEFAULT_FN_ATTRS
6345 _mm_cvttss_i32 (__m128 __A)
6347 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6348 _MM_FROUND_CUR_DIRECTION);
6352 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6353 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6355 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6356 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6358 static __inline__ long long __DEFAULT_FN_ATTRS
6359 _mm_cvttss_i64 (__m128 __A)
6361 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6362 _MM_FROUND_CUR_DIRECTION);
6366 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6367 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6369 static __inline__ unsigned __DEFAULT_FN_ATTRS
6370 _mm_cvttss_u32 (__m128 __A)
6372 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6373 _MM_FROUND_CUR_DIRECTION);
6377 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6378 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6381 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6382 _mm_cvttss_u64 (__m128 __A)
6384 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6386 _MM_FROUND_CUR_DIRECTION);
6390 static __inline__ __m512d __DEFAULT_FN_ATTRS
6391 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6394 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6401 static __inline__ __m512 __DEFAULT_FN_ATTRS
6402 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6405 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6412 static __inline__ __m512i __DEFAULT_FN_ATTRS
6413 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6414 __mmask8 __U, __m512i __B)
6416 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6423 #define _mm512_permute_pd(X, C) __extension__ ({ \
6424 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6425 (__v8df)_mm512_undefined_pd(), \
6426 0 + (((C) >> 0) & 0x1), \
6427 0 + (((C) >> 1) & 0x1), \
6428 2 + (((C) >> 2) & 0x1), \
6429 2 + (((C) >> 3) & 0x1), \
6430 4 + (((C) >> 4) & 0x1), \
6431 4 + (((C) >> 5) & 0x1), \
6432 6 + (((C) >> 6) & 0x1), \
6433 6 + (((C) >> 7) & 0x1)); })
6435 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6436 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6437 (__v8df)_mm512_permute_pd((X), (C)), \
6438 (__v8df)(__m512d)(W)); })
6440 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6441 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6442 (__v8df)_mm512_permute_pd((X), (C)), \
6443 (__v8df)_mm512_setzero_pd()); })
6445 #define _mm512_permute_ps(X, C) __extension__ ({ \
6446 (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6447 (__v16sf)_mm512_undefined_ps(), \
6448 0 + (((C) >> 0) & 0x3), \
6449 0 + (((C) >> 2) & 0x3), \
6450 0 + (((C) >> 4) & 0x3), \
6451 0 + (((C) >> 6) & 0x3), \
6452 4 + (((C) >> 0) & 0x3), \
6453 4 + (((C) >> 2) & 0x3), \
6454 4 + (((C) >> 4) & 0x3), \
6455 4 + (((C) >> 6) & 0x3), \
6456 8 + (((C) >> 0) & 0x3), \
6457 8 + (((C) >> 2) & 0x3), \
6458 8 + (((C) >> 4) & 0x3), \
6459 8 + (((C) >> 6) & 0x3), \
6460 12 + (((C) >> 0) & 0x3), \
6461 12 + (((C) >> 2) & 0x3), \
6462 12 + (((C) >> 4) & 0x3), \
6463 12 + (((C) >> 6) & 0x3)); })
6465 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6466 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6467 (__v16sf)_mm512_permute_ps((X), (C)), \
6468 (__v16sf)(__m512)(W)); })
6470 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6471 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6472 (__v16sf)_mm512_permute_ps((X), (C)), \
6473 (__v16sf)_mm512_setzero_ps()); })
6475 static __inline__ __m512d __DEFAULT_FN_ATTRS
6476 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6478 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6481 static __inline__ __m512d __DEFAULT_FN_ATTRS
6482 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6484 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6485 (__v8df)_mm512_permutevar_pd(__A, __C),
6489 static __inline__ __m512d __DEFAULT_FN_ATTRS
6490 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6492 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6493 (__v8df)_mm512_permutevar_pd(__A, __C),
6494 (__v8df)_mm512_setzero_pd());
6497 static __inline__ __m512 __DEFAULT_FN_ATTRS
6498 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6500 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6503 static __inline__ __m512 __DEFAULT_FN_ATTRS
6504 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6506 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6507 (__v16sf)_mm512_permutevar_ps(__A, __C),
6511 static __inline__ __m512 __DEFAULT_FN_ATTRS
6512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6514 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6515 (__v16sf)_mm512_permutevar_ps(__A, __C),
6516 (__v16sf)_mm512_setzero_ps());
6519 static __inline __m512d __DEFAULT_FN_ATTRS
6520 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6522 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6529 static __inline__ __m512d __DEFAULT_FN_ATTRS
6530 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6532 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6539 static __inline__ __m512d __DEFAULT_FN_ATTRS
6540 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6543 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6550 static __inline __m512 __DEFAULT_FN_ATTRS
6551 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6553 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6560 static __inline__ __m512 __DEFAULT_FN_ATTRS
6561 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6563 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6570 static __inline__ __m512 __DEFAULT_FN_ATTRS
6571 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6574 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6582 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6583 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6584 (__v8si)_mm256_undefined_si256(), \
6585 (__mmask8)-1, (int)(R)); })
6587 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6588 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6589 (__v8si)(__m256i)(W), \
6590 (__mmask8)(U), (int)(R)); })
6592 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6593 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6594 (__v8si)_mm256_setzero_si256(), \
6595 (__mmask8)(U), (int)(R)); })
6597 static __inline__ __m256i __DEFAULT_FN_ATTRS
6598 _mm512_cvttpd_epu32 (__m512d __A)
6600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6602 _mm256_undefined_si256 (),
6604 _MM_FROUND_CUR_DIRECTION);
6607 static __inline__ __m256i __DEFAULT_FN_ATTRS
6608 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6610 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6613 _MM_FROUND_CUR_DIRECTION);
6616 static __inline__ __m256i __DEFAULT_FN_ATTRS
6617 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6619 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6621 _mm256_setzero_si256 (),
6623 _MM_FROUND_CUR_DIRECTION);
6626 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6627 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6628 (__v2df)(__m128d)(B), \
6629 (__v2df)_mm_setzero_pd(), \
6630 (__mmask8)-1, (int)(imm), \
6633 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6634 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6635 (__v2df)(__m128d)(B), \
6636 (__v2df)_mm_setzero_pd(), \
6637 (__mmask8)-1, (int)(imm), \
6638 _MM_FROUND_CUR_DIRECTION); })
6640 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6641 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6642 (__v2df)(__m128d)(B), \
6643 (__v2df)(__m128d)(W), \
6644 (__mmask8)(U), (int)(imm), \
6645 _MM_FROUND_CUR_DIRECTION); })
6647 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6648 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6649 (__v2df)(__m128d)(B), \
6650 (__v2df)(__m128d)(W), \
6651 (__mmask8)(U), (int)(I), \
6654 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6655 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6656 (__v2df)(__m128d)(B), \
6657 (__v2df)_mm_setzero_pd(), \
6658 (__mmask8)(U), (int)(I), \
6659 _MM_FROUND_CUR_DIRECTION); })
6661 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6662 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6663 (__v2df)(__m128d)(B), \
6664 (__v2df)_mm_setzero_pd(), \
6665 (__mmask8)(U), (int)(I), \
6668 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6669 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6670 (__v4sf)(__m128)(B), \
6671 (__v4sf)_mm_setzero_ps(), \
6672 (__mmask8)-1, (int)(imm), \
6675 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6676 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6677 (__v4sf)(__m128)(B), \
6678 (__v4sf)_mm_setzero_ps(), \
6679 (__mmask8)-1, (int)(imm), \
6680 _MM_FROUND_CUR_DIRECTION); })
6682 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6683 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6684 (__v4sf)(__m128)(B), \
6685 (__v4sf)(__m128)(W), \
6686 (__mmask8)(U), (int)(I), \
6687 _MM_FROUND_CUR_DIRECTION); })
6689 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6690 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6691 (__v4sf)(__m128)(B), \
6692 (__v4sf)(__m128)(W), \
6693 (__mmask8)(U), (int)(I), \
6696 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6697 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6698 (__v4sf)(__m128)(B), \
6699 (__v4sf)_mm_setzero_ps(), \
6700 (__mmask8)(U), (int)(I), \
6701 _MM_FROUND_CUR_DIRECTION); })
6703 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6704 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6705 (__v4sf)(__m128)(B), \
6706 (__v4sf)_mm_setzero_ps(), \
6707 (__mmask8)(U), (int)(I), \
6710 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6711 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6712 (__v8df)(__m512d)(B), \
6713 (__v8df)_mm512_undefined_pd(), \
6714 (__mmask8)-1, (int)(R)); })
6716 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6717 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6718 (__v8df)(__m512d)(B), \
6719 (__v8df)(__m512d)(W), \
6720 (__mmask8)(U), (int)(R)); })
6722 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6723 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6724 (__v8df)(__m512d)(B), \
6725 (__v8df)_mm512_setzero_pd(), \
6726 (__mmask8)(U), (int)(R)); })
6728 static __inline__ __m512d __DEFAULT_FN_ATTRS
6729 _mm512_scalef_pd (__m512d __A, __m512d __B)
6731 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6734 _mm512_undefined_pd (),
6736 _MM_FROUND_CUR_DIRECTION);
6739 static __inline__ __m512d __DEFAULT_FN_ATTRS
6740 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6742 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6746 _MM_FROUND_CUR_DIRECTION);
6749 static __inline__ __m512d __DEFAULT_FN_ATTRS
6750 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6752 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6755 _mm512_setzero_pd (),
6757 _MM_FROUND_CUR_DIRECTION);
6760 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6761 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6762 (__v16sf)(__m512)(B), \
6763 (__v16sf)_mm512_undefined_ps(), \
6764 (__mmask16)-1, (int)(R)); })
6766 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6767 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6768 (__v16sf)(__m512)(B), \
6769 (__v16sf)(__m512)(W), \
6770 (__mmask16)(U), (int)(R)); })
6772 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6773 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6774 (__v16sf)(__m512)(B), \
6775 (__v16sf)_mm512_setzero_ps(), \
6776 (__mmask16)(U), (int)(R)); })
6778 static __inline__ __m512 __DEFAULT_FN_ATTRS
6779 _mm512_scalef_ps (__m512 __A, __m512 __B)
6781 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6784 _mm512_undefined_ps (),
6786 _MM_FROUND_CUR_DIRECTION);
6789 static __inline__ __m512 __DEFAULT_FN_ATTRS
6790 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6792 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6796 _MM_FROUND_CUR_DIRECTION);
6799 static __inline__ __m512 __DEFAULT_FN_ATTRS
6800 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6802 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6805 _mm512_setzero_ps (),
6807 _MM_FROUND_CUR_DIRECTION);
6810 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
6811 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6812 (__v2df)(__m128d)(B), \
6813 (__v2df)_mm_setzero_pd(), \
6814 (__mmask8)-1, (int)(R)); })
6816 static __inline__ __m128d __DEFAULT_FN_ATTRS
6817 _mm_scalef_sd (__m128d __A, __m128d __B)
6819 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6820 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6822 _MM_FROUND_CUR_DIRECTION);
6825 static __inline__ __m128d __DEFAULT_FN_ATTRS
6826 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6828 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6832 _MM_FROUND_CUR_DIRECTION);
6835 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
6836 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6837 (__v2df)(__m128d)(B), \
6838 (__v2df)(__m128d)(W), \
6839 (__mmask8)(U), (int)(R)); })
6841 static __inline__ __m128d __DEFAULT_FN_ATTRS
6842 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6844 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6846 (__v2df) _mm_setzero_pd (),
6848 _MM_FROUND_CUR_DIRECTION);
6851 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
6852 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6853 (__v2df)(__m128d)(B), \
6854 (__v2df)_mm_setzero_pd(), \
6855 (__mmask8)(U), (int)(R)); })
6857 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
6858 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6859 (__v4sf)(__m128)(B), \
6860 (__v4sf)_mm_setzero_ps(), \
6861 (__mmask8)-1, (int)(R)); })
6863 static __inline__ __m128 __DEFAULT_FN_ATTRS
6864 _mm_scalef_ss (__m128 __A, __m128 __B)
6866 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6867 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6869 _MM_FROUND_CUR_DIRECTION);
6872 static __inline__ __m128 __DEFAULT_FN_ATTRS
6873 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6875 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6879 _MM_FROUND_CUR_DIRECTION);
6882 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
6883 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6884 (__v4sf)(__m128)(B), \
6885 (__v4sf)(__m128)(W), \
6886 (__mmask8)(U), (int)(R)); })
6888 static __inline__ __m128 __DEFAULT_FN_ATTRS
6889 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6891 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6893 (__v4sf) _mm_setzero_ps (),
6895 _MM_FROUND_CUR_DIRECTION);
6898 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
6899 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6900 (__v4sf)(__m128)(B), \
6901 (__v4sf)_mm_setzero_ps(), \
6903 _MM_FROUND_CUR_DIRECTION); })
6905 static __inline__ __m512i __DEFAULT_FN_ATTRS
6906 _mm512_srai_epi32(__m512i __A, int __B)
6908 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6911 static __inline__ __m512i __DEFAULT_FN_ATTRS
6912 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6914 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6915 (__v16si)_mm512_srai_epi32(__A, __B), \
6919 static __inline__ __m512i __DEFAULT_FN_ATTRS
6920 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6921 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6922 (__v16si)_mm512_srai_epi32(__A, __B), \
6923 (__v16si)_mm512_setzero_si512());
6926 static __inline__ __m512i __DEFAULT_FN_ATTRS
6927 _mm512_srai_epi64(__m512i __A, int __B)
6929 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6932 static __inline__ __m512i __DEFAULT_FN_ATTRS
6933 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6935 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6936 (__v8di)_mm512_srai_epi64(__A, __B), \
6940 static __inline__ __m512i __DEFAULT_FN_ATTRS
6941 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6943 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6944 (__v8di)_mm512_srai_epi64(__A, __B), \
6945 (__v8di)_mm512_setzero_si512());
6948 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
6949 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
6950 (__v16sf)(__m512)(B), \
6951 0 + ((((imm) >> 0) & 0x3) * 4), \
6952 1 + ((((imm) >> 0) & 0x3) * 4), \
6953 2 + ((((imm) >> 0) & 0x3) * 4), \
6954 3 + ((((imm) >> 0) & 0x3) * 4), \
6955 0 + ((((imm) >> 2) & 0x3) * 4), \
6956 1 + ((((imm) >> 2) & 0x3) * 4), \
6957 2 + ((((imm) >> 2) & 0x3) * 4), \
6958 3 + ((((imm) >> 2) & 0x3) * 4), \
6959 16 + ((((imm) >> 4) & 0x3) * 4), \
6960 17 + ((((imm) >> 4) & 0x3) * 4), \
6961 18 + ((((imm) >> 4) & 0x3) * 4), \
6962 19 + ((((imm) >> 4) & 0x3) * 4), \
6963 16 + ((((imm) >> 6) & 0x3) * 4), \
6964 17 + ((((imm) >> 6) & 0x3) * 4), \
6965 18 + ((((imm) >> 6) & 0x3) * 4), \
6966 19 + ((((imm) >> 6) & 0x3) * 4)); })
6968 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
6969 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6970 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6971 (__v16sf)(__m512)(W)); })
6973 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
6974 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6975 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6976 (__v16sf)_mm512_setzero_ps()); })
6978 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
6979 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
6980 (__v8df)(__m512d)(B), \
6981 0 + ((((imm) >> 0) & 0x3) * 2), \
6982 1 + ((((imm) >> 0) & 0x3) * 2), \
6983 0 + ((((imm) >> 2) & 0x3) * 2), \
6984 1 + ((((imm) >> 2) & 0x3) * 2), \
6985 8 + ((((imm) >> 4) & 0x3) * 2), \
6986 9 + ((((imm) >> 4) & 0x3) * 2), \
6987 8 + ((((imm) >> 6) & 0x3) * 2), \
6988 9 + ((((imm) >> 6) & 0x3) * 2)); })
6990 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
6991 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6992 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6993 (__v8df)(__m512d)(W)); })
6995 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
6996 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6997 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6998 (__v8df)_mm512_setzero_pd()); })
7000 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7001 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7002 (__v8di)(__m512i)(B), \
7003 0 + ((((imm) >> 0) & 0x3) * 2), \
7004 1 + ((((imm) >> 0) & 0x3) * 2), \
7005 0 + ((((imm) >> 2) & 0x3) * 2), \
7006 1 + ((((imm) >> 2) & 0x3) * 2), \
7007 8 + ((((imm) >> 4) & 0x3) * 2), \
7008 9 + ((((imm) >> 4) & 0x3) * 2), \
7009 8 + ((((imm) >> 6) & 0x3) * 2), \
7010 9 + ((((imm) >> 6) & 0x3) * 2)); })
7012 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7013 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7014 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
7015 (__v16si)(__m512i)(W)); })
7017 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7018 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7019 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
7020 (__v16si)_mm512_setzero_si512()); })
7022 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7023 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7024 (__v8di)(__m512i)(B), \
7025 0 + ((((imm) >> 0) & 0x3) * 2), \
7026 1 + ((((imm) >> 0) & 0x3) * 2), \
7027 0 + ((((imm) >> 2) & 0x3) * 2), \
7028 1 + ((((imm) >> 2) & 0x3) * 2), \
7029 8 + ((((imm) >> 4) & 0x3) * 2), \
7030 9 + ((((imm) >> 4) & 0x3) * 2), \
7031 8 + ((((imm) >> 6) & 0x3) * 2), \
7032 9 + ((((imm) >> 6) & 0x3) * 2)); })
7034 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7035 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7036 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
7037 (__v8di)(__m512i)(W)); })
7039 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7040 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7041 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
7042 (__v8di)_mm512_setzero_si512()); })
7044 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7045 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7046 (__v8df)(__m512d)(B), \
7047 0 + (((M) >> 0) & 0x1), \
7048 8 + (((M) >> 1) & 0x1), \
7049 2 + (((M) >> 2) & 0x1), \
7050 10 + (((M) >> 3) & 0x1), \
7051 4 + (((M) >> 4) & 0x1), \
7052 12 + (((M) >> 5) & 0x1), \
7053 6 + (((M) >> 6) & 0x1), \
7054 14 + (((M) >> 7) & 0x1)); })
7056 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7057 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7058 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7059 (__v8df)(__m512d)(W)); })
7061 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7062 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7063 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7064 (__v8df)_mm512_setzero_pd()); })
7066 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7067 (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7068 (__v16sf)(__m512)(B), \
7069 0 + (((M) >> 0) & 0x3), \
7070 0 + (((M) >> 2) & 0x3), \
7071 16 + (((M) >> 4) & 0x3), \
7072 16 + (((M) >> 6) & 0x3), \
7073 4 + (((M) >> 0) & 0x3), \
7074 4 + (((M) >> 2) & 0x3), \
7075 20 + (((M) >> 4) & 0x3), \
7076 20 + (((M) >> 6) & 0x3), \
7077 8 + (((M) >> 0) & 0x3), \
7078 8 + (((M) >> 2) & 0x3), \
7079 24 + (((M) >> 4) & 0x3), \
7080 24 + (((M) >> 6) & 0x3), \
7081 12 + (((M) >> 0) & 0x3), \
7082 12 + (((M) >> 2) & 0x3), \
7083 28 + (((M) >> 4) & 0x3), \
7084 28 + (((M) >> 6) & 0x3)); })
7086 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7087 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7088 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7089 (__v16sf)(__m512)(W)); })
7091 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7092 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7093 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7094 (__v16sf)_mm512_setzero_ps()); })
7096 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7097 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7098 (__v2df)(__m128d)(B), \
7099 (__v2df)_mm_setzero_pd(), \
7100 (__mmask8)-1, (int)(R)); })
7102 static __inline__ __m128d __DEFAULT_FN_ATTRS
7103 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7105 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7109 _MM_FROUND_CUR_DIRECTION);
7112 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7113 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7114 (__v2df)(__m128d)(B), \
7115 (__v2df)(__m128d)(W), \
7116 (__mmask8)(U), (int)(R)); })
7118 static __inline__ __m128d __DEFAULT_FN_ATTRS
7119 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7121 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7123 (__v2df) _mm_setzero_pd (),
7125 _MM_FROUND_CUR_DIRECTION);
7128 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7129 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7130 (__v2df)(__m128d)(B), \
7131 (__v2df)_mm_setzero_pd(), \
7132 (__mmask8)(U), (int)(R)); })
7134 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7135 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7136 (__v4sf)(__m128)(B), \
7137 (__v4sf)_mm_setzero_ps(), \
7138 (__mmask8)-1, (int)(R)); })
7140 static __inline__ __m128 __DEFAULT_FN_ATTRS
7141 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7143 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7147 _MM_FROUND_CUR_DIRECTION);
7150 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7151 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7152 (__v4sf)(__m128)(B), \
7153 (__v4sf)(__m128)(W), (__mmask8)(U), \
7156 static __inline__ __m128 __DEFAULT_FN_ATTRS
7157 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7159 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7161 (__v4sf) _mm_setzero_ps (),
7163 _MM_FROUND_CUR_DIRECTION);
7166 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7167 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7168 (__v4sf)(__m128)(B), \
7169 (__v4sf)_mm_setzero_ps(), \
7170 (__mmask8)(U), (int)(R)); })
7172 static __inline__ __m512 __DEFAULT_FN_ATTRS
7173 _mm512_broadcast_f32x4(__m128 __A)
7175 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7176 0, 1, 2, 3, 0, 1, 2, 3,
7177 0, 1, 2, 3, 0, 1, 2, 3);
7180 static __inline__ __m512 __DEFAULT_FN_ATTRS
7181 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
7183 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7184 (__v16sf)_mm512_broadcast_f32x4(__A),
7188 static __inline__ __m512 __DEFAULT_FN_ATTRS
7189 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
7191 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7192 (__v16sf)_mm512_broadcast_f32x4(__A),
7193 (__v16sf)_mm512_setzero_ps());
7196 static __inline__ __m512d __DEFAULT_FN_ATTRS
7197 _mm512_broadcast_f64x4(__m256d __A)
7199 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7200 0, 1, 2, 3, 0, 1, 2, 3);
7203 static __inline__ __m512d __DEFAULT_FN_ATTRS
7204 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
7206 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7207 (__v8df)_mm512_broadcast_f64x4(__A),
7211 static __inline__ __m512d __DEFAULT_FN_ATTRS
7212 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
7214 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7215 (__v8df)_mm512_broadcast_f64x4(__A),
7216 (__v8df)_mm512_setzero_pd());
7219 static __inline__ __m512i __DEFAULT_FN_ATTRS
7220 _mm512_broadcast_i32x4(__m128i __A)
7222 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7223 0, 1, 2, 3, 0, 1, 2, 3,
7224 0, 1, 2, 3, 0, 1, 2, 3);
7227 static __inline__ __m512i __DEFAULT_FN_ATTRS
7228 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
7230 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7231 (__v16si)_mm512_broadcast_i32x4(__A),
7235 static __inline__ __m512i __DEFAULT_FN_ATTRS
7236 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
7238 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7239 (__v16si)_mm512_broadcast_i32x4(__A),
7240 (__v16si)_mm512_setzero_si512());
7243 static __inline__ __m512i __DEFAULT_FN_ATTRS
7244 _mm512_broadcast_i64x4(__m256i __A)
7246 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7247 0, 1, 2, 3, 0, 1, 2, 3);
7250 static __inline__ __m512i __DEFAULT_FN_ATTRS
7251 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
7253 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7254 (__v8di)_mm512_broadcast_i64x4(__A),
7258 static __inline__ __m512i __DEFAULT_FN_ATTRS
7259 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
7261 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7262 (__v8di)_mm512_broadcast_i64x4(__A),
7263 (__v8di)_mm512_setzero_si512());
7266 static __inline__ __m512d __DEFAULT_FN_ATTRS
7267 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7269 return (__m512d)__builtin_ia32_selectpd_512(__M,
7270 (__v8df) _mm512_broadcastsd_pd(__A),
7274 static __inline__ __m512d __DEFAULT_FN_ATTRS
7275 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7277 return (__m512d)__builtin_ia32_selectpd_512(__M,
7278 (__v8df) _mm512_broadcastsd_pd(__A),
7279 (__v8df) _mm512_setzero_pd());
7282 static __inline__ __m512 __DEFAULT_FN_ATTRS
7283 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7285 return (__m512)__builtin_ia32_selectps_512(__M,
7286 (__v16sf) _mm512_broadcastss_ps(__A),
7290 static __inline__ __m512 __DEFAULT_FN_ATTRS
7291 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7293 return (__m512)__builtin_ia32_selectps_512(__M,
7294 (__v16sf) _mm512_broadcastss_ps(__A),
7295 (__v16sf) _mm512_setzero_ps());
7298 static __inline__ __m128i __DEFAULT_FN_ATTRS
7299 _mm512_cvtsepi32_epi8 (__m512i __A)
7301 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7302 (__v16qi) _mm_undefined_si128 (),
7306 static __inline__ __m128i __DEFAULT_FN_ATTRS
7307 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7309 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7310 (__v16qi) __O, __M);
7313 static __inline__ __m128i __DEFAULT_FN_ATTRS
7314 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7316 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7317 (__v16qi) _mm_setzero_si128 (),
7321 static __inline__ void __DEFAULT_FN_ATTRS
7322 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7324 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7327 static __inline__ __m256i __DEFAULT_FN_ATTRS
7328 _mm512_cvtsepi32_epi16 (__m512i __A)
7330 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7331 (__v16hi) _mm256_undefined_si256 (),
7335 static __inline__ __m256i __DEFAULT_FN_ATTRS
7336 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7338 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7339 (__v16hi) __O, __M);
7342 static __inline__ __m256i __DEFAULT_FN_ATTRS
7343 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7345 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7346 (__v16hi) _mm256_setzero_si256 (),
7350 static __inline__ void __DEFAULT_FN_ATTRS
7351 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7353 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7356 static __inline__ __m128i __DEFAULT_FN_ATTRS
7357 _mm512_cvtsepi64_epi8 (__m512i __A)
7359 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7360 (__v16qi) _mm_undefined_si128 (),
7364 static __inline__ __m128i __DEFAULT_FN_ATTRS
7365 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7367 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7368 (__v16qi) __O, __M);
7371 static __inline__ __m128i __DEFAULT_FN_ATTRS
7372 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7374 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7375 (__v16qi) _mm_setzero_si128 (),
7379 static __inline__ void __DEFAULT_FN_ATTRS
7380 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7382 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7385 static __inline__ __m256i __DEFAULT_FN_ATTRS
7386 _mm512_cvtsepi64_epi32 (__m512i __A)
7388 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7389 (__v8si) _mm256_undefined_si256 (),
7393 static __inline__ __m256i __DEFAULT_FN_ATTRS
7394 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7396 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7400 static __inline__ __m256i __DEFAULT_FN_ATTRS
7401 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7403 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7404 (__v8si) _mm256_setzero_si256 (),
7408 static __inline__ void __DEFAULT_FN_ATTRS
7409 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7411 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7414 static __inline__ __m128i __DEFAULT_FN_ATTRS
7415 _mm512_cvtsepi64_epi16 (__m512i __A)
7417 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7418 (__v8hi) _mm_undefined_si128 (),
7422 static __inline__ __m128i __DEFAULT_FN_ATTRS
7423 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7425 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7429 static __inline__ __m128i __DEFAULT_FN_ATTRS
7430 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7432 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7433 (__v8hi) _mm_setzero_si128 (),
7437 static __inline__ void __DEFAULT_FN_ATTRS
7438 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7440 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7443 static __inline__ __m128i __DEFAULT_FN_ATTRS
7444 _mm512_cvtusepi32_epi8 (__m512i __A)
7446 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7447 (__v16qi) _mm_undefined_si128 (),
7451 static __inline__ __m128i __DEFAULT_FN_ATTRS
7452 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7454 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7459 static __inline__ __m128i __DEFAULT_FN_ATTRS
7460 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7462 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7463 (__v16qi) _mm_setzero_si128 (),
7467 static __inline__ void __DEFAULT_FN_ATTRS
7468 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7470 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7473 static __inline__ __m256i __DEFAULT_FN_ATTRS
7474 _mm512_cvtusepi32_epi16 (__m512i __A)
7476 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7477 (__v16hi) _mm256_undefined_si256 (),
7481 static __inline__ __m256i __DEFAULT_FN_ATTRS
7482 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7484 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7489 static __inline__ __m256i __DEFAULT_FN_ATTRS
7490 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7492 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7493 (__v16hi) _mm256_setzero_si256 (),
7497 static __inline__ void __DEFAULT_FN_ATTRS
7498 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7500 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7503 static __inline__ __m128i __DEFAULT_FN_ATTRS
7504 _mm512_cvtusepi64_epi8 (__m512i __A)
7506 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7507 (__v16qi) _mm_undefined_si128 (),
7511 static __inline__ __m128i __DEFAULT_FN_ATTRS
7512 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7514 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7519 static __inline__ __m128i __DEFAULT_FN_ATTRS
7520 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7522 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7523 (__v16qi) _mm_setzero_si128 (),
7527 static __inline__ void __DEFAULT_FN_ATTRS
7528 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7530 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7533 static __inline__ __m256i __DEFAULT_FN_ATTRS
7534 _mm512_cvtusepi64_epi32 (__m512i __A)
7536 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7537 (__v8si) _mm256_undefined_si256 (),
7541 static __inline__ __m256i __DEFAULT_FN_ATTRS
7542 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7544 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7548 static __inline__ __m256i __DEFAULT_FN_ATTRS
7549 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7551 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7552 (__v8si) _mm256_setzero_si256 (),
7556 static __inline__ void __DEFAULT_FN_ATTRS
7557 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7559 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7562 static __inline__ __m128i __DEFAULT_FN_ATTRS
7563 _mm512_cvtusepi64_epi16 (__m512i __A)
7565 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7566 (__v8hi) _mm_undefined_si128 (),
7570 static __inline__ __m128i __DEFAULT_FN_ATTRS
7571 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7573 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7577 static __inline__ __m128i __DEFAULT_FN_ATTRS
7578 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7580 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7581 (__v8hi) _mm_setzero_si128 (),
7585 static __inline__ void __DEFAULT_FN_ATTRS
7586 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7588 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7591 static __inline__ __m128i __DEFAULT_FN_ATTRS
7592 _mm512_cvtepi32_epi8 (__m512i __A)
7594 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7595 (__v16qi) _mm_undefined_si128 (),
7599 static __inline__ __m128i __DEFAULT_FN_ATTRS
7600 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7602 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7603 (__v16qi) __O, __M);
7606 static __inline__ __m128i __DEFAULT_FN_ATTRS
7607 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7609 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7610 (__v16qi) _mm_setzero_si128 (),
7614 static __inline__ void __DEFAULT_FN_ATTRS
7615 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7617 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7620 static __inline__ __m256i __DEFAULT_FN_ATTRS
7621 _mm512_cvtepi32_epi16 (__m512i __A)
7623 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7624 (__v16hi) _mm256_undefined_si256 (),
7628 static __inline__ __m256i __DEFAULT_FN_ATTRS
7629 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7631 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7632 (__v16hi) __O, __M);
7635 static __inline__ __m256i __DEFAULT_FN_ATTRS
7636 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7638 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7639 (__v16hi) _mm256_setzero_si256 (),
7643 static __inline__ void __DEFAULT_FN_ATTRS
7644 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7646 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS
7650 _mm512_cvtepi64_epi8 (__m512i __A)
7652 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7653 (__v16qi) _mm_undefined_si128 (),
7657 static __inline__ __m128i __DEFAULT_FN_ATTRS
7658 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7660 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7661 (__v16qi) __O, __M);
7664 static __inline__ __m128i __DEFAULT_FN_ATTRS
7665 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7667 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7668 (__v16qi) _mm_setzero_si128 (),
7672 static __inline__ void __DEFAULT_FN_ATTRS
7673 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7675 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7678 static __inline__ __m256i __DEFAULT_FN_ATTRS
7679 _mm512_cvtepi64_epi32 (__m512i __A)
7681 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7682 (__v8si) _mm256_undefined_si256 (),
7686 static __inline__ __m256i __DEFAULT_FN_ATTRS
7687 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7689 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7693 static __inline__ __m256i __DEFAULT_FN_ATTRS
7694 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7696 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7697 (__v8si) _mm256_setzero_si256 (),
7701 static __inline__ void __DEFAULT_FN_ATTRS
7702 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7704 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7707 static __inline__ __m128i __DEFAULT_FN_ATTRS
7708 _mm512_cvtepi64_epi16 (__m512i __A)
7710 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7711 (__v8hi) _mm_undefined_si128 (),
7715 static __inline__ __m128i __DEFAULT_FN_ATTRS
7716 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7718 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7722 static __inline__ __m128i __DEFAULT_FN_ATTRS
7723 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7725 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7726 (__v8hi) _mm_setzero_si128 (),
7730 static __inline__ void __DEFAULT_FN_ATTRS
7731 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7733 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7736 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
7737 (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7738 (__v16si)_mm512_undefined_epi32(), \
7739 0 + ((imm) & 0x3) * 4, \
7740 1 + ((imm) & 0x3) * 4, \
7741 2 + ((imm) & 0x3) * 4, \
7742 3 + ((imm) & 0x3) * 4); })
7744 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7745 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7746 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7749 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7750 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7751 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7752 (__v4si)_mm_setzero_si128()); })
7754 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
7755 (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7756 (__v8di)_mm512_undefined_epi32(), \
7757 ((imm) & 1) ? 4 : 0, \
7758 ((imm) & 1) ? 5 : 1, \
7759 ((imm) & 1) ? 6 : 2, \
7760 ((imm) & 1) ? 7 : 3); })
7762 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7763 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7764 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7767 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7768 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7769 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7770 (__v4di)_mm256_setzero_si256()); })
7772 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7773 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7774 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7775 ((imm) & 0x1) ? 0 : 8, \
7776 ((imm) & 0x1) ? 1 : 9, \
7777 ((imm) & 0x1) ? 2 : 10, \
7778 ((imm) & 0x1) ? 3 : 11, \
7779 ((imm) & 0x1) ? 8 : 4, \
7780 ((imm) & 0x1) ? 9 : 5, \
7781 ((imm) & 0x1) ? 10 : 6, \
7782 ((imm) & 0x1) ? 11 : 7); })
7784 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
7785 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7786 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7789 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
7790 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7791 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7792 (__v8df)_mm512_setzero_pd()); })
7794 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
7795 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7796 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7797 ((imm) & 0x1) ? 0 : 8, \
7798 ((imm) & 0x1) ? 1 : 9, \
7799 ((imm) & 0x1) ? 2 : 10, \
7800 ((imm) & 0x1) ? 3 : 11, \
7801 ((imm) & 0x1) ? 8 : 4, \
7802 ((imm) & 0x1) ? 9 : 5, \
7803 ((imm) & 0x1) ? 10 : 6, \
7804 ((imm) & 0x1) ? 11 : 7); })
7806 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
7807 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7808 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7811 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
7812 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7813 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7814 (__v8di)_mm512_setzero_si512()); })
7816 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
7817 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7818 (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7819 (((imm) & 0x3) == 0) ? 16 : 0, \
7820 (((imm) & 0x3) == 0) ? 17 : 1, \
7821 (((imm) & 0x3) == 0) ? 18 : 2, \
7822 (((imm) & 0x3) == 0) ? 19 : 3, \
7823 (((imm) & 0x3) == 1) ? 16 : 4, \
7824 (((imm) & 0x3) == 1) ? 17 : 5, \
7825 (((imm) & 0x3) == 1) ? 18 : 6, \
7826 (((imm) & 0x3) == 1) ? 19 : 7, \
7827 (((imm) & 0x3) == 2) ? 16 : 8, \
7828 (((imm) & 0x3) == 2) ? 17 : 9, \
7829 (((imm) & 0x3) == 2) ? 18 : 10, \
7830 (((imm) & 0x3) == 2) ? 19 : 11, \
7831 (((imm) & 0x3) == 3) ? 16 : 12, \
7832 (((imm) & 0x3) == 3) ? 17 : 13, \
7833 (((imm) & 0x3) == 3) ? 18 : 14, \
7834 (((imm) & 0x3) == 3) ? 19 : 15); })
7836 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
7837 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7838 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7841 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
7842 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7843 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7844 (__v16sf)_mm512_setzero_ps()); })
7846 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
7847 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7848 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
7849 (((imm) & 0x3) == 0) ? 16 : 0, \
7850 (((imm) & 0x3) == 0) ? 17 : 1, \
7851 (((imm) & 0x3) == 0) ? 18 : 2, \
7852 (((imm) & 0x3) == 0) ? 19 : 3, \
7853 (((imm) & 0x3) == 1) ? 16 : 4, \
7854 (((imm) & 0x3) == 1) ? 17 : 5, \
7855 (((imm) & 0x3) == 1) ? 18 : 6, \
7856 (((imm) & 0x3) == 1) ? 19 : 7, \
7857 (((imm) & 0x3) == 2) ? 16 : 8, \
7858 (((imm) & 0x3) == 2) ? 17 : 9, \
7859 (((imm) & 0x3) == 2) ? 18 : 10, \
7860 (((imm) & 0x3) == 2) ? 19 : 11, \
7861 (((imm) & 0x3) == 3) ? 16 : 12, \
7862 (((imm) & 0x3) == 3) ? 17 : 13, \
7863 (((imm) & 0x3) == 3) ? 18 : 14, \
7864 (((imm) & 0x3) == 3) ? 19 : 15); })
7866 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
7867 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7868 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7871 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
7872 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7873 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7874 (__v16si)_mm512_setzero_si512()); })
7876 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
7877 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7878 (int)(((C)<<2) | (B)), \
7879 (__v8df)_mm512_undefined_pd(), \
7880 (__mmask8)-1, (int)(R)); })
7882 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
7883 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7884 (int)(((C)<<2) | (B)), \
7885 (__v8df)(__m512d)(W), \
7886 (__mmask8)(U), (int)(R)); })
7888 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
7889 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7890 (int)(((C)<<2) | (B)), \
7891 (__v8df)_mm512_setzero_pd(), \
7892 (__mmask8)(U), (int)(R)); })
7894 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
7895 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7896 (int)(((C)<<2) | (B)), \
7897 (__v8df)_mm512_setzero_pd(), \
7899 _MM_FROUND_CUR_DIRECTION); })
7901 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
7902 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7903 (int)(((C)<<2) | (B)), \
7904 (__v8df)(__m512d)(W), \
7906 _MM_FROUND_CUR_DIRECTION); })
7908 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
7909 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7910 (int)(((C)<<2) | (B)), \
7911 (__v8df)_mm512_setzero_pd(), \
7913 _MM_FROUND_CUR_DIRECTION); })
7915 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
7916 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7917 (int)(((C)<<2) | (B)), \
7918 (__v16sf)_mm512_undefined_ps(), \
7919 (__mmask16)-1, (int)(R)); })
7921 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
7922 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7923 (int)(((C)<<2) | (B)), \
7924 (__v16sf)(__m512)(W), \
7925 (__mmask16)(U), (int)(R)); })
7927 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
7928 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7929 (int)(((C)<<2) | (B)), \
7930 (__v16sf)_mm512_setzero_ps(), \
7931 (__mmask16)(U), (int)(R)); })
7933 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
7934 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7935 (int)(((C)<<2)|(B)), \
7936 (__v16sf)_mm512_undefined_ps(), \
7938 _MM_FROUND_CUR_DIRECTION); })
7940 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
7941 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7942 (int)(((C)<<2)|(B)), \
7943 (__v16sf)(__m512)(W), \
7945 _MM_FROUND_CUR_DIRECTION); })
7947 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
7948 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7949 (int)(((C)<<2)|(B)), \
7950 (__v16sf)_mm512_setzero_ps(), \
7952 _MM_FROUND_CUR_DIRECTION); })
7954 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
7955 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7956 (__v8df)_mm512_undefined_pd(), \
7957 (__mmask8)-1, (int)(R)); })
7959 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
7960 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7961 (__v8df)(__m512d)(W), \
7962 (__mmask8)(U), (int)(R)); })
7964 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
7965 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7966 (__v8df)_mm512_setzero_pd(), \
7967 (__mmask8)(U), (int)(R)); })
7969 static __inline__ __m512d __DEFAULT_FN_ATTRS
7970 _mm512_getexp_pd (__m512d __A)
7972 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7973 (__v8df) _mm512_undefined_pd (),
7975 _MM_FROUND_CUR_DIRECTION);
7978 static __inline__ __m512d __DEFAULT_FN_ATTRS
7979 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7981 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7984 _MM_FROUND_CUR_DIRECTION);
7987 static __inline__ __m512d __DEFAULT_FN_ATTRS
7988 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7990 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7991 (__v8df) _mm512_setzero_pd (),
7993 _MM_FROUND_CUR_DIRECTION);
7996 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
7997 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7998 (__v16sf)_mm512_undefined_ps(), \
7999 (__mmask16)-1, (int)(R)); })
8001 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8002 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8003 (__v16sf)(__m512)(W), \
8004 (__mmask16)(U), (int)(R)); })
8006 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8007 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8008 (__v16sf)_mm512_setzero_ps(), \
8009 (__mmask16)(U), (int)(R)); })
8011 static __inline__ __m512 __DEFAULT_FN_ATTRS
8012 _mm512_getexp_ps (__m512 __A)
8014 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8015 (__v16sf) _mm512_undefined_ps (),
8017 _MM_FROUND_CUR_DIRECTION);
8020 static __inline__ __m512 __DEFAULT_FN_ATTRS
8021 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8023 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8026 _MM_FROUND_CUR_DIRECTION);
8029 static __inline__ __m512 __DEFAULT_FN_ATTRS
8030 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8032 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8033 (__v16sf) _mm512_setzero_ps (),
8035 _MM_FROUND_CUR_DIRECTION);
8038 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8039 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8040 (float const *)(addr), \
8041 (__v8di)(__m512i)(index), (__mmask8)-1, \
8044 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
8045 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8046 (float const *)(addr), \
8047 (__v8di)(__m512i)(index), \
8048 (__mmask8)(mask), (int)(scale)); })
8050 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8051 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8052 (int const *)(addr), \
8053 (__v8di)(__m512i)(index), \
8054 (__mmask8)-1, (int)(scale)); })
8056 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8057 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8058 (int const *)(addr), \
8059 (__v8di)(__m512i)(index), \
8060 (__mmask8)(mask), (int)(scale)); })
8062 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8063 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8064 (double const *)(addr), \
8065 (__v8di)(__m512i)(index), (__mmask8)-1, \
8068 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8069 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8070 (double const *)(addr), \
8071 (__v8di)(__m512i)(index), \
8072 (__mmask8)(mask), (int)(scale)); })
8074 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8075 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8076 (long long const *)(addr), \
8077 (__v8di)(__m512i)(index), (__mmask8)-1, \
8080 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8081 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8082 (long long const *)(addr), \
8083 (__v8di)(__m512i)(index), \
8084 (__mmask8)(mask), (int)(scale)); })
8086 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8087 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8088 (float const *)(addr), \
8089 (__v16sf)(__m512)(index), \
8090 (__mmask16)-1, (int)(scale)); })
8092 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8093 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8094 (float const *)(addr), \
8095 (__v16sf)(__m512)(index), \
8096 (__mmask16)(mask), (int)(scale)); })
8098 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8099 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8100 (int const *)(addr), \
8101 (__v16si)(__m512i)(index), \
8102 (__mmask16)-1, (int)(scale)); })
8104 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8105 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8106 (int const *)(addr), \
8107 (__v16si)(__m512i)(index), \
8108 (__mmask16)(mask), (int)(scale)); })
8110 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8111 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8112 (double const *)(addr), \
8113 (__v8si)(__m256i)(index), (__mmask8)-1, \
8116 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8117 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8118 (double const *)(addr), \
8119 (__v8si)(__m256i)(index), \
8120 (__mmask8)(mask), (int)(scale)); })
8122 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8123 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8124 (long long const *)(addr), \
8125 (__v8si)(__m256i)(index), (__mmask8)-1, \
8128 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8129 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8130 (long long const *)(addr), \
8131 (__v8si)(__m256i)(index), \
8132 (__mmask8)(mask), (int)(scale)); })
8134 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8135 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8136 (__v8di)(__m512i)(index), \
8137 (__v8sf)(__m256)(v1), (int)(scale)); })
8139 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8140 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8141 (__v8di)(__m512i)(index), \
8142 (__v8sf)(__m256)(v1), (int)(scale)); })
8144 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8145 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8146 (__v8di)(__m512i)(index), \
8147 (__v8si)(__m256i)(v1), (int)(scale)); })
8149 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8150 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8151 (__v8di)(__m512i)(index), \
8152 (__v8si)(__m256i)(v1), (int)(scale)); })
8154 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8155 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8156 (__v8di)(__m512i)(index), \
8157 (__v8df)(__m512d)(v1), (int)(scale)); })
8159 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8160 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8161 (__v8di)(__m512i)(index), \
8162 (__v8df)(__m512d)(v1), (int)(scale)); })
8164 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8165 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8166 (__v8di)(__m512i)(index), \
8167 (__v8di)(__m512i)(v1), (int)(scale)); })
8169 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8170 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8171 (__v8di)(__m512i)(index), \
8172 (__v8di)(__m512i)(v1), (int)(scale)); })
8174 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8175 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8176 (__v16si)(__m512i)(index), \
8177 (__v16sf)(__m512)(v1), (int)(scale)); })
8179 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8180 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8181 (__v16si)(__m512i)(index), \
8182 (__v16sf)(__m512)(v1), (int)(scale)); })
8184 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8185 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8186 (__v16si)(__m512i)(index), \
8187 (__v16si)(__m512i)(v1), (int)(scale)); })
8189 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8190 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8191 (__v16si)(__m512i)(index), \
8192 (__v16si)(__m512i)(v1), (int)(scale)); })
8194 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8195 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8196 (__v8si)(__m256i)(index), \
8197 (__v8df)(__m512d)(v1), (int)(scale)); })
8199 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8200 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8201 (__v8si)(__m256i)(index), \
8202 (__v8df)(__m512d)(v1), (int)(scale)); })
8204 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8205 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8206 (__v8si)(__m256i)(index), \
8207 (__v8di)(__m512i)(v1), (int)(scale)); })
8209 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8210 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8211 (__v8si)(__m256i)(index), \
8212 (__v8di)(__m512i)(v1), (int)(scale)); })
8214 static __inline__ __m128 __DEFAULT_FN_ATTRS
8215 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8217 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8221 _MM_FROUND_CUR_DIRECTION);
8224 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8225 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8226 (__v4sf)(__m128)(A), \
8227 (__v4sf)(__m128)(B), (__mmask8)(U), \
8230 static __inline__ __m128 __DEFAULT_FN_ATTRS
8231 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8233 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8237 _MM_FROUND_CUR_DIRECTION);
8240 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8241 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8242 (__v4sf)(__m128)(B), \
8243 (__v4sf)(__m128)(C), (__mmask8)(U), \
8244 _MM_FROUND_CUR_DIRECTION); })
8246 static __inline__ __m128 __DEFAULT_FN_ATTRS
8247 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8249 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8253 _MM_FROUND_CUR_DIRECTION);
8256 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8257 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8258 (__v4sf)(__m128)(X), \
8259 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8262 static __inline__ __m128 __DEFAULT_FN_ATTRS
8263 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8265 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8269 _MM_FROUND_CUR_DIRECTION);
8272 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8273 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8274 (__v4sf)(__m128)(A), \
8275 (__v4sf)(__m128)(B), (__mmask8)(U), \
8278 static __inline__ __m128 __DEFAULT_FN_ATTRS
8279 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8281 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8285 _MM_FROUND_CUR_DIRECTION);
8288 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8289 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8290 (__v4sf)(__m128)(B), \
8291 -(__v4sf)(__m128)(C), (__mmask8)(U), \
8294 static __inline__ __m128 __DEFAULT_FN_ATTRS
8295 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8297 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8301 _MM_FROUND_CUR_DIRECTION);
8304 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8305 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8306 (__v4sf)(__m128)(X), \
8307 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8310 static __inline__ __m128 __DEFAULT_FN_ATTRS
8311 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8313 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8317 _MM_FROUND_CUR_DIRECTION);
8320 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8321 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8322 -(__v4sf)(__m128)(A), \
8323 (__v4sf)(__m128)(B), (__mmask8)(U), \
8326 static __inline__ __m128 __DEFAULT_FN_ATTRS
8327 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8329 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8333 _MM_FROUND_CUR_DIRECTION);
8336 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8337 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8338 (__v4sf)(__m128)(B), \
8339 (__v4sf)(__m128)(C), (__mmask8)(U), \
8342 static __inline__ __m128 __DEFAULT_FN_ATTRS
8343 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8345 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8349 _MM_FROUND_CUR_DIRECTION);
8352 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8353 (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8354 (__v4sf)(__m128)(X), \
8355 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8358 static __inline__ __m128 __DEFAULT_FN_ATTRS
8359 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8361 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8365 _MM_FROUND_CUR_DIRECTION);
8368 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8369 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8370 -(__v4sf)(__m128)(A), \
8371 -(__v4sf)(__m128)(B), (__mmask8)(U), \
8374 static __inline__ __m128 __DEFAULT_FN_ATTRS
8375 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8377 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8381 _MM_FROUND_CUR_DIRECTION);
8384 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8385 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8386 (__v4sf)(__m128)(B), \
8387 -(__v4sf)(__m128)(C), (__mmask8)(U), \
8388 _MM_FROUND_CUR_DIRECTION); })
8390 static __inline__ __m128 __DEFAULT_FN_ATTRS
8391 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8393 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8397 _MM_FROUND_CUR_DIRECTION);
8400 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8401 (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8402 (__v4sf)(__m128)(X), \
8403 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8406 static __inline__ __m128d __DEFAULT_FN_ATTRS
8407 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8409 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8413 _MM_FROUND_CUR_DIRECTION);
8416 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8417 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8418 (__v2df)(__m128d)(A), \
8419 (__v2df)(__m128d)(B), (__mmask8)(U), \
8422 static __inline__ __m128d __DEFAULT_FN_ATTRS
8423 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8425 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8429 _MM_FROUND_CUR_DIRECTION);
8432 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8433 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8434 (__v2df)(__m128d)(B), \
8435 (__v2df)(__m128d)(C), (__mmask8)(U), \
8436 _MM_FROUND_CUR_DIRECTION); })
8438 static __inline__ __m128d __DEFAULT_FN_ATTRS
8439 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8441 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8445 _MM_FROUND_CUR_DIRECTION);
8448 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8449 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8450 (__v2df)(__m128d)(X), \
8451 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8454 static __inline__ __m128d __DEFAULT_FN_ATTRS
8455 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8457 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8461 _MM_FROUND_CUR_DIRECTION);
8464 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8465 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8466 (__v2df)(__m128d)(A), \
8467 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8470 static __inline__ __m128d __DEFAULT_FN_ATTRS
8471 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8473 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8477 _MM_FROUND_CUR_DIRECTION);
8480 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8481 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8482 (__v2df)(__m128d)(B), \
8483 -(__v2df)(__m128d)(C), \
8484 (__mmask8)(U), (int)(R)); })
8486 static __inline__ __m128d __DEFAULT_FN_ATTRS
8487 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8489 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8493 _MM_FROUND_CUR_DIRECTION);
8496 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8497 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8498 (__v2df)(__m128d)(X), \
8499 (__v2df)(__m128d)(Y), \
8500 (__mmask8)(U), (int)(R)); })
8502 static __inline__ __m128d __DEFAULT_FN_ATTRS
8503 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8505 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8509 _MM_FROUND_CUR_DIRECTION);
8512 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8513 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8514 -(__v2df)(__m128d)(A), \
8515 (__v2df)(__m128d)(B), (__mmask8)(U), \
8518 static __inline__ __m128d __DEFAULT_FN_ATTRS
8519 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8521 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8525 _MM_FROUND_CUR_DIRECTION);
8528 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8529 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8530 (__v2df)(__m128d)(B), \
8531 (__v2df)(__m128d)(C), (__mmask8)(U), \
8534 static __inline__ __m128d __DEFAULT_FN_ATTRS
8535 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8537 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8541 _MM_FROUND_CUR_DIRECTION);
8544 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8545 (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8546 (__v2df)(__m128d)(X), \
8547 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8550 static __inline__ __m128d __DEFAULT_FN_ATTRS
8551 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8553 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8557 _MM_FROUND_CUR_DIRECTION);
8560 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8561 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8562 -(__v2df)(__m128d)(A), \
8563 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8566 static __inline__ __m128d __DEFAULT_FN_ATTRS
8567 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8569 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8573 _MM_FROUND_CUR_DIRECTION);
8576 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8577 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8578 (__v2df)(__m128d)(B), \
8579 -(__v2df)(__m128d)(C), \
8581 _MM_FROUND_CUR_DIRECTION); })
8583 static __inline__ __m128d __DEFAULT_FN_ATTRS
8584 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8586 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8590 _MM_FROUND_CUR_DIRECTION);
8593 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8594 (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8595 (__v2df)(__m128d)(X), \
8596 (__v2df)(__m128d)(Y), \
8597 (__mmask8)(U), (int)(R)); })
8599 #define _mm512_permutex_pd(X, C) __extension__ ({ \
8600 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8601 (__v8df)_mm512_undefined_pd(), \
8602 0 + (((C) >> 0) & 0x3), \
8603 0 + (((C) >> 2) & 0x3), \
8604 0 + (((C) >> 4) & 0x3), \
8605 0 + (((C) >> 6) & 0x3), \
8606 4 + (((C) >> 0) & 0x3), \
8607 4 + (((C) >> 2) & 0x3), \
8608 4 + (((C) >> 4) & 0x3), \
8609 4 + (((C) >> 6) & 0x3)); })
8611 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8612 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8613 (__v8df)_mm512_permutex_pd((X), (C)), \
8614 (__v8df)(__m512d)(W)); })
8616 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8617 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8618 (__v8df)_mm512_permutex_pd((X), (C)), \
8619 (__v8df)_mm512_setzero_pd()); })
8621 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
8622 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8623 (__v8di)_mm512_undefined_epi32(), \
8624 0 + (((C) >> 0) & 0x3), \
8625 0 + (((C) >> 2) & 0x3), \
8626 0 + (((C) >> 4) & 0x3), \
8627 0 + (((C) >> 6) & 0x3), \
8628 4 + (((C) >> 0) & 0x3), \
8629 4 + (((C) >> 2) & 0x3), \
8630 4 + (((C) >> 4) & 0x3), \
8631 4 + (((C) >> 6) & 0x3)); })
8633 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8634 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8635 (__v8di)_mm512_permutex_epi64((X), (C)), \
8636 (__v8di)(__m512i)(W)); })
8638 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8639 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8640 (__v8di)_mm512_permutex_epi64((X), (C)), \
8641 (__v8di)_mm512_setzero_si512()); })
8643 static __inline__ __m512d __DEFAULT_FN_ATTRS
8644 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8646 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8648 (__v8df) _mm512_undefined_pd (),
8652 static __inline__ __m512d __DEFAULT_FN_ATTRS
8653 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8655 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8661 static __inline__ __m512d __DEFAULT_FN_ATTRS
8662 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8664 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8666 (__v8df) _mm512_setzero_pd (),
8670 static __inline__ __m512i __DEFAULT_FN_ATTRS
8671 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8673 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8675 (__v8di) _mm512_setzero_si512 (),
8679 static __inline__ __m512i __DEFAULT_FN_ATTRS
8680 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8682 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8684 (__v8di) _mm512_undefined_epi32 (),
8688 static __inline__ __m512i __DEFAULT_FN_ATTRS
8689 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8692 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8698 static __inline__ __m512 __DEFAULT_FN_ATTRS
8699 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8701 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8703 (__v16sf) _mm512_undefined_ps (),
8707 static __inline__ __m512 __DEFAULT_FN_ATTRS
8708 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8710 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8716 static __inline__ __m512 __DEFAULT_FN_ATTRS
8717 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8719 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8721 (__v16sf) _mm512_setzero_ps (),
8725 static __inline__ __m512i __DEFAULT_FN_ATTRS
8726 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8728 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8730 (__v16si) _mm512_setzero_si512 (),
8734 static __inline__ __m512i __DEFAULT_FN_ATTRS
8735 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8737 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8739 (__v16si) _mm512_undefined_epi32 (),
8743 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8745 static __inline__ __m512i __DEFAULT_FN_ATTRS
8746 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8749 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8755 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8757 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8758 _mm512_kand (__mmask16 __A, __mmask16 __B)
8760 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8763 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8764 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8766 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8769 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8770 _mm512_kor (__mmask16 __A, __mmask16 __B)
8772 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8775 static __inline__ int __DEFAULT_FN_ATTRS
8776 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8778 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8781 static __inline__ int __DEFAULT_FN_ATTRS
8782 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8784 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8787 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8788 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8790 return (__mmask16) (( __A & 0xFF) | ( __B << 8));
8793 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8794 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8796 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8799 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8800 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8802 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8805 static __inline__ void __DEFAULT_FN_ATTRS
8806 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8808 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8809 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8812 static __inline__ __m512i __DEFAULT_FN_ATTRS
8813 _mm512_stream_load_si512 (void const *__P)
8815 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8816 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8819 static __inline__ void __DEFAULT_FN_ATTRS
8820 _mm512_stream_pd (double *__P, __m512d __A)
8822 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8823 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8826 static __inline__ void __DEFAULT_FN_ATTRS
8827 _mm512_stream_ps (float *__P, __m512 __A)
8829 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8830 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8833 static __inline__ __m512d __DEFAULT_FN_ATTRS
8834 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8836 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8841 static __inline__ __m512d __DEFAULT_FN_ATTRS
8842 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8844 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8846 _mm512_setzero_pd (),
8850 static __inline__ __m512i __DEFAULT_FN_ATTRS
8851 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8853 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8858 static __inline__ __m512i __DEFAULT_FN_ATTRS
8859 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8861 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8863 _mm512_setzero_si512 (),
8867 static __inline__ __m512 __DEFAULT_FN_ATTRS
8868 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8870 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8875 static __inline__ __m512 __DEFAULT_FN_ATTRS
8876 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8878 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8880 _mm512_setzero_ps (),
8884 static __inline__ __m512i __DEFAULT_FN_ATTRS
8885 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8887 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8892 static __inline__ __m512i __DEFAULT_FN_ATTRS
8893 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8895 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8897 _mm512_setzero_si512 (),
8901 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
8902 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8903 (__v4sf)(__m128)(Y), (int)(P), \
8904 (__mmask8)-1, (int)(R)); })
8906 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
8907 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8908 (__v4sf)(__m128)(Y), (int)(P), \
8909 (__mmask8)(M), (int)(R)); })
8911 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
8912 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8913 (__v4sf)(__m128)(Y), (int)(P), \
8915 _MM_FROUND_CUR_DIRECTION); })
8917 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
8918 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8919 (__v4sf)(__m128)(Y), (int)(P), \
8921 _MM_FROUND_CUR_DIRECTION); })
8923 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
8924 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8925 (__v2df)(__m128d)(Y), (int)(P), \
8926 (__mmask8)-1, (int)(R)); })
8928 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
8929 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8930 (__v2df)(__m128d)(Y), (int)(P), \
8931 (__mmask8)(M), (int)(R)); })
8933 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
8934 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8935 (__v2df)(__m128d)(Y), (int)(P), \
8937 _MM_FROUND_CUR_DIRECTION); })
8939 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
8940 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8941 (__v2df)(__m128d)(Y), (int)(P), \
8943 _MM_FROUND_CUR_DIRECTION); })
8947 static __inline __mmask16 __DEFAULT_FN_ATTRS
8948 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8950 return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8951 _mm512_setzero_epi32());
8954 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8955 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8957 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8958 _mm512_setzero_epi32());
8961 static __inline __mmask8 __DEFAULT_FN_ATTRS
8962 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8964 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8965 _mm512_setzero_epi32());
8968 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8969 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8971 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8972 _mm512_setzero_epi32());
8975 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8976 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8978 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8979 _mm512_setzero_epi32());
8982 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8983 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8985 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8986 _mm512_setzero_epi32());
8989 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8990 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8992 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8993 _mm512_setzero_epi32());
8996 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8997 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8999 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
9000 _mm512_setzero_epi32());
9003 static __inline__ __m512 __DEFAULT_FN_ATTRS
9004 _mm512_movehdup_ps (__m512 __A)
9006 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9007 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9010 static __inline__ __m512 __DEFAULT_FN_ATTRS
9011 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9013 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9014 (__v16sf)_mm512_movehdup_ps(__A),
9018 static __inline__ __m512 __DEFAULT_FN_ATTRS
9019 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9021 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9022 (__v16sf)_mm512_movehdup_ps(__A),
9023 (__v16sf)_mm512_setzero_ps());
9026 static __inline__ __m512 __DEFAULT_FN_ATTRS
9027 _mm512_moveldup_ps (__m512 __A)
9029 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9030 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9033 static __inline__ __m512 __DEFAULT_FN_ATTRS
9034 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9036 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9037 (__v16sf)_mm512_moveldup_ps(__A),
9041 static __inline__ __m512 __DEFAULT_FN_ATTRS
9042 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9044 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9045 (__v16sf)_mm512_moveldup_ps(__A),
9046 (__v16sf)_mm512_setzero_ps());
9049 static __inline__ __m128 __DEFAULT_FN_ATTRS
9050 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9053 res[0] = (__U & 1) ? __B[0] : __W[0];
9057 static __inline__ __m128 __DEFAULT_FN_ATTRS
9058 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9061 res[0] = (__U & 1) ? __B[0] : 0;
9065 static __inline__ __m128d __DEFAULT_FN_ATTRS
9066 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9069 res[0] = (__U & 1) ? __B[0] : __W[0];
9073 static __inline__ __m128d __DEFAULT_FN_ATTRS
9074 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9077 res[0] = (__U & 1) ? __B[0] : 0;
9081 static __inline__ void __DEFAULT_FN_ATTRS
9082 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9084 __builtin_ia32_storess128_mask ((__v16sf *)__W,
9085 (__v16sf) _mm512_castps128_ps512(__A),
9086 (__mmask16) __U & (__mmask16)1);
9089 static __inline__ void __DEFAULT_FN_ATTRS
9090 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9092 __builtin_ia32_storesd128_mask ((__v8df *)__W,
9093 (__v8df) _mm512_castpd128_pd512(__A),
9094 (__mmask8) __U & 1);
9097 static __inline__ __m128 __DEFAULT_FN_ATTRS
9098 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9100 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9101 (__v4sf) {0.0, 0.0, 0.0, 0.0},
9104 return (__m128) __builtin_shufflevector(
9105 __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9106 (__v16sf) _mm512_castps128_ps512(src),
9107 (__mmask16) __U & 1),
9108 _mm512_undefined_ps(), 0, 1, 2, 3);
9111 static __inline__ __m128 __DEFAULT_FN_ATTRS
9112 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
9114 return (__m128) __builtin_shufflevector(
9115 __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9116 (__v16sf) _mm512_setzero_ps(),
9117 (__mmask16) __U & 1),
9118 _mm512_undefined_ps(), 0, 1, 2, 3);
9121 static __inline__ __m128d __DEFAULT_FN_ATTRS
9122 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9124 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9125 (__v2df) {0.0, 0.0}, 0, 2);
9127 return (__m128d) __builtin_shufflevector(
9128 __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9129 (__v8df) _mm512_castpd128_pd512(src),
9130 (__mmask8) __U & 1),
9131 _mm512_undefined_pd(), 0, 1);
9134 static __inline__ __m128d __DEFAULT_FN_ATTRS
9135 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
9137 return (__m128d) __builtin_shufflevector(
9138 __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9139 (__v8df) _mm512_setzero_pd(),
9140 (__mmask8) __U & 1),
9141 _mm512_undefined_pd(), 0, 1);
9144 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9145 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9146 (__v16si)_mm512_undefined_epi32(), \
9147 0 + (((I) >> 0) & 0x3), \
9148 0 + (((I) >> 2) & 0x3), \
9149 0 + (((I) >> 4) & 0x3), \
9150 0 + (((I) >> 6) & 0x3), \
9151 4 + (((I) >> 0) & 0x3), \
9152 4 + (((I) >> 2) & 0x3), \
9153 4 + (((I) >> 4) & 0x3), \
9154 4 + (((I) >> 6) & 0x3), \
9155 8 + (((I) >> 0) & 0x3), \
9156 8 + (((I) >> 2) & 0x3), \
9157 8 + (((I) >> 4) & 0x3), \
9158 8 + (((I) >> 6) & 0x3), \
9159 12 + (((I) >> 0) & 0x3), \
9160 12 + (((I) >> 2) & 0x3), \
9161 12 + (((I) >> 4) & 0x3), \
9162 12 + (((I) >> 6) & 0x3)); })
9164 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9165 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9166 (__v16si)_mm512_shuffle_epi32((A), (I)), \
9167 (__v16si)(__m512i)(W)); })
9169 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9170 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9171 (__v16si)_mm512_shuffle_epi32((A), (I)), \
9172 (__v16si)_mm512_setzero_si512()); })
9174 static __inline__ __m512d __DEFAULT_FN_ATTRS
9175 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9177 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9182 static __inline__ __m512d __DEFAULT_FN_ATTRS
9183 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9185 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9186 (__v8df) _mm512_setzero_pd (),
9190 static __inline__ __m512i __DEFAULT_FN_ATTRS
9191 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9193 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9198 static __inline__ __m512i __DEFAULT_FN_ATTRS
9199 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9201 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9202 (__v8di) _mm512_setzero_pd (),
9206 static __inline__ __m512d __DEFAULT_FN_ATTRS
9207 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9209 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9214 static __inline__ __m512d __DEFAULT_FN_ATTRS
9215 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9217 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9218 (__v8df) _mm512_setzero_pd(),
9222 static __inline__ __m512i __DEFAULT_FN_ATTRS
9223 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9225 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9230 static __inline__ __m512i __DEFAULT_FN_ATTRS
9231 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9233 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9234 (__v8di) _mm512_setzero_pd(),
9238 static __inline__ __m512 __DEFAULT_FN_ATTRS
9239 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9241 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9246 static __inline__ __m512 __DEFAULT_FN_ATTRS
9247 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9249 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9250 (__v16sf) _mm512_setzero_ps(),
9254 static __inline__ __m512i __DEFAULT_FN_ATTRS
9255 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9257 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9262 static __inline__ __m512i __DEFAULT_FN_ATTRS
9263 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9265 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9266 (__v16si) _mm512_setzero_ps(),
9270 static __inline__ __m512 __DEFAULT_FN_ATTRS
9271 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9273 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9278 static __inline__ __m512 __DEFAULT_FN_ATTRS
9279 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9281 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9282 (__v16sf) _mm512_setzero_ps(),
9286 static __inline__ __m512i __DEFAULT_FN_ATTRS
9287 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9289 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9294 static __inline__ __m512i __DEFAULT_FN_ATTRS
9295 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9297 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9298 (__v16si) _mm512_setzero_ps(),
9302 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9303 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9304 (__v8df)_mm512_undefined_pd(), \
9305 (__mmask8)-1, (int)(R)); })
9307 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9308 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9309 (__v8df)(__m512d)(W), \
9310 (__mmask8)(U), (int)(R)); })
9312 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9313 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9314 (__v8df)_mm512_setzero_pd(), \
9315 (__mmask8)(U), (int)(R)); })
9317 static __inline__ __m512d __DEFAULT_FN_ATTRS
9318 _mm512_cvtps_pd (__m256 __A)
9320 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9322 _mm512_undefined_pd (),
9324 _MM_FROUND_CUR_DIRECTION);
9327 static __inline__ __m512d __DEFAULT_FN_ATTRS
9328 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9330 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9333 _MM_FROUND_CUR_DIRECTION);
9336 static __inline__ __m512d __DEFAULT_FN_ATTRS
9337 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9339 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9341 _mm512_setzero_pd (),
9343 _MM_FROUND_CUR_DIRECTION);
9346 static __inline__ __m512 __DEFAULT_FN_ATTRS
9347 _mm512_cvtpslo_pd (__m512 __A)
9349 return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9352 static __inline__ __m512 __DEFAULT_FN_ATTRS
9353 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9355 return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9358 static __inline__ __m512d __DEFAULT_FN_ATTRS
9359 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9361 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9366 static __inline__ __m512d __DEFAULT_FN_ATTRS
9367 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9369 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9371 (__v8df) _mm512_setzero_pd ());
9374 static __inline__ __m512 __DEFAULT_FN_ATTRS
9375 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9377 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9382 static __inline__ __m512 __DEFAULT_FN_ATTRS
9383 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9385 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9387 (__v16sf) _mm512_setzero_ps ());
9390 static __inline__ void __DEFAULT_FN_ATTRS
9391 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9393 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9397 static __inline__ void __DEFAULT_FN_ATTRS
9398 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9400 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9404 static __inline__ void __DEFAULT_FN_ATTRS
9405 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9407 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9411 static __inline__ void __DEFAULT_FN_ATTRS
9412 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9414 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9418 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9419 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9420 (__v2df)(__m128d)(B), \
9421 (__v4sf)_mm_undefined_ps(), \
9422 (__mmask8)-1, (int)(R)); })
9424 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9425 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9426 (__v2df)(__m128d)(B), \
9427 (__v4sf)(__m128)(W), \
9428 (__mmask8)(U), (int)(R)); })
9430 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9431 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9432 (__v2df)(__m128d)(B), \
9433 (__v4sf)_mm_setzero_ps(), \
9434 (__mmask8)(U), (int)(R)); })
9436 static __inline__ __m128 __DEFAULT_FN_ATTRS
9437 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9439 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9442 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9445 static __inline__ __m128 __DEFAULT_FN_ATTRS
9446 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9448 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9450 (__v4sf)_mm_setzero_ps(),
9451 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9454 #define _mm_cvtss_i32 _mm_cvtss_si32
9455 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9456 #define _mm_cvti32_sd _mm_cvtsi32_sd
9457 #define _mm_cvti32_ss _mm_cvtsi32_ss
9459 #define _mm_cvtss_i64 _mm_cvtss_si64
9460 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9461 #define _mm_cvti64_sd _mm_cvtsi64_sd
9462 #define _mm_cvti64_ss _mm_cvtsi64_ss
9466 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9467 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9470 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9471 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9475 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9476 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9478 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9479 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9482 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9483 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9486 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9487 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9491 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9492 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9493 (__v4sf)(__m128)(B), \
9494 (__v2df)_mm_undefined_pd(), \
9495 (__mmask8)-1, (int)(R)); })
9497 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9498 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9499 (__v4sf)(__m128)(B), \
9500 (__v2df)(__m128d)(W), \
9501 (__mmask8)(U), (int)(R)); })
9503 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9504 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9505 (__v4sf)(__m128)(B), \
9506 (__v2df)_mm_setzero_pd(), \
9507 (__mmask8)(U), (int)(R)); })
9509 static __inline__ __m128d __DEFAULT_FN_ATTRS
9510 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9512 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9515 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9518 static __inline__ __m128d __DEFAULT_FN_ATTRS
9519 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9521 return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9523 (__v2df)_mm_setzero_pd(),
9524 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9527 static __inline__ __m128d __DEFAULT_FN_ATTRS
9528 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9530 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9534 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9535 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9536 (unsigned long long)(B), (int)(R)); })
9538 static __inline__ __m128d __DEFAULT_FN_ATTRS
9539 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9541 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9542 _MM_FROUND_CUR_DIRECTION);
9546 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9547 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9550 static __inline__ __m128 __DEFAULT_FN_ATTRS
9551 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9553 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9554 _MM_FROUND_CUR_DIRECTION);
9558 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9559 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9560 (unsigned long long)(B), (int)(R)); })
9562 static __inline__ __m128 __DEFAULT_FN_ATTRS
9563 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9565 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9566 _MM_FROUND_CUR_DIRECTION);
9570 static __inline__ __m512i __DEFAULT_FN_ATTRS
9571 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9573 return (__m512i) __builtin_ia32_selectd_512(__M,
9574 (__v16si) _mm512_set1_epi32(__A),
9579 static __inline__ __m512i __DEFAULT_FN_ATTRS
9580 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9582 return (__m512i) __builtin_ia32_selectq_512(__M,
9583 (__v8di) _mm512_set1_epi64(__A),
9588 static __inline __m512i __DEFAULT_FN_ATTRS
9589 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9590 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9591 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9592 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9593 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9594 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9595 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9596 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9597 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9598 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9599 char __e4, char __e3, char __e2, char __e1, char __e0) {
9601 return __extension__ (__m512i)(__v64qi)
9602 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9603 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9604 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9605 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9606 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9607 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9608 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9609 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9612 static __inline __m512i __DEFAULT_FN_ATTRS
9613 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9614 short __e27, short __e26, short __e25, short __e24, short __e23,
9615 short __e22, short __e21, short __e20, short __e19, short __e18,
9616 short __e17, short __e16, short __e15, short __e14, short __e13,
9617 short __e12, short __e11, short __e10, short __e9, short __e8,
9618 short __e7, short __e6, short __e5, short __e4, short __e3,
9619 short __e2, short __e1, short __e0) {
9620 return __extension__ (__m512i)(__v32hi)
9621 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9622 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9623 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9624 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9627 static __inline __m512i __DEFAULT_FN_ATTRS
9628 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9629 int __E, int __F, int __G, int __H,
9630 int __I, int __J, int __K, int __L,
9631 int __M, int __N, int __O, int __P)
9633 return __extension__ (__m512i)(__v16si)
9634 { __P, __O, __N, __M, __L, __K, __J, __I,
9635 __H, __G, __F, __E, __D, __C, __B, __A };
9638 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9639 e8,e9,e10,e11,e12,e13,e14,e15) \
9640 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9641 (e5),(e4),(e3),(e2),(e1),(e0))
9643 static __inline__ __m512i __DEFAULT_FN_ATTRS
9644 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9645 long long __D, long long __E, long long __F,
9646 long long __G, long long __H)
9648 return __extension__ (__m512i) (__v8di)
9649 { __H, __G, __F, __E, __D, __C, __B, __A };
9652 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9653 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9655 static __inline__ __m512d __DEFAULT_FN_ATTRS
9656 _mm512_set_pd (double __A, double __B, double __C, double __D,
9657 double __E, double __F, double __G, double __H)
9659 return __extension__ (__m512d)
9660 { __H, __G, __F, __E, __D, __C, __B, __A };
9663 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9664 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9666 static __inline__ __m512 __DEFAULT_FN_ATTRS
9667 _mm512_set_ps (float __A, float __B, float __C, float __D,
9668 float __E, float __F, float __G, float __H,
9669 float __I, float __J, float __K, float __L,
9670 float __M, float __N, float __O, float __P)
9672 return __extension__ (__m512)
9673 { __P, __O, __N, __M, __L, __K, __J, __I,
9674 __H, __G, __F, __E, __D, __C, __B, __A };
9677 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9678 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9679 (e4),(e3),(e2),(e1),(e0))
9681 static __inline__ __m512 __DEFAULT_FN_ATTRS
9682 _mm512_abs_ps(__m512 __A)
9684 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9687 static __inline__ __m512 __DEFAULT_FN_ATTRS
9688 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9690 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9693 static __inline__ __m512d __DEFAULT_FN_ATTRS
9694 _mm512_abs_pd(__m512d __A)
9696 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9699 static __inline__ __m512d __DEFAULT_FN_ATTRS
9700 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9702 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9705 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9706 // outputs. This class of vector operation forms the basis of many scientific
9707 // computations. In vector-reduction arithmetic, the evaluation off is
9708 // independent of the order of the input elements of V.
9710 // Used bisection method. At each step, we partition the vector with previous
9711 // step in half, and the operation is performed on its two halves.
9712 // This takes log2(n) steps where n is the number of elements in the vector.
9714 // Vec512 - Vector with size 512.
9715 // Operator - Can be one of following: +,*,&,|
9716 // T2 - Can get 'i' for int and 'f' for float.
9717 // T1 - Can get 'i' for int and 'd' for double.
9719 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1) \
9721 __m256##T1 Vec256 = __builtin_shufflevector( \
9722 (__v8d##T2)Vec512, \
9723 (__v8d##T2)Vec512, \
9726 __builtin_shufflevector( \
9727 (__v8d##T2)Vec512, \
9728 (__v8d##T2)Vec512, \
9730 __m128##T1 Vec128 = __builtin_shufflevector( \
9731 (__v4d##T2)Vec256, \
9732 (__v4d##T2)Vec256, \
9735 __builtin_shufflevector( \
9736 (__v4d##T2)Vec256, \
9737 (__v4d##T2)Vec256, \
9739 Vec128 = __builtin_shufflevector((__v2d##T2)Vec128, \
9740 (__v2d##T2)Vec128, 0, -1) \
9742 __builtin_shufflevector((__v2d##T2)Vec128, \
9743 (__v2d##T2)Vec128, 1, -1); \
9747 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9748 _mm512_reduce_operator_64bit(__W, +, i, i);
9751 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9752 _mm512_reduce_operator_64bit(__W, *, i, i);
9755 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9756 _mm512_reduce_operator_64bit(__W, &, i, i);
9759 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9760 _mm512_reduce_operator_64bit(__W, |, i, i);
9763 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9764 _mm512_reduce_operator_64bit(__W, +, f, d);
9767 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9768 _mm512_reduce_operator_64bit(__W, *, f, d);
9771 // Vec512 - Vector with size 512.
9772 // Vec512Neutral - All vector elements set to the identity element.
9773 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9774 // Operator - Can be one of following: +,*,&,|
9775 // Mask - Intrinsic Mask
9776 // T2 - Can get 'i' for int and 'f' for float.
9777 // T1 - Can get 'i' for int and 'd' for packed double-precision.
9778 // T3 - Can be Pd for packed double or q for q-word.
9780 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator, \
9783 Vec512 = __builtin_ia32_select##T3##_512( \
9785 (__v8d##T2)Vec512, \
9786 (__v8d##T2)Vec512Neutral); \
9787 _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1); \
9790 static __inline__ long long __DEFAULT_FN_ATTRS
9791 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9792 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9795 static __inline__ long long __DEFAULT_FN_ATTRS
9796 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9797 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9800 static __inline__ long long __DEFAULT_FN_ATTRS
9801 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9802 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9806 static __inline__ long long __DEFAULT_FN_ATTRS
9807 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9808 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
9812 static __inline__ double __DEFAULT_FN_ATTRS
9813 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9814 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
9818 static __inline__ double __DEFAULT_FN_ATTRS
9819 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9820 _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9824 // Vec512 - Vector with size 512.
9825 // Operator - Can be one of following: +,*,&,|
9826 // T2 - Can get 'i' for int and ' ' for packed single.
9827 // T1 - Can get 'i' for int and 'f' for float.
9829 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9830 __m256##T1 Vec256 = \
9831 (__m256##T1)(__builtin_shufflevector( \
9832 (__v16s##T2)Vec512, \
9833 (__v16s##T2)Vec512, \
9834 0, 1, 2, 3, 4, 5, 6, 7) \
9836 __builtin_shufflevector( \
9837 (__v16s##T2)Vec512, \
9838 (__v16s##T2)Vec512, \
9839 8, 9, 10, 11, 12, 13, 14, 15)); \
9840 __m128##T1 Vec128 = \
9841 (__m128##T1)(__builtin_shufflevector( \
9842 (__v8s##T2)Vec256, \
9843 (__v8s##T2)Vec256, \
9846 __builtin_shufflevector( \
9847 (__v8s##T2)Vec256, \
9848 (__v8s##T2)Vec256, \
9850 Vec128 = (__m128##T1)(__builtin_shufflevector( \
9851 (__v4s##T2)Vec128, \
9852 (__v4s##T2)Vec128, \
9855 __builtin_shufflevector( \
9856 (__v4s##T2)Vec128, \
9857 (__v4s##T2)Vec128, \
9859 Vec128 = (__m128##T1)(__builtin_shufflevector( \
9860 (__v4s##T2)Vec128, \
9861 (__v4s##T2)Vec128, \
9864 __builtin_shufflevector( \
9865 (__v4s##T2)Vec128, \
9866 (__v4s##T2)Vec128, \
9871 static __inline__ int __DEFAULT_FN_ATTRS
9872 _mm512_reduce_add_epi32(__m512i __W) {
9873 _mm512_reduce_operator_32bit(__W, +, i, i);
9876 static __inline__ int __DEFAULT_FN_ATTRS
9877 _mm512_reduce_mul_epi32(__m512i __W) {
9878 _mm512_reduce_operator_32bit(__W, *, i, i);
9881 static __inline__ int __DEFAULT_FN_ATTRS
9882 _mm512_reduce_and_epi32(__m512i __W) {
9883 _mm512_reduce_operator_32bit(__W, &, i, i);
9886 static __inline__ int __DEFAULT_FN_ATTRS
9887 _mm512_reduce_or_epi32(__m512i __W) {
9888 _mm512_reduce_operator_32bit(__W, |, i, i);
9891 static __inline__ float __DEFAULT_FN_ATTRS
9892 _mm512_reduce_add_ps(__m512 __W) {
9893 _mm512_reduce_operator_32bit(__W, +, f, );
9896 static __inline__ float __DEFAULT_FN_ATTRS
9897 _mm512_reduce_mul_ps(__m512 __W) {
9898 _mm512_reduce_operator_32bit(__W, *, f, );
9901 // Vec512 - Vector with size 512.
9902 // Vec512Neutral - All vector elements set to the identity element.
9903 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9904 // Operator - Can be one of following: +,*,&,|
9905 // Mask - Intrinsic Mask
9906 // T2 - Can get 'i' for int and 'f' for float.
9907 // T1 - Can get 'i' for int and 'd' for double.
9908 // T3 - Can be Ps for packed single or d for d-word.
9910 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator, \
9913 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
9915 (__v16s##T2)Vec512, \
9916 (__v16s##T2)Vec512Neutral); \
9917 _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1); \
9920 static __inline__ int __DEFAULT_FN_ATTRS
9921 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9922 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
9925 static __inline__ int __DEFAULT_FN_ATTRS
9926 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9927 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
9930 static __inline__ int __DEFAULT_FN_ATTRS
9931 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9932 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
9936 static __inline__ int __DEFAULT_FN_ATTRS
9937 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9938 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
9941 static __inline__ float __DEFAULT_FN_ATTRS
9942 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9943 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
9946 static __inline__ float __DEFAULT_FN_ATTRS
9947 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9948 _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
9951 // Used bisection method. At each step, we partition the vector with previous
9952 // step in half, and the operation is performed on its two halves.
9953 // This takes log2(n) steps where n is the number of elements in the vector.
9954 // This macro uses only intrinsics from the AVX512F feature.
9956 // Vec512 - Vector with size of 512.
9957 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9958 // __mm512_max_epi64
9959 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9960 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9962 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9963 Vec512 = _mm512_##IntrinName( \
9964 (__m512##T1)__builtin_shufflevector( \
9965 (__v8d##T2)Vec512, \
9966 (__v8d##T2)Vec512, \
9967 0, 1, 2, 3, -1, -1, -1, -1), \
9968 (__m512##T1)__builtin_shufflevector( \
9969 (__v8d##T2)Vec512, \
9970 (__v8d##T2)Vec512, \
9971 4, 5, 6, 7, -1, -1, -1, -1)); \
9972 Vec512 = _mm512_##IntrinName( \
9973 (__m512##T1)__builtin_shufflevector( \
9974 (__v8d##T2)Vec512, \
9975 (__v8d##T2)Vec512, \
9976 0, 1, -1, -1, -1, -1, -1, -1),\
9977 (__m512##T1)__builtin_shufflevector( \
9978 (__v8d##T2)Vec512, \
9979 (__v8d##T2)Vec512, \
9980 2, 3, -1, -1, -1, -1, -1, \
9982 Vec512 = _mm512_##IntrinName( \
9983 (__m512##T1)__builtin_shufflevector( \
9984 (__v8d##T2)Vec512, \
9985 (__v8d##T2)Vec512, \
9986 0, -1, -1, -1, -1, -1, -1, -1),\
9987 (__m512##T1)__builtin_shufflevector( \
9988 (__v8d##T2)Vec512, \
9989 (__v8d##T2)Vec512, \
9990 1, -1, -1, -1, -1, -1, -1, -1))\
9995 static __inline__ long long __DEFAULT_FN_ATTRS
9996 _mm512_reduce_max_epi64(__m512i __V) {
9997 _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
10000 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10001 _mm512_reduce_max_epu64(__m512i __V) {
10002 _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10005 static __inline__ double __DEFAULT_FN_ATTRS
10006 _mm512_reduce_max_pd(__m512d __V) {
10007 _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10010 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10012 _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10015 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10016 _mm512_reduce_min_epu64(__m512i __V) {
10017 _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10020 static __inline__ double __DEFAULT_FN_ATTRS
10021 _mm512_reduce_min_pd(__m512d __V) {
10022 _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10025 // Vec512 - Vector with size 512.
10026 // Vec512Neutral - A 512 length vector with elements set to the identity element
10027 // Identity element: {max_epi,0x8000000000000000}
10028 // {max_epu,0x0000000000000000}
10029 // {max_pd, 0xFFF0000000000000}
10030 // {min_epi,0x7FFFFFFFFFFFFFFF}
10031 // {min_epu,0xFFFFFFFFFFFFFFFF}
10032 // {min_pd, 0x7FF0000000000000}
10034 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10035 // __mm512_max_epi64
10036 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10037 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10038 // T3 - Can get 'q' q word and 'pd' for packed double.
10039 // [__builtin_ia32_select{q|pd}_512]
10040 // Mask - Intrinsic Mask
10042 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10045 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10047 (__v8d##T2)Vec512, \
10048 (__v8d##T2)Vec512Neutral); \
10049 _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2); \
10052 static __inline__ long long __DEFAULT_FN_ATTRS
10053 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10054 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10055 max_epi64, i, i, q, __M);
10058 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10059 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10060 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10061 max_epu64, i, i, q, __M);
10064 static __inline__ double __DEFAULT_FN_ATTRS
10065 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10066 _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10067 max_pd, d, f, pd, __M);
10070 static __inline__ long long __DEFAULT_FN_ATTRS
10071 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10072 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10073 min_epi64, i, i, q, __M);
10076 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10077 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10078 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10079 min_epu64, i, i, q, __M);
10082 static __inline__ double __DEFAULT_FN_ATTRS
10083 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10084 _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10085 min_pd, d, f, pd, __M);
10088 // Vec512 - Vector with size 512.
10089 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10090 // __mm512_max_epi32
10091 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10092 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10094 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10095 Vec512 = _mm512_##IntrinName( \
10096 (__m512##T1)__builtin_shufflevector( \
10097 (__v16s##T2)Vec512, \
10098 (__v16s##T2)Vec512, \
10099 0, 1, 2, 3, 4, 5, 6, 7, \
10100 -1, -1, -1, -1, -1, -1, -1, -1), \
10101 (__m512##T1)__builtin_shufflevector( \
10102 (__v16s##T2)Vec512, \
10103 (__v16s##T2)Vec512, \
10104 8, 9, 10, 11, 12, 13, 14, 15, \
10105 -1, -1, -1, -1, -1, -1, -1, -1)); \
10106 Vec512 = _mm512_##IntrinName( \
10107 (__m512##T1)__builtin_shufflevector( \
10108 (__v16s##T2)Vec512, \
10109 (__v16s##T2)Vec512, \
10110 0, 1, 2, 3, -1, -1, -1, -1, \
10111 -1, -1, -1, -1, -1, -1, -1, -1), \
10112 (__m512##T1)__builtin_shufflevector( \
10113 (__v16s##T2)Vec512, \
10114 (__v16s##T2)Vec512, \
10115 4, 5, 6, 7, -1, -1, -1, -1, \
10116 -1, -1, -1, -1, -1, -1, -1, -1)); \
10117 Vec512 = _mm512_##IntrinName( \
10118 (__m512##T1)__builtin_shufflevector( \
10119 (__v16s##T2)Vec512, \
10120 (__v16s##T2)Vec512, \
10121 0, 1, -1, -1, -1, -1, -1, -1, \
10122 -1, -1, -1, -1, -1, -1, -1, -1), \
10123 (__m512##T1)__builtin_shufflevector( \
10124 (__v16s##T2)Vec512, \
10125 (__v16s##T2)Vec512, \
10126 2, 3, -1, -1, -1, -1, -1, -1, \
10127 -1, -1, -1, -1, -1, -1, -1, -1)); \
10128 Vec512 = _mm512_##IntrinName( \
10129 (__m512##T1)__builtin_shufflevector( \
10130 (__v16s##T2)Vec512, \
10131 (__v16s##T2)Vec512, \
10132 0, -1, -1, -1, -1, -1, -1, -1, \
10133 -1, -1, -1, -1, -1, -1, -1, -1), \
10134 (__m512##T1)__builtin_shufflevector( \
10135 (__v16s##T2)Vec512, \
10136 (__v16s##T2)Vec512, \
10137 1, -1, -1, -1, -1, -1, -1, -1, \
10138 -1, -1, -1, -1, -1, -1, -1, -1)); \
10139 return Vec512[0]; \
10142 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10143 _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10146 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10147 _mm512_reduce_max_epu32(__m512i a) {
10148 _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10151 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10152 _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10155 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10156 _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10159 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10160 _mm512_reduce_min_epu32(__m512i a) {
10161 _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10164 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10165 _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10168 // Vec512 - Vector with size 512.
10169 // Vec512Neutral - A 512 length vector with elements set to the identity element
10170 // Identity element: {max_epi,0x80000000}
10171 // {max_epu,0x00000000}
10172 // {max_ps, 0xFF800000}
10173 // {min_epi,0x7FFFFFFF}
10174 // {min_epu,0xFFFFFFFF}
10175 // {min_ps, 0x7F800000}
10177 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10178 // __mm512_max_epi32
10179 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10180 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10181 // T3 - Can get 'q' q word and 'pd' for packed double.
10182 // [__builtin_ia32_select{q|pd}_512]
10183 // Mask - Intrinsic Mask
10185 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10188 Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512( \
10190 (__v16s##T2)Vec512, \
10191 (__v16s##T2)Vec512Neutral); \
10192 _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2); \
10195 static __inline__ int __DEFAULT_FN_ATTRS
10196 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10197 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10201 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10202 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10203 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10207 static __inline__ float __DEFAULT_FN_ATTRS
10208 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10209 _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10213 static __inline__ int __DEFAULT_FN_ATTRS
10214 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10215 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10219 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10220 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10221 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10225 static __inline__ float __DEFAULT_FN_ATTRS
10226 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10227 _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10231 #undef __DEFAULT_FN_ATTRS
10233 #endif // __AVX512FINTRIN_H