]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
MFV r331712:
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / Headers / avx512fintrin.h
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
26
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
29
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
36
37 /* Unsigned types */
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
46
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
49
50 /* Rounding mode macros.  */
51 #define _MM_FROUND_TO_NEAREST_INT   0x00
52 #define _MM_FROUND_TO_NEG_INF       0x01
53 #define _MM_FROUND_TO_POS_INF       0x02
54 #define _MM_FROUND_TO_ZERO          0x03
55 #define _MM_FROUND_CUR_DIRECTION    0x04
56
57 /* Constants for integer comparison predicates */
58 typedef enum {
59     _MM_CMPINT_EQ,      /* Equal */
60     _MM_CMPINT_LT,      /* Less than */
61     _MM_CMPINT_LE,      /* Less than or Equal */
62     _MM_CMPINT_UNUSED,
63     _MM_CMPINT_NE,      /* Not Equal */
64     _MM_CMPINT_NLT,     /* Not Less than */
65 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
66     _MM_CMPINT_NLE      /* Not Less than or Equal */
67 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
68 } _MM_CMPINT_ENUM;
69
70 typedef enum
71 {
72   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157   _MM_PERM_DDDD = 0xFF
158 } _MM_PERM_ENUM;
159
160 typedef enum
161 {
162   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
163   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
164   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
165   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
166 } _MM_MANTISSA_NORM_ENUM;
167
168 typedef enum
169 {
170   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
171   _MM_MANT_SIGN_zero,   /* sign = 0             */
172   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
173 } _MM_MANTISSA_SIGN_ENUM;
174
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177
178 /* Create vectors with repeated elements */
179
180 static  __inline __m512i __DEFAULT_FN_ATTRS
181 _mm512_setzero_si512(void)
182 {
183   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184 }
185
186 #define _mm512_setzero_epi32 _mm512_setzero_si512
187
188 static __inline__ __m512d __DEFAULT_FN_ATTRS
189 _mm512_undefined_pd(void)
190 {
191   return (__m512d)__builtin_ia32_undef512();
192 }
193
194 static __inline__ __m512 __DEFAULT_FN_ATTRS
195 _mm512_undefined(void)
196 {
197   return (__m512)__builtin_ia32_undef512();
198 }
199
200 static __inline__ __m512 __DEFAULT_FN_ATTRS
201 _mm512_undefined_ps(void)
202 {
203   return (__m512)__builtin_ia32_undef512();
204 }
205
206 static __inline__ __m512i __DEFAULT_FN_ATTRS
207 _mm512_undefined_epi32(void)
208 {
209   return (__m512i)__builtin_ia32_undef512();
210 }
211
212 static __inline__ __m512i __DEFAULT_FN_ATTRS
213 _mm512_broadcastd_epi32 (__m128i __A)
214 {
215   return (__m512i)__builtin_shufflevector((__v4si) __A,
216                                           (__v4si)_mm_undefined_si128(),
217                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218 }
219
220 static __inline__ __m512i __DEFAULT_FN_ATTRS
221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222 {
223   return (__m512i)__builtin_ia32_selectd_512(__M,
224                                              (__v16si) _mm512_broadcastd_epi32(__A),
225                                              (__v16si) __O);
226 }
227
228 static __inline__ __m512i __DEFAULT_FN_ATTRS
229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230 {
231   return (__m512i)__builtin_ia32_selectd_512(__M,
232                                              (__v16si) _mm512_broadcastd_epi32(__A),
233                                              (__v16si) _mm512_setzero_si512());
234 }
235
236 static __inline__ __m512i __DEFAULT_FN_ATTRS
237 _mm512_broadcastq_epi64 (__m128i __A)
238 {
239   return (__m512i)__builtin_shufflevector((__v2di) __A,
240                                           (__v2di) _mm_undefined_si128(),
241                                           0, 0, 0, 0, 0, 0, 0, 0);
242 }
243
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246 {
247   return (__m512i)__builtin_ia32_selectq_512(__M,
248                                              (__v8di) _mm512_broadcastq_epi64(__A),
249                                              (__v8di) __O);
250
251 }
252
253 static __inline__ __m512i __DEFAULT_FN_ATTRS
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255 {
256   return (__m512i)__builtin_ia32_selectq_512(__M,
257                                              (__v8di) _mm512_broadcastq_epi64(__A),
258                                              (__v8di) _mm512_setzero_si512());
259 }
260
261
262 static __inline __m512 __DEFAULT_FN_ATTRS
263 _mm512_setzero_ps(void)
264 {
265   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
267 }
268
269 #define _mm512_setzero _mm512_setzero_ps
270
271 static  __inline __m512d __DEFAULT_FN_ATTRS
272 _mm512_setzero_pd(void)
273 {
274   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
275 }
276
277 static __inline __m512 __DEFAULT_FN_ATTRS
278 _mm512_set1_ps(float __w)
279 {
280   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281                    __w, __w, __w, __w, __w, __w, __w, __w  };
282 }
283
284 static __inline __m512d __DEFAULT_FN_ATTRS
285 _mm512_set1_pd(double __w)
286 {
287   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288 }
289
290 static __inline __m512i __DEFAULT_FN_ATTRS
291 _mm512_set1_epi8(char __w)
292 {
293   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
294                              __w, __w, __w, __w, __w, __w, __w, __w,
295                              __w, __w, __w, __w, __w, __w, __w, __w,
296                              __w, __w, __w, __w, __w, __w, __w, __w,
297                              __w, __w, __w, __w, __w, __w, __w, __w,
298                              __w, __w, __w, __w, __w, __w, __w, __w,
299                              __w, __w, __w, __w, __w, __w, __w, __w,
300                              __w, __w, __w, __w, __w, __w, __w, __w  };
301 }
302
303 static __inline __m512i __DEFAULT_FN_ATTRS
304 _mm512_set1_epi16(short __w)
305 {
306   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
307                              __w, __w, __w, __w, __w, __w, __w, __w,
308                              __w, __w, __w, __w, __w, __w, __w, __w,
309                              __w, __w, __w, __w, __w, __w, __w, __w };
310 }
311
312 static __inline __m512i __DEFAULT_FN_ATTRS
313 _mm512_set1_epi32(int __s)
314 {
315   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
316                              __s, __s, __s, __s, __s, __s, __s, __s };
317 }
318
319 static __inline __m512i __DEFAULT_FN_ATTRS
320 _mm512_maskz_set1_epi32(__mmask16 __M, int __A) 
321 {
322   return (__m512i)__builtin_ia32_selectd_512(__M, 
323                                              (__v16si)_mm512_set1_epi32(__A),
324                                              (__v16si)_mm512_setzero_si512());
325 }
326
327 static __inline __m512i __DEFAULT_FN_ATTRS
328 _mm512_set1_epi64(long long __d)
329 {
330   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
331 }
332
333 #ifdef __x86_64__
334 static __inline __m512i __DEFAULT_FN_ATTRS
335 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
336 {
337   return (__m512i)__builtin_ia32_selectq_512(__M,
338                                              (__v8di)_mm512_set1_epi64(__A),
339                                              (__v8di)_mm512_setzero_si512());
340 }
341 #endif
342
343 static __inline__ __m512 __DEFAULT_FN_ATTRS
344 _mm512_broadcastss_ps(__m128 __A)
345 {
346   return (__m512)__builtin_shufflevector((__v4sf) __A,
347                                          (__v4sf)_mm_undefined_ps(),
348                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
349 }
350
351 static __inline __m512i __DEFAULT_FN_ATTRS
352 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
353 {
354   return  (__m512i)(__v16si)
355    { __D, __C, __B, __A, __D, __C, __B, __A,
356      __D, __C, __B, __A, __D, __C, __B, __A };
357 }
358
359 static __inline __m512i __DEFAULT_FN_ATTRS
360 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
361        long long __D)
362 {
363   return  (__m512i) (__v8di)
364    { __D, __C, __B, __A, __D, __C, __B, __A };
365 }
366
367 static __inline __m512d __DEFAULT_FN_ATTRS
368 _mm512_set4_pd (double __A, double __B, double __C, double __D)
369 {
370   return  (__m512d)
371    { __D, __C, __B, __A, __D, __C, __B, __A };
372 }
373
374 static __inline __m512 __DEFAULT_FN_ATTRS
375 _mm512_set4_ps (float __A, float __B, float __C, float __D)
376 {
377   return  (__m512)
378    { __D, __C, __B, __A, __D, __C, __B, __A,
379      __D, __C, __B, __A, __D, __C, __B, __A };
380 }
381
382 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
383   _mm512_set4_epi32((e3),(e2),(e1),(e0))
384
385 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
386   _mm512_set4_epi64((e3),(e2),(e1),(e0))
387
388 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
389   _mm512_set4_pd((e3),(e2),(e1),(e0))
390
391 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
392   _mm512_set4_ps((e3),(e2),(e1),(e0))
393
394 static __inline__ __m512d __DEFAULT_FN_ATTRS
395 _mm512_broadcastsd_pd(__m128d __A)
396 {
397   return (__m512d)__builtin_shufflevector((__v2df) __A,
398                                           (__v2df) _mm_undefined_pd(),
399                                           0, 0, 0, 0, 0, 0, 0, 0);
400 }
401
402 /* Cast between vector types */
403
404 static __inline __m512d __DEFAULT_FN_ATTRS
405 _mm512_castpd256_pd512(__m256d __a)
406 {
407   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
408 }
409
410 static __inline __m512 __DEFAULT_FN_ATTRS
411 _mm512_castps256_ps512(__m256 __a)
412 {
413   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
414                                           -1, -1, -1, -1, -1, -1, -1, -1);
415 }
416
417 static __inline __m128d __DEFAULT_FN_ATTRS
418 _mm512_castpd512_pd128(__m512d __a)
419 {
420   return __builtin_shufflevector(__a, __a, 0, 1);
421 }
422
423 static __inline __m256d __DEFAULT_FN_ATTRS
424 _mm512_castpd512_pd256 (__m512d __A)
425 {
426   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
427 }
428
429 static __inline __m128 __DEFAULT_FN_ATTRS
430 _mm512_castps512_ps128(__m512 __a)
431 {
432   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
433 }
434
435 static __inline __m256 __DEFAULT_FN_ATTRS
436 _mm512_castps512_ps256 (__m512 __A)
437 {
438   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
439 }
440
441 static __inline __m512 __DEFAULT_FN_ATTRS
442 _mm512_castpd_ps (__m512d __A)
443 {
444   return (__m512) (__A);
445 }
446
447 static __inline __m512i __DEFAULT_FN_ATTRS
448 _mm512_castpd_si512 (__m512d __A)
449 {
450   return (__m512i) (__A);
451 }
452
453 static __inline__ __m512d __DEFAULT_FN_ATTRS
454 _mm512_castpd128_pd512 (__m128d __A)
455 {
456   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
457 }
458
459 static __inline __m512d __DEFAULT_FN_ATTRS
460 _mm512_castps_pd (__m512 __A)
461 {
462   return (__m512d) (__A);
463 }
464
465 static __inline __m512i __DEFAULT_FN_ATTRS
466 _mm512_castps_si512 (__m512 __A)
467 {
468   return (__m512i) (__A);
469 }
470
471 static __inline__ __m512 __DEFAULT_FN_ATTRS
472 _mm512_castps128_ps512 (__m128 __A)
473 {
474     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
475 }
476
477 static __inline__ __m512i __DEFAULT_FN_ATTRS
478 _mm512_castsi128_si512 (__m128i __A)
479 {
480    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
481 }
482
483 static __inline__ __m512i __DEFAULT_FN_ATTRS
484 _mm512_castsi256_si512 (__m256i __A)
485 {
486    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
487 }
488
489 static __inline __m512 __DEFAULT_FN_ATTRS
490 _mm512_castsi512_ps (__m512i __A)
491 {
492   return (__m512) (__A);
493 }
494
495 static __inline __m512d __DEFAULT_FN_ATTRS
496 _mm512_castsi512_pd (__m512i __A)
497 {
498   return (__m512d) (__A);
499 }
500
501 static __inline __m128i __DEFAULT_FN_ATTRS
502 _mm512_castsi512_si128 (__m512i __A)
503 {
504   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
505 }
506
507 static __inline __m256i __DEFAULT_FN_ATTRS
508 _mm512_castsi512_si256 (__m512i __A)
509 {
510   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
511 }
512
513 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
514 _mm512_int2mask(int __a)
515 {
516   return (__mmask16)__a;
517 }
518
519 static __inline__ int __DEFAULT_FN_ATTRS
520 _mm512_mask2int(__mmask16 __a)
521 {
522   return (int)__a;
523 }
524
525 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
526 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
527 ///    contain the value of the source vector. The upper 384 bits are set
528 ///    to zero.
529 ///
530 /// \headerfile <x86intrin.h>
531 ///
532 /// This intrinsic has no corresponding instruction.
533 ///
534 /// \param __a
535 ///    A 128-bit vector of [2 x double].
536 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537 ///    contain the value of the parameter. The upper 384 bits are set to zero.
538 static __inline __m512d __DEFAULT_FN_ATTRS
539 _mm512_zextpd128_pd512(__m128d __a)
540 {
541   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
542 }
543
544 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
545 ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
546 ///    contain the value of the source vector. The upper 256 bits are set
547 ///    to zero.
548 ///
549 /// \headerfile <x86intrin.h>
550 ///
551 /// This intrinsic has no corresponding instruction.
552 ///
553 /// \param __a
554 ///    A 256-bit vector of [4 x double].
555 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
556 ///    contain the value of the parameter. The upper 256 bits are set to zero.
557 static __inline __m512d __DEFAULT_FN_ATTRS
558 _mm512_zextpd256_pd512(__m256d __a)
559 {
560   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
561 }
562
563 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
564 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
565 ///    the value of the source vector. The upper 384 bits are set to zero.
566 ///
567 /// \headerfile <x86intrin.h>
568 ///
569 /// This intrinsic has no corresponding instruction.
570 ///
571 /// \param __a
572 ///    A 128-bit vector of [4 x float].
573 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
574 ///    contain the value of the parameter. The upper 384 bits are set to zero.
575 static __inline __m512 __DEFAULT_FN_ATTRS
576 _mm512_zextps128_ps512(__m128 __a)
577 {
578   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
579 }
580
581 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
582 ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
583 ///    the value of the source vector. The upper 256 bits are set to zero.
584 ///
585 /// \headerfile <x86intrin.h>
586 ///
587 /// This intrinsic has no corresponding instruction.
588 ///
589 /// \param __a
590 ///    A 256-bit vector of [8 x float].
591 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
592 ///    contain the value of the parameter. The upper 256 bits are set to zero.
593 static __inline __m512 __DEFAULT_FN_ATTRS
594 _mm512_zextps256_ps512(__m256 __a)
595 {
596   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
597 }
598
599 /// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
600 ///    The lower 128 bits contain the value of the source vector. The upper
601 ///    384 bits are set to zero.
602 ///
603 /// \headerfile <x86intrin.h>
604 ///
605 /// This intrinsic has no corresponding instruction.
606 ///
607 /// \param __a
608 ///    A 128-bit integer vector.
609 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
610 ///    the parameter. The upper 384 bits are set to zero.
611 static __inline __m512i __DEFAULT_FN_ATTRS
612 _mm512_zextsi128_si512(__m128i __a)
613 {
614   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
615 }
616
617 /// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
618 ///    The lower 256 bits contain the value of the source vector. The upper
619 ///    256 bits are set to zero.
620 ///
621 /// \headerfile <x86intrin.h>
622 ///
623 /// This intrinsic has no corresponding instruction.
624 ///
625 /// \param __a
626 ///    A 256-bit integer vector.
627 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
628 ///    the parameter. The upper 256 bits are set to zero.
629 static __inline __m512i __DEFAULT_FN_ATTRS
630 _mm512_zextsi256_si512(__m256i __a)
631 {
632   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
633 }
634
635 /* Bitwise operators */
636 static __inline__ __m512i __DEFAULT_FN_ATTRS
637 _mm512_and_epi32(__m512i __a, __m512i __b)
638 {
639   return (__m512i)((__v16su)__a & (__v16su)__b);
640 }
641
642 static __inline__ __m512i __DEFAULT_FN_ATTRS
643 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
644 {
645   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
646                 (__v16si) _mm512_and_epi32(__a, __b),
647                 (__v16si) __src);
648 }
649
650 static __inline__ __m512i __DEFAULT_FN_ATTRS
651 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
652 {
653   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
654                                          __k, __a, __b);
655 }
656
657 static __inline__ __m512i __DEFAULT_FN_ATTRS
658 _mm512_and_epi64(__m512i __a, __m512i __b)
659 {
660   return (__m512i)((__v8du)__a & (__v8du)__b);
661 }
662
663 static __inline__ __m512i __DEFAULT_FN_ATTRS
664 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
665 {
666     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
667                 (__v8di) _mm512_and_epi64(__a, __b),
668                 (__v8di) __src);
669 }
670
671 static __inline__ __m512i __DEFAULT_FN_ATTRS
672 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
673 {
674   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
675                                          __k, __a, __b);
676 }
677
678 static __inline__ __m512i __DEFAULT_FN_ATTRS
679 _mm512_andnot_si512 (__m512i __A, __m512i __B)
680 {
681   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
682 }
683
684 static __inline__ __m512i __DEFAULT_FN_ATTRS
685 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
686 {
687   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
688 }
689
690 static __inline__ __m512i __DEFAULT_FN_ATTRS
691 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
692 {
693   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
694                                          (__v16si)_mm512_andnot_epi32(__A, __B),
695                                          (__v16si)__W);
696 }
697
698 static __inline__ __m512i __DEFAULT_FN_ATTRS
699 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
700 {
701   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
702                                            __U, __A, __B);
703 }
704
705 static __inline__ __m512i __DEFAULT_FN_ATTRS
706 _mm512_andnot_epi64(__m512i __A, __m512i __B)
707 {
708   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
709 }
710
711 static __inline__ __m512i __DEFAULT_FN_ATTRS
712 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
713 {
714   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
715                                           (__v8di)_mm512_andnot_epi64(__A, __B),
716                                           (__v8di)__W);
717 }
718
719 static __inline__ __m512i __DEFAULT_FN_ATTRS
720 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
721 {
722   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
723                                            __U, __A, __B);
724 }
725
726 static __inline__ __m512i __DEFAULT_FN_ATTRS
727 _mm512_or_epi32(__m512i __a, __m512i __b)
728 {
729   return (__m512i)((__v16su)__a | (__v16su)__b);
730 }
731
732 static __inline__ __m512i __DEFAULT_FN_ATTRS
733 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
734 {
735   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
736                                              (__v16si)_mm512_or_epi32(__a, __b),
737                                              (__v16si)__src);
738 }
739
740 static __inline__ __m512i __DEFAULT_FN_ATTRS
741 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
742 {
743   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
744 }
745
746 static __inline__ __m512i __DEFAULT_FN_ATTRS
747 _mm512_or_epi64(__m512i __a, __m512i __b)
748 {
749   return (__m512i)((__v8du)__a | (__v8du)__b);
750 }
751
752 static __inline__ __m512i __DEFAULT_FN_ATTRS
753 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
754 {
755   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
756                                              (__v8di)_mm512_or_epi64(__a, __b),
757                                              (__v8di)__src);
758 }
759
760 static __inline__ __m512i __DEFAULT_FN_ATTRS
761 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
762 {
763   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
764 }
765
766 static __inline__ __m512i __DEFAULT_FN_ATTRS
767 _mm512_xor_epi32(__m512i __a, __m512i __b)
768 {
769   return (__m512i)((__v16su)__a ^ (__v16su)__b);
770 }
771
772 static __inline__ __m512i __DEFAULT_FN_ATTRS
773 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
774 {
775   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
776                                             (__v16si)_mm512_xor_epi32(__a, __b),
777                                             (__v16si)__src);
778 }
779
780 static __inline__ __m512i __DEFAULT_FN_ATTRS
781 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
782 {
783   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
784 }
785
786 static __inline__ __m512i __DEFAULT_FN_ATTRS
787 _mm512_xor_epi64(__m512i __a, __m512i __b)
788 {
789   return (__m512i)((__v8du)__a ^ (__v8du)__b);
790 }
791
792 static __inline__ __m512i __DEFAULT_FN_ATTRS
793 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
794 {
795   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
796                                              (__v8di)_mm512_xor_epi64(__a, __b),
797                                              (__v8di)__src);
798 }
799
800 static __inline__ __m512i __DEFAULT_FN_ATTRS
801 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
802 {
803   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
804 }
805
806 static __inline__ __m512i __DEFAULT_FN_ATTRS
807 _mm512_and_si512(__m512i __a, __m512i __b)
808 {
809   return (__m512i)((__v8du)__a & (__v8du)__b);
810 }
811
812 static __inline__ __m512i __DEFAULT_FN_ATTRS
813 _mm512_or_si512(__m512i __a, __m512i __b)
814 {
815   return (__m512i)((__v8du)__a | (__v8du)__b);
816 }
817
818 static __inline__ __m512i __DEFAULT_FN_ATTRS
819 _mm512_xor_si512(__m512i __a, __m512i __b)
820 {
821   return (__m512i)((__v8du)__a ^ (__v8du)__b);
822 }
823
824 /* Arithmetic */
825
826 static __inline __m512d __DEFAULT_FN_ATTRS
827 _mm512_add_pd(__m512d __a, __m512d __b)
828 {
829   return (__m512d)((__v8df)__a + (__v8df)__b);
830 }
831
832 static __inline __m512 __DEFAULT_FN_ATTRS
833 _mm512_add_ps(__m512 __a, __m512 __b)
834 {
835   return (__m512)((__v16sf)__a + (__v16sf)__b);
836 }
837
838 static __inline __m512d __DEFAULT_FN_ATTRS
839 _mm512_mul_pd(__m512d __a, __m512d __b)
840 {
841   return (__m512d)((__v8df)__a * (__v8df)__b);
842 }
843
844 static __inline __m512 __DEFAULT_FN_ATTRS
845 _mm512_mul_ps(__m512 __a, __m512 __b)
846 {
847   return (__m512)((__v16sf)__a * (__v16sf)__b);
848 }
849
850 static __inline __m512d __DEFAULT_FN_ATTRS
851 _mm512_sub_pd(__m512d __a, __m512d __b)
852 {
853   return (__m512d)((__v8df)__a - (__v8df)__b);
854 }
855
856 static __inline __m512 __DEFAULT_FN_ATTRS
857 _mm512_sub_ps(__m512 __a, __m512 __b)
858 {
859   return (__m512)((__v16sf)__a - (__v16sf)__b);
860 }
861
862 static __inline__ __m512i __DEFAULT_FN_ATTRS
863 _mm512_add_epi64 (__m512i __A, __m512i __B)
864 {
865   return (__m512i) ((__v8du) __A + (__v8du) __B);
866 }
867
868 static __inline__ __m512i __DEFAULT_FN_ATTRS
869 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
870 {
871   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872                                              (__v8di)_mm512_add_epi64(__A, __B),
873                                              (__v8di)__W);
874 }
875
876 static __inline__ __m512i __DEFAULT_FN_ATTRS
877 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
878 {
879   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
880                                              (__v8di)_mm512_add_epi64(__A, __B),
881                                              (__v8di)_mm512_setzero_si512());
882 }
883
884 static __inline__ __m512i __DEFAULT_FN_ATTRS
885 _mm512_sub_epi64 (__m512i __A, __m512i __B)
886 {
887   return (__m512i) ((__v8du) __A - (__v8du) __B);
888 }
889
890 static __inline__ __m512i __DEFAULT_FN_ATTRS
891 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
892 {
893   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
894                                              (__v8di)_mm512_sub_epi64(__A, __B),
895                                              (__v8di)__W);
896 }
897
898 static __inline__ __m512i __DEFAULT_FN_ATTRS
899 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
900 {
901   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
902                                              (__v8di)_mm512_sub_epi64(__A, __B),
903                                              (__v8di)_mm512_setzero_si512());
904 }
905
906 static __inline__ __m512i __DEFAULT_FN_ATTRS
907 _mm512_add_epi32 (__m512i __A, __m512i __B)
908 {
909   return (__m512i) ((__v16su) __A + (__v16su) __B);
910 }
911
912 static __inline__ __m512i __DEFAULT_FN_ATTRS
913 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
914 {
915   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
916                                              (__v16si)_mm512_add_epi32(__A, __B),
917                                              (__v16si)__W);
918 }
919
920 static __inline__ __m512i __DEFAULT_FN_ATTRS
921 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
922 {
923   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
924                                              (__v16si)_mm512_add_epi32(__A, __B),
925                                              (__v16si)_mm512_setzero_si512());
926 }
927
928 static __inline__ __m512i __DEFAULT_FN_ATTRS
929 _mm512_sub_epi32 (__m512i __A, __m512i __B)
930 {
931   return (__m512i) ((__v16su) __A - (__v16su) __B);
932 }
933
934 static __inline__ __m512i __DEFAULT_FN_ATTRS
935 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
936 {
937   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
938                                              (__v16si)_mm512_sub_epi32(__A, __B),
939                                              (__v16si)__W);
940 }
941
942 static __inline__ __m512i __DEFAULT_FN_ATTRS
943 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
944 {
945   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
946                                              (__v16si)_mm512_sub_epi32(__A, __B),
947                                              (__v16si)_mm512_setzero_si512());
948 }
949
950 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
951   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
952                                         (__v8df)(__m512d)(B), \
953                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
954                                         (int)(R)); })
955
956 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
957   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
958                                         (__v8df)(__m512d)(B), \
959                                         (__v8df)_mm512_setzero_pd(), \
960                                         (__mmask8)(U), (int)(R)); })
961
962 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
963   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
964                                         (__v8df)(__m512d)(B), \
965                                         (__v8df)_mm512_undefined_pd(), \
966                                         (__mmask8)-1, (int)(R)); })
967
968 static  __inline__ __m512d __DEFAULT_FN_ATTRS
969 _mm512_max_pd(__m512d __A, __m512d __B)
970 {
971   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
972              (__v8df) __B,
973              (__v8df)
974              _mm512_setzero_pd (),
975              (__mmask8) -1,
976              _MM_FROUND_CUR_DIRECTION);
977 }
978
979 static __inline__ __m512d __DEFAULT_FN_ATTRS
980 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
981 {
982   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
983                   (__v8df) __B,
984                   (__v8df) __W,
985                   (__mmask8) __U,
986                   _MM_FROUND_CUR_DIRECTION);
987 }
988
989 static __inline__ __m512d __DEFAULT_FN_ATTRS
990 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
991 {
992   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
993                   (__v8df) __B,
994                   (__v8df)
995                   _mm512_setzero_pd (),
996                   (__mmask8) __U,
997                   _MM_FROUND_CUR_DIRECTION);
998 }
999
1000 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
1001   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1002                                        (__v16sf)(__m512)(B), \
1003                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1004                                        (int)(R)); })
1005
1006 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1007   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1008                                        (__v16sf)(__m512)(B), \
1009                                        (__v16sf)_mm512_setzero_ps(), \
1010                                        (__mmask16)(U), (int)(R)); })
1011
1012 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1013   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1014                                        (__v16sf)(__m512)(B), \
1015                                        (__v16sf)_mm512_undefined_ps(), \
1016                                        (__mmask16)-1, (int)(R)); })
1017
1018 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1019 _mm512_max_ps(__m512 __A, __m512 __B)
1020 {
1021   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1022             (__v16sf) __B,
1023             (__v16sf)
1024             _mm512_setzero_ps (),
1025             (__mmask16) -1,
1026             _MM_FROUND_CUR_DIRECTION);
1027 }
1028
1029 static __inline__ __m512 __DEFAULT_FN_ATTRS
1030 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1031 {
1032   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1033                  (__v16sf) __B,
1034                  (__v16sf) __W,
1035                  (__mmask16) __U,
1036                  _MM_FROUND_CUR_DIRECTION);
1037 }
1038
1039 static __inline__ __m512 __DEFAULT_FN_ATTRS
1040 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1041 {
1042   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1043                  (__v16sf) __B,
1044                  (__v16sf)
1045                  _mm512_setzero_ps (),
1046                  (__mmask16) __U,
1047                  _MM_FROUND_CUR_DIRECTION);
1048 }
1049
1050 static __inline__ __m128 __DEFAULT_FN_ATTRS
1051 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1052   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1053                 (__v4sf) __B,
1054                 (__v4sf) __W,
1055                 (__mmask8) __U,
1056                 _MM_FROUND_CUR_DIRECTION);
1057 }
1058
1059 static __inline__ __m128 __DEFAULT_FN_ATTRS
1060 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1061   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1062                 (__v4sf) __B,
1063                 (__v4sf)  _mm_setzero_ps (),
1064                 (__mmask8) __U,
1065                 _MM_FROUND_CUR_DIRECTION);
1066 }
1067
1068 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
1069   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1070                                           (__v4sf)(__m128)(B), \
1071                                           (__v4sf)_mm_setzero_ps(), \
1072                                           (__mmask8)-1, (int)(R)); })
1073
1074 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1075   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1076                                           (__v4sf)(__m128)(B), \
1077                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1078                                           (int)(R)); })
1079
1080 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1081   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1082                                           (__v4sf)(__m128)(B), \
1083                                           (__v4sf)_mm_setzero_ps(), \
1084                                           (__mmask8)(U), (int)(R)); })
1085
1086 static __inline__ __m128d __DEFAULT_FN_ATTRS
1087 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1088   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1089                 (__v2df) __B,
1090                 (__v2df) __W,
1091                 (__mmask8) __U,
1092                 _MM_FROUND_CUR_DIRECTION);
1093 }
1094
1095 static __inline__ __m128d __DEFAULT_FN_ATTRS
1096 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1097   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1098                 (__v2df) __B,
1099                 (__v2df)  _mm_setzero_pd (),
1100                 (__mmask8) __U,
1101                 _MM_FROUND_CUR_DIRECTION);
1102 }
1103
1104 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
1105   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1106                                            (__v2df)(__m128d)(B), \
1107                                            (__v2df)_mm_setzero_pd(), \
1108                                            (__mmask8)-1, (int)(R)); })
1109
1110 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1111   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1112                                            (__v2df)(__m128d)(B), \
1113                                            (__v2df)(__m128d)(W), \
1114                                            (__mmask8)(U), (int)(R)); })
1115
1116 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1117   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1118                                            (__v2df)(__m128d)(B), \
1119                                            (__v2df)_mm_setzero_pd(), \
1120                                            (__mmask8)(U), (int)(R)); })
1121
1122 static __inline __m512i
1123 __DEFAULT_FN_ATTRS
1124 _mm512_max_epi32(__m512i __A, __m512i __B)
1125 {
1126   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1127               (__v16si) __B,
1128               (__v16si)
1129               _mm512_setzero_si512 (),
1130               (__mmask16) -1);
1131 }
1132
1133 static __inline__ __m512i __DEFAULT_FN_ATTRS
1134 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1135 {
1136   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1137                    (__v16si) __B,
1138                    (__v16si) __W, __M);
1139 }
1140
1141 static __inline__ __m512i __DEFAULT_FN_ATTRS
1142 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1143 {
1144   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1145                    (__v16si) __B,
1146                    (__v16si)
1147                    _mm512_setzero_si512 (),
1148                    __M);
1149 }
1150
1151 static __inline __m512i __DEFAULT_FN_ATTRS
1152 _mm512_max_epu32(__m512i __A, __m512i __B)
1153 {
1154   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1155               (__v16si) __B,
1156               (__v16si)
1157               _mm512_setzero_si512 (),
1158               (__mmask16) -1);
1159 }
1160
1161 static __inline__ __m512i __DEFAULT_FN_ATTRS
1162 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1163 {
1164   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1165                    (__v16si) __B,
1166                    (__v16si) __W, __M);
1167 }
1168
1169 static __inline__ __m512i __DEFAULT_FN_ATTRS
1170 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1171 {
1172   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1173                    (__v16si) __B,
1174                    (__v16si)
1175                    _mm512_setzero_si512 (),
1176                    __M);
1177 }
1178
1179 static __inline __m512i __DEFAULT_FN_ATTRS
1180 _mm512_max_epi64(__m512i __A, __m512i __B)
1181 {
1182   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1183               (__v8di) __B,
1184               (__v8di)
1185               _mm512_setzero_si512 (),
1186               (__mmask8) -1);
1187 }
1188
1189 static __inline__ __m512i __DEFAULT_FN_ATTRS
1190 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1191 {
1192   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1193                    (__v8di) __B,
1194                    (__v8di) __W, __M);
1195 }
1196
1197 static __inline__ __m512i __DEFAULT_FN_ATTRS
1198 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1199 {
1200   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1201                    (__v8di) __B,
1202                    (__v8di)
1203                    _mm512_setzero_si512 (),
1204                    __M);
1205 }
1206
1207 static __inline __m512i __DEFAULT_FN_ATTRS
1208 _mm512_max_epu64(__m512i __A, __m512i __B)
1209 {
1210   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1211               (__v8di) __B,
1212               (__v8di)
1213               _mm512_setzero_si512 (),
1214               (__mmask8) -1);
1215 }
1216
1217 static __inline__ __m512i __DEFAULT_FN_ATTRS
1218 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1219 {
1220   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1221                    (__v8di) __B,
1222                    (__v8di) __W, __M);
1223 }
1224
1225 static __inline__ __m512i __DEFAULT_FN_ATTRS
1226 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1227 {
1228   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1229                    (__v8di) __B,
1230                    (__v8di)
1231                    _mm512_setzero_si512 (),
1232                    __M);
1233 }
1234
1235 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1236   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1237                                         (__v8df)(__m512d)(B), \
1238                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1239                                         (int)(R)); })
1240
1241 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1242   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1243                                         (__v8df)(__m512d)(B), \
1244                                         (__v8df)_mm512_setzero_pd(), \
1245                                         (__mmask8)(U), (int)(R)); })
1246
1247 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1248   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1249                                         (__v8df)(__m512d)(B), \
1250                                         (__v8df)_mm512_undefined_pd(), \
1251                                         (__mmask8)-1, (int)(R)); })
1252
1253 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1254 _mm512_min_pd(__m512d __A, __m512d __B)
1255 {
1256   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1257              (__v8df) __B,
1258              (__v8df)
1259              _mm512_setzero_pd (),
1260              (__mmask8) -1,
1261              _MM_FROUND_CUR_DIRECTION);
1262 }
1263
1264 static __inline__ __m512d __DEFAULT_FN_ATTRS
1265 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1266 {
1267   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1268                   (__v8df) __B,
1269                   (__v8df) __W,
1270                   (__mmask8) __U,
1271                   _MM_FROUND_CUR_DIRECTION);
1272 }
1273
1274 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1275   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1276                                        (__v16sf)(__m512)(B), \
1277                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1278                                        (int)(R)); })
1279
1280 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1281   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1282                                        (__v16sf)(__m512)(B), \
1283                                        (__v16sf)_mm512_setzero_ps(), \
1284                                        (__mmask16)(U), (int)(R)); })
1285
1286 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1287   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1288                                        (__v16sf)(__m512)(B), \
1289                                        (__v16sf)_mm512_undefined_ps(), \
1290                                        (__mmask16)-1, (int)(R)); })
1291
1292 static __inline__ __m512d __DEFAULT_FN_ATTRS
1293 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1294 {
1295   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1296                   (__v8df) __B,
1297                   (__v8df)
1298                   _mm512_setzero_pd (),
1299                   (__mmask8) __U,
1300                   _MM_FROUND_CUR_DIRECTION);
1301 }
1302
1303 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1304 _mm512_min_ps(__m512 __A, __m512 __B)
1305 {
1306   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1307             (__v16sf) __B,
1308             (__v16sf)
1309             _mm512_setzero_ps (),
1310             (__mmask16) -1,
1311             _MM_FROUND_CUR_DIRECTION);
1312 }
1313
1314 static __inline__ __m512 __DEFAULT_FN_ATTRS
1315 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1316 {
1317   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1318                  (__v16sf) __B,
1319                  (__v16sf) __W,
1320                  (__mmask16) __U,
1321                  _MM_FROUND_CUR_DIRECTION);
1322 }
1323
1324 static __inline__ __m512 __DEFAULT_FN_ATTRS
1325 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1326 {
1327   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1328                  (__v16sf) __B,
1329                  (__v16sf)
1330                  _mm512_setzero_ps (),
1331                  (__mmask16) __U,
1332                  _MM_FROUND_CUR_DIRECTION);
1333 }
1334
1335 static __inline__ __m128 __DEFAULT_FN_ATTRS
1336 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1337   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1338                 (__v4sf) __B,
1339                 (__v4sf) __W,
1340                 (__mmask8) __U,
1341                 _MM_FROUND_CUR_DIRECTION);
1342 }
1343
1344 static __inline__ __m128 __DEFAULT_FN_ATTRS
1345 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1346   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1347                 (__v4sf) __B,
1348                 (__v4sf)  _mm_setzero_ps (),
1349                 (__mmask8) __U,
1350                 _MM_FROUND_CUR_DIRECTION);
1351 }
1352
1353 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
1354   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1355                                           (__v4sf)(__m128)(B), \
1356                                           (__v4sf)_mm_setzero_ps(), \
1357                                           (__mmask8)-1, (int)(R)); })
1358
1359 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1360   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1361                                           (__v4sf)(__m128)(B), \
1362                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1363                                           (int)(R)); })
1364
1365 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1366   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1367                                           (__v4sf)(__m128)(B), \
1368                                           (__v4sf)_mm_setzero_ps(), \
1369                                           (__mmask8)(U), (int)(R)); })
1370
1371 static __inline__ __m128d __DEFAULT_FN_ATTRS
1372 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1373   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1374                 (__v2df) __B,
1375                 (__v2df) __W,
1376                 (__mmask8) __U,
1377                 _MM_FROUND_CUR_DIRECTION);
1378 }
1379
1380 static __inline__ __m128d __DEFAULT_FN_ATTRS
1381 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1382   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1383                 (__v2df) __B,
1384                 (__v2df)  _mm_setzero_pd (),
1385                 (__mmask8) __U,
1386                 _MM_FROUND_CUR_DIRECTION);
1387 }
1388
1389 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
1390   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1391                                            (__v2df)(__m128d)(B), \
1392                                            (__v2df)_mm_setzero_pd(), \
1393                                            (__mmask8)-1, (int)(R)); })
1394
1395 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1396   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1397                                            (__v2df)(__m128d)(B), \
1398                                            (__v2df)(__m128d)(W), \
1399                                            (__mmask8)(U), (int)(R)); })
1400
1401 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1402   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1403                                            (__v2df)(__m128d)(B), \
1404                                            (__v2df)_mm_setzero_pd(), \
1405                                            (__mmask8)(U), (int)(R)); })
1406
1407 static __inline __m512i
1408 __DEFAULT_FN_ATTRS
1409 _mm512_min_epi32(__m512i __A, __m512i __B)
1410 {
1411   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1412               (__v16si) __B,
1413               (__v16si)
1414               _mm512_setzero_si512 (),
1415               (__mmask16) -1);
1416 }
1417
1418 static __inline__ __m512i __DEFAULT_FN_ATTRS
1419 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1420 {
1421   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1422                    (__v16si) __B,
1423                    (__v16si) __W, __M);
1424 }
1425
1426 static __inline__ __m512i __DEFAULT_FN_ATTRS
1427 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1428 {
1429   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1430                    (__v16si) __B,
1431                    (__v16si)
1432                    _mm512_setzero_si512 (),
1433                    __M);
1434 }
1435
1436 static __inline __m512i __DEFAULT_FN_ATTRS
1437 _mm512_min_epu32(__m512i __A, __m512i __B)
1438 {
1439   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1440               (__v16si) __B,
1441               (__v16si)
1442               _mm512_setzero_si512 (),
1443               (__mmask16) -1);
1444 }
1445
1446 static __inline__ __m512i __DEFAULT_FN_ATTRS
1447 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1448 {
1449   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1450                    (__v16si) __B,
1451                    (__v16si) __W, __M);
1452 }
1453
1454 static __inline__ __m512i __DEFAULT_FN_ATTRS
1455 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1456 {
1457   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1458                    (__v16si) __B,
1459                    (__v16si)
1460                    _mm512_setzero_si512 (),
1461                    __M);
1462 }
1463
1464 static __inline __m512i __DEFAULT_FN_ATTRS
1465 _mm512_min_epi64(__m512i __A, __m512i __B)
1466 {
1467   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1468               (__v8di) __B,
1469               (__v8di)
1470               _mm512_setzero_si512 (),
1471               (__mmask8) -1);
1472 }
1473
1474 static __inline__ __m512i __DEFAULT_FN_ATTRS
1475 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1476 {
1477   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1478                    (__v8di) __B,
1479                    (__v8di) __W, __M);
1480 }
1481
1482 static __inline__ __m512i __DEFAULT_FN_ATTRS
1483 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1484 {
1485   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1486                    (__v8di) __B,
1487                    (__v8di)
1488                    _mm512_setzero_si512 (),
1489                    __M);
1490 }
1491
1492 static __inline __m512i __DEFAULT_FN_ATTRS
1493 _mm512_min_epu64(__m512i __A, __m512i __B)
1494 {
1495   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1496               (__v8di) __B,
1497               (__v8di)
1498               _mm512_setzero_si512 (),
1499               (__mmask8) -1);
1500 }
1501
1502 static __inline__ __m512i __DEFAULT_FN_ATTRS
1503 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1504 {
1505   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1506                    (__v8di) __B,
1507                    (__v8di) __W, __M);
1508 }
1509
1510 static __inline__ __m512i __DEFAULT_FN_ATTRS
1511 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1512 {
1513   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1514                    (__v8di) __B,
1515                    (__v8di)
1516                    _mm512_setzero_si512 (),
1517                    __M);
1518 }
1519
1520 static __inline __m512i __DEFAULT_FN_ATTRS
1521 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1522 {
1523   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1524 }
1525
1526 static __inline __m512i __DEFAULT_FN_ATTRS
1527 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1528 {
1529   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1530                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1531                                              (__v8di)__W);
1532 }
1533
1534 static __inline __m512i __DEFAULT_FN_ATTRS
1535 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1536 {
1537   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1538                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1539                                              (__v8di)_mm512_setzero_si512 ());
1540 }
1541
1542 static __inline __m512i __DEFAULT_FN_ATTRS
1543 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1544 {
1545   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1546 }
1547
1548 static __inline __m512i __DEFAULT_FN_ATTRS
1549 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1550 {
1551   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1552                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1553                                              (__v8di)__W);
1554 }
1555
1556 static __inline __m512i __DEFAULT_FN_ATTRS
1557 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1558 {
1559   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1560                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1561                                              (__v8di)_mm512_setzero_si512 ());
1562 }
1563
1564 static __inline __m512i __DEFAULT_FN_ATTRS
1565 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1566 {
1567   return (__m512i) ((__v16su) __A * (__v16su) __B);
1568 }
1569
1570 static __inline __m512i __DEFAULT_FN_ATTRS
1571 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1572 {
1573   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1574                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1575                                              (__v16si)_mm512_setzero_si512());
1576 }
1577
1578 static __inline __m512i __DEFAULT_FN_ATTRS
1579 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1580 {
1581   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1582                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1583                                              (__v16si)__W);
1584 }
1585
1586 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1587   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1588                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
1589                                          (int)(R)); })
1590
1591 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1592   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1593                                          (__v8df)_mm512_setzero_pd(), \
1594                                          (__mmask8)(U), (int)(R)); })
1595
1596 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1597   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1598                                          (__v8df)_mm512_undefined_pd(), \
1599                                          (__mmask8)-1, (int)(R)); })
1600
1601 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1602 _mm512_sqrt_pd(__m512d __a)
1603 {
1604   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1605                                                 (__v8df) _mm512_setzero_pd (),
1606                                                 (__mmask8) -1,
1607                                                 _MM_FROUND_CUR_DIRECTION);
1608 }
1609
1610 static __inline__ __m512d __DEFAULT_FN_ATTRS
1611 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1612 {
1613   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1614                    (__v8df) __W,
1615                    (__mmask8) __U,
1616                    _MM_FROUND_CUR_DIRECTION);
1617 }
1618
1619 static __inline__ __m512d __DEFAULT_FN_ATTRS
1620 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1621 {
1622   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1623                    (__v8df)
1624                    _mm512_setzero_pd (),
1625                    (__mmask8) __U,
1626                    _MM_FROUND_CUR_DIRECTION);
1627 }
1628
1629 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1630   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1631                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
1632                                         (int)(R)); })
1633
1634 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1635   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1636                                         (__v16sf)_mm512_setzero_ps(), \
1637                                         (__mmask16)(U), (int)(R)); })
1638
1639 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1640   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1641                                         (__v16sf)_mm512_undefined_ps(), \
1642                                         (__mmask16)-1, (int)(R)); })
1643
1644 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1645 _mm512_sqrt_ps(__m512 __a)
1646 {
1647   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1648                                                (__v16sf) _mm512_setzero_ps (),
1649                                                (__mmask16) -1,
1650                                                _MM_FROUND_CUR_DIRECTION);
1651 }
1652
1653 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1654 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1655 {
1656   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1657                                                (__v16sf) __W,
1658                                                (__mmask16) __U,
1659                                                _MM_FROUND_CUR_DIRECTION);
1660 }
1661
1662 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1663 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1664 {
1665   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1666                                                (__v16sf) _mm512_setzero_ps (),
1667                                                (__mmask16) __U,
1668                                                _MM_FROUND_CUR_DIRECTION);
1669 }
1670
1671 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1672 _mm512_rsqrt14_pd(__m512d __A)
1673 {
1674   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1675                  (__v8df)
1676                  _mm512_setzero_pd (),
1677                  (__mmask8) -1);}
1678
1679 static __inline__ __m512d __DEFAULT_FN_ATTRS
1680 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1681 {
1682   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1683                   (__v8df) __W,
1684                   (__mmask8) __U);
1685 }
1686
1687 static __inline__ __m512d __DEFAULT_FN_ATTRS
1688 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1689 {
1690   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1691                   (__v8df)
1692                   _mm512_setzero_pd (),
1693                   (__mmask8) __U);
1694 }
1695
1696 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1697 _mm512_rsqrt14_ps(__m512 __A)
1698 {
1699   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1700                 (__v16sf)
1701                 _mm512_setzero_ps (),
1702                 (__mmask16) -1);
1703 }
1704
1705 static __inline__ __m512 __DEFAULT_FN_ATTRS
1706 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1707 {
1708   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1709                  (__v16sf) __W,
1710                  (__mmask16) __U);
1711 }
1712
1713 static __inline__ __m512 __DEFAULT_FN_ATTRS
1714 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1715 {
1716   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1717                  (__v16sf)
1718                  _mm512_setzero_ps (),
1719                  (__mmask16) __U);
1720 }
1721
1722 static  __inline__ __m128 __DEFAULT_FN_ATTRS
1723 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1724 {
1725   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1726              (__v4sf) __B,
1727              (__v4sf)
1728              _mm_setzero_ps (),
1729              (__mmask8) -1);
1730 }
1731
1732 static __inline__ __m128 __DEFAULT_FN_ATTRS
1733 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1734 {
1735  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1736           (__v4sf) __B,
1737           (__v4sf) __W,
1738           (__mmask8) __U);
1739 }
1740
1741 static __inline__ __m128 __DEFAULT_FN_ATTRS
1742 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1743 {
1744  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1745           (__v4sf) __B,
1746           (__v4sf) _mm_setzero_ps (),
1747           (__mmask8) __U);
1748 }
1749
1750 static  __inline__ __m128d __DEFAULT_FN_ATTRS
1751 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1752 {
1753   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1754               (__v2df) __B,
1755               (__v2df)
1756               _mm_setzero_pd (),
1757               (__mmask8) -1);
1758 }
1759
1760 static __inline__ __m128d __DEFAULT_FN_ATTRS
1761 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1762 {
1763  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1764           (__v2df) __B,
1765           (__v2df) __W,
1766           (__mmask8) __U);
1767 }
1768
1769 static __inline__ __m128d __DEFAULT_FN_ATTRS
1770 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1771 {
1772  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1773           (__v2df) __B,
1774           (__v2df) _mm_setzero_pd (),
1775           (__mmask8) __U);
1776 }
1777
1778 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1779 _mm512_rcp14_pd(__m512d __A)
1780 {
1781   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1782                (__v8df)
1783                _mm512_setzero_pd (),
1784                (__mmask8) -1);
1785 }
1786
1787 static __inline__ __m512d __DEFAULT_FN_ATTRS
1788 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1789 {
1790   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1791                 (__v8df) __W,
1792                 (__mmask8) __U);
1793 }
1794
1795 static __inline__ __m512d __DEFAULT_FN_ATTRS
1796 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1797 {
1798   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1799                 (__v8df)
1800                 _mm512_setzero_pd (),
1801                 (__mmask8) __U);
1802 }
1803
1804 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1805 _mm512_rcp14_ps(__m512 __A)
1806 {
1807   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1808               (__v16sf)
1809               _mm512_setzero_ps (),
1810               (__mmask16) -1);
1811 }
1812
1813 static __inline__ __m512 __DEFAULT_FN_ATTRS
1814 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1815 {
1816   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1817                    (__v16sf) __W,
1818                    (__mmask16) __U);
1819 }
1820
1821 static __inline__ __m512 __DEFAULT_FN_ATTRS
1822 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1823 {
1824   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1825                    (__v16sf)
1826                    _mm512_setzero_ps (),
1827                    (__mmask16) __U);
1828 }
1829
1830 static  __inline__ __m128 __DEFAULT_FN_ATTRS
1831 _mm_rcp14_ss(__m128 __A, __m128 __B)
1832 {
1833   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1834                  (__v4sf) __B,
1835                  (__v4sf)
1836                  _mm_setzero_ps (),
1837                  (__mmask8) -1);
1838 }
1839
1840 static __inline__ __m128 __DEFAULT_FN_ATTRS
1841 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1842 {
1843  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1844           (__v4sf) __B,
1845           (__v4sf) __W,
1846           (__mmask8) __U);
1847 }
1848
1849 static __inline__ __m128 __DEFAULT_FN_ATTRS
1850 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1851 {
1852  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1853           (__v4sf) __B,
1854           (__v4sf) _mm_setzero_ps (),
1855           (__mmask8) __U);
1856 }
1857
1858 static  __inline__ __m128d __DEFAULT_FN_ATTRS
1859 _mm_rcp14_sd(__m128d __A, __m128d __B)
1860 {
1861   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1862             (__v2df) __B,
1863             (__v2df)
1864             _mm_setzero_pd (),
1865             (__mmask8) -1);
1866 }
1867
1868 static __inline__ __m128d __DEFAULT_FN_ATTRS
1869 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1870 {
1871  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1872           (__v2df) __B,
1873           (__v2df) __W,
1874           (__mmask8) __U);
1875 }
1876
1877 static __inline__ __m128d __DEFAULT_FN_ATTRS
1878 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1879 {
1880  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1881           (__v2df) __B,
1882           (__v2df) _mm_setzero_pd (),
1883           (__mmask8) __U);
1884 }
1885
1886 static __inline __m512 __DEFAULT_FN_ATTRS
1887 _mm512_floor_ps(__m512 __A)
1888 {
1889   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1890                                                   _MM_FROUND_FLOOR,
1891                                                   (__v16sf) __A, -1,
1892                                                   _MM_FROUND_CUR_DIRECTION);
1893 }
1894
1895 static __inline__ __m512 __DEFAULT_FN_ATTRS
1896 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1897 {
1898   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1899                    _MM_FROUND_FLOOR,
1900                    (__v16sf) __W, __U,
1901                    _MM_FROUND_CUR_DIRECTION);
1902 }
1903
1904 static __inline __m512d __DEFAULT_FN_ATTRS
1905 _mm512_floor_pd(__m512d __A)
1906 {
1907   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1908                                                    _MM_FROUND_FLOOR,
1909                                                    (__v8df) __A, -1,
1910                                                    _MM_FROUND_CUR_DIRECTION);
1911 }
1912
1913 static __inline__ __m512d __DEFAULT_FN_ATTRS
1914 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1915 {
1916   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1917                 _MM_FROUND_FLOOR,
1918                 (__v8df) __W, __U,
1919                 _MM_FROUND_CUR_DIRECTION);
1920 }
1921
1922 static __inline__ __m512 __DEFAULT_FN_ATTRS
1923 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1924 {
1925   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1926                    _MM_FROUND_CEIL,
1927                    (__v16sf) __W, __U,
1928                    _MM_FROUND_CUR_DIRECTION);
1929 }
1930
1931 static __inline __m512 __DEFAULT_FN_ATTRS
1932 _mm512_ceil_ps(__m512 __A)
1933 {
1934   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1935                                                   _MM_FROUND_CEIL,
1936                                                   (__v16sf) __A, -1,
1937                                                   _MM_FROUND_CUR_DIRECTION);
1938 }
1939
1940 static __inline __m512d __DEFAULT_FN_ATTRS
1941 _mm512_ceil_pd(__m512d __A)
1942 {
1943   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1944                                                    _MM_FROUND_CEIL,
1945                                                    (__v8df) __A, -1,
1946                                                    _MM_FROUND_CUR_DIRECTION);
1947 }
1948
1949 static __inline__ __m512d __DEFAULT_FN_ATTRS
1950 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1951 {
1952   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1953                 _MM_FROUND_CEIL,
1954                 (__v8df) __W, __U,
1955                 _MM_FROUND_CUR_DIRECTION);
1956 }
1957
1958 static __inline __m512i __DEFAULT_FN_ATTRS
1959 _mm512_abs_epi64(__m512i __A)
1960 {
1961   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1962              (__v8di)
1963              _mm512_setzero_si512 (),
1964              (__mmask8) -1);
1965 }
1966
1967 static __inline__ __m512i __DEFAULT_FN_ATTRS
1968 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1969 {
1970   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1971                   (__v8di) __W,
1972                   (__mmask8) __U);
1973 }
1974
1975 static __inline__ __m512i __DEFAULT_FN_ATTRS
1976 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1977 {
1978   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1979                   (__v8di)
1980                   _mm512_setzero_si512 (),
1981                   (__mmask8) __U);
1982 }
1983
1984 static __inline __m512i __DEFAULT_FN_ATTRS
1985 _mm512_abs_epi32(__m512i __A)
1986 {
1987   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1988              (__v16si)
1989              _mm512_setzero_si512 (),
1990              (__mmask16) -1);
1991 }
1992
1993 static __inline__ __m512i __DEFAULT_FN_ATTRS
1994 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1995 {
1996   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1997                   (__v16si) __W,
1998                   (__mmask16) __U);
1999 }
2000
2001 static __inline__ __m512i __DEFAULT_FN_ATTRS
2002 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
2003 {
2004   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2005                   (__v16si)
2006                   _mm512_setzero_si512 (),
2007                   (__mmask16) __U);
2008 }
2009
2010 static __inline__ __m128 __DEFAULT_FN_ATTRS
2011 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2012   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2013                 (__v4sf) __B,
2014                 (__v4sf) __W,
2015                 (__mmask8) __U,
2016                 _MM_FROUND_CUR_DIRECTION);
2017 }
2018
2019 static __inline__ __m128 __DEFAULT_FN_ATTRS
2020 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2021   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2022                 (__v4sf) __B,
2023                 (__v4sf)  _mm_setzero_ps (),
2024                 (__mmask8) __U,
2025                 _MM_FROUND_CUR_DIRECTION);
2026 }
2027
2028 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
2029   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2030                                           (__v4sf)(__m128)(B), \
2031                                           (__v4sf)_mm_setzero_ps(), \
2032                                           (__mmask8)-1, (int)(R)); })
2033
2034 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
2035   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2036                                           (__v4sf)(__m128)(B), \
2037                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2038                                           (int)(R)); })
2039
2040 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
2041   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2042                                           (__v4sf)(__m128)(B), \
2043                                           (__v4sf)_mm_setzero_ps(), \
2044                                           (__mmask8)(U), (int)(R)); })
2045
2046 static __inline__ __m128d __DEFAULT_FN_ATTRS
2047 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2048   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2049                 (__v2df) __B,
2050                 (__v2df) __W,
2051                 (__mmask8) __U,
2052                 _MM_FROUND_CUR_DIRECTION);
2053 }
2054
2055 static __inline__ __m128d __DEFAULT_FN_ATTRS
2056 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2057   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2058                 (__v2df) __B,
2059                 (__v2df)  _mm_setzero_pd (),
2060                 (__mmask8) __U,
2061                 _MM_FROUND_CUR_DIRECTION);
2062 }
2063 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
2064   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2065                                            (__v2df)(__m128d)(B), \
2066                                            (__v2df)_mm_setzero_pd(), \
2067                                            (__mmask8)-1, (int)(R)); })
2068
2069 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2070   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2071                                            (__v2df)(__m128d)(B), \
2072                                            (__v2df)(__m128d)(W), \
2073                                            (__mmask8)(U), (int)(R)); })
2074
2075 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2076   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2077                                            (__v2df)(__m128d)(B), \
2078                                            (__v2df)_mm_setzero_pd(), \
2079                                            (__mmask8)(U), (int)(R)); })
2080
2081 static __inline__ __m512d __DEFAULT_FN_ATTRS
2082 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2083   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2084                                               (__v8df)_mm512_add_pd(__A, __B),
2085                                               (__v8df)__W);
2086 }
2087
2088 static __inline__ __m512d __DEFAULT_FN_ATTRS
2089 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2090   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2091                                               (__v8df)_mm512_add_pd(__A, __B),
2092                                               (__v8df)_mm512_setzero_pd());
2093 }
2094
2095 static __inline__ __m512 __DEFAULT_FN_ATTRS
2096 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2097   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2098                                              (__v16sf)_mm512_add_ps(__A, __B),
2099                                              (__v16sf)__W);
2100 }
2101
2102 static __inline__ __m512 __DEFAULT_FN_ATTRS
2103 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2104   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2105                                              (__v16sf)_mm512_add_ps(__A, __B),
2106                                              (__v16sf)_mm512_setzero_ps());
2107 }
2108
2109 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2110   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2111                                         (__v8df)(__m512d)(B), \
2112                                         (__v8df)_mm512_setzero_pd(), \
2113                                         (__mmask8)-1, (int)(R)); })
2114
2115 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2116   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2117                                         (__v8df)(__m512d)(B), \
2118                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2119                                         (int)(R)); })
2120
2121 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2122   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2123                                         (__v8df)(__m512d)(B), \
2124                                         (__v8df)_mm512_setzero_pd(), \
2125                                         (__mmask8)(U), (int)(R)); })
2126
2127 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2128   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2129                                        (__v16sf)(__m512)(B), \
2130                                        (__v16sf)_mm512_setzero_ps(), \
2131                                        (__mmask16)-1, (int)(R)); })
2132
2133 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2134   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2135                                        (__v16sf)(__m512)(B), \
2136                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2137                                        (int)(R)); })
2138
2139 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2140   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2141                                        (__v16sf)(__m512)(B), \
2142                                        (__v16sf)_mm512_setzero_ps(), \
2143                                        (__mmask16)(U), (int)(R)); })
2144
2145 static __inline__ __m128 __DEFAULT_FN_ATTRS
2146 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2147   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2148                 (__v4sf) __B,
2149                 (__v4sf) __W,
2150                 (__mmask8) __U,
2151                 _MM_FROUND_CUR_DIRECTION);
2152 }
2153
2154 static __inline__ __m128 __DEFAULT_FN_ATTRS
2155 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2156   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2157                 (__v4sf) __B,
2158                 (__v4sf)  _mm_setzero_ps (),
2159                 (__mmask8) __U,
2160                 _MM_FROUND_CUR_DIRECTION);
2161 }
2162 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2163   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2164                                           (__v4sf)(__m128)(B), \
2165                                           (__v4sf)_mm_setzero_ps(), \
2166                                           (__mmask8)-1, (int)(R)); })
2167
2168 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2169   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2170                                           (__v4sf)(__m128)(B), \
2171                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2172                                           (int)(R)); })
2173
2174 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2175   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2176                                           (__v4sf)(__m128)(B), \
2177                                           (__v4sf)_mm_setzero_ps(), \
2178                                           (__mmask8)(U), (int)(R)); })
2179
2180 static __inline__ __m128d __DEFAULT_FN_ATTRS
2181 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2182   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2183                 (__v2df) __B,
2184                 (__v2df) __W,
2185                 (__mmask8) __U,
2186                 _MM_FROUND_CUR_DIRECTION);
2187 }
2188
2189 static __inline__ __m128d __DEFAULT_FN_ATTRS
2190 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2191   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2192                 (__v2df) __B,
2193                 (__v2df)  _mm_setzero_pd (),
2194                 (__mmask8) __U,
2195                 _MM_FROUND_CUR_DIRECTION);
2196 }
2197
2198 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2199   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2200                                            (__v2df)(__m128d)(B), \
2201                                            (__v2df)_mm_setzero_pd(), \
2202                                            (__mmask8)-1, (int)(R)); })
2203
2204 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2205   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2206                                            (__v2df)(__m128d)(B), \
2207                                            (__v2df)(__m128d)(W), \
2208                                            (__mmask8)(U), (int)(R)); })
2209
2210 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2211   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2212                                            (__v2df)(__m128d)(B), \
2213                                            (__v2df)_mm_setzero_pd(), \
2214                                            (__mmask8)(U), (int)(R)); })
2215
2216 static __inline__ __m512d __DEFAULT_FN_ATTRS
2217 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2218   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2219                                               (__v8df)_mm512_sub_pd(__A, __B),
2220                                               (__v8df)__W);
2221 }
2222
2223 static __inline__ __m512d __DEFAULT_FN_ATTRS
2224 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2225   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2226                                               (__v8df)_mm512_sub_pd(__A, __B),
2227                                               (__v8df)_mm512_setzero_pd());
2228 }
2229
2230 static __inline__ __m512 __DEFAULT_FN_ATTRS
2231 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2232   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2233                                              (__v16sf)_mm512_sub_ps(__A, __B),
2234                                              (__v16sf)__W);
2235 }
2236
2237 static __inline__ __m512 __DEFAULT_FN_ATTRS
2238 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2239   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2240                                              (__v16sf)_mm512_sub_ps(__A, __B),
2241                                              (__v16sf)_mm512_setzero_ps());
2242 }
2243
2244 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2245   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2246                                         (__v8df)(__m512d)(B), \
2247                                         (__v8df)_mm512_setzero_pd(), \
2248                                         (__mmask8)-1, (int)(R)); })
2249
2250 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2251   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2252                                         (__v8df)(__m512d)(B), \
2253                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2254                                         (int)(R)); })
2255
2256 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2257   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2258                                         (__v8df)(__m512d)(B), \
2259                                         (__v8df)_mm512_setzero_pd(), \
2260                                         (__mmask8)(U), (int)(R)); })
2261
2262 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2263   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2264                                        (__v16sf)(__m512)(B), \
2265                                        (__v16sf)_mm512_setzero_ps(), \
2266                                        (__mmask16)-1, (int)(R)); })
2267
2268 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2269   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2270                                        (__v16sf)(__m512)(B), \
2271                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2272                                        (int)(R)); });
2273
2274 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2275   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2276                                        (__v16sf)(__m512)(B), \
2277                                        (__v16sf)_mm512_setzero_ps(), \
2278                                        (__mmask16)(U), (int)(R)); });
2279
2280 static __inline__ __m128 __DEFAULT_FN_ATTRS
2281 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2282   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2283                 (__v4sf) __B,
2284                 (__v4sf) __W,
2285                 (__mmask8) __U,
2286                 _MM_FROUND_CUR_DIRECTION);
2287 }
2288
2289 static __inline__ __m128 __DEFAULT_FN_ATTRS
2290 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2291   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2292                 (__v4sf) __B,
2293                 (__v4sf)  _mm_setzero_ps (),
2294                 (__mmask8) __U,
2295                 _MM_FROUND_CUR_DIRECTION);
2296 }
2297 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2298   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2299                                           (__v4sf)(__m128)(B), \
2300                                           (__v4sf)_mm_setzero_ps(), \
2301                                           (__mmask8)-1, (int)(R)); })
2302
2303 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2304   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2305                                           (__v4sf)(__m128)(B), \
2306                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2307                                           (int)(R)); })
2308
2309 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2310   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2311                                           (__v4sf)(__m128)(B), \
2312                                           (__v4sf)_mm_setzero_ps(), \
2313                                           (__mmask8)(U), (int)(R)); })
2314
2315 static __inline__ __m128d __DEFAULT_FN_ATTRS
2316 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2317   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2318                 (__v2df) __B,
2319                 (__v2df) __W,
2320                 (__mmask8) __U,
2321                 _MM_FROUND_CUR_DIRECTION);
2322 }
2323
2324 static __inline__ __m128d __DEFAULT_FN_ATTRS
2325 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2326   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2327                 (__v2df) __B,
2328                 (__v2df)  _mm_setzero_pd (),
2329                 (__mmask8) __U,
2330                 _MM_FROUND_CUR_DIRECTION);
2331 }
2332
2333 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2334   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2335                                            (__v2df)(__m128d)(B), \
2336                                            (__v2df)_mm_setzero_pd(), \
2337                                            (__mmask8)-1, (int)(R)); })
2338
2339 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2340   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2341                                            (__v2df)(__m128d)(B), \
2342                                            (__v2df)(__m128d)(W), \
2343                                            (__mmask8)(U), (int)(R)); })
2344
2345 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2346   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2347                                            (__v2df)(__m128d)(B), \
2348                                            (__v2df)_mm_setzero_pd(), \
2349                                            (__mmask8)(U), (int)(R)); })
2350
2351 static __inline__ __m512d __DEFAULT_FN_ATTRS
2352 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2353   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2354                                               (__v8df)_mm512_mul_pd(__A, __B),
2355                                               (__v8df)__W);
2356 }
2357
2358 static __inline__ __m512d __DEFAULT_FN_ATTRS
2359 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2360   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2361                                               (__v8df)_mm512_mul_pd(__A, __B),
2362                                               (__v8df)_mm512_setzero_pd());
2363 }
2364
2365 static __inline__ __m512 __DEFAULT_FN_ATTRS
2366 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2367   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2368                                              (__v16sf)_mm512_mul_ps(__A, __B),
2369                                              (__v16sf)__W);
2370 }
2371
2372 static __inline__ __m512 __DEFAULT_FN_ATTRS
2373 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2374   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2375                                              (__v16sf)_mm512_mul_ps(__A, __B),
2376                                              (__v16sf)_mm512_setzero_ps());
2377 }
2378
2379 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2380   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2381                                         (__v8df)(__m512d)(B), \
2382                                         (__v8df)_mm512_setzero_pd(), \
2383                                         (__mmask8)-1, (int)(R)); })
2384
2385 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2386   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2387                                         (__v8df)(__m512d)(B), \
2388                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2389                                         (int)(R)); })
2390
2391 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2392   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2393                                         (__v8df)(__m512d)(B), \
2394                                         (__v8df)_mm512_setzero_pd(), \
2395                                         (__mmask8)(U), (int)(R)); })
2396
2397 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2398   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2399                                        (__v16sf)(__m512)(B), \
2400                                        (__v16sf)_mm512_setzero_ps(), \
2401                                        (__mmask16)-1, (int)(R)); })
2402
2403 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2404   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2405                                        (__v16sf)(__m512)(B), \
2406                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2407                                        (int)(R)); });
2408
2409 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2410   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2411                                        (__v16sf)(__m512)(B), \
2412                                        (__v16sf)_mm512_setzero_ps(), \
2413                                        (__mmask16)(U), (int)(R)); });
2414
2415 static __inline__ __m128 __DEFAULT_FN_ATTRS
2416 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2417   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2418                 (__v4sf) __B,
2419                 (__v4sf) __W,
2420                 (__mmask8) __U,
2421                 _MM_FROUND_CUR_DIRECTION);
2422 }
2423
2424 static __inline__ __m128 __DEFAULT_FN_ATTRS
2425 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2426   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2427                 (__v4sf) __B,
2428                 (__v4sf)  _mm_setzero_ps (),
2429                 (__mmask8) __U,
2430                 _MM_FROUND_CUR_DIRECTION);
2431 }
2432
2433 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
2434   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2435                                           (__v4sf)(__m128)(B), \
2436                                           (__v4sf)_mm_setzero_ps(), \
2437                                           (__mmask8)-1, (int)(R)); })
2438
2439 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2440   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2441                                           (__v4sf)(__m128)(B), \
2442                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2443                                           (int)(R)); })
2444
2445 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2446   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2447                                           (__v4sf)(__m128)(B), \
2448                                           (__v4sf)_mm_setzero_ps(), \
2449                                           (__mmask8)(U), (int)(R)); })
2450
2451 static __inline__ __m128d __DEFAULT_FN_ATTRS
2452 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2453   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2454                 (__v2df) __B,
2455                 (__v2df) __W,
2456                 (__mmask8) __U,
2457                 _MM_FROUND_CUR_DIRECTION);
2458 }
2459
2460 static __inline__ __m128d __DEFAULT_FN_ATTRS
2461 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2462   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2463                 (__v2df) __B,
2464                 (__v2df)  _mm_setzero_pd (),
2465                 (__mmask8) __U,
2466                 _MM_FROUND_CUR_DIRECTION);
2467 }
2468
2469 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
2470   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2471                                            (__v2df)(__m128d)(B), \
2472                                            (__v2df)_mm_setzero_pd(), \
2473                                            (__mmask8)-1, (int)(R)); })
2474
2475 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2476   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2477                                            (__v2df)(__m128d)(B), \
2478                                            (__v2df)(__m128d)(W), \
2479                                            (__mmask8)(U), (int)(R)); })
2480
2481 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2482   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2483                                            (__v2df)(__m128d)(B), \
2484                                            (__v2df)_mm_setzero_pd(), \
2485                                            (__mmask8)(U), (int)(R)); })
2486
2487 static __inline __m512d __DEFAULT_FN_ATTRS
2488 _mm512_div_pd(__m512d __a, __m512d __b)
2489 {
2490   return (__m512d)((__v8df)__a/(__v8df)__b);
2491 }
2492
2493 static __inline__ __m512d __DEFAULT_FN_ATTRS
2494 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2495   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2496                                               (__v8df)_mm512_div_pd(__A, __B),
2497                                               (__v8df)__W);
2498 }
2499
2500 static __inline__ __m512d __DEFAULT_FN_ATTRS
2501 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2502   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2503                                               (__v8df)_mm512_div_pd(__A, __B),
2504                                               (__v8df)_mm512_setzero_pd());
2505 }
2506
2507 static __inline __m512 __DEFAULT_FN_ATTRS
2508 _mm512_div_ps(__m512 __a, __m512 __b)
2509 {
2510   return (__m512)((__v16sf)__a/(__v16sf)__b);
2511 }
2512
2513 static __inline__ __m512 __DEFAULT_FN_ATTRS
2514 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2515   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2516                                              (__v16sf)_mm512_div_ps(__A, __B),
2517                                              (__v16sf)__W);
2518 }
2519
2520 static __inline__ __m512 __DEFAULT_FN_ATTRS
2521 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2522   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2523                                              (__v16sf)_mm512_div_ps(__A, __B),
2524                                              (__v16sf)_mm512_setzero_ps());
2525 }
2526
2527 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2528   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2529                                         (__v8df)(__m512d)(B), \
2530                                         (__v8df)_mm512_setzero_pd(), \
2531                                         (__mmask8)-1, (int)(R)); })
2532
2533 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2534   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2535                                         (__v8df)(__m512d)(B), \
2536                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2537                                         (int)(R)); })
2538
2539 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2540   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2541                                         (__v8df)(__m512d)(B), \
2542                                         (__v8df)_mm512_setzero_pd(), \
2543                                         (__mmask8)(U), (int)(R)); })
2544
2545 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2546   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2547                                        (__v16sf)(__m512)(B), \
2548                                        (__v16sf)_mm512_setzero_ps(), \
2549                                        (__mmask16)-1, (int)(R)); })
2550
2551 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2552   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2553                                        (__v16sf)(__m512)(B), \
2554                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2555                                        (int)(R)); });
2556
2557 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2558   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2559                                        (__v16sf)(__m512)(B), \
2560                                        (__v16sf)_mm512_setzero_ps(), \
2561                                        (__mmask16)(U), (int)(R)); });
2562
2563 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
2564   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2565                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
2566                                          _MM_FROUND_CUR_DIRECTION); })
2567
2568 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2569   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2570                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2571                                          _MM_FROUND_CUR_DIRECTION); })
2572
2573 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2574   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2575                                          (__v16sf)_mm512_setzero_ps(), \
2576                                          (__mmask16)(A), \
2577                                          _MM_FROUND_CUR_DIRECTION); })
2578
2579 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2580   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2581                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2582                                          (int)(R)); })
2583
2584 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2585   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2586                                          (__v16sf)_mm512_setzero_ps(), \
2587                                          (__mmask16)(A), (int)(R)); })
2588
2589 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2590   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2591                                          (__v16sf)_mm512_undefined_ps(), \
2592                                          (__mmask16)-1, (int)(R)); })
2593
2594 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
2595   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2596                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
2597                                           _MM_FROUND_CUR_DIRECTION); })
2598
2599 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2600   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2601                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2602                                           _MM_FROUND_CUR_DIRECTION); })
2603
2604 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2605   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2606                                           (__v8df)_mm512_setzero_pd(), \
2607                                           (__mmask8)(A), \
2608                                           _MM_FROUND_CUR_DIRECTION); })
2609
2610 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2611   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2612                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2613                                           (int)(R)); })
2614
2615 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2616   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2617                                           (__v8df)_mm512_setzero_pd(), \
2618                                           (__mmask8)(A), (int)(R)); })
2619
2620 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2621   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2622                                           (__v8df)_mm512_undefined_pd(), \
2623                                           (__mmask8)-1, (int)(R)); })
2624
2625 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2626   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2627                                            (__v8df)(__m512d)(B), \
2628                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2629                                            (int)(R)); })
2630
2631
2632 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2633   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2634                                            (__v8df)(__m512d)(B), \
2635                                            (__v8df)(__m512d)(C), \
2636                                            (__mmask8)(U), (int)(R)); })
2637
2638
2639 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2640   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2641                                             (__v8df)(__m512d)(B), \
2642                                             (__v8df)(__m512d)(C), \
2643                                             (__mmask8)(U), (int)(R)); })
2644
2645
2646 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2647   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2648                                             (__v8df)(__m512d)(B), \
2649                                             (__v8df)(__m512d)(C), \
2650                                             (__mmask8)(U), (int)(R)); })
2651
2652
2653 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2654   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2655                                            (__v8df)(__m512d)(B), \
2656                                            -(__v8df)(__m512d)(C), \
2657                                            (__mmask8)-1, (int)(R)); })
2658
2659
2660 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2661   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2662                                            (__v8df)(__m512d)(B), \
2663                                            -(__v8df)(__m512d)(C), \
2664                                            (__mmask8)(U), (int)(R)); })
2665
2666
2667 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2668   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2669                                             (__v8df)(__m512d)(B), \
2670                                             -(__v8df)(__m512d)(C), \
2671                                             (__mmask8)(U), (int)(R)); })
2672
2673
2674 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2675   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2676                                            (__v8df)(__m512d)(B), \
2677                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2678                                            (int)(R)); })
2679
2680
2681 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2682   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2683                                             (__v8df)(__m512d)(B), \
2684                                             (__v8df)(__m512d)(C), \
2685                                             (__mmask8)(U), (int)(R)); })
2686
2687
2688 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2689   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2690                                             (__v8df)(__m512d)(B), \
2691                                             (__v8df)(__m512d)(C), \
2692                                             (__mmask8)(U), (int)(R)); })
2693
2694
2695 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2696   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2697                                            (__v8df)(__m512d)(B), \
2698                                            -(__v8df)(__m512d)(C), \
2699                                            (__mmask8)-1, (int)(R)); })
2700
2701
2702 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2703   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2704                                             (__v8df)(__m512d)(B), \
2705                                             -(__v8df)(__m512d)(C), \
2706                                             (__mmask8)(U), (int)(R)); })
2707
2708
2709 static __inline__ __m512d __DEFAULT_FN_ATTRS
2710 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2711 {
2712   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2713                                                     (__v8df) __B,
2714                                                     (__v8df) __C,
2715                                                     (__mmask8) -1,
2716                                                     _MM_FROUND_CUR_DIRECTION);
2717 }
2718
2719 static __inline__ __m512d __DEFAULT_FN_ATTRS
2720 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2721 {
2722   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2723                                                     (__v8df) __B,
2724                                                     (__v8df) __C,
2725                                                     (__mmask8) __U,
2726                                                     _MM_FROUND_CUR_DIRECTION);
2727 }
2728
2729 static __inline__ __m512d __DEFAULT_FN_ATTRS
2730 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2731 {
2732   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2733                                                      (__v8df) __B,
2734                                                      (__v8df) __C,
2735                                                      (__mmask8) __U,
2736                                                      _MM_FROUND_CUR_DIRECTION);
2737 }
2738
2739 static __inline__ __m512d __DEFAULT_FN_ATTRS
2740 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2741 {
2742   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2743                                                      (__v8df) __B,
2744                                                      (__v8df) __C,
2745                                                      (__mmask8) __U,
2746                                                      _MM_FROUND_CUR_DIRECTION);
2747 }
2748
2749 static __inline__ __m512d __DEFAULT_FN_ATTRS
2750 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2751 {
2752   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2753                                                     (__v8df) __B,
2754                                                     -(__v8df) __C,
2755                                                     (__mmask8) -1,
2756                                                     _MM_FROUND_CUR_DIRECTION);
2757 }
2758
2759 static __inline__ __m512d __DEFAULT_FN_ATTRS
2760 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2761 {
2762   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2763                                                     (__v8df) __B,
2764                                                     -(__v8df) __C,
2765                                                     (__mmask8) __U,
2766                                                     _MM_FROUND_CUR_DIRECTION);
2767 }
2768
2769 static __inline__ __m512d __DEFAULT_FN_ATTRS
2770 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2771 {
2772   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2773                                                      (__v8df) __B,
2774                                                      -(__v8df) __C,
2775                                                      (__mmask8) __U,
2776                                                      _MM_FROUND_CUR_DIRECTION);
2777 }
2778
2779 static __inline__ __m512d __DEFAULT_FN_ATTRS
2780 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2781 {
2782   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2783                                                     (__v8df) __B,
2784                                                     (__v8df) __C,
2785                                                     (__mmask8) -1,
2786                                                     _MM_FROUND_CUR_DIRECTION);
2787 }
2788
2789 static __inline__ __m512d __DEFAULT_FN_ATTRS
2790 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2791 {
2792   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2793                                                      (__v8df) __B,
2794                                                      (__v8df) __C,
2795                                                      (__mmask8) __U,
2796                                                      _MM_FROUND_CUR_DIRECTION);
2797 }
2798
2799 static __inline__ __m512d __DEFAULT_FN_ATTRS
2800 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2801 {
2802   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2803                                                      (__v8df) __B,
2804                                                      (__v8df) __C,
2805                                                      (__mmask8) __U,
2806                                                      _MM_FROUND_CUR_DIRECTION);
2807 }
2808
2809 static __inline__ __m512d __DEFAULT_FN_ATTRS
2810 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2811 {
2812   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2813                                                     (__v8df) __B,
2814                                                     -(__v8df) __C,
2815                                                     (__mmask8) -1,
2816                                                     _MM_FROUND_CUR_DIRECTION);
2817 }
2818
2819 static __inline__ __m512d __DEFAULT_FN_ATTRS
2820 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2821 {
2822   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2823                                                      (__v8df) __B,
2824                                                      -(__v8df) __C,
2825                                                      (__mmask8) __U,
2826                                                      _MM_FROUND_CUR_DIRECTION);
2827 }
2828
2829 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2830   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2831                                           (__v16sf)(__m512)(B), \
2832                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2833                                           (int)(R)); })
2834
2835
2836 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2837   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2838                                           (__v16sf)(__m512)(B), \
2839                                           (__v16sf)(__m512)(C), \
2840                                           (__mmask16)(U), (int)(R)); })
2841
2842
2843 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2844   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2845                                            (__v16sf)(__m512)(B), \
2846                                            (__v16sf)(__m512)(C), \
2847                                            (__mmask16)(U), (int)(R)); })
2848
2849
2850 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2851   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2852                                            (__v16sf)(__m512)(B), \
2853                                            (__v16sf)(__m512)(C), \
2854                                            (__mmask16)(U), (int)(R)); })
2855
2856
2857 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2858   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2859                                           (__v16sf)(__m512)(B), \
2860                                           -(__v16sf)(__m512)(C), \
2861                                           (__mmask16)-1, (int)(R)); })
2862
2863
2864 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2865   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2866                                           (__v16sf)(__m512)(B), \
2867                                           -(__v16sf)(__m512)(C), \
2868                                           (__mmask16)(U), (int)(R)); })
2869
2870
2871 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2872   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2873                                            (__v16sf)(__m512)(B), \
2874                                            -(__v16sf)(__m512)(C), \
2875                                            (__mmask16)(U), (int)(R)); })
2876
2877
2878 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2879   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2880                                           (__v16sf)(__m512)(B), \
2881                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2882                                           (int)(R)); })
2883
2884
2885 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2886   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2887                                            (__v16sf)(__m512)(B), \
2888                                            (__v16sf)(__m512)(C), \
2889                                            (__mmask16)(U), (int)(R)); })
2890
2891
2892 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2893   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2894                                            (__v16sf)(__m512)(B), \
2895                                            (__v16sf)(__m512)(C), \
2896                                            (__mmask16)(U), (int)(R)); })
2897
2898
2899 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2900   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2901                                           (__v16sf)(__m512)(B), \
2902                                           -(__v16sf)(__m512)(C), \
2903                                           (__mmask16)-1, (int)(R)); })
2904
2905
2906 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2907   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2908                                            (__v16sf)(__m512)(B), \
2909                                            -(__v16sf)(__m512)(C), \
2910                                            (__mmask16)(U), (int)(R)); })
2911
2912
2913 static __inline__ __m512 __DEFAULT_FN_ATTRS
2914 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2915 {
2916   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2917                                                    (__v16sf) __B,
2918                                                    (__v16sf) __C,
2919                                                    (__mmask16) -1,
2920                                                    _MM_FROUND_CUR_DIRECTION);
2921 }
2922
2923 static __inline__ __m512 __DEFAULT_FN_ATTRS
2924 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2925 {
2926   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2927                                                    (__v16sf) __B,
2928                                                    (__v16sf) __C,
2929                                                    (__mmask16) __U,
2930                                                    _MM_FROUND_CUR_DIRECTION);
2931 }
2932
2933 static __inline__ __m512 __DEFAULT_FN_ATTRS
2934 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2935 {
2936   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2937                                                     (__v16sf) __B,
2938                                                     (__v16sf) __C,
2939                                                     (__mmask16) __U,
2940                                                     _MM_FROUND_CUR_DIRECTION);
2941 }
2942
2943 static __inline__ __m512 __DEFAULT_FN_ATTRS
2944 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2945 {
2946   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2947                                                     (__v16sf) __B,
2948                                                     (__v16sf) __C,
2949                                                     (__mmask16) __U,
2950                                                     _MM_FROUND_CUR_DIRECTION);
2951 }
2952
2953 static __inline__ __m512 __DEFAULT_FN_ATTRS
2954 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2955 {
2956   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2957                                                    (__v16sf) __B,
2958                                                    -(__v16sf) __C,
2959                                                    (__mmask16) -1,
2960                                                    _MM_FROUND_CUR_DIRECTION);
2961 }
2962
2963 static __inline__ __m512 __DEFAULT_FN_ATTRS
2964 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2965 {
2966   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2967                                                    (__v16sf) __B,
2968                                                    -(__v16sf) __C,
2969                                                    (__mmask16) __U,
2970                                                    _MM_FROUND_CUR_DIRECTION);
2971 }
2972
2973 static __inline__ __m512 __DEFAULT_FN_ATTRS
2974 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2975 {
2976   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2977                                                     (__v16sf) __B,
2978                                                     -(__v16sf) __C,
2979                                                     (__mmask16) __U,
2980                                                     _MM_FROUND_CUR_DIRECTION);
2981 }
2982
2983 static __inline__ __m512 __DEFAULT_FN_ATTRS
2984 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2985 {
2986   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2987                                                    (__v16sf) __B,
2988                                                    (__v16sf) __C,
2989                                                    (__mmask16) -1,
2990                                                    _MM_FROUND_CUR_DIRECTION);
2991 }
2992
2993 static __inline__ __m512 __DEFAULT_FN_ATTRS
2994 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2995 {
2996   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2997                                                     (__v16sf) __B,
2998                                                     (__v16sf) __C,
2999                                                     (__mmask16) __U,
3000                                                     _MM_FROUND_CUR_DIRECTION);
3001 }
3002
3003 static __inline__ __m512 __DEFAULT_FN_ATTRS
3004 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3005 {
3006   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3007                                                     (__v16sf) __B,
3008                                                     (__v16sf) __C,
3009                                                     (__mmask16) __U,
3010                                                     _MM_FROUND_CUR_DIRECTION);
3011 }
3012
3013 static __inline__ __m512 __DEFAULT_FN_ATTRS
3014 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3015 {
3016   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3017                                                    (__v16sf) __B,
3018                                                    -(__v16sf) __C,
3019                                                    (__mmask16) -1,
3020                                                    _MM_FROUND_CUR_DIRECTION);
3021 }
3022
3023 static __inline__ __m512 __DEFAULT_FN_ATTRS
3024 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3025 {
3026   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3027                                                     (__v16sf) __B,
3028                                                     -(__v16sf) __C,
3029                                                     (__mmask16) __U,
3030                                                     _MM_FROUND_CUR_DIRECTION);
3031 }
3032
3033 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
3034   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3035                                               (__v8df)(__m512d)(B), \
3036                                               (__v8df)(__m512d)(C), \
3037                                               (__mmask8)-1, (int)(R)); })
3038
3039
3040 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
3041   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3042                                               (__v8df)(__m512d)(B), \
3043                                               (__v8df)(__m512d)(C), \
3044                                               (__mmask8)(U), (int)(R)); })
3045
3046
3047 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
3048   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3049                                                (__v8df)(__m512d)(B), \
3050                                                (__v8df)(__m512d)(C), \
3051                                                (__mmask8)(U), (int)(R)); })
3052
3053
3054 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
3055   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3056                                                (__v8df)(__m512d)(B), \
3057                                                (__v8df)(__m512d)(C), \
3058                                                (__mmask8)(U), (int)(R)); })
3059
3060
3061 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
3062   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3063                                               (__v8df)(__m512d)(B), \
3064                                               -(__v8df)(__m512d)(C), \
3065                                               (__mmask8)-1, (int)(R)); })
3066
3067
3068 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3069   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3070                                               (__v8df)(__m512d)(B), \
3071                                               -(__v8df)(__m512d)(C), \
3072                                               (__mmask8)(U), (int)(R)); })
3073
3074
3075 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3076   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3077                                                (__v8df)(__m512d)(B), \
3078                                                -(__v8df)(__m512d)(C), \
3079                                                (__mmask8)(U), (int)(R)); })
3080
3081
3082 static __inline__ __m512d __DEFAULT_FN_ATTRS
3083 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3084 {
3085   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3086                                                        (__v8df) __B,
3087                                                        (__v8df) __C,
3088                                                        (__mmask8) -1,
3089                                                        _MM_FROUND_CUR_DIRECTION);
3090 }
3091
3092 static __inline__ __m512d __DEFAULT_FN_ATTRS
3093 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3094 {
3095   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3096                                                        (__v8df) __B,
3097                                                        (__v8df) __C,
3098                                                        (__mmask8) __U,
3099                                                        _MM_FROUND_CUR_DIRECTION);
3100 }
3101
3102 static __inline__ __m512d __DEFAULT_FN_ATTRS
3103 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3104 {
3105   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3106                                                         (__v8df) __B,
3107                                                         (__v8df) __C,
3108                                                         (__mmask8) __U,
3109                                                         _MM_FROUND_CUR_DIRECTION);
3110 }
3111
3112 static __inline__ __m512d __DEFAULT_FN_ATTRS
3113 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3114 {
3115   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3116                                                         (__v8df) __B,
3117                                                         (__v8df) __C,
3118                                                         (__mmask8) __U,
3119                                                         _MM_FROUND_CUR_DIRECTION);
3120 }
3121
3122 static __inline__ __m512d __DEFAULT_FN_ATTRS
3123 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3124 {
3125   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3126                                                        (__v8df) __B,
3127                                                        -(__v8df) __C,
3128                                                        (__mmask8) -1,
3129                                                        _MM_FROUND_CUR_DIRECTION);
3130 }
3131
3132 static __inline__ __m512d __DEFAULT_FN_ATTRS
3133 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3134 {
3135   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3136                                                        (__v8df) __B,
3137                                                        -(__v8df) __C,
3138                                                        (__mmask8) __U,
3139                                                        _MM_FROUND_CUR_DIRECTION);
3140 }
3141
3142 static __inline__ __m512d __DEFAULT_FN_ATTRS
3143 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3144 {
3145   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3146                                                         (__v8df) __B,
3147                                                         -(__v8df) __C,
3148                                                         (__mmask8) __U,
3149                                                         _MM_FROUND_CUR_DIRECTION);
3150 }
3151
3152 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3153   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3154                                              (__v16sf)(__m512)(B), \
3155                                              (__v16sf)(__m512)(C), \
3156                                              (__mmask16)-1, (int)(R)); })
3157
3158
3159 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3160   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3161                                              (__v16sf)(__m512)(B), \
3162                                              (__v16sf)(__m512)(C), \
3163                                              (__mmask16)(U), (int)(R)); })
3164
3165
3166 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3167   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3168                                               (__v16sf)(__m512)(B), \
3169                                               (__v16sf)(__m512)(C), \
3170                                               (__mmask16)(U), (int)(R)); })
3171
3172
3173 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3174   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3175                                               (__v16sf)(__m512)(B), \
3176                                               (__v16sf)(__m512)(C), \
3177                                               (__mmask16)(U), (int)(R)); })
3178
3179
3180 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3181   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3182                                              (__v16sf)(__m512)(B), \
3183                                              -(__v16sf)(__m512)(C), \
3184                                              (__mmask16)-1, (int)(R)); })
3185
3186
3187 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3188   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3189                                              (__v16sf)(__m512)(B), \
3190                                              -(__v16sf)(__m512)(C), \
3191                                              (__mmask16)(U), (int)(R)); })
3192
3193
3194 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3195   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3196                                               (__v16sf)(__m512)(B), \
3197                                               -(__v16sf)(__m512)(C), \
3198                                               (__mmask16)(U), (int)(R)); })
3199
3200
3201 static __inline__ __m512 __DEFAULT_FN_ATTRS
3202 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3203 {
3204   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3205                                                       (__v16sf) __B,
3206                                                       (__v16sf) __C,
3207                                                       (__mmask16) -1,
3208                                                       _MM_FROUND_CUR_DIRECTION);
3209 }
3210
3211 static __inline__ __m512 __DEFAULT_FN_ATTRS
3212 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3213 {
3214   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3215                                                       (__v16sf) __B,
3216                                                       (__v16sf) __C,
3217                                                       (__mmask16) __U,
3218                                                       _MM_FROUND_CUR_DIRECTION);
3219 }
3220
3221 static __inline__ __m512 __DEFAULT_FN_ATTRS
3222 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3223 {
3224   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3225                                                        (__v16sf) __B,
3226                                                        (__v16sf) __C,
3227                                                        (__mmask16) __U,
3228                                                        _MM_FROUND_CUR_DIRECTION);
3229 }
3230
3231 static __inline__ __m512 __DEFAULT_FN_ATTRS
3232 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3233 {
3234   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3235                                                        (__v16sf) __B,
3236                                                        (__v16sf) __C,
3237                                                        (__mmask16) __U,
3238                                                        _MM_FROUND_CUR_DIRECTION);
3239 }
3240
3241 static __inline__ __m512 __DEFAULT_FN_ATTRS
3242 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3243 {
3244   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3245                                                       (__v16sf) __B,
3246                                                       -(__v16sf) __C,
3247                                                       (__mmask16) -1,
3248                                                       _MM_FROUND_CUR_DIRECTION);
3249 }
3250
3251 static __inline__ __m512 __DEFAULT_FN_ATTRS
3252 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3253 {
3254   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3255                                                       (__v16sf) __B,
3256                                                       -(__v16sf) __C,
3257                                                       (__mmask16) __U,
3258                                                       _MM_FROUND_CUR_DIRECTION);
3259 }
3260
3261 static __inline__ __m512 __DEFAULT_FN_ATTRS
3262 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3263 {
3264   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3265                                                        (__v16sf) __B,
3266                                                        -(__v16sf) __C,
3267                                                        (__mmask16) __U,
3268                                                        _MM_FROUND_CUR_DIRECTION);
3269 }
3270
3271 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3272   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3273                                             (__v8df)(__m512d)(B), \
3274                                             (__v8df)(__m512d)(C), \
3275                                             (__mmask8)(U), (int)(R)); })
3276
3277
3278 static __inline__ __m512d __DEFAULT_FN_ATTRS
3279 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3280 {
3281   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3282                                                      (__v8df) __B,
3283                                                      (__v8df) __C,
3284                                                      (__mmask8) __U,
3285                                                      _MM_FROUND_CUR_DIRECTION);
3286 }
3287
3288 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3289   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3290                                            (__v16sf)(__m512)(B), \
3291                                            (__v16sf)(__m512)(C), \
3292                                            (__mmask16)(U), (int)(R)); })
3293
3294
3295 static __inline__ __m512 __DEFAULT_FN_ATTRS
3296 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3297 {
3298   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3299                                                     (__v16sf) __B,
3300                                                     (__v16sf) __C,
3301                                                     (__mmask16) __U,
3302                                                     _MM_FROUND_CUR_DIRECTION);
3303 }
3304
3305 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3306   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3307                                                (__v8df)(__m512d)(B), \
3308                                                (__v8df)(__m512d)(C), \
3309                                                (__mmask8)(U), (int)(R)); })
3310
3311
3312 static __inline__ __m512d __DEFAULT_FN_ATTRS
3313 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3314 {
3315   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3316                                                         (__v8df) __B,
3317                                                         (__v8df) __C,
3318                                                         (__mmask8) __U,
3319                                                         _MM_FROUND_CUR_DIRECTION);
3320 }
3321
3322 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3323   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3324                                               (__v16sf)(__m512)(B), \
3325                                               (__v16sf)(__m512)(C), \
3326                                               (__mmask16)(U), (int)(R)); })
3327
3328
3329 static __inline__ __m512 __DEFAULT_FN_ATTRS
3330 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3331 {
3332   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3333                                                        (__v16sf) __B,
3334                                                        (__v16sf) __C,
3335                                                        (__mmask16) __U,
3336                                                        _MM_FROUND_CUR_DIRECTION);
3337 }
3338
3339 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3340   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3341                                             (__v8df)(__m512d)(B), \
3342                                             (__v8df)(__m512d)(C), \
3343                                             (__mmask8)(U), (int)(R)); })
3344
3345
3346 static __inline__ __m512d __DEFAULT_FN_ATTRS
3347 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3348 {
3349   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3350                                                      (__v8df) __B,
3351                                                      (__v8df) __C,
3352                                                      (__mmask8) __U,
3353                                                      _MM_FROUND_CUR_DIRECTION);
3354 }
3355
3356 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3357   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3358                                            (__v16sf)(__m512)(B), \
3359                                            (__v16sf)(__m512)(C), \
3360                                            (__mmask16)(U), (int)(R)); })
3361
3362
3363 static __inline__ __m512 __DEFAULT_FN_ATTRS
3364 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3365 {
3366   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3367                                                     (__v16sf) __B,
3368                                                     (__v16sf) __C,
3369                                                     (__mmask16) __U,
3370                                                     _MM_FROUND_CUR_DIRECTION);
3371 }
3372
3373 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3374   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3375                                             (__v8df)(__m512d)(B), \
3376                                             (__v8df)(__m512d)(C), \
3377                                             (__mmask8)(U), (int)(R)); })
3378
3379
3380 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3381   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3382                                              (__v8df)(__m512d)(B), \
3383                                              (__v8df)(__m512d)(C), \
3384                                              (__mmask8)(U), (int)(R)); })
3385
3386
3387 static __inline__ __m512d __DEFAULT_FN_ATTRS
3388 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3389 {
3390   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3391                                                      (__v8df) __B,
3392                                                      (__v8df) __C,
3393                                                      (__mmask8) __U,
3394                                                      _MM_FROUND_CUR_DIRECTION);
3395 }
3396
3397 static __inline__ __m512d __DEFAULT_FN_ATTRS
3398 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3399 {
3400   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3401                                                       (__v8df) __B,
3402                                                       (__v8df) __C,
3403                                                       (__mmask8) __U,
3404                                                       _MM_FROUND_CUR_DIRECTION);
3405 }
3406
3407 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3408   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3409                                            (__v16sf)(__m512)(B), \
3410                                            (__v16sf)(__m512)(C), \
3411                                            (__mmask16)(U), (int)(R)); })
3412
3413
3414 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3415   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3416                                             (__v16sf)(__m512)(B), \
3417                                             (__v16sf)(__m512)(C), \
3418                                             (__mmask16)(U), (int)(R)); })
3419
3420
3421 static __inline__ __m512 __DEFAULT_FN_ATTRS
3422 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3423 {
3424   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3425                                                     (__v16sf) __B,
3426                                                     (__v16sf) __C,
3427                                                     (__mmask16) __U,
3428                                                     _MM_FROUND_CUR_DIRECTION);
3429 }
3430
3431 static __inline__ __m512 __DEFAULT_FN_ATTRS
3432 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3433 {
3434   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3435                                                      (__v16sf) __B,
3436                                                      (__v16sf) __C,
3437                                                      (__mmask16) __U,
3438                                                      _MM_FROUND_CUR_DIRECTION);
3439 }
3440
3441
3442
3443 /* Vector permutations */
3444
3445 static __inline __m512i __DEFAULT_FN_ATTRS
3446 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3447 {
3448   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3449                                                        /* idx */ ,
3450                                                        (__v16si) __A,
3451                                                        (__v16si) __B,
3452                                                        (__mmask16) -1);
3453 }
3454
3455 static __inline__ __m512i __DEFAULT_FN_ATTRS
3456 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3457                                 __m512i __I, __m512i __B)
3458 {
3459   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3460                                                         /* idx */ ,
3461                                                         (__v16si) __A,
3462                                                         (__v16si) __B,
3463                                                         (__mmask16) __U);
3464 }
3465
3466 static __inline__ __m512i __DEFAULT_FN_ATTRS
3467 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3468                                  __m512i __I, __m512i __B)
3469 {
3470   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3471                                                         /* idx */ ,
3472                                                         (__v16si) __A,
3473                                                         (__v16si) __B,
3474                                                         (__mmask16) __U);
3475 }
3476
3477 static __inline __m512i __DEFAULT_FN_ATTRS
3478 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3479 {
3480   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3481                                                        /* idx */ ,
3482                                                        (__v8di) __A,
3483                                                        (__v8di) __B,
3484                                                        (__mmask8) -1);
3485 }
3486
3487 static __inline__ __m512i __DEFAULT_FN_ATTRS
3488 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3489                                 __m512i __B)
3490 {
3491   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3492                                                        /* idx */ ,
3493                                                        (__v8di) __A,
3494                                                        (__v8di) __B,
3495                                                        (__mmask8) __U);
3496 }
3497
3498
3499 static __inline__ __m512i __DEFAULT_FN_ATTRS
3500 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3501          __m512i __I, __m512i __B)
3502 {
3503   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3504                                                         /* idx */ ,
3505                                                         (__v8di) __A,
3506                                                         (__v8di) __B,
3507                                                         (__mmask8) __U);
3508 }
3509
3510 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3511   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3512                                    (__v8di)(__m512i)(A), \
3513                                    ((int)(I) & 0x7) + 0, \
3514                                    ((int)(I) & 0x7) + 1, \
3515                                    ((int)(I) & 0x7) + 2, \
3516                                    ((int)(I) & 0x7) + 3, \
3517                                    ((int)(I) & 0x7) + 4, \
3518                                    ((int)(I) & 0x7) + 5, \
3519                                    ((int)(I) & 0x7) + 6, \
3520                                    ((int)(I) & 0x7) + 7); })
3521
3522 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3523   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3524                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3525                                  (__v8di)(__m512i)(W)); })
3526
3527 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3528   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3529                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3530                                  (__v8di)_mm512_setzero_si512()); })
3531
3532 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3533   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3534                                    (__v16si)(__m512i)(A), \
3535                                    ((int)(I) & 0xf) + 0, \
3536                                    ((int)(I) & 0xf) + 1, \
3537                                    ((int)(I) & 0xf) + 2, \
3538                                    ((int)(I) & 0xf) + 3, \
3539                                    ((int)(I) & 0xf) + 4, \
3540                                    ((int)(I) & 0xf) + 5, \
3541                                    ((int)(I) & 0xf) + 6, \
3542                                    ((int)(I) & 0xf) + 7, \
3543                                    ((int)(I) & 0xf) + 8, \
3544                                    ((int)(I) & 0xf) + 9, \
3545                                    ((int)(I) & 0xf) + 10, \
3546                                    ((int)(I) & 0xf) + 11, \
3547                                    ((int)(I) & 0xf) + 12, \
3548                                    ((int)(I) & 0xf) + 13, \
3549                                    ((int)(I) & 0xf) + 14, \
3550                                    ((int)(I) & 0xf) + 15); })
3551
3552 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3553   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3554                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3555                                 (__v16si)(__m512i)(W)); })
3556
3557 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3558   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3559                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3560                                 (__v16si)_mm512_setzero_si512()); })
3561 /* Vector Extract */
3562
3563 #define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
3564   (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
3565                                    (__v8df)_mm512_undefined_pd(), \
3566                                    ((I) & 1) ? 4 : 0,             \
3567                                    ((I) & 1) ? 5 : 1,             \
3568                                    ((I) & 1) ? 6 : 2,             \
3569                                    ((I) & 1) ? 7 : 3); })
3570
3571 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3572   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3573                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3574                                    (__v4df)(W)); })
3575
3576 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3577   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3578                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3579                                    (__v4df)_mm256_setzero_pd()); })
3580
3581 #define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
3582   (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
3583                                   (__v16sf)_mm512_undefined_ps(), \
3584                                   0 + ((I) & 0x3) * 4,            \
3585                                   1 + ((I) & 0x3) * 4,            \
3586                                   2 + ((I) & 0x3) * 4,            \
3587                                   3 + ((I) & 0x3) * 4); })
3588
3589 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3590   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3591                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3592                                    (__v4sf)(W)); })
3593
3594 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3595   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3596                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3597                                    (__v4sf)_mm_setzero_ps()); })
3598
3599 /* Vector Blend */
3600
3601 static __inline __m512d __DEFAULT_FN_ATTRS
3602 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3603 {
3604   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3605                  (__v8df) __W,
3606                  (__v8df) __A);
3607 }
3608
3609 static __inline __m512 __DEFAULT_FN_ATTRS
3610 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3611 {
3612   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3613                 (__v16sf) __W,
3614                 (__v16sf) __A);
3615 }
3616
3617 static __inline __m512i __DEFAULT_FN_ATTRS
3618 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3619 {
3620   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3621                 (__v8di) __W,
3622                 (__v8di) __A);
3623 }
3624
3625 static __inline __m512i __DEFAULT_FN_ATTRS
3626 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3627 {
3628   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3629                 (__v16si) __W,
3630                 (__v16si) __A);
3631 }
3632
3633 /* Compare */
3634
3635 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3636   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3637                                           (__v16sf)(__m512)(B), (int)(P), \
3638                                           (__mmask16)-1, (int)(R)); })
3639
3640 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3641   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3642                                           (__v16sf)(__m512)(B), (int)(P), \
3643                                           (__mmask16)(U), (int)(R)); })
3644
3645 #define _mm512_cmp_ps_mask(A, B, P) \
3646   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3647 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3648   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3649
3650 #define _mm512_cmpeq_ps_mask(A, B) \
3651     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3652 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3653     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3654
3655 #define _mm512_cmplt_ps_mask(A, B) \
3656     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3657 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3658     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3659
3660 #define _mm512_cmple_ps_mask(A, B) \
3661     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3662 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3663     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3664
3665 #define _mm512_cmpunord_ps_mask(A, B) \
3666     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3667 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3668     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3669
3670 #define _mm512_cmpneq_ps_mask(A, B) \
3671     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3672 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3673     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3674
3675 #define _mm512_cmpnlt_ps_mask(A, B) \
3676     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3677 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3678     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3679
3680 #define _mm512_cmpnle_ps_mask(A, B) \
3681     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3682 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3683     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3684
3685 #define _mm512_cmpord_ps_mask(A, B) \
3686     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3687 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3688     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3689
3690 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3691   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3692                                          (__v8df)(__m512d)(B), (int)(P), \
3693                                          (__mmask8)-1, (int)(R)); })
3694
3695 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3696   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3697                                          (__v8df)(__m512d)(B), (int)(P), \
3698                                          (__mmask8)(U), (int)(R)); })
3699
3700 #define _mm512_cmp_pd_mask(A, B, P) \
3701   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3702 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3703   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3704
3705 #define _mm512_cmpeq_pd_mask(A, B) \
3706     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3707 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3708     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3709
3710 #define _mm512_cmplt_pd_mask(A, B) \
3711     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3712 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3713     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3714
3715 #define _mm512_cmple_pd_mask(A, B) \
3716     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3717 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3718     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3719
3720 #define _mm512_cmpunord_pd_mask(A, B) \
3721     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3722 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3723     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3724
3725 #define _mm512_cmpneq_pd_mask(A, B) \
3726     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3727 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3728     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3729
3730 #define _mm512_cmpnlt_pd_mask(A, B) \
3731     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3732 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3733     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3734
3735 #define _mm512_cmpnle_pd_mask(A, B) \
3736     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3737 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3738     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3739
3740 #define _mm512_cmpord_pd_mask(A, B) \
3741     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3742 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3743     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3744
3745 /* Conversion */
3746
3747 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3748   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3749                                              (__v16si)_mm512_undefined_epi32(), \
3750                                              (__mmask16)-1, (int)(R)); })
3751
3752 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3753   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3754                                              (__v16si)(__m512i)(W), \
3755                                              (__mmask16)(U), (int)(R)); })
3756
3757 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3758   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3759                                              (__v16si)_mm512_setzero_si512(), \
3760                                              (__mmask16)(U), (int)(R)); })
3761
3762
3763 static __inline __m512i __DEFAULT_FN_ATTRS
3764 _mm512_cvttps_epu32(__m512 __A)
3765 {
3766   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3767                   (__v16si)
3768                   _mm512_setzero_si512 (),
3769                   (__mmask16) -1,
3770                   _MM_FROUND_CUR_DIRECTION);
3771 }
3772
3773 static __inline__ __m512i __DEFAULT_FN_ATTRS
3774 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3775 {
3776   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3777                    (__v16si) __W,
3778                    (__mmask16) __U,
3779                    _MM_FROUND_CUR_DIRECTION);
3780 }
3781
3782 static __inline__ __m512i __DEFAULT_FN_ATTRS
3783 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3784 {
3785   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3786                    (__v16si) _mm512_setzero_si512 (),
3787                    (__mmask16) __U,
3788                    _MM_FROUND_CUR_DIRECTION);
3789 }
3790
3791 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3792   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3793                                           (__v16sf)_mm512_setzero_ps(), \
3794                                           (__mmask16)-1, (int)(R)); })
3795
3796 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3797   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3798                                           (__v16sf)(__m512)(W), \
3799                                           (__mmask16)(U), (int)(R)); })
3800
3801 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3802   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3803                                           (__v16sf)_mm512_setzero_ps(), \
3804                                           (__mmask16)(U), (int)(R)); })
3805
3806 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3807   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3808                                            (__v16sf)_mm512_setzero_ps(), \
3809                                            (__mmask16)-1, (int)(R)); })
3810
3811 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3812   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3813                                            (__v16sf)(__m512)(W), \
3814                                            (__mmask16)(U), (int)(R)); })
3815
3816 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3817   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3818                                            (__v16sf)_mm512_setzero_ps(), \
3819                                            (__mmask16)(U), (int)(R)); })
3820
3821 static __inline__ __m512 __DEFAULT_FN_ATTRS
3822 _mm512_cvtepu32_ps (__m512i __A)
3823 {
3824   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3825                  (__v16sf) _mm512_undefined_ps (),
3826                  (__mmask16) -1,
3827                  _MM_FROUND_CUR_DIRECTION);
3828 }
3829
3830 static __inline__ __m512 __DEFAULT_FN_ATTRS
3831 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3832 {
3833   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3834                  (__v16sf) __W,
3835                  (__mmask16) __U,
3836                  _MM_FROUND_CUR_DIRECTION);
3837 }
3838
3839 static __inline__ __m512 __DEFAULT_FN_ATTRS
3840 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3841 {
3842   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3843                  (__v16sf) _mm512_setzero_ps (),
3844                  (__mmask16) __U,
3845                  _MM_FROUND_CUR_DIRECTION);
3846 }
3847
3848 static __inline __m512d __DEFAULT_FN_ATTRS
3849 _mm512_cvtepi32_pd(__m256i __A)
3850 {
3851   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3852 }
3853
3854 static __inline__ __m512d __DEFAULT_FN_ATTRS
3855 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3856 {
3857   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3858                                               (__v8df)_mm512_cvtepi32_pd(__A),
3859                                               (__v8df)__W);
3860 }
3861
3862 static __inline__ __m512d __DEFAULT_FN_ATTRS
3863 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3864 {
3865   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3866                                               (__v8df)_mm512_cvtepi32_pd(__A),
3867                                               (__v8df)_mm512_setzero_pd());
3868 }
3869
3870 static __inline__ __m512d __DEFAULT_FN_ATTRS
3871 _mm512_cvtepi32lo_pd(__m512i __A)
3872 {
3873   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3874 }
3875
3876 static __inline__ __m512d __DEFAULT_FN_ATTRS
3877 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3878 {
3879   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3880 }
3881
3882 static __inline__ __m512 __DEFAULT_FN_ATTRS
3883 _mm512_cvtepi32_ps (__m512i __A)
3884 {
3885   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3886                 (__v16sf) _mm512_undefined_ps (),
3887                 (__mmask16) -1,
3888                 _MM_FROUND_CUR_DIRECTION);
3889 }
3890
3891 static __inline__ __m512 __DEFAULT_FN_ATTRS
3892 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3893 {
3894   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3895                 (__v16sf) __W,
3896                 (__mmask16) __U,
3897                 _MM_FROUND_CUR_DIRECTION);
3898 }
3899
3900 static __inline__ __m512 __DEFAULT_FN_ATTRS
3901 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3902 {
3903   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3904                 (__v16sf) _mm512_setzero_ps (),
3905                 (__mmask16) __U,
3906                 _MM_FROUND_CUR_DIRECTION);
3907 }
3908
3909 static __inline __m512d __DEFAULT_FN_ATTRS
3910 _mm512_cvtepu32_pd(__m256i __A)
3911 {
3912   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3913 }
3914
3915 static __inline__ __m512d __DEFAULT_FN_ATTRS
3916 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3917 {
3918   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3919                                               (__v8df)_mm512_cvtepu32_pd(__A),
3920                                               (__v8df)__W);
3921 }
3922
3923 static __inline__ __m512d __DEFAULT_FN_ATTRS
3924 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3925 {
3926   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3927                                               (__v8df)_mm512_cvtepu32_pd(__A),
3928                                               (__v8df)_mm512_setzero_pd());
3929 }
3930
3931 static __inline__ __m512d __DEFAULT_FN_ATTRS
3932 _mm512_cvtepu32lo_pd(__m512i __A)
3933 {
3934   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3935 }
3936
3937 static __inline__ __m512d __DEFAULT_FN_ATTRS
3938 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3939 {
3940   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3941 }
3942
3943 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3944   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3945                                           (__v8sf)_mm256_setzero_ps(), \
3946                                           (__mmask8)-1, (int)(R)); })
3947
3948 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3949   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3950                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
3951                                           (int)(R)); })
3952
3953 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3954   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3955                                           (__v8sf)_mm256_setzero_ps(), \
3956                                           (__mmask8)(U), (int)(R)); })
3957
3958 static __inline__ __m256 __DEFAULT_FN_ATTRS
3959 _mm512_cvtpd_ps (__m512d __A)
3960 {
3961   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3962                 (__v8sf) _mm256_undefined_ps (),
3963                 (__mmask8) -1,
3964                 _MM_FROUND_CUR_DIRECTION);
3965 }
3966
3967 static __inline__ __m256 __DEFAULT_FN_ATTRS
3968 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3969 {
3970   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3971                 (__v8sf) __W,
3972                 (__mmask8) __U,
3973                 _MM_FROUND_CUR_DIRECTION);
3974 }
3975
3976 static __inline__ __m256 __DEFAULT_FN_ATTRS
3977 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3978 {
3979   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3980                 (__v8sf) _mm256_setzero_ps (),
3981                 (__mmask8) __U,
3982                 _MM_FROUND_CUR_DIRECTION);
3983 }
3984
3985 static __inline__ __m512 __DEFAULT_FN_ATTRS
3986 _mm512_cvtpd_pslo (__m512d __A)
3987 {
3988   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3989                 (__v8sf) _mm256_setzero_ps (),
3990                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3991 }
3992
3993 static __inline__ __m512 __DEFAULT_FN_ATTRS
3994 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3995 {
3996   return (__m512) __builtin_shufflevector (
3997                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3998                                                __U, __A),
3999                 (__v8sf) _mm256_setzero_ps (),
4000                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4001 }
4002
4003 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
4004   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4005                                             (__v16hi)_mm256_undefined_si256(), \
4006                                             (__mmask16)-1); })
4007
4008 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
4009   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4010                                             (__v16hi)(__m256i)(U), \
4011                                             (__mmask16)(W)); })
4012
4013 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
4014   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4015                                             (__v16hi)_mm256_setzero_si256(), \
4016                                             (__mmask16)(W)); })
4017
4018 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
4019   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4020                                             (__v16hi)_mm256_setzero_si256(), \
4021                                             (__mmask16)-1); })
4022
4023 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
4024   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4025                                             (__v16hi)(__m256i)(U), \
4026                                             (__mmask16)(W)); })
4027
4028 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
4029   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4030                                             (__v16hi)_mm256_setzero_si256(), \
4031                                             (__mmask16)(W)); })
4032
4033 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
4034   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4035                                            (__v16sf)_mm512_undefined_ps(), \
4036                                            (__mmask16)-1, (int)(R)); })
4037
4038 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
4039   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4040                                            (__v16sf)(__m512)(W), \
4041                                            (__mmask16)(U), (int)(R)); })
4042
4043 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
4044   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4045                                            (__v16sf)_mm512_setzero_ps(), \
4046                                            (__mmask16)(U), (int)(R)); })
4047
4048
4049 static  __inline __m512 __DEFAULT_FN_ATTRS
4050 _mm512_cvtph_ps(__m256i __A)
4051 {
4052   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4053                 (__v16sf)
4054                 _mm512_setzero_ps (),
4055                 (__mmask16) -1,
4056                 _MM_FROUND_CUR_DIRECTION);
4057 }
4058
4059 static __inline__ __m512 __DEFAULT_FN_ATTRS
4060 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
4061 {
4062   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4063                  (__v16sf) __W,
4064                  (__mmask16) __U,
4065                  _MM_FROUND_CUR_DIRECTION);
4066 }
4067
4068 static __inline__ __m512 __DEFAULT_FN_ATTRS
4069 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4070 {
4071   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4072                  (__v16sf) _mm512_setzero_ps (),
4073                  (__mmask16) __U,
4074                  _MM_FROUND_CUR_DIRECTION);
4075 }
4076
4077 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4078   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4079                                             (__v8si)_mm256_setzero_si256(), \
4080                                             (__mmask8)-1, (int)(R)); })
4081
4082 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4083   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4084                                             (__v8si)(__m256i)(W), \
4085                                             (__mmask8)(U), (int)(R)); })
4086
4087 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
4088   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4089                                             (__v8si)_mm256_setzero_si256(), \
4090                                             (__mmask8)(U), (int)(R)); })
4091
4092 static __inline __m256i __DEFAULT_FN_ATTRS
4093 _mm512_cvttpd_epi32(__m512d __a)
4094 {
4095   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4096                                                    (__v8si)_mm256_setzero_si256(),
4097                                                    (__mmask8) -1,
4098                                                     _MM_FROUND_CUR_DIRECTION);
4099 }
4100
4101 static __inline__ __m256i __DEFAULT_FN_ATTRS
4102 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4103 {
4104   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4105                   (__v8si) __W,
4106                   (__mmask8) __U,
4107                   _MM_FROUND_CUR_DIRECTION);
4108 }
4109
4110 static __inline__ __m256i __DEFAULT_FN_ATTRS
4111 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4112 {
4113   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4114                   (__v8si) _mm256_setzero_si256 (),
4115                   (__mmask8) __U,
4116                   _MM_FROUND_CUR_DIRECTION);
4117 }
4118
4119 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4120   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4121                                             (__v16si)_mm512_setzero_si512(), \
4122                                             (__mmask16)-1, (int)(R)); })
4123
4124 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4125   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4126                                             (__v16si)(__m512i)(W), \
4127                                             (__mmask16)(U), (int)(R)); })
4128
4129 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4130   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4131                                             (__v16si)_mm512_setzero_si512(), \
4132                                             (__mmask16)(U), (int)(R)); })
4133
4134 static __inline __m512i __DEFAULT_FN_ATTRS
4135 _mm512_cvttps_epi32(__m512 __a)
4136 {
4137   return (__m512i)
4138     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4139                                      (__v16si) _mm512_setzero_si512 (),
4140                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4141 }
4142
4143 static __inline__ __m512i __DEFAULT_FN_ATTRS
4144 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4145 {
4146   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4147                   (__v16si) __W,
4148                   (__mmask16) __U,
4149                   _MM_FROUND_CUR_DIRECTION);
4150 }
4151
4152 static __inline__ __m512i __DEFAULT_FN_ATTRS
4153 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4154 {
4155   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4156                   (__v16si) _mm512_setzero_si512 (),
4157                   (__mmask16) __U,
4158                   _MM_FROUND_CUR_DIRECTION);
4159 }
4160
4161 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4162   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4163                                            (__v16si)_mm512_setzero_si512(), \
4164                                            (__mmask16)-1, (int)(R)); })
4165
4166 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4167   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4168                                            (__v16si)(__m512i)(W), \
4169                                            (__mmask16)(U), (int)(R)); })
4170
4171 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4172   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4173                                            (__v16si)_mm512_setzero_si512(), \
4174                                            (__mmask16)(U), (int)(R)); })
4175
4176 static __inline__ __m512i __DEFAULT_FN_ATTRS
4177 _mm512_cvtps_epi32 (__m512 __A)
4178 {
4179   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4180                  (__v16si) _mm512_undefined_epi32 (),
4181                  (__mmask16) -1,
4182                  _MM_FROUND_CUR_DIRECTION);
4183 }
4184
4185 static __inline__ __m512i __DEFAULT_FN_ATTRS
4186 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4187 {
4188   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4189                  (__v16si) __W,
4190                  (__mmask16) __U,
4191                  _MM_FROUND_CUR_DIRECTION);
4192 }
4193
4194 static __inline__ __m512i __DEFAULT_FN_ATTRS
4195 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4196 {
4197   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4198                  (__v16si)
4199                  _mm512_setzero_si512 (),
4200                  (__mmask16) __U,
4201                  _MM_FROUND_CUR_DIRECTION);
4202 }
4203
4204 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4205   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4206                                            (__v8si)_mm256_setzero_si256(), \
4207                                            (__mmask8)-1, (int)(R)); })
4208
4209 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4210   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4211                                            (__v8si)(__m256i)(W), \
4212                                            (__mmask8)(U), (int)(R)); })
4213
4214 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4215   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4216                                            (__v8si)_mm256_setzero_si256(), \
4217                                            (__mmask8)(U), (int)(R)); })
4218
4219 static __inline__ __m256i __DEFAULT_FN_ATTRS
4220 _mm512_cvtpd_epi32 (__m512d __A)
4221 {
4222   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4223                  (__v8si)
4224                  _mm256_undefined_si256 (),
4225                  (__mmask8) -1,
4226                  _MM_FROUND_CUR_DIRECTION);
4227 }
4228
4229 static __inline__ __m256i __DEFAULT_FN_ATTRS
4230 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4231 {
4232   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4233                  (__v8si) __W,
4234                  (__mmask8) __U,
4235                  _MM_FROUND_CUR_DIRECTION);
4236 }
4237
4238 static __inline__ __m256i __DEFAULT_FN_ATTRS
4239 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4240 {
4241   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4242                  (__v8si)
4243                  _mm256_setzero_si256 (),
4244                  (__mmask8) __U,
4245                  _MM_FROUND_CUR_DIRECTION);
4246 }
4247
4248 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4249   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4250                                             (__v16si)_mm512_setzero_si512(), \
4251                                             (__mmask16)-1, (int)(R)); })
4252
4253 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4254   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4255                                             (__v16si)(__m512i)(W), \
4256                                             (__mmask16)(U), (int)(R)); })
4257
4258 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4259   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4260                                             (__v16si)_mm512_setzero_si512(), \
4261                                             (__mmask16)(U), (int)(R)); })
4262
4263 static __inline__ __m512i __DEFAULT_FN_ATTRS
4264 _mm512_cvtps_epu32 ( __m512 __A)
4265 {
4266   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4267                   (__v16si)\
4268                   _mm512_undefined_epi32 (),\
4269                   (__mmask16) -1,\
4270                   _MM_FROUND_CUR_DIRECTION);\
4271 }
4272
4273 static __inline__ __m512i __DEFAULT_FN_ATTRS
4274 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4275 {
4276   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4277                   (__v16si) __W,
4278                   (__mmask16) __U,
4279                   _MM_FROUND_CUR_DIRECTION);
4280 }
4281
4282 static __inline__ __m512i __DEFAULT_FN_ATTRS
4283 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4284 {
4285   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4286                   (__v16si)
4287                   _mm512_setzero_si512 (),
4288                   (__mmask16) __U ,
4289                   _MM_FROUND_CUR_DIRECTION);
4290 }
4291
4292 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4293   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4294                                             (__v8si)_mm256_setzero_si256(), \
4295                                             (__mmask8)-1, (int)(R)); })
4296
4297 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4298   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4299                                             (__v8si)(W), \
4300                                             (__mmask8)(U), (int)(R)); })
4301
4302 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4303   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4304                                             (__v8si)_mm256_setzero_si256(), \
4305                                             (__mmask8)(U), (int)(R)); })
4306
4307 static __inline__ __m256i __DEFAULT_FN_ATTRS
4308 _mm512_cvtpd_epu32 (__m512d __A)
4309 {
4310   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4311                   (__v8si)
4312                   _mm256_undefined_si256 (),
4313                   (__mmask8) -1,
4314                   _MM_FROUND_CUR_DIRECTION);
4315 }
4316
4317 static __inline__ __m256i __DEFAULT_FN_ATTRS
4318 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4319 {
4320   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4321                   (__v8si) __W,
4322                   (__mmask8) __U,
4323                   _MM_FROUND_CUR_DIRECTION);
4324 }
4325
4326 static __inline__ __m256i __DEFAULT_FN_ATTRS
4327 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4328 {
4329   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4330                   (__v8si)
4331                   _mm256_setzero_si256 (),
4332                   (__mmask8) __U,
4333                   _MM_FROUND_CUR_DIRECTION);
4334 }
4335
4336 static __inline__ double __DEFAULT_FN_ATTRS
4337 _mm512_cvtsd_f64(__m512d __a)
4338 {
4339   return __a[0];
4340 }
4341
4342 static __inline__ float __DEFAULT_FN_ATTRS
4343 _mm512_cvtss_f32(__m512 __a)
4344 {
4345   return __a[0];
4346 }
4347
4348 /* Unpack and Interleave */
4349
4350 static __inline __m512d __DEFAULT_FN_ATTRS
4351 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4352 {
4353   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4354                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4355 }
4356
4357 static __inline__ __m512d __DEFAULT_FN_ATTRS
4358 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4359 {
4360   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4361                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4362                                            (__v8df)__W);
4363 }
4364
4365 static __inline__ __m512d __DEFAULT_FN_ATTRS
4366 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4367 {
4368   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4369                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4370                                            (__v8df)_mm512_setzero_pd());
4371 }
4372
4373 static __inline __m512d __DEFAULT_FN_ATTRS
4374 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4375 {
4376   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4377                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4378 }
4379
4380 static __inline__ __m512d __DEFAULT_FN_ATTRS
4381 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4382 {
4383   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4384                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4385                                            (__v8df)__W);
4386 }
4387
4388 static __inline__ __m512d __DEFAULT_FN_ATTRS
4389 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4390 {
4391   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4392                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4393                                            (__v8df)_mm512_setzero_pd());
4394 }
4395
4396 static __inline __m512 __DEFAULT_FN_ATTRS
4397 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4398 {
4399   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4400                                          2,    18,    3,    19,
4401                                          2+4,  18+4,  3+4,  19+4,
4402                                          2+8,  18+8,  3+8,  19+8,
4403                                          2+12, 18+12, 3+12, 19+12);
4404 }
4405
4406 static __inline__ __m512 __DEFAULT_FN_ATTRS
4407 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4408 {
4409   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4410                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4411                                           (__v16sf)__W);
4412 }
4413
4414 static __inline__ __m512 __DEFAULT_FN_ATTRS
4415 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4416 {
4417   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4418                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4419                                           (__v16sf)_mm512_setzero_ps());
4420 }
4421
4422 static __inline __m512 __DEFAULT_FN_ATTRS
4423 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4424 {
4425   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4426                                          0,    16,    1,    17,
4427                                          0+4,  16+4,  1+4,  17+4,
4428                                          0+8,  16+8,  1+8,  17+8,
4429                                          0+12, 16+12, 1+12, 17+12);
4430 }
4431
4432 static __inline__ __m512 __DEFAULT_FN_ATTRS
4433 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4434 {
4435   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4436                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4437                                           (__v16sf)__W);
4438 }
4439
4440 static __inline__ __m512 __DEFAULT_FN_ATTRS
4441 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4442 {
4443   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4444                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4445                                           (__v16sf)_mm512_setzero_ps());
4446 }
4447
4448 static __inline__ __m512i __DEFAULT_FN_ATTRS
4449 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4450 {
4451   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4452                                           2,    18,    3,    19,
4453                                           2+4,  18+4,  3+4,  19+4,
4454                                           2+8,  18+8,  3+8,  19+8,
4455                                           2+12, 18+12, 3+12, 19+12);
4456 }
4457
4458 static __inline__ __m512i __DEFAULT_FN_ATTRS
4459 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4460 {
4461   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4462                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4463                                        (__v16si)__W);
4464 }
4465
4466 static __inline__ __m512i __DEFAULT_FN_ATTRS
4467 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4468 {
4469   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4470                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4471                                        (__v16si)_mm512_setzero_si512());
4472 }
4473
4474 static __inline__ __m512i __DEFAULT_FN_ATTRS
4475 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4476 {
4477   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4478                                           0,    16,    1,    17,
4479                                           0+4,  16+4,  1+4,  17+4,
4480                                           0+8,  16+8,  1+8,  17+8,
4481                                           0+12, 16+12, 1+12, 17+12);
4482 }
4483
4484 static __inline__ __m512i __DEFAULT_FN_ATTRS
4485 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4486 {
4487   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4488                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4489                                        (__v16si)__W);
4490 }
4491
4492 static __inline__ __m512i __DEFAULT_FN_ATTRS
4493 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4494 {
4495   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4496                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4497                                        (__v16si)_mm512_setzero_si512());
4498 }
4499
4500 static __inline__ __m512i __DEFAULT_FN_ATTRS
4501 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4502 {
4503   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4504                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4505 }
4506
4507 static __inline__ __m512i __DEFAULT_FN_ATTRS
4508 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4509 {
4510   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4511                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4512                                         (__v8di)__W);
4513 }
4514
4515 static __inline__ __m512i __DEFAULT_FN_ATTRS
4516 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4517 {
4518   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4519                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4520                                         (__v8di)_mm512_setzero_si512());
4521 }
4522
4523 static __inline__ __m512i __DEFAULT_FN_ATTRS
4524 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4525 {
4526   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4527                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4528 }
4529
4530 static __inline__ __m512i __DEFAULT_FN_ATTRS
4531 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4532 {
4533   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4534                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4535                                         (__v8di)__W);
4536 }
4537
4538 static __inline__ __m512i __DEFAULT_FN_ATTRS
4539 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4540 {
4541   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4542                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4543                                         (__v8di)_mm512_setzero_si512());
4544 }
4545
4546
4547 /* SIMD load ops */
4548
4549 static __inline __m512i __DEFAULT_FN_ATTRS
4550 _mm512_loadu_si512 (void const *__P)
4551 {
4552   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4553                   (__v16si)
4554                   _mm512_setzero_si512 (),
4555                   (__mmask16) -1);
4556 }
4557
4558 static __inline __m512i __DEFAULT_FN_ATTRS
4559 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4560 {
4561   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4562                   (__v16si) __W,
4563                   (__mmask16) __U);
4564 }
4565
4566
4567 static __inline __m512i __DEFAULT_FN_ATTRS
4568 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4569 {
4570   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4571                                                      (__v16si)
4572                                                      _mm512_setzero_si512 (),
4573                                                      (__mmask16) __U);
4574 }
4575
4576 static __inline __m512i __DEFAULT_FN_ATTRS
4577 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4578 {
4579   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4580                   (__v8di) __W,
4581                   (__mmask8) __U);
4582 }
4583
4584 static __inline __m512i __DEFAULT_FN_ATTRS
4585 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4586 {
4587   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4588                                                      (__v8di)
4589                                                      _mm512_setzero_si512 (),
4590                                                      (__mmask8) __U);
4591 }
4592
4593 static __inline __m512 __DEFAULT_FN_ATTRS
4594 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4595 {
4596   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4597                    (__v16sf) __W,
4598                    (__mmask16) __U);
4599 }
4600
4601 static __inline __m512 __DEFAULT_FN_ATTRS
4602 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4603 {
4604   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4605                                                   (__v16sf)
4606                                                   _mm512_setzero_ps (),
4607                                                   (__mmask16) __U);
4608 }
4609
4610 static __inline __m512d __DEFAULT_FN_ATTRS
4611 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4612 {
4613   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4614                 (__v8df) __W,
4615                 (__mmask8) __U);
4616 }
4617
4618 static __inline __m512d __DEFAULT_FN_ATTRS
4619 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4620 {
4621   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4622                                                    (__v8df)
4623                                                    _mm512_setzero_pd (),
4624                                                    (__mmask8) __U);
4625 }
4626
4627 static __inline __m512d __DEFAULT_FN_ATTRS
4628 _mm512_loadu_pd(void const *__p)
4629 {
4630   struct __loadu_pd {
4631     __m512d __v;
4632   } __attribute__((__packed__, __may_alias__));
4633   return ((struct __loadu_pd*)__p)->__v;
4634 }
4635
4636 static __inline __m512 __DEFAULT_FN_ATTRS
4637 _mm512_loadu_ps(void const *__p)
4638 {
4639   struct __loadu_ps {
4640     __m512 __v;
4641   } __attribute__((__packed__, __may_alias__));
4642   return ((struct __loadu_ps*)__p)->__v;
4643 }
4644
4645 static __inline __m512 __DEFAULT_FN_ATTRS
4646 _mm512_load_ps(void const *__p)
4647 {
4648   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4649                                                   (__v16sf)
4650                                                   _mm512_setzero_ps (),
4651                                                   (__mmask16) -1);
4652 }
4653
4654 static __inline __m512 __DEFAULT_FN_ATTRS
4655 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4656 {
4657   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4658                    (__v16sf) __W,
4659                    (__mmask16) __U);
4660 }
4661
4662 static __inline __m512 __DEFAULT_FN_ATTRS
4663 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4664 {
4665   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4666                                                   (__v16sf)
4667                                                   _mm512_setzero_ps (),
4668                                                   (__mmask16) __U);
4669 }
4670
4671 static __inline __m512d __DEFAULT_FN_ATTRS
4672 _mm512_load_pd(void const *__p)
4673 {
4674   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4675                                                    (__v8df)
4676                                                    _mm512_setzero_pd (),
4677                                                    (__mmask8) -1);
4678 }
4679
4680 static __inline __m512d __DEFAULT_FN_ATTRS
4681 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4682 {
4683   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4684                           (__v8df) __W,
4685                           (__mmask8) __U);
4686 }
4687
4688 static __inline __m512d __DEFAULT_FN_ATTRS
4689 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4690 {
4691   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4692                                                    (__v8df)
4693                                                    _mm512_setzero_pd (),
4694                                                    (__mmask8) __U);
4695 }
4696
4697 static __inline __m512i __DEFAULT_FN_ATTRS
4698 _mm512_load_si512 (void const *__P)
4699 {
4700   return *(__m512i *) __P;
4701 }
4702
4703 static __inline __m512i __DEFAULT_FN_ATTRS
4704 _mm512_load_epi32 (void const *__P)
4705 {
4706   return *(__m512i *) __P;
4707 }
4708
4709 static __inline __m512i __DEFAULT_FN_ATTRS
4710 _mm512_load_epi64 (void const *__P)
4711 {
4712   return *(__m512i *) __P;
4713 }
4714
4715 /* SIMD store ops */
4716
4717 static __inline void __DEFAULT_FN_ATTRS
4718 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4719 {
4720   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4721                                      (__mmask8) __U);
4722 }
4723
4724 static __inline void __DEFAULT_FN_ATTRS
4725 _mm512_storeu_si512 (void *__P, __m512i __A)
4726 {
4727   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4728             (__mmask16) -1);
4729 }
4730
4731 static __inline void __DEFAULT_FN_ATTRS
4732 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4733 {
4734   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4735                                      (__mmask16) __U);
4736 }
4737
4738 static __inline void __DEFAULT_FN_ATTRS
4739 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4740 {
4741   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4742 }
4743
4744 static __inline void __DEFAULT_FN_ATTRS
4745 _mm512_storeu_pd(void *__P, __m512d __A)
4746 {
4747   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4748 }
4749
4750 static __inline void __DEFAULT_FN_ATTRS
4751 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4752 {
4753   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4754                                    (__mmask16) __U);
4755 }
4756
4757 static __inline void __DEFAULT_FN_ATTRS
4758 _mm512_storeu_ps(void *__P, __m512 __A)
4759 {
4760   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4761 }
4762
4763 static __inline void __DEFAULT_FN_ATTRS
4764 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4765 {
4766   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4767 }
4768
4769 static __inline void __DEFAULT_FN_ATTRS
4770 _mm512_store_pd(void *__P, __m512d __A)
4771 {
4772   *(__m512d*)__P = __A;
4773 }
4774
4775 static __inline void __DEFAULT_FN_ATTRS
4776 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4777 {
4778   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4779                                    (__mmask16) __U);
4780 }
4781
4782 static __inline void __DEFAULT_FN_ATTRS
4783 _mm512_store_ps(void *__P, __m512 __A)
4784 {
4785   *(__m512*)__P = __A;
4786 }
4787
4788 static __inline void __DEFAULT_FN_ATTRS
4789 _mm512_store_si512 (void *__P, __m512i __A)
4790 {
4791   *(__m512i *) __P = __A;
4792 }
4793
4794 static __inline void __DEFAULT_FN_ATTRS
4795 _mm512_store_epi32 (void *__P, __m512i __A)
4796 {
4797   *(__m512i *) __P = __A;
4798 }
4799
4800 static __inline void __DEFAULT_FN_ATTRS
4801 _mm512_store_epi64 (void *__P, __m512i __A)
4802 {
4803   *(__m512i *) __P = __A;
4804 }
4805
4806 /* Mask ops */
4807
4808 static __inline __mmask16 __DEFAULT_FN_ATTRS
4809 _mm512_knot(__mmask16 __M)
4810 {
4811   return __builtin_ia32_knothi(__M);
4812 }
4813
4814 /* Integer compare */
4815
4816 #define _mm512_cmpeq_epi32_mask(A, B) \
4817     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4818 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4819     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4820 #define _mm512_cmpge_epi32_mask(A, B) \
4821     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4822 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4823     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4824 #define _mm512_cmpgt_epi32_mask(A, B) \
4825     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4826 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4827     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4828 #define _mm512_cmple_epi32_mask(A, B) \
4829     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4830 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4831     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4832 #define _mm512_cmplt_epi32_mask(A, B) \
4833     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4834 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4835     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4836 #define _mm512_cmpneq_epi32_mask(A, B) \
4837     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4838 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4839     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4840
4841 #define _mm512_cmpeq_epu32_mask(A, B) \
4842     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4843 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4844     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4845 #define _mm512_cmpge_epu32_mask(A, B) \
4846     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4847 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4848     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4849 #define _mm512_cmpgt_epu32_mask(A, B) \
4850     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4851 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4852     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4853 #define _mm512_cmple_epu32_mask(A, B) \
4854     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4855 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4856     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4857 #define _mm512_cmplt_epu32_mask(A, B) \
4858     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4859 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4860     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4861 #define _mm512_cmpneq_epu32_mask(A, B) \
4862     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4863 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4864     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4865
4866 #define _mm512_cmpeq_epi64_mask(A, B) \
4867     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4868 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4869     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4870 #define _mm512_cmpge_epi64_mask(A, B) \
4871     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4872 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4873     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4874 #define _mm512_cmpgt_epi64_mask(A, B) \
4875     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4876 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4877     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4878 #define _mm512_cmple_epi64_mask(A, B) \
4879     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4880 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4881     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4882 #define _mm512_cmplt_epi64_mask(A, B) \
4883     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4884 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4885     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4886 #define _mm512_cmpneq_epi64_mask(A, B) \
4887     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4888 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4889     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4890
4891 #define _mm512_cmpeq_epu64_mask(A, B) \
4892     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4893 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4894     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4895 #define _mm512_cmpge_epu64_mask(A, B) \
4896     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4897 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4898     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4899 #define _mm512_cmpgt_epu64_mask(A, B) \
4900     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4901 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4902     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4903 #define _mm512_cmple_epu64_mask(A, B) \
4904     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4905 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4906     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4907 #define _mm512_cmplt_epu64_mask(A, B) \
4908     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4909 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4910     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4911 #define _mm512_cmpneq_epu64_mask(A, B) \
4912     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4913 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4914     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4915
4916 static __inline__ __m512i __DEFAULT_FN_ATTRS
4917 _mm512_cvtepi8_epi32(__m128i __A)
4918 {
4919   /* This function always performs a signed extension, but __v16qi is a char
4920      which may be signed or unsigned, so use __v16qs. */
4921   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4922 }
4923
4924 static __inline__ __m512i __DEFAULT_FN_ATTRS
4925 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4926 {
4927   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4928                                              (__v16si)_mm512_cvtepi8_epi32(__A),
4929                                              (__v16si)__W);
4930 }
4931
4932 static __inline__ __m512i __DEFAULT_FN_ATTRS
4933 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4934 {
4935   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4936                                              (__v16si)_mm512_cvtepi8_epi32(__A),
4937                                              (__v16si)_mm512_setzero_si512());
4938 }
4939
4940 static __inline__ __m512i __DEFAULT_FN_ATTRS
4941 _mm512_cvtepi8_epi64(__m128i __A)
4942 {
4943   /* This function always performs a signed extension, but __v16qi is a char
4944      which may be signed or unsigned, so use __v16qs. */
4945   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4946 }
4947
4948 static __inline__ __m512i __DEFAULT_FN_ATTRS
4949 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4950 {
4951   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4952                                              (__v8di)_mm512_cvtepi8_epi64(__A),
4953                                              (__v8di)__W);
4954 }
4955
4956 static __inline__ __m512i __DEFAULT_FN_ATTRS
4957 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4958 {
4959   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4960                                              (__v8di)_mm512_cvtepi8_epi64(__A),
4961                                              (__v8di)_mm512_setzero_si512 ());
4962 }
4963
4964 static __inline__ __m512i __DEFAULT_FN_ATTRS
4965 _mm512_cvtepi32_epi64(__m256i __X)
4966 {
4967   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4968 }
4969
4970 static __inline__ __m512i __DEFAULT_FN_ATTRS
4971 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4972 {
4973   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4974                                              (__v8di)_mm512_cvtepi32_epi64(__X),
4975                                              (__v8di)__W);
4976 }
4977
4978 static __inline__ __m512i __DEFAULT_FN_ATTRS
4979 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4980 {
4981   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4982                                              (__v8di)_mm512_cvtepi32_epi64(__X),
4983                                              (__v8di)_mm512_setzero_si512());
4984 }
4985
4986 static __inline__ __m512i __DEFAULT_FN_ATTRS
4987 _mm512_cvtepi16_epi32(__m256i __A)
4988 {
4989   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4990 }
4991
4992 static __inline__ __m512i __DEFAULT_FN_ATTRS
4993 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4994 {
4995   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4996                                             (__v16si)_mm512_cvtepi16_epi32(__A),
4997                                             (__v16si)__W);
4998 }
4999
5000 static __inline__ __m512i __DEFAULT_FN_ATTRS
5001 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5002 {
5003   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5004                                             (__v16si)_mm512_cvtepi16_epi32(__A),
5005                                             (__v16si)_mm512_setzero_si512 ());
5006 }
5007
5008 static __inline__ __m512i __DEFAULT_FN_ATTRS
5009 _mm512_cvtepi16_epi64(__m128i __A)
5010 {
5011   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5012 }
5013
5014 static __inline__ __m512i __DEFAULT_FN_ATTRS
5015 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5016 {
5017   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5018                                              (__v8di)_mm512_cvtepi16_epi64(__A),
5019                                              (__v8di)__W);
5020 }
5021
5022 static __inline__ __m512i __DEFAULT_FN_ATTRS
5023 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5024 {
5025   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5026                                              (__v8di)_mm512_cvtepi16_epi64(__A),
5027                                              (__v8di)_mm512_setzero_si512());
5028 }
5029
5030 static __inline__ __m512i __DEFAULT_FN_ATTRS
5031 _mm512_cvtepu8_epi32(__m128i __A)
5032 {
5033   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5034 }
5035
5036 static __inline__ __m512i __DEFAULT_FN_ATTRS
5037 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5038 {
5039   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5040                                              (__v16si)_mm512_cvtepu8_epi32(__A),
5041                                              (__v16si)__W);
5042 }
5043
5044 static __inline__ __m512i __DEFAULT_FN_ATTRS
5045 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5046 {
5047   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5048                                              (__v16si)_mm512_cvtepu8_epi32(__A),
5049                                              (__v16si)_mm512_setzero_si512());
5050 }
5051
5052 static __inline__ __m512i __DEFAULT_FN_ATTRS
5053 _mm512_cvtepu8_epi64(__m128i __A)
5054 {
5055   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5056 }
5057
5058 static __inline__ __m512i __DEFAULT_FN_ATTRS
5059 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5060 {
5061   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5062                                              (__v8di)_mm512_cvtepu8_epi64(__A),
5063                                              (__v8di)__W);
5064 }
5065
5066 static __inline__ __m512i __DEFAULT_FN_ATTRS
5067 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5068 {
5069   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5070                                              (__v8di)_mm512_cvtepu8_epi64(__A),
5071                                              (__v8di)_mm512_setzero_si512());
5072 }
5073
5074 static __inline__ __m512i __DEFAULT_FN_ATTRS
5075 _mm512_cvtepu32_epi64(__m256i __X)
5076 {
5077   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5078 }
5079
5080 static __inline__ __m512i __DEFAULT_FN_ATTRS
5081 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5082 {
5083   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5084                                              (__v8di)_mm512_cvtepu32_epi64(__X),
5085                                              (__v8di)__W);
5086 }
5087
5088 static __inline__ __m512i __DEFAULT_FN_ATTRS
5089 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5090 {
5091   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5092                                              (__v8di)_mm512_cvtepu32_epi64(__X),
5093                                              (__v8di)_mm512_setzero_si512());
5094 }
5095
5096 static __inline__ __m512i __DEFAULT_FN_ATTRS
5097 _mm512_cvtepu16_epi32(__m256i __A)
5098 {
5099   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5100 }
5101
5102 static __inline__ __m512i __DEFAULT_FN_ATTRS
5103 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5104 {
5105   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5106                                             (__v16si)_mm512_cvtepu16_epi32(__A),
5107                                             (__v16si)__W);
5108 }
5109
5110 static __inline__ __m512i __DEFAULT_FN_ATTRS
5111 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5112 {
5113   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5114                                             (__v16si)_mm512_cvtepu16_epi32(__A),
5115                                             (__v16si)_mm512_setzero_si512());
5116 }
5117
5118 static __inline__ __m512i __DEFAULT_FN_ATTRS
5119 _mm512_cvtepu16_epi64(__m128i __A)
5120 {
5121   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5122 }
5123
5124 static __inline__ __m512i __DEFAULT_FN_ATTRS
5125 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5126 {
5127   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5128                                              (__v8di)_mm512_cvtepu16_epi64(__A),
5129                                              (__v8di)__W);
5130 }
5131
5132 static __inline__ __m512i __DEFAULT_FN_ATTRS
5133 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5134 {
5135   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5136                                              (__v8di)_mm512_cvtepu16_epi64(__A),
5137                                              (__v8di)_mm512_setzero_si512());
5138 }
5139
5140 static __inline__ __m512i __DEFAULT_FN_ATTRS
5141 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
5142 {
5143   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5144               (__v16si) __B,
5145               (__v16si)
5146               _mm512_setzero_si512 (),
5147               (__mmask16) -1);
5148 }
5149
5150 static __inline__ __m512i __DEFAULT_FN_ATTRS
5151 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5152 {
5153   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5154               (__v16si) __B,
5155               (__v16si) __W,
5156               (__mmask16) __U);
5157 }
5158
5159 static __inline__ __m512i __DEFAULT_FN_ATTRS
5160 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5161 {
5162   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5163               (__v16si) __B,
5164               (__v16si)
5165               _mm512_setzero_si512 (),
5166               (__mmask16) __U);
5167 }
5168
5169 static __inline__ __m512i __DEFAULT_FN_ATTRS
5170 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
5171 {
5172   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5173               (__v8di) __B,
5174               (__v8di)
5175               _mm512_setzero_si512 (),
5176               (__mmask8) -1);
5177 }
5178
5179 static __inline__ __m512i __DEFAULT_FN_ATTRS
5180 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5181 {
5182   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5183               (__v8di) __B,
5184               (__v8di) __W,
5185               (__mmask8) __U);
5186 }
5187
5188 static __inline__ __m512i __DEFAULT_FN_ATTRS
5189 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5190 {
5191   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5192               (__v8di) __B,
5193               (__v8di)
5194               _mm512_setzero_si512 (),
5195               (__mmask8) __U);
5196 }
5197
5198
5199
5200 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5201   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5202                                          (__v16si)(__m512i)(b), (int)(p), \
5203                                          (__mmask16)-1); })
5204
5205 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5206   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5207                                           (__v16si)(__m512i)(b), (int)(p), \
5208                                           (__mmask16)-1); })
5209
5210 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5211   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5212                                         (__v8di)(__m512i)(b), (int)(p), \
5213                                         (__mmask8)-1); })
5214
5215 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5216   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5217                                          (__v8di)(__m512i)(b), (int)(p), \
5218                                          (__mmask8)-1); })
5219
5220 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5221   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5222                                          (__v16si)(__m512i)(b), (int)(p), \
5223                                          (__mmask16)(m)); })
5224
5225 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5226   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5227                                           (__v16si)(__m512i)(b), (int)(p), \
5228                                           (__mmask16)(m)); })
5229
5230 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5231   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5232                                         (__v8di)(__m512i)(b), (int)(p), \
5233                                         (__mmask8)(m)); })
5234
5235 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5236   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5237                                          (__v8di)(__m512i)(b), (int)(p), \
5238                                          (__mmask8)(m)); })
5239
5240 #define _mm512_rol_epi32(a, b) __extension__ ({ \
5241   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5242                                         (__v16si)_mm512_setzero_si512(), \
5243                                         (__mmask16)-1); })
5244
5245 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5246   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5247                                         (__v16si)(__m512i)(W), \
5248                                         (__mmask16)(U)); })
5249
5250 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5251   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5252                                         (__v16si)_mm512_setzero_si512(), \
5253                                         (__mmask16)(U)); })
5254
5255 #define _mm512_rol_epi64(a, b) __extension__ ({ \
5256   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5257                                         (__v8di)_mm512_setzero_si512(), \
5258                                         (__mmask8)-1); })
5259
5260 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5261   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5262                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5263
5264 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5265   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5266                                         (__v8di)_mm512_setzero_si512(), \
5267                                         (__mmask8)(U)); })
5268 static __inline__ __m512i __DEFAULT_FN_ATTRS
5269 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5270 {
5271   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5272               (__v16si) __B,
5273               (__v16si)
5274               _mm512_setzero_si512 (),
5275               (__mmask16) -1);
5276 }
5277
5278 static __inline__ __m512i __DEFAULT_FN_ATTRS
5279 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5280 {
5281   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5282               (__v16si) __B,
5283               (__v16si) __W,
5284               (__mmask16) __U);
5285 }
5286
5287 static __inline__ __m512i __DEFAULT_FN_ATTRS
5288 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5289 {
5290   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5291               (__v16si) __B,
5292               (__v16si)
5293               _mm512_setzero_si512 (),
5294               (__mmask16) __U);
5295 }
5296
5297 static __inline__ __m512i __DEFAULT_FN_ATTRS
5298 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5299 {
5300   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5301               (__v8di) __B,
5302               (__v8di)
5303               _mm512_setzero_si512 (),
5304               (__mmask8) -1);
5305 }
5306
5307 static __inline__ __m512i __DEFAULT_FN_ATTRS
5308 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5309 {
5310   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5311               (__v8di) __B,
5312               (__v8di) __W,
5313               (__mmask8) __U);
5314 }
5315
5316 static __inline__ __m512i __DEFAULT_FN_ATTRS
5317 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5318 {
5319   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5320               (__v8di) __B,
5321               (__v8di)
5322               _mm512_setzero_si512 (),
5323               (__mmask8) __U);
5324 }
5325
5326 #define _mm512_ror_epi32(A, B) __extension__ ({ \
5327   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5328                                         (__v16si)_mm512_setzero_si512(), \
5329                                         (__mmask16)-1); })
5330
5331 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5332   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5333                                         (__v16si)(__m512i)(W), \
5334                                         (__mmask16)(U)); })
5335
5336 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5337   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5338                                         (__v16si)_mm512_setzero_si512(), \
5339                                         (__mmask16)(U)); })
5340
5341 #define _mm512_ror_epi64(A, B) __extension__ ({ \
5342   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5343                                         (__v8di)_mm512_setzero_si512(), \
5344                                         (__mmask8)-1); })
5345
5346 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5347   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5348                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5349
5350 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5351   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5352                                         (__v8di)_mm512_setzero_si512(), \
5353                                         (__mmask8)(U)); })
5354
5355 static __inline__ __m512i __DEFAULT_FN_ATTRS
5356 _mm512_slli_epi32(__m512i __A, int __B)
5357 {
5358   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5359 }
5360
5361 static __inline__ __m512i __DEFAULT_FN_ATTRS
5362 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5363 {
5364   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5365                                          (__v16si)_mm512_slli_epi32(__A, __B),
5366                                          (__v16si)__W);
5367 }
5368
5369 static __inline__ __m512i __DEFAULT_FN_ATTRS
5370 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5371   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5372                                          (__v16si)_mm512_slli_epi32(__A, __B),
5373                                          (__v16si)_mm512_setzero_si512());
5374 }
5375
5376 static __inline__ __m512i __DEFAULT_FN_ATTRS
5377 _mm512_slli_epi64(__m512i __A, int __B)
5378 {
5379   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5380 }
5381
5382 static __inline__ __m512i __DEFAULT_FN_ATTRS
5383 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5384 {
5385   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5386                                           (__v8di)_mm512_slli_epi64(__A, __B),
5387                                           (__v8di)__W);
5388 }
5389
5390 static __inline__ __m512i __DEFAULT_FN_ATTRS
5391 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5392 {
5393   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5394                                           (__v8di)_mm512_slli_epi64(__A, __B),
5395                                           (__v8di)_mm512_setzero_si512());
5396 }
5397
5398 static __inline__ __m512i __DEFAULT_FN_ATTRS
5399 _mm512_srli_epi32(__m512i __A, int __B)
5400 {
5401   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5402 }
5403
5404 static __inline__ __m512i __DEFAULT_FN_ATTRS
5405 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5406 {
5407   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5408                                          (__v16si)_mm512_srli_epi32(__A, __B),
5409                                          (__v16si)__W);
5410 }
5411
5412 static __inline__ __m512i __DEFAULT_FN_ATTRS
5413 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5414   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5415                                          (__v16si)_mm512_srli_epi32(__A, __B),
5416                                          (__v16si)_mm512_setzero_si512());
5417 }
5418
5419 static __inline__ __m512i __DEFAULT_FN_ATTRS
5420 _mm512_srli_epi64(__m512i __A, int __B)
5421 {
5422   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5423 }
5424
5425 static __inline__ __m512i __DEFAULT_FN_ATTRS
5426 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5427 {
5428   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5429                                           (__v8di)_mm512_srli_epi64(__A, __B),
5430                                           (__v8di)__W);
5431 }
5432
5433 static __inline__ __m512i __DEFAULT_FN_ATTRS
5434 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5435 {
5436   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5437                                           (__v8di)_mm512_srli_epi64(__A, __B),
5438                                           (__v8di)_mm512_setzero_si512());
5439 }
5440
5441 static __inline__ __m512i __DEFAULT_FN_ATTRS
5442 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5443 {
5444   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5445               (__v16si) __W,
5446               (__mmask16) __U);
5447 }
5448
5449 static __inline__ __m512i __DEFAULT_FN_ATTRS
5450 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5451 {
5452   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5453               (__v16si)
5454               _mm512_setzero_si512 (),
5455               (__mmask16) __U);
5456 }
5457
5458 static __inline__ void __DEFAULT_FN_ATTRS
5459 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5460 {
5461   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5462           (__mmask16) __U);
5463 }
5464
5465 static __inline__ __m512i __DEFAULT_FN_ATTRS
5466 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5467 {
5468   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5469                  (__v16si) __A,
5470                  (__v16si) __W);
5471 }
5472
5473 static __inline__ __m512i __DEFAULT_FN_ATTRS
5474 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5475 {
5476   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5477                  (__v16si) __A,
5478                  (__v16si) _mm512_setzero_si512 ());
5479 }
5480
5481 static __inline__ __m512i __DEFAULT_FN_ATTRS
5482 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5483 {
5484   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5485                  (__v8di) __A,
5486                  (__v8di) __W);
5487 }
5488
5489 static __inline__ __m512i __DEFAULT_FN_ATTRS
5490 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5491 {
5492   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5493                  (__v8di) __A,
5494                  (__v8di) _mm512_setzero_si512 ());
5495 }
5496
5497 static __inline__ __m512i __DEFAULT_FN_ATTRS
5498 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5499 {
5500   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5501               (__v8di) __W,
5502               (__mmask8) __U);
5503 }
5504
5505 static __inline__ __m512i __DEFAULT_FN_ATTRS
5506 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5507 {
5508   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5509               (__v8di)
5510               _mm512_setzero_si512 (),
5511               (__mmask8) __U);
5512 }
5513
5514 static __inline__ void __DEFAULT_FN_ATTRS
5515 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5516 {
5517   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5518           (__mmask8) __U);
5519 }
5520
5521 static __inline__ __m512d __DEFAULT_FN_ATTRS
5522 _mm512_movedup_pd (__m512d __A)
5523 {
5524   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5525                                           0, 0, 2, 2, 4, 4, 6, 6);
5526 }
5527
5528 static __inline__ __m512d __DEFAULT_FN_ATTRS
5529 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5530 {
5531   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5532                                               (__v8df)_mm512_movedup_pd(__A),
5533                                               (__v8df)__W);
5534 }
5535
5536 static __inline__ __m512d __DEFAULT_FN_ATTRS
5537 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5538 {
5539   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5540                                               (__v8df)_mm512_movedup_pd(__A),
5541                                               (__v8df)_mm512_setzero_pd());
5542 }
5543
5544 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5545   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5546                                              (__v8df)(__m512d)(B), \
5547                                              (__v8di)(__m512i)(C), (int)(imm), \
5548                                              (__mmask8)-1, (int)(R)); })
5549
5550 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5551   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5552                                              (__v8df)(__m512d)(B), \
5553                                              (__v8di)(__m512i)(C), (int)(imm), \
5554                                              (__mmask8)(U), (int)(R)); })
5555
5556 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5557   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5558                                              (__v8df)(__m512d)(B), \
5559                                              (__v8di)(__m512i)(C), (int)(imm), \
5560                                              (__mmask8)-1, \
5561                                              _MM_FROUND_CUR_DIRECTION); })
5562
5563 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5564   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5565                                              (__v8df)(__m512d)(B), \
5566                                              (__v8di)(__m512i)(C), (int)(imm), \
5567                                              (__mmask8)(U), \
5568                                              _MM_FROUND_CUR_DIRECTION); })
5569
5570 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5571   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5572                                               (__v8df)(__m512d)(B), \
5573                                               (__v8di)(__m512i)(C), \
5574                                               (int)(imm), (__mmask8)(U), \
5575                                               (int)(R)); })
5576
5577 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5578   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5579                                               (__v8df)(__m512d)(B), \
5580                                               (__v8di)(__m512i)(C), \
5581                                               (int)(imm), (__mmask8)(U), \
5582                                               _MM_FROUND_CUR_DIRECTION); })
5583
5584 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5585   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5586                                             (__v16sf)(__m512)(B), \
5587                                             (__v16si)(__m512i)(C), (int)(imm), \
5588                                             (__mmask16)-1, (int)(R)); })
5589
5590 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5591   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5592                                             (__v16sf)(__m512)(B), \
5593                                             (__v16si)(__m512i)(C), (int)(imm), \
5594                                             (__mmask16)(U), (int)(R)); })
5595
5596 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5597   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5598                                             (__v16sf)(__m512)(B), \
5599                                             (__v16si)(__m512i)(C), (int)(imm), \
5600                                             (__mmask16)-1, \
5601                                             _MM_FROUND_CUR_DIRECTION); })
5602
5603 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5604   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5605                                             (__v16sf)(__m512)(B), \
5606                                             (__v16si)(__m512i)(C), (int)(imm), \
5607                                             (__mmask16)(U), \
5608                                             _MM_FROUND_CUR_DIRECTION); })
5609
5610 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5611   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5612                                              (__v16sf)(__m512)(B), \
5613                                              (__v16si)(__m512i)(C), \
5614                                              (int)(imm), (__mmask16)(U), \
5615                                              (int)(R)); })
5616
5617 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5618   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5619                                              (__v16sf)(__m512)(B), \
5620                                              (__v16si)(__m512i)(C), \
5621                                              (int)(imm), (__mmask16)(U), \
5622                                              _MM_FROUND_CUR_DIRECTION); })
5623
5624 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5625   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5626                                           (__v2df)(__m128d)(B), \
5627                                           (__v2di)(__m128i)(C), (int)(imm), \
5628                                           (__mmask8)-1, (int)(R)); })
5629
5630 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5631   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5632                                           (__v2df)(__m128d)(B), \
5633                                           (__v2di)(__m128i)(C), (int)(imm), \
5634                                           (__mmask8)(U), (int)(R)); })
5635
5636 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5637   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5638                                           (__v2df)(__m128d)(B), \
5639                                           (__v2di)(__m128i)(C), (int)(imm), \
5640                                           (__mmask8)-1, \
5641                                           _MM_FROUND_CUR_DIRECTION); })
5642
5643 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5644   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5645                                           (__v2df)(__m128d)(B), \
5646                                           (__v2di)(__m128i)(C), (int)(imm), \
5647                                           (__mmask8)(U), \
5648                                           _MM_FROUND_CUR_DIRECTION); })
5649
5650 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5651   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5652                                            (__v2df)(__m128d)(B), \
5653                                            (__v2di)(__m128i)(C), (int)(imm), \
5654                                            (__mmask8)(U), (int)(R)); })
5655
5656 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5657   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5658                                            (__v2df)(__m128d)(B), \
5659                                            (__v2di)(__m128i)(C), (int)(imm), \
5660                                            (__mmask8)(U), \
5661                                            _MM_FROUND_CUR_DIRECTION); })
5662
5663 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5664   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5665                                          (__v4sf)(__m128)(B), \
5666                                          (__v4si)(__m128i)(C), (int)(imm), \
5667                                          (__mmask8)-1, (int)(R)); })
5668
5669 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5670   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5671                                          (__v4sf)(__m128)(B), \
5672                                          (__v4si)(__m128i)(C), (int)(imm), \
5673                                          (__mmask8)(U), (int)(R)); })
5674
5675 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5676   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5677                                          (__v4sf)(__m128)(B), \
5678                                          (__v4si)(__m128i)(C), (int)(imm), \
5679                                          (__mmask8)-1, \
5680                                          _MM_FROUND_CUR_DIRECTION); })
5681
5682 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5683   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5684                                          (__v4sf)(__m128)(B), \
5685                                          (__v4si)(__m128i)(C), (int)(imm), \
5686                                          (__mmask8)(U), \
5687                                          _MM_FROUND_CUR_DIRECTION); })
5688
5689 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5690   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5691                                           (__v4sf)(__m128)(B), \
5692                                           (__v4si)(__m128i)(C), (int)(imm), \
5693                                           (__mmask8)(U), (int)(R)); })
5694
5695 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5696   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5697                                           (__v4sf)(__m128)(B), \
5698                                           (__v4si)(__m128i)(C), (int)(imm), \
5699                                           (__mmask8)(U), \
5700                                           _MM_FROUND_CUR_DIRECTION); })
5701
5702 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5703   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5704                                                  (__v2df)(__m128d)(B), \
5705                                                  (__v2df)_mm_setzero_pd(), \
5706                                                  (__mmask8)-1, (int)(R)); })
5707
5708
5709 static __inline__ __m128d __DEFAULT_FN_ATTRS
5710 _mm_getexp_sd (__m128d __A, __m128d __B)
5711 {
5712   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5713                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5714 }
5715
5716 static __inline__ __m128d __DEFAULT_FN_ATTRS
5717 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5718 {
5719  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5720           (__v2df) __B,
5721           (__v2df) __W,
5722           (__mmask8) __U,
5723           _MM_FROUND_CUR_DIRECTION);
5724 }
5725
5726 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5727   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5728                                                  (__v2df)(__m128d)(B), \
5729                                                  (__v2df)(__m128d)(W), \
5730                                                  (__mmask8)(U), (int)(R)); })
5731
5732 static __inline__ __m128d __DEFAULT_FN_ATTRS
5733 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5734 {
5735  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5736           (__v2df) __B,
5737           (__v2df) _mm_setzero_pd (),
5738           (__mmask8) __U,
5739           _MM_FROUND_CUR_DIRECTION);
5740 }
5741
5742 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5743   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5744                                                  (__v2df)(__m128d)(B), \
5745                                                  (__v2df)_mm_setzero_pd(), \
5746                                                  (__mmask8)(U), (int)(R)); })
5747
5748 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5749   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5750                                                 (__v4sf)(__m128)(B), \
5751                                                 (__v4sf)_mm_setzero_ps(), \
5752                                                 (__mmask8)-1, (int)(R)); })
5753
5754 static __inline__ __m128 __DEFAULT_FN_ATTRS
5755 _mm_getexp_ss (__m128 __A, __m128 __B)
5756 {
5757   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5758                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5759 }
5760
5761 static __inline__ __m128 __DEFAULT_FN_ATTRS
5762 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5763 {
5764  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5765           (__v4sf) __B,
5766           (__v4sf) __W,
5767           (__mmask8) __U,
5768           _MM_FROUND_CUR_DIRECTION);
5769 }
5770
5771 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5772   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5773                                                 (__v4sf)(__m128)(B), \
5774                                                 (__v4sf)(__m128)(W), \
5775                                                 (__mmask8)(U), (int)(R)); })
5776
5777 static __inline__ __m128 __DEFAULT_FN_ATTRS
5778 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5779 {
5780  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5781           (__v4sf) __B,
5782           (__v4sf) _mm_setzero_pd (),
5783           (__mmask8) __U,
5784           _MM_FROUND_CUR_DIRECTION);
5785 }
5786
5787 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5788   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5789                                                 (__v4sf)(__m128)(B), \
5790                                                 (__v4sf)_mm_setzero_ps(), \
5791                                                 (__mmask8)(U), (int)(R)); })
5792
5793 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5794   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5795                                                (__v2df)(__m128d)(B), \
5796                                                (int)(((D)<<2) | (C)), \
5797                                                (__v2df)_mm_setzero_pd(), \
5798                                                (__mmask8)-1, (int)(R)); })
5799
5800 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
5801   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5802                                                (__v2df)(__m128d)(B), \
5803                                                (int)(((D)<<2) | (C)), \
5804                                                (__v2df)_mm_setzero_pd(), \
5805                                                (__mmask8)-1, \
5806                                                _MM_FROUND_CUR_DIRECTION); })
5807
5808 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5809   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5810                                                (__v2df)(__m128d)(B), \
5811                                                (int)(((D)<<2) | (C)), \
5812                                                (__v2df)(__m128d)(W), \
5813                                                (__mmask8)(U), \
5814                                                _MM_FROUND_CUR_DIRECTION); })
5815
5816 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5817   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5818                                                (__v2df)(__m128d)(B), \
5819                                                (int)(((D)<<2) | (C)), \
5820                                                (__v2df)(__m128d)(W), \
5821                                                (__mmask8)(U), (int)(R)); })
5822
5823 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5824   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5825                                                (__v2df)(__m128d)(B), \
5826                                                (int)(((D)<<2) | (C)), \
5827                                                (__v2df)_mm_setzero_pd(), \
5828                                                (__mmask8)(U), \
5829                                                _MM_FROUND_CUR_DIRECTION); })
5830
5831 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5832   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5833                                                (__v2df)(__m128d)(B), \
5834                                                (int)(((D)<<2) | (C)), \
5835                                                (__v2df)_mm_setzero_pd(), \
5836                                                (__mmask8)(U), (int)(R)); })
5837
5838 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5839   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5840                                               (__v4sf)(__m128)(B), \
5841                                               (int)(((D)<<2) | (C)), \
5842                                               (__v4sf)_mm_setzero_ps(), \
5843                                               (__mmask8)-1, (int)(R)); })
5844
5845 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5846   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5847                                               (__v4sf)(__m128)(B), \
5848                                               (int)(((D)<<2) | (C)), \
5849                                               (__v4sf)_mm_setzero_ps(), \
5850                                               (__mmask8)-1, \
5851                                               _MM_FROUND_CUR_DIRECTION); })
5852
5853 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5854   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5855                                               (__v4sf)(__m128)(B), \
5856                                               (int)(((D)<<2) | (C)), \
5857                                               (__v4sf)(__m128)(W), \
5858                                               (__mmask8)(U), \
5859                                               _MM_FROUND_CUR_DIRECTION); })
5860
5861 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5862   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5863                                               (__v4sf)(__m128)(B), \
5864                                               (int)(((D)<<2) | (C)), \
5865                                               (__v4sf)(__m128)(W), \
5866                                               (__mmask8)(U), (int)(R)); })
5867
5868 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5869   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5870                                               (__v4sf)(__m128)(B), \
5871                                               (int)(((D)<<2) | (C)), \
5872                                               (__v4sf)_mm_setzero_pd(), \
5873                                               (__mmask8)(U), \
5874                                               _MM_FROUND_CUR_DIRECTION); })
5875
5876 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5877   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5878                                               (__v4sf)(__m128)(B), \
5879                                               (int)(((D)<<2) | (C)), \
5880                                               (__v4sf)_mm_setzero_ps(), \
5881                                               (__mmask8)(U), (int)(R)); })
5882
5883 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5884 _mm512_kmov (__mmask16 __A)
5885 {
5886   return  __A;
5887 }
5888
5889 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5890   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5891                               (int)(P), (int)(R)); })
5892
5893 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5894   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5895                               (int)(P), (int)(R)); })
5896
5897 #ifdef __x86_64__
5898 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
5899   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
5900 #endif
5901
5902 static __inline__ __m512i __DEFAULT_FN_ATTRS
5903 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5904          __mmask16 __U, __m512i __B)
5905 {
5906   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5907                    (__v16si) __I
5908                    /* idx */ ,
5909                    (__v16si) __B,
5910                    (__mmask16) __U);
5911 }
5912
5913 static __inline__ __m512i __DEFAULT_FN_ATTRS
5914 _mm512_sll_epi32(__m512i __A, __m128i __B)
5915 {
5916   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5917 }
5918
5919 static __inline__ __m512i __DEFAULT_FN_ATTRS
5920 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5921 {
5922   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5923                                           (__v16si)_mm512_sll_epi32(__A, __B),
5924                                           (__v16si)__W);
5925 }
5926
5927 static __inline__ __m512i __DEFAULT_FN_ATTRS
5928 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5929 {
5930   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5931                                           (__v16si)_mm512_sll_epi32(__A, __B),
5932                                           (__v16si)_mm512_setzero_si512());
5933 }
5934
5935 static __inline__ __m512i __DEFAULT_FN_ATTRS
5936 _mm512_sll_epi64(__m512i __A, __m128i __B)
5937 {
5938   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5939 }
5940
5941 static __inline__ __m512i __DEFAULT_FN_ATTRS
5942 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5943 {
5944   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5945                                              (__v8di)_mm512_sll_epi64(__A, __B),
5946                                              (__v8di)__W);
5947 }
5948
5949 static __inline__ __m512i __DEFAULT_FN_ATTRS
5950 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5951 {
5952   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5953                                            (__v8di)_mm512_sll_epi64(__A, __B),
5954                                            (__v8di)_mm512_setzero_si512());
5955 }
5956
5957 static __inline__ __m512i __DEFAULT_FN_ATTRS
5958 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5959 {
5960   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5961 }
5962
5963 static __inline__ __m512i __DEFAULT_FN_ATTRS
5964 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5965 {
5966   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5967                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
5968                                            (__v16si)__W);
5969 }
5970
5971 static __inline__ __m512i __DEFAULT_FN_ATTRS
5972 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5973 {
5974   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5975                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
5976                                            (__v16si)_mm512_setzero_si512());
5977 }
5978
5979 static __inline__ __m512i __DEFAULT_FN_ATTRS
5980 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5981 {
5982   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5983 }
5984
5985 static __inline__ __m512i __DEFAULT_FN_ATTRS
5986 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5987 {
5988   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5989                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
5990                                             (__v8di)__W);
5991 }
5992
5993 static __inline__ __m512i __DEFAULT_FN_ATTRS
5994 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5995 {
5996   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5997                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
5998                                             (__v8di)_mm512_setzero_si512());
5999 }
6000
6001 static __inline__ __m512i __DEFAULT_FN_ATTRS
6002 _mm512_sra_epi32(__m512i __A, __m128i __B)
6003 {
6004   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6005 }
6006
6007 static __inline__ __m512i __DEFAULT_FN_ATTRS
6008 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6009 {
6010   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6011                                           (__v16si)_mm512_sra_epi32(__A, __B),
6012                                           (__v16si)__W);
6013 }
6014
6015 static __inline__ __m512i __DEFAULT_FN_ATTRS
6016 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6017 {
6018   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6019                                           (__v16si)_mm512_sra_epi32(__A, __B),
6020                                           (__v16si)_mm512_setzero_si512());
6021 }
6022
6023 static __inline__ __m512i __DEFAULT_FN_ATTRS
6024 _mm512_sra_epi64(__m512i __A, __m128i __B)
6025 {
6026   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6027 }
6028
6029 static __inline__ __m512i __DEFAULT_FN_ATTRS
6030 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6031 {
6032   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6033                                            (__v8di)_mm512_sra_epi64(__A, __B),
6034                                            (__v8di)__W);
6035 }
6036
6037 static __inline__ __m512i __DEFAULT_FN_ATTRS
6038 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6039 {
6040   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6041                                            (__v8di)_mm512_sra_epi64(__A, __B),
6042                                            (__v8di)_mm512_setzero_si512());
6043 }
6044
6045 static __inline__ __m512i __DEFAULT_FN_ATTRS
6046 _mm512_srav_epi32(__m512i __X, __m512i __Y)
6047 {
6048   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6049 }
6050
6051 static __inline__ __m512i __DEFAULT_FN_ATTRS
6052 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6053 {
6054   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6055                                            (__v16si)_mm512_srav_epi32(__X, __Y),
6056                                            (__v16si)__W);
6057 }
6058
6059 static __inline__ __m512i __DEFAULT_FN_ATTRS
6060 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6061 {
6062   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6063                                            (__v16si)_mm512_srav_epi32(__X, __Y),
6064                                            (__v16si)_mm512_setzero_si512());
6065 }
6066
6067 static __inline__ __m512i __DEFAULT_FN_ATTRS
6068 _mm512_srav_epi64(__m512i __X, __m512i __Y)
6069 {
6070   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6071 }
6072
6073 static __inline__ __m512i __DEFAULT_FN_ATTRS
6074 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6075 {
6076   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6077                                             (__v8di)_mm512_srav_epi64(__X, __Y),
6078                                             (__v8di)__W);
6079 }
6080
6081 static __inline__ __m512i __DEFAULT_FN_ATTRS
6082 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6083 {
6084   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6085                                             (__v8di)_mm512_srav_epi64(__X, __Y),
6086                                             (__v8di)_mm512_setzero_si512());
6087 }
6088
6089 static __inline__ __m512i __DEFAULT_FN_ATTRS
6090 _mm512_srl_epi32(__m512i __A, __m128i __B)
6091 {
6092   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6093 }
6094
6095 static __inline__ __m512i __DEFAULT_FN_ATTRS
6096 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6097 {
6098   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6099                                           (__v16si)_mm512_srl_epi32(__A, __B),
6100                                           (__v16si)__W);
6101 }
6102
6103 static __inline__ __m512i __DEFAULT_FN_ATTRS
6104 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6105 {
6106   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6107                                           (__v16si)_mm512_srl_epi32(__A, __B),
6108                                           (__v16si)_mm512_setzero_si512());
6109 }
6110
6111 static __inline__ __m512i __DEFAULT_FN_ATTRS
6112 _mm512_srl_epi64(__m512i __A, __m128i __B)
6113 {
6114   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6115 }
6116
6117 static __inline__ __m512i __DEFAULT_FN_ATTRS
6118 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6119 {
6120   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6121                                            (__v8di)_mm512_srl_epi64(__A, __B),
6122                                            (__v8di)__W);
6123 }
6124
6125 static __inline__ __m512i __DEFAULT_FN_ATTRS
6126 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6127 {
6128   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6129                                            (__v8di)_mm512_srl_epi64(__A, __B),
6130                                            (__v8di)_mm512_setzero_si512());
6131 }
6132
6133 static __inline__ __m512i __DEFAULT_FN_ATTRS
6134 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
6135 {
6136   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6137 }
6138
6139 static __inline__ __m512i __DEFAULT_FN_ATTRS
6140 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6141 {
6142   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6143                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
6144                                            (__v16si)__W);
6145 }
6146
6147 static __inline__ __m512i __DEFAULT_FN_ATTRS
6148 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6149 {
6150   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6151                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
6152                                            (__v16si)_mm512_setzero_si512());
6153 }
6154
6155 static __inline__ __m512i __DEFAULT_FN_ATTRS
6156 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6157 {
6158   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6159 }
6160
6161 static __inline__ __m512i __DEFAULT_FN_ATTRS
6162 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6163 {
6164   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6165                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
6166                                             (__v8di)__W);
6167 }
6168
6169 static __inline__ __m512i __DEFAULT_FN_ATTRS
6170 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6171 {
6172   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6173                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
6174                                             (__v8di)_mm512_setzero_si512());
6175 }
6176
6177 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6178   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6179                                             (__v16si)(__m512i)(B), \
6180                                             (__v16si)(__m512i)(C), (int)(imm), \
6181                                             (__mmask16)-1); })
6182
6183 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6184   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6185                                             (__v16si)(__m512i)(B), \
6186                                             (__v16si)(__m512i)(C), (int)(imm), \
6187                                             (__mmask16)(U)); })
6188
6189 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6190   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6191                                              (__v16si)(__m512i)(B), \
6192                                              (__v16si)(__m512i)(C), \
6193                                              (int)(imm), (__mmask16)(U)); })
6194
6195 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6196   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6197                                             (__v8di)(__m512i)(B), \
6198                                             (__v8di)(__m512i)(C), (int)(imm), \
6199                                             (__mmask8)-1); })
6200
6201 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6202   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6203                                             (__v8di)(__m512i)(B), \
6204                                             (__v8di)(__m512i)(C), (int)(imm), \
6205                                             (__mmask8)(U)); })
6206
6207 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6208   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6209                                              (__v8di)(__m512i)(B), \
6210                                              (__v8di)(__m512i)(C), (int)(imm), \
6211                                              (__mmask8)(U)); })
6212
6213 #ifdef __x86_64__
6214 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6215   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6216 #endif
6217
6218 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6219   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6220
6221 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6222   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6223
6224 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6225   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6226
6227 static __inline__ unsigned __DEFAULT_FN_ATTRS
6228 _mm_cvtsd_u32 (__m128d __A)
6229 {
6230   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6231              _MM_FROUND_CUR_DIRECTION);
6232 }
6233
6234 #ifdef __x86_64__
6235 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6236   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6237                                                   (int)(R)); })
6238
6239 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6240 _mm_cvtsd_u64 (__m128d __A)
6241 {
6242   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6243                  __A,
6244                  _MM_FROUND_CUR_DIRECTION);
6245 }
6246 #endif
6247
6248 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6249   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6250
6251 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6252   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6253
6254 #ifdef __x86_64__
6255 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6256   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6257
6258 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6259   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6260 #endif
6261
6262 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6263   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6264
6265 static __inline__ unsigned __DEFAULT_FN_ATTRS
6266 _mm_cvtss_u32 (__m128 __A)
6267 {
6268   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6269              _MM_FROUND_CUR_DIRECTION);
6270 }
6271
6272 #ifdef __x86_64__
6273 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6274   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6275                                                   (int)(R)); })
6276
6277 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6278 _mm_cvtss_u64 (__m128 __A)
6279 {
6280   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6281                  __A,
6282                  _MM_FROUND_CUR_DIRECTION);
6283 }
6284 #endif
6285
6286 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6287   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6288
6289 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6290   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6291
6292 static __inline__ int __DEFAULT_FN_ATTRS
6293 _mm_cvttsd_i32 (__m128d __A)
6294 {
6295   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6296               _MM_FROUND_CUR_DIRECTION);
6297 }
6298
6299 #ifdef __x86_64__
6300 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6301   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6302
6303 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6304   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6305
6306 static __inline__ long long __DEFAULT_FN_ATTRS
6307 _mm_cvttsd_i64 (__m128d __A)
6308 {
6309   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6310               _MM_FROUND_CUR_DIRECTION);
6311 }
6312 #endif
6313
6314 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6315   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6316
6317 static __inline__ unsigned __DEFAULT_FN_ATTRS
6318 _mm_cvttsd_u32 (__m128d __A)
6319 {
6320   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6321               _MM_FROUND_CUR_DIRECTION);
6322 }
6323
6324 #ifdef __x86_64__
6325 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6326   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6327                                                    (int)(R)); })
6328
6329 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6330 _mm_cvttsd_u64 (__m128d __A)
6331 {
6332   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6333                   __A,
6334                   _MM_FROUND_CUR_DIRECTION);
6335 }
6336 #endif
6337
6338 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6339   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6340
6341 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6342   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6343
6344 static __inline__ int __DEFAULT_FN_ATTRS
6345 _mm_cvttss_i32 (__m128 __A)
6346 {
6347   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6348               _MM_FROUND_CUR_DIRECTION);
6349 }
6350
6351 #ifdef __x86_64__
6352 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6353   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6354
6355 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6356   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6357
6358 static __inline__ long long __DEFAULT_FN_ATTRS
6359 _mm_cvttss_i64 (__m128 __A)
6360 {
6361   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6362               _MM_FROUND_CUR_DIRECTION);
6363 }
6364 #endif
6365
6366 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6367   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6368
6369 static __inline__ unsigned __DEFAULT_FN_ATTRS
6370 _mm_cvttss_u32 (__m128 __A)
6371 {
6372   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6373               _MM_FROUND_CUR_DIRECTION);
6374 }
6375
6376 #ifdef __x86_64__
6377 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6378   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6379                                                    (int)(R)); })
6380
6381 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6382 _mm_cvttss_u64 (__m128 __A)
6383 {
6384   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6385                   __A,
6386                   _MM_FROUND_CUR_DIRECTION);
6387 }
6388 #endif
6389
6390 static __inline__ __m512d __DEFAULT_FN_ATTRS
6391 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6392             __m512d __B)
6393 {
6394   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6395               (__v8di) __I
6396               /* idx */ ,
6397               (__v8df) __B,
6398               (__mmask8) __U);
6399 }
6400
6401 static __inline__ __m512 __DEFAULT_FN_ATTRS
6402 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6403             __m512 __B)
6404 {
6405   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6406                    (__v16si) __I
6407                    /* idx */ ,
6408                    (__v16sf) __B,
6409                    (__mmask16) __U);
6410 }
6411
6412 static __inline__ __m512i __DEFAULT_FN_ATTRS
6413 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6414          __mmask8 __U, __m512i __B)
6415 {
6416   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6417                    (__v8di) __I
6418                    /* idx */ ,
6419                    (__v8di) __B,
6420                    (__mmask8) __U);
6421 }
6422
6423 #define _mm512_permute_pd(X, C) __extension__ ({ \
6424   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6425                                    (__v8df)_mm512_undefined_pd(), \
6426                                    0 + (((C) >> 0) & 0x1), \
6427                                    0 + (((C) >> 1) & 0x1), \
6428                                    2 + (((C) >> 2) & 0x1), \
6429                                    2 + (((C) >> 3) & 0x1), \
6430                                    4 + (((C) >> 4) & 0x1), \
6431                                    4 + (((C) >> 5) & 0x1), \
6432                                    6 + (((C) >> 6) & 0x1), \
6433                                    6 + (((C) >> 7) & 0x1)); })
6434
6435 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6436   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6437                                        (__v8df)_mm512_permute_pd((X), (C)), \
6438                                        (__v8df)(__m512d)(W)); })
6439
6440 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6441   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6442                                        (__v8df)_mm512_permute_pd((X), (C)), \
6443                                        (__v8df)_mm512_setzero_pd()); })
6444
6445 #define _mm512_permute_ps(X, C) __extension__ ({ \
6446   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6447                                   (__v16sf)_mm512_undefined_ps(), \
6448                                    0  + (((C) >> 0) & 0x3), \
6449                                    0  + (((C) >> 2) & 0x3), \
6450                                    0  + (((C) >> 4) & 0x3), \
6451                                    0  + (((C) >> 6) & 0x3), \
6452                                    4  + (((C) >> 0) & 0x3), \
6453                                    4  + (((C) >> 2) & 0x3), \
6454                                    4  + (((C) >> 4) & 0x3), \
6455                                    4  + (((C) >> 6) & 0x3), \
6456                                    8  + (((C) >> 0) & 0x3), \
6457                                    8  + (((C) >> 2) & 0x3), \
6458                                    8  + (((C) >> 4) & 0x3), \
6459                                    8  + (((C) >> 6) & 0x3), \
6460                                    12 + (((C) >> 0) & 0x3), \
6461                                    12 + (((C) >> 2) & 0x3), \
6462                                    12 + (((C) >> 4) & 0x3), \
6463                                    12 + (((C) >> 6) & 0x3)); })
6464
6465 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6466   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6467                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6468                                       (__v16sf)(__m512)(W)); })
6469
6470 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6471   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6472                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6473                                       (__v16sf)_mm512_setzero_ps()); })
6474
6475 static __inline__ __m512d __DEFAULT_FN_ATTRS
6476 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6477 {
6478   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6479 }
6480
6481 static __inline__ __m512d __DEFAULT_FN_ATTRS
6482 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6483 {
6484   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6485                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6486                                          (__v8df)__W);
6487 }
6488
6489 static __inline__ __m512d __DEFAULT_FN_ATTRS
6490 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6491 {
6492   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6493                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6494                                          (__v8df)_mm512_setzero_pd());
6495 }
6496
6497 static __inline__ __m512 __DEFAULT_FN_ATTRS
6498 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6499 {
6500   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6501 }
6502
6503 static __inline__ __m512 __DEFAULT_FN_ATTRS
6504 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6505 {
6506   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6507                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6508                                         (__v16sf)__W);
6509 }
6510
6511 static __inline__ __m512 __DEFAULT_FN_ATTRS
6512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6513 {
6514   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6515                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6516                                         (__v16sf)_mm512_setzero_ps());
6517 }
6518
6519 static __inline __m512d __DEFAULT_FN_ATTRS
6520 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6521 {
6522   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6523                     /* idx */ ,
6524                     (__v8df) __A,
6525                     (__v8df) __B,
6526                     (__mmask8) -1);
6527 }
6528
6529 static __inline__ __m512d __DEFAULT_FN_ATTRS
6530 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6531 {
6532   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6533                     /* idx */ ,
6534                     (__v8df) __A,
6535                     (__v8df) __B,
6536                     (__mmask8) __U);
6537 }
6538
6539 static __inline__ __m512d __DEFAULT_FN_ATTRS
6540 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6541             __m512d __B)
6542 {
6543   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6544                                                          /* idx */ ,
6545                                                          (__v8df) __A,
6546                                                          (__v8df) __B,
6547                                                          (__mmask8) __U);
6548 }
6549
6550 static __inline __m512 __DEFAULT_FN_ATTRS
6551 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6552 {
6553   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6554                                                          /* idx */ ,
6555                                                          (__v16sf) __A,
6556                                                          (__v16sf) __B,
6557                                                          (__mmask16) -1);
6558 }
6559
6560 static __inline__ __m512 __DEFAULT_FN_ATTRS
6561 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6562 {
6563   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6564                                                          /* idx */ ,
6565                                                          (__v16sf) __A,
6566                                                          (__v16sf) __B,
6567                                                          (__mmask16) __U);
6568 }
6569
6570 static __inline__ __m512 __DEFAULT_FN_ATTRS
6571 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6572             __m512 __B)
6573 {
6574   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6575                                                         /* idx */ ,
6576                                                         (__v16sf) __A,
6577                                                         (__v16sf) __B,
6578                                                         (__mmask16) __U);
6579 }
6580
6581
6582 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6583   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6584                                              (__v8si)_mm256_undefined_si256(), \
6585                                              (__mmask8)-1, (int)(R)); })
6586
6587 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6588   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6589                                              (__v8si)(__m256i)(W), \
6590                                              (__mmask8)(U), (int)(R)); })
6591
6592 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6593   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6594                                              (__v8si)_mm256_setzero_si256(), \
6595                                              (__mmask8)(U), (int)(R)); })
6596
6597 static __inline__ __m256i __DEFAULT_FN_ATTRS
6598 _mm512_cvttpd_epu32 (__m512d __A)
6599 {
6600   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6601                   (__v8si)
6602                   _mm256_undefined_si256 (),
6603                   (__mmask8) -1,
6604                   _MM_FROUND_CUR_DIRECTION);
6605 }
6606
6607 static __inline__ __m256i __DEFAULT_FN_ATTRS
6608 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6609 {
6610   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6611                   (__v8si) __W,
6612                   (__mmask8) __U,
6613                   _MM_FROUND_CUR_DIRECTION);
6614 }
6615
6616 static __inline__ __m256i __DEFAULT_FN_ATTRS
6617 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6618 {
6619   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6620                   (__v8si)
6621                   _mm256_setzero_si256 (),
6622                   (__mmask8) __U,
6623                   _MM_FROUND_CUR_DIRECTION);
6624 }
6625
6626 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6627   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6628                                                 (__v2df)(__m128d)(B), \
6629                                                 (__v2df)_mm_setzero_pd(), \
6630                                                 (__mmask8)-1, (int)(imm), \
6631                                                 (int)(R)); })
6632
6633 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6634   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6635                                                 (__v2df)(__m128d)(B), \
6636                                                 (__v2df)_mm_setzero_pd(), \
6637                                                 (__mmask8)-1, (int)(imm), \
6638                                                 _MM_FROUND_CUR_DIRECTION); })
6639
6640 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6641   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6642                                                 (__v2df)(__m128d)(B), \
6643                                                 (__v2df)(__m128d)(W), \
6644                                                 (__mmask8)(U), (int)(imm), \
6645                                                 _MM_FROUND_CUR_DIRECTION); })
6646
6647 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6648   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6649                                                 (__v2df)(__m128d)(B), \
6650                                                 (__v2df)(__m128d)(W), \
6651                                                 (__mmask8)(U), (int)(I), \
6652                                                 (int)(R)); })
6653
6654 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6655   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6656                                                 (__v2df)(__m128d)(B), \
6657                                                 (__v2df)_mm_setzero_pd(), \
6658                                                 (__mmask8)(U), (int)(I), \
6659                                                 _MM_FROUND_CUR_DIRECTION); })
6660
6661 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6662   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6663                                                 (__v2df)(__m128d)(B), \
6664                                                 (__v2df)_mm_setzero_pd(), \
6665                                                 (__mmask8)(U), (int)(I), \
6666                                                 (int)(R)); })
6667
6668 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6669   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6670                                                (__v4sf)(__m128)(B), \
6671                                                (__v4sf)_mm_setzero_ps(), \
6672                                                (__mmask8)-1, (int)(imm), \
6673                                                (int)(R)); })
6674
6675 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6676   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6677                                                (__v4sf)(__m128)(B), \
6678                                                (__v4sf)_mm_setzero_ps(), \
6679                                                (__mmask8)-1, (int)(imm), \
6680                                                _MM_FROUND_CUR_DIRECTION); })
6681
6682 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6683   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6684                                                (__v4sf)(__m128)(B), \
6685                                                (__v4sf)(__m128)(W), \
6686                                                (__mmask8)(U), (int)(I), \
6687                                                _MM_FROUND_CUR_DIRECTION); })
6688
6689 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6690   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6691                                                (__v4sf)(__m128)(B), \
6692                                                (__v4sf)(__m128)(W), \
6693                                                (__mmask8)(U), (int)(I), \
6694                                                (int)(R)); })
6695
6696 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6697   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6698                                                (__v4sf)(__m128)(B), \
6699                                                (__v4sf)_mm_setzero_ps(), \
6700                                                (__mmask8)(U), (int)(I), \
6701                                                _MM_FROUND_CUR_DIRECTION); })
6702
6703 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6704   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6705                                                (__v4sf)(__m128)(B), \
6706                                                (__v4sf)_mm_setzero_ps(), \
6707                                                (__mmask8)(U), (int)(I), \
6708                                                (int)(R)); })
6709
6710 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6711   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6712                                            (__v8df)(__m512d)(B), \
6713                                            (__v8df)_mm512_undefined_pd(), \
6714                                            (__mmask8)-1, (int)(R)); })
6715
6716 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6717   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6718                                            (__v8df)(__m512d)(B), \
6719                                            (__v8df)(__m512d)(W), \
6720                                            (__mmask8)(U), (int)(R)); })
6721
6722 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6723   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6724                                            (__v8df)(__m512d)(B), \
6725                                            (__v8df)_mm512_setzero_pd(), \
6726                                            (__mmask8)(U), (int)(R)); })
6727
6728 static __inline__ __m512d __DEFAULT_FN_ATTRS
6729 _mm512_scalef_pd (__m512d __A, __m512d __B)
6730 {
6731   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6732                 (__v8df) __B,
6733                 (__v8df)
6734                 _mm512_undefined_pd (),
6735                 (__mmask8) -1,
6736                 _MM_FROUND_CUR_DIRECTION);
6737 }
6738
6739 static __inline__ __m512d __DEFAULT_FN_ATTRS
6740 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6741 {
6742   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6743                 (__v8df) __B,
6744                 (__v8df) __W,
6745                 (__mmask8) __U,
6746                 _MM_FROUND_CUR_DIRECTION);
6747 }
6748
6749 static __inline__ __m512d __DEFAULT_FN_ATTRS
6750 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6751 {
6752   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6753                 (__v8df) __B,
6754                 (__v8df)
6755                 _mm512_setzero_pd (),
6756                 (__mmask8) __U,
6757                 _MM_FROUND_CUR_DIRECTION);
6758 }
6759
6760 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6761   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6762                                           (__v16sf)(__m512)(B), \
6763                                           (__v16sf)_mm512_undefined_ps(), \
6764                                           (__mmask16)-1, (int)(R)); })
6765
6766 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6767   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6768                                           (__v16sf)(__m512)(B), \
6769                                           (__v16sf)(__m512)(W), \
6770                                           (__mmask16)(U), (int)(R)); })
6771
6772 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6773   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6774                                           (__v16sf)(__m512)(B), \
6775                                           (__v16sf)_mm512_setzero_ps(), \
6776                                           (__mmask16)(U), (int)(R)); })
6777
6778 static __inline__ __m512 __DEFAULT_FN_ATTRS
6779 _mm512_scalef_ps (__m512 __A, __m512 __B)
6780 {
6781   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6782                (__v16sf) __B,
6783                (__v16sf)
6784                _mm512_undefined_ps (),
6785                (__mmask16) -1,
6786                _MM_FROUND_CUR_DIRECTION);
6787 }
6788
6789 static __inline__ __m512 __DEFAULT_FN_ATTRS
6790 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6791 {
6792   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6793                (__v16sf) __B,
6794                (__v16sf) __W,
6795                (__mmask16) __U,
6796                _MM_FROUND_CUR_DIRECTION);
6797 }
6798
6799 static __inline__ __m512 __DEFAULT_FN_ATTRS
6800 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6801 {
6802   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6803                (__v16sf) __B,
6804                (__v16sf)
6805                _mm512_setzero_ps (),
6806                (__mmask16) __U,
6807                _MM_FROUND_CUR_DIRECTION);
6808 }
6809
6810 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
6811   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6812                                               (__v2df)(__m128d)(B), \
6813                                               (__v2df)_mm_setzero_pd(), \
6814                                               (__mmask8)-1, (int)(R)); })
6815
6816 static __inline__ __m128d __DEFAULT_FN_ATTRS
6817 _mm_scalef_sd (__m128d __A, __m128d __B)
6818 {
6819   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6820               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6821               (__mmask8) -1,
6822               _MM_FROUND_CUR_DIRECTION);
6823 }
6824
6825 static __inline__ __m128d __DEFAULT_FN_ATTRS
6826 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6827 {
6828  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6829                  (__v2df) __B,
6830                 (__v2df) __W,
6831                 (__mmask8) __U,
6832                 _MM_FROUND_CUR_DIRECTION);
6833 }
6834
6835 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
6836   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6837                                               (__v2df)(__m128d)(B), \
6838                                               (__v2df)(__m128d)(W), \
6839                                               (__mmask8)(U), (int)(R)); })
6840
6841 static __inline__ __m128d __DEFAULT_FN_ATTRS
6842 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6843 {
6844  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6845                  (__v2df) __B,
6846                 (__v2df) _mm_setzero_pd (),
6847                 (__mmask8) __U,
6848                 _MM_FROUND_CUR_DIRECTION);
6849 }
6850
6851 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
6852   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6853                                               (__v2df)(__m128d)(B), \
6854                                               (__v2df)_mm_setzero_pd(), \
6855                                               (__mmask8)(U), (int)(R)); })
6856
6857 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
6858   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6859                                              (__v4sf)(__m128)(B), \
6860                                              (__v4sf)_mm_setzero_ps(), \
6861                                              (__mmask8)-1, (int)(R)); })
6862
6863 static __inline__ __m128 __DEFAULT_FN_ATTRS
6864 _mm_scalef_ss (__m128 __A, __m128 __B)
6865 {
6866   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6867              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6868              (__mmask8) -1,
6869              _MM_FROUND_CUR_DIRECTION);
6870 }
6871
6872 static __inline__ __m128 __DEFAULT_FN_ATTRS
6873 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6874 {
6875  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6876                 (__v4sf) __B,
6877                 (__v4sf) __W,
6878                 (__mmask8) __U,
6879                 _MM_FROUND_CUR_DIRECTION);
6880 }
6881
6882 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
6883   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6884                                              (__v4sf)(__m128)(B), \
6885                                              (__v4sf)(__m128)(W), \
6886                                              (__mmask8)(U), (int)(R)); })
6887
6888 static __inline__ __m128 __DEFAULT_FN_ATTRS
6889 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6890 {
6891  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6892                  (__v4sf) __B,
6893                 (__v4sf) _mm_setzero_ps (),
6894                 (__mmask8) __U,
6895                 _MM_FROUND_CUR_DIRECTION);
6896 }
6897
6898 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
6899   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6900                                              (__v4sf)(__m128)(B), \
6901                                              (__v4sf)_mm_setzero_ps(), \
6902                                              (__mmask8)(U), \
6903                                              _MM_FROUND_CUR_DIRECTION); })
6904
6905 static __inline__ __m512i __DEFAULT_FN_ATTRS
6906 _mm512_srai_epi32(__m512i __A, int __B)
6907 {
6908   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6909 }
6910
6911 static __inline__ __m512i __DEFAULT_FN_ATTRS
6912 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
6913 {
6914   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6915                                          (__v16si)_mm512_srai_epi32(__A, __B), \
6916                                          (__v16si)__W);
6917 }
6918
6919 static __inline__ __m512i __DEFAULT_FN_ATTRS
6920 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
6921   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
6922                                          (__v16si)_mm512_srai_epi32(__A, __B), \
6923                                          (__v16si)_mm512_setzero_si512());
6924 }
6925
6926 static __inline__ __m512i __DEFAULT_FN_ATTRS
6927 _mm512_srai_epi64(__m512i __A, int __B)
6928 {
6929   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6930 }
6931
6932 static __inline__ __m512i __DEFAULT_FN_ATTRS
6933 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
6934 {
6935   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6936                                           (__v8di)_mm512_srai_epi64(__A, __B), \
6937                                           (__v8di)__W);
6938 }
6939
6940 static __inline__ __m512i __DEFAULT_FN_ATTRS
6941 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
6942 {
6943   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
6944                                           (__v8di)_mm512_srai_epi64(__A, __B), \
6945                                           (__v8di)_mm512_setzero_si512());
6946 }
6947
6948 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
6949   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
6950                                   (__v16sf)(__m512)(B), \
6951                                   0 + ((((imm) >> 0) & 0x3) * 4), \
6952                                   1 + ((((imm) >> 0) & 0x3) * 4), \
6953                                   2 + ((((imm) >> 0) & 0x3) * 4), \
6954                                   3 + ((((imm) >> 0) & 0x3) * 4), \
6955                                   0 + ((((imm) >> 2) & 0x3) * 4), \
6956                                   1 + ((((imm) >> 2) & 0x3) * 4), \
6957                                   2 + ((((imm) >> 2) & 0x3) * 4), \
6958                                   3 + ((((imm) >> 2) & 0x3) * 4), \
6959                                   16 + ((((imm) >> 4) & 0x3) * 4), \
6960                                   17 + ((((imm) >> 4) & 0x3) * 4), \
6961                                   18 + ((((imm) >> 4) & 0x3) * 4), \
6962                                   19 + ((((imm) >> 4) & 0x3) * 4), \
6963                                   16 + ((((imm) >> 6) & 0x3) * 4), \
6964                                   17 + ((((imm) >> 6) & 0x3) * 4), \
6965                                   18 + ((((imm) >> 6) & 0x3) * 4), \
6966                                   19 + ((((imm) >> 6) & 0x3) * 4)); })
6967
6968 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
6969   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6970                                       (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6971                                       (__v16sf)(__m512)(W)); })
6972
6973 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
6974   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6975                                       (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6976                                       (__v16sf)_mm512_setzero_ps()); })
6977
6978 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
6979   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
6980                                    (__v8df)(__m512d)(B), \
6981                                    0 + ((((imm) >> 0) & 0x3) * 2), \
6982                                    1 + ((((imm) >> 0) & 0x3) * 2), \
6983                                    0 + ((((imm) >> 2) & 0x3) * 2), \
6984                                    1 + ((((imm) >> 2) & 0x3) * 2), \
6985                                    8 + ((((imm) >> 4) & 0x3) * 2), \
6986                                    9 + ((((imm) >> 4) & 0x3) * 2), \
6987                                    8 + ((((imm) >> 6) & 0x3) * 2), \
6988                                    9 + ((((imm) >> 6) & 0x3) * 2)); })
6989
6990 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
6991   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6992                                        (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6993                                        (__v8df)(__m512d)(W)); })
6994
6995 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
6996   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6997                                        (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6998                                        (__v8df)_mm512_setzero_pd()); })
6999
7000 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7001   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7002                                    (__v8di)(__m512i)(B), \
7003                                    0 + ((((imm) >> 0) & 0x3) * 2), \
7004                                    1 + ((((imm) >> 0) & 0x3) * 2), \
7005                                    0 + ((((imm) >> 2) & 0x3) * 2), \
7006                                    1 + ((((imm) >> 2) & 0x3) * 2), \
7007                                    8 + ((((imm) >> 4) & 0x3) * 2), \
7008                                    9 + ((((imm) >> 4) & 0x3) * 2), \
7009                                    8 + ((((imm) >> 6) & 0x3) * 2), \
7010                                    9 + ((((imm) >> 6) & 0x3) * 2)); })
7011
7012 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7013   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7014                                       (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
7015                                       (__v16si)(__m512i)(W)); })
7016
7017 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7018   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7019                                       (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
7020                                       (__v16si)_mm512_setzero_si512()); })
7021
7022 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7023   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7024                                    (__v8di)(__m512i)(B), \
7025                                    0 + ((((imm) >> 0) & 0x3) * 2), \
7026                                    1 + ((((imm) >> 0) & 0x3) * 2), \
7027                                    0 + ((((imm) >> 2) & 0x3) * 2), \
7028                                    1 + ((((imm) >> 2) & 0x3) * 2), \
7029                                    8 + ((((imm) >> 4) & 0x3) * 2), \
7030                                    9 + ((((imm) >> 4) & 0x3) * 2), \
7031                                    8 + ((((imm) >> 6) & 0x3) * 2), \
7032                                    9 + ((((imm) >> 6) & 0x3) * 2)); })
7033
7034 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7035   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7036                                       (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
7037                                       (__v8di)(__m512i)(W)); })
7038
7039 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7040   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7041                                       (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
7042                                       (__v8di)_mm512_setzero_si512()); })
7043
7044 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7045   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7046                                    (__v8df)(__m512d)(B), \
7047                                    0  + (((M) >> 0) & 0x1), \
7048                                    8  + (((M) >> 1) & 0x1), \
7049                                    2  + (((M) >> 2) & 0x1), \
7050                                    10 + (((M) >> 3) & 0x1), \
7051                                    4  + (((M) >> 4) & 0x1), \
7052                                    12 + (((M) >> 5) & 0x1), \
7053                                    6  + (((M) >> 6) & 0x1), \
7054                                    14 + (((M) >> 7) & 0x1)); })
7055
7056 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7057   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7058                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7059                                        (__v8df)(__m512d)(W)); })
7060
7061 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7062   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7063                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7064                                        (__v8df)_mm512_setzero_pd()); })
7065
7066 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7067   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7068                                    (__v16sf)(__m512)(B), \
7069                                    0  + (((M) >> 0) & 0x3), \
7070                                    0  + (((M) >> 2) & 0x3), \
7071                                    16 + (((M) >> 4) & 0x3), \
7072                                    16 + (((M) >> 6) & 0x3), \
7073                                    4  + (((M) >> 0) & 0x3), \
7074                                    4  + (((M) >> 2) & 0x3), \
7075                                    20 + (((M) >> 4) & 0x3), \
7076                                    20 + (((M) >> 6) & 0x3), \
7077                                    8  + (((M) >> 0) & 0x3), \
7078                                    8  + (((M) >> 2) & 0x3), \
7079                                    24 + (((M) >> 4) & 0x3), \
7080                                    24 + (((M) >> 6) & 0x3), \
7081                                    12 + (((M) >> 0) & 0x3), \
7082                                    12 + (((M) >> 2) & 0x3), \
7083                                    28 + (((M) >> 4) & 0x3), \
7084                                    28 + (((M) >> 6) & 0x3)); })
7085
7086 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7087   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7088                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7089                                       (__v16sf)(__m512)(W)); })
7090
7091 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7092   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7093                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7094                                       (__v16sf)_mm512_setzero_ps()); })
7095
7096 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7097   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7098                                             (__v2df)(__m128d)(B), \
7099                                             (__v2df)_mm_setzero_pd(), \
7100                                             (__mmask8)-1, (int)(R)); })
7101
7102 static __inline__ __m128d __DEFAULT_FN_ATTRS
7103 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7104 {
7105  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7106                  (__v2df) __B,
7107                 (__v2df) __W,
7108                 (__mmask8) __U,
7109                 _MM_FROUND_CUR_DIRECTION);
7110 }
7111
7112 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7113   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7114                                             (__v2df)(__m128d)(B), \
7115                                             (__v2df)(__m128d)(W), \
7116                                             (__mmask8)(U), (int)(R)); })
7117
7118 static __inline__ __m128d __DEFAULT_FN_ATTRS
7119 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7120 {
7121  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7122                  (__v2df) __B,
7123                 (__v2df) _mm_setzero_pd (),
7124                 (__mmask8) __U,
7125                 _MM_FROUND_CUR_DIRECTION);
7126 }
7127
7128 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7129   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7130                                             (__v2df)(__m128d)(B), \
7131                                             (__v2df)_mm_setzero_pd(), \
7132                                             (__mmask8)(U), (int)(R)); })
7133
7134 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7135   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7136                                            (__v4sf)(__m128)(B), \
7137                                            (__v4sf)_mm_setzero_ps(), \
7138                                            (__mmask8)-1, (int)(R)); })
7139
7140 static __inline__ __m128 __DEFAULT_FN_ATTRS
7141 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7142 {
7143  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7144                  (__v4sf) __B,
7145                 (__v4sf) __W,
7146                 (__mmask8) __U,
7147                 _MM_FROUND_CUR_DIRECTION);
7148 }
7149
7150 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7151   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7152                                            (__v4sf)(__m128)(B), \
7153                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
7154                                            (int)(R)); })
7155
7156 static __inline__ __m128 __DEFAULT_FN_ATTRS
7157 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7158 {
7159  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7160                  (__v4sf) __B,
7161                 (__v4sf) _mm_setzero_ps (),
7162                 (__mmask8) __U,
7163                 _MM_FROUND_CUR_DIRECTION);
7164 }
7165
7166 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7167   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7168                                            (__v4sf)(__m128)(B), \
7169                                            (__v4sf)_mm_setzero_ps(), \
7170                                            (__mmask8)(U), (int)(R)); })
7171
7172 static __inline__ __m512 __DEFAULT_FN_ATTRS
7173 _mm512_broadcast_f32x4(__m128 __A)
7174 {
7175   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7176                                          0, 1, 2, 3, 0, 1, 2, 3,
7177                                          0, 1, 2, 3, 0, 1, 2, 3);
7178 }
7179
7180 static __inline__ __m512 __DEFAULT_FN_ATTRS
7181 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
7182 {
7183   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7184                                            (__v16sf)_mm512_broadcast_f32x4(__A),
7185                                            (__v16sf)__O);
7186 }
7187
7188 static __inline__ __m512 __DEFAULT_FN_ATTRS
7189 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
7190 {
7191   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7192                                            (__v16sf)_mm512_broadcast_f32x4(__A),
7193                                            (__v16sf)_mm512_setzero_ps());
7194 }
7195
7196 static __inline__ __m512d __DEFAULT_FN_ATTRS
7197 _mm512_broadcast_f64x4(__m256d __A)
7198 {
7199   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7200                                           0, 1, 2, 3, 0, 1, 2, 3);
7201 }
7202
7203 static __inline__ __m512d __DEFAULT_FN_ATTRS
7204 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
7205 {
7206   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7207                                             (__v8df)_mm512_broadcast_f64x4(__A),
7208                                             (__v8df)__O);
7209 }
7210
7211 static __inline__ __m512d __DEFAULT_FN_ATTRS
7212 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
7213 {
7214   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7215                                             (__v8df)_mm512_broadcast_f64x4(__A),
7216                                             (__v8df)_mm512_setzero_pd());
7217 }
7218
7219 static __inline__ __m512i __DEFAULT_FN_ATTRS
7220 _mm512_broadcast_i32x4(__m128i __A)
7221 {
7222   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7223                                           0, 1, 2, 3, 0, 1, 2, 3,
7224                                           0, 1, 2, 3, 0, 1, 2, 3);
7225 }
7226
7227 static __inline__ __m512i __DEFAULT_FN_ATTRS
7228 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
7229 {
7230   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7231                                            (__v16si)_mm512_broadcast_i32x4(__A),
7232                                            (__v16si)__O);
7233 }
7234
7235 static __inline__ __m512i __DEFAULT_FN_ATTRS
7236 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
7237 {
7238   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7239                                            (__v16si)_mm512_broadcast_i32x4(__A),
7240                                            (__v16si)_mm512_setzero_si512());
7241 }
7242
7243 static __inline__ __m512i __DEFAULT_FN_ATTRS
7244 _mm512_broadcast_i64x4(__m256i __A)
7245 {
7246   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7247                                           0, 1, 2, 3, 0, 1, 2, 3);
7248 }
7249
7250 static __inline__ __m512i __DEFAULT_FN_ATTRS
7251 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
7252 {
7253   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7254                                             (__v8di)_mm512_broadcast_i64x4(__A),
7255                                             (__v8di)__O);
7256 }
7257
7258 static __inline__ __m512i __DEFAULT_FN_ATTRS
7259 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
7260 {
7261   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7262                                             (__v8di)_mm512_broadcast_i64x4(__A),
7263                                             (__v8di)_mm512_setzero_si512());
7264 }
7265
7266 static __inline__ __m512d __DEFAULT_FN_ATTRS
7267 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7268 {
7269   return (__m512d)__builtin_ia32_selectpd_512(__M,
7270                                               (__v8df) _mm512_broadcastsd_pd(__A),
7271                                               (__v8df) __O);
7272 }
7273
7274 static __inline__ __m512d __DEFAULT_FN_ATTRS
7275 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7276 {
7277   return (__m512d)__builtin_ia32_selectpd_512(__M,
7278                                               (__v8df) _mm512_broadcastsd_pd(__A),
7279                                               (__v8df) _mm512_setzero_pd());
7280 }
7281
7282 static __inline__ __m512 __DEFAULT_FN_ATTRS
7283 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7284 {
7285   return (__m512)__builtin_ia32_selectps_512(__M,
7286                                              (__v16sf) _mm512_broadcastss_ps(__A),
7287                                              (__v16sf) __O);
7288 }
7289
7290 static __inline__ __m512 __DEFAULT_FN_ATTRS
7291 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7292 {
7293   return (__m512)__builtin_ia32_selectps_512(__M,
7294                                              (__v16sf) _mm512_broadcastss_ps(__A),
7295                                              (__v16sf) _mm512_setzero_ps());
7296 }
7297
7298 static __inline__ __m128i __DEFAULT_FN_ATTRS
7299 _mm512_cvtsepi32_epi8 (__m512i __A)
7300 {
7301   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7302                (__v16qi) _mm_undefined_si128 (),
7303                (__mmask16) -1);
7304 }
7305
7306 static __inline__ __m128i __DEFAULT_FN_ATTRS
7307 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7308 {
7309   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7310                (__v16qi) __O, __M);
7311 }
7312
7313 static __inline__ __m128i __DEFAULT_FN_ATTRS
7314 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7315 {
7316   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7317                (__v16qi) _mm_setzero_si128 (),
7318                __M);
7319 }
7320
7321 static __inline__ void __DEFAULT_FN_ATTRS
7322 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7323 {
7324   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7325 }
7326
7327 static __inline__ __m256i __DEFAULT_FN_ATTRS
7328 _mm512_cvtsepi32_epi16 (__m512i __A)
7329 {
7330   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7331                (__v16hi) _mm256_undefined_si256 (),
7332                (__mmask16) -1);
7333 }
7334
7335 static __inline__ __m256i __DEFAULT_FN_ATTRS
7336 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7337 {
7338   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7339                (__v16hi) __O, __M);
7340 }
7341
7342 static __inline__ __m256i __DEFAULT_FN_ATTRS
7343 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7344 {
7345   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7346                (__v16hi) _mm256_setzero_si256 (),
7347                __M);
7348 }
7349
7350 static __inline__ void __DEFAULT_FN_ATTRS
7351 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7352 {
7353   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7354 }
7355
7356 static __inline__ __m128i __DEFAULT_FN_ATTRS
7357 _mm512_cvtsepi64_epi8 (__m512i __A)
7358 {
7359   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7360                (__v16qi) _mm_undefined_si128 (),
7361                (__mmask8) -1);
7362 }
7363
7364 static __inline__ __m128i __DEFAULT_FN_ATTRS
7365 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7366 {
7367   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7368                (__v16qi) __O, __M);
7369 }
7370
7371 static __inline__ __m128i __DEFAULT_FN_ATTRS
7372 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7373 {
7374   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7375                (__v16qi) _mm_setzero_si128 (),
7376                __M);
7377 }
7378
7379 static __inline__ void __DEFAULT_FN_ATTRS
7380 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7381 {
7382   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7383 }
7384
7385 static __inline__ __m256i __DEFAULT_FN_ATTRS
7386 _mm512_cvtsepi64_epi32 (__m512i __A)
7387 {
7388   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7389                (__v8si) _mm256_undefined_si256 (),
7390                (__mmask8) -1);
7391 }
7392
7393 static __inline__ __m256i __DEFAULT_FN_ATTRS
7394 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7395 {
7396   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7397                (__v8si) __O, __M);
7398 }
7399
7400 static __inline__ __m256i __DEFAULT_FN_ATTRS
7401 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7402 {
7403   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7404                (__v8si) _mm256_setzero_si256 (),
7405                __M);
7406 }
7407
7408 static __inline__ void __DEFAULT_FN_ATTRS
7409 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7410 {
7411   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7412 }
7413
7414 static __inline__ __m128i __DEFAULT_FN_ATTRS
7415 _mm512_cvtsepi64_epi16 (__m512i __A)
7416 {
7417   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7418                (__v8hi) _mm_undefined_si128 (),
7419                (__mmask8) -1);
7420 }
7421
7422 static __inline__ __m128i __DEFAULT_FN_ATTRS
7423 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7424 {
7425   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7426                (__v8hi) __O, __M);
7427 }
7428
7429 static __inline__ __m128i __DEFAULT_FN_ATTRS
7430 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7431 {
7432   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7433                (__v8hi) _mm_setzero_si128 (),
7434                __M);
7435 }
7436
7437 static __inline__ void __DEFAULT_FN_ATTRS
7438 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7439 {
7440   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7441 }
7442
7443 static __inline__ __m128i __DEFAULT_FN_ATTRS
7444 _mm512_cvtusepi32_epi8 (__m512i __A)
7445 {
7446   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7447                 (__v16qi) _mm_undefined_si128 (),
7448                 (__mmask16) -1);
7449 }
7450
7451 static __inline__ __m128i __DEFAULT_FN_ATTRS
7452 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7453 {
7454   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7455                 (__v16qi) __O,
7456                 __M);
7457 }
7458
7459 static __inline__ __m128i __DEFAULT_FN_ATTRS
7460 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7461 {
7462   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7463                 (__v16qi) _mm_setzero_si128 (),
7464                 __M);
7465 }
7466
7467 static __inline__ void __DEFAULT_FN_ATTRS
7468 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7469 {
7470   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7471 }
7472
7473 static __inline__ __m256i __DEFAULT_FN_ATTRS
7474 _mm512_cvtusepi32_epi16 (__m512i __A)
7475 {
7476   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7477                 (__v16hi) _mm256_undefined_si256 (),
7478                 (__mmask16) -1);
7479 }
7480
7481 static __inline__ __m256i __DEFAULT_FN_ATTRS
7482 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7483 {
7484   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7485                 (__v16hi) __O,
7486                 __M);
7487 }
7488
7489 static __inline__ __m256i __DEFAULT_FN_ATTRS
7490 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7491 {
7492   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7493                 (__v16hi) _mm256_setzero_si256 (),
7494                 __M);
7495 }
7496
7497 static __inline__ void __DEFAULT_FN_ATTRS
7498 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7499 {
7500   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7501 }
7502
7503 static __inline__ __m128i __DEFAULT_FN_ATTRS
7504 _mm512_cvtusepi64_epi8 (__m512i __A)
7505 {
7506   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7507                 (__v16qi) _mm_undefined_si128 (),
7508                 (__mmask8) -1);
7509 }
7510
7511 static __inline__ __m128i __DEFAULT_FN_ATTRS
7512 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7513 {
7514   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7515                 (__v16qi) __O,
7516                 __M);
7517 }
7518
7519 static __inline__ __m128i __DEFAULT_FN_ATTRS
7520 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7521 {
7522   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7523                 (__v16qi) _mm_setzero_si128 (),
7524                 __M);
7525 }
7526
7527 static __inline__ void __DEFAULT_FN_ATTRS
7528 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7529 {
7530   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7531 }
7532
7533 static __inline__ __m256i __DEFAULT_FN_ATTRS
7534 _mm512_cvtusepi64_epi32 (__m512i __A)
7535 {
7536   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7537                 (__v8si) _mm256_undefined_si256 (),
7538                 (__mmask8) -1);
7539 }
7540
7541 static __inline__ __m256i __DEFAULT_FN_ATTRS
7542 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7543 {
7544   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7545                 (__v8si) __O, __M);
7546 }
7547
7548 static __inline__ __m256i __DEFAULT_FN_ATTRS
7549 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7550 {
7551   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7552                 (__v8si) _mm256_setzero_si256 (),
7553                 __M);
7554 }
7555
7556 static __inline__ void __DEFAULT_FN_ATTRS
7557 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7558 {
7559   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7560 }
7561
7562 static __inline__ __m128i __DEFAULT_FN_ATTRS
7563 _mm512_cvtusepi64_epi16 (__m512i __A)
7564 {
7565   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7566                 (__v8hi) _mm_undefined_si128 (),
7567                 (__mmask8) -1);
7568 }
7569
7570 static __inline__ __m128i __DEFAULT_FN_ATTRS
7571 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7572 {
7573   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7574                 (__v8hi) __O, __M);
7575 }
7576
7577 static __inline__ __m128i __DEFAULT_FN_ATTRS
7578 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7579 {
7580   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7581                 (__v8hi) _mm_setzero_si128 (),
7582                 __M);
7583 }
7584
7585 static __inline__ void __DEFAULT_FN_ATTRS
7586 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7587 {
7588   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7589 }
7590
7591 static __inline__ __m128i __DEFAULT_FN_ATTRS
7592 _mm512_cvtepi32_epi8 (__m512i __A)
7593 {
7594   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7595               (__v16qi) _mm_undefined_si128 (),
7596               (__mmask16) -1);
7597 }
7598
7599 static __inline__ __m128i __DEFAULT_FN_ATTRS
7600 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7601 {
7602   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7603               (__v16qi) __O, __M);
7604 }
7605
7606 static __inline__ __m128i __DEFAULT_FN_ATTRS
7607 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7608 {
7609   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7610               (__v16qi) _mm_setzero_si128 (),
7611               __M);
7612 }
7613
7614 static __inline__ void __DEFAULT_FN_ATTRS
7615 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7616 {
7617   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7618 }
7619
7620 static __inline__ __m256i __DEFAULT_FN_ATTRS
7621 _mm512_cvtepi32_epi16 (__m512i __A)
7622 {
7623   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7624               (__v16hi) _mm256_undefined_si256 (),
7625               (__mmask16) -1);
7626 }
7627
7628 static __inline__ __m256i __DEFAULT_FN_ATTRS
7629 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7630 {
7631   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7632               (__v16hi) __O, __M);
7633 }
7634
7635 static __inline__ __m256i __DEFAULT_FN_ATTRS
7636 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7637 {
7638   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7639               (__v16hi) _mm256_setzero_si256 (),
7640               __M);
7641 }
7642
7643 static __inline__ void __DEFAULT_FN_ATTRS
7644 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7645 {
7646   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7647 }
7648
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS
7650 _mm512_cvtepi64_epi8 (__m512i __A)
7651 {
7652   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7653               (__v16qi) _mm_undefined_si128 (),
7654               (__mmask8) -1);
7655 }
7656
7657 static __inline__ __m128i __DEFAULT_FN_ATTRS
7658 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7659 {
7660   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7661               (__v16qi) __O, __M);
7662 }
7663
7664 static __inline__ __m128i __DEFAULT_FN_ATTRS
7665 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7666 {
7667   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7668               (__v16qi) _mm_setzero_si128 (),
7669               __M);
7670 }
7671
7672 static __inline__ void __DEFAULT_FN_ATTRS
7673 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7674 {
7675   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7676 }
7677
7678 static __inline__ __m256i __DEFAULT_FN_ATTRS
7679 _mm512_cvtepi64_epi32 (__m512i __A)
7680 {
7681   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7682               (__v8si) _mm256_undefined_si256 (),
7683               (__mmask8) -1);
7684 }
7685
7686 static __inline__ __m256i __DEFAULT_FN_ATTRS
7687 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7688 {
7689   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7690               (__v8si) __O, __M);
7691 }
7692
7693 static __inline__ __m256i __DEFAULT_FN_ATTRS
7694 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7695 {
7696   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7697               (__v8si) _mm256_setzero_si256 (),
7698               __M);
7699 }
7700
7701 static __inline__ void __DEFAULT_FN_ATTRS
7702 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7703 {
7704   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7705 }
7706
7707 static __inline__ __m128i __DEFAULT_FN_ATTRS
7708 _mm512_cvtepi64_epi16 (__m512i __A)
7709 {
7710   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7711               (__v8hi) _mm_undefined_si128 (),
7712               (__mmask8) -1);
7713 }
7714
7715 static __inline__ __m128i __DEFAULT_FN_ATTRS
7716 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7717 {
7718   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7719               (__v8hi) __O, __M);
7720 }
7721
7722 static __inline__ __m128i __DEFAULT_FN_ATTRS
7723 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7724 {
7725   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7726               (__v8hi) _mm_setzero_si128 (),
7727               __M);
7728 }
7729
7730 static __inline__ void __DEFAULT_FN_ATTRS
7731 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7732 {
7733   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7734 }
7735
7736 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
7737   (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
7738                                    (__v16si)_mm512_undefined_epi32(), \
7739                                    0 + ((imm) & 0x3) * 4,             \
7740                                    1 + ((imm) & 0x3) * 4,             \
7741                                    2 + ((imm) & 0x3) * 4,             \
7742                                    3 + ((imm) & 0x3) * 4); })
7743
7744 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7745   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7746                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7747                                 (__v4si)(W)); })
7748
7749 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7750   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7751                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7752                                 (__v4si)_mm_setzero_si128()); })
7753
7754 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
7755   (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
7756                                    (__v8di)_mm512_undefined_epi32(), \
7757                                    ((imm) & 1) ? 4 : 0,              \
7758                                    ((imm) & 1) ? 5 : 1,              \
7759                                    ((imm) & 1) ? 6 : 2,              \
7760                                    ((imm) & 1) ? 7 : 3); })
7761
7762 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7763   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7764                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7765                                 (__v4di)(W)); })
7766
7767 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7768   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7769                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7770                                 (__v4di)_mm256_setzero_si256()); })
7771
7772 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7773   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7774                                  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7775                                  ((imm) & 0x1) ?  0 :  8, \
7776                                  ((imm) & 0x1) ?  1 :  9, \
7777                                  ((imm) & 0x1) ?  2 : 10, \
7778                                  ((imm) & 0x1) ?  3 : 11, \
7779                                  ((imm) & 0x1) ?  8 :  4, \
7780                                  ((imm) & 0x1) ?  9 :  5, \
7781                                  ((imm) & 0x1) ? 10 :  6, \
7782                                  ((imm) & 0x1) ? 11 :  7); })
7783
7784 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
7785   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7786                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7787                                   (__v8df)(W)); })
7788
7789 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
7790   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7791                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7792                                   (__v8df)_mm512_setzero_pd()); })
7793
7794 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
7795   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7796                                  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7797                                  ((imm) & 0x1) ?  0 :  8, \
7798                                  ((imm) & 0x1) ?  1 :  9, \
7799                                  ((imm) & 0x1) ?  2 : 10, \
7800                                  ((imm) & 0x1) ?  3 : 11, \
7801                                  ((imm) & 0x1) ?  8 :  4, \
7802                                  ((imm) & 0x1) ?  9 :  5, \
7803                                  ((imm) & 0x1) ? 10 :  6, \
7804                                  ((imm) & 0x1) ? 11 :  7); })
7805
7806 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
7807   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7808                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7809                                   (__v8di)(W)); })
7810
7811 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
7812   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7813                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7814                                   (__v8di)_mm512_setzero_si512()); })
7815
7816 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
7817   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7818                                   (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7819                                   (((imm) & 0x3) == 0) ? 16 :  0, \
7820                                   (((imm) & 0x3) == 0) ? 17 :  1, \
7821                                   (((imm) & 0x3) == 0) ? 18 :  2, \
7822                                   (((imm) & 0x3) == 0) ? 19 :  3, \
7823                                   (((imm) & 0x3) == 1) ? 16 :  4, \
7824                                   (((imm) & 0x3) == 1) ? 17 :  5, \
7825                                   (((imm) & 0x3) == 1) ? 18 :  6, \
7826                                   (((imm) & 0x3) == 1) ? 19 :  7, \
7827                                   (((imm) & 0x3) == 2) ? 16 :  8, \
7828                                   (((imm) & 0x3) == 2) ? 17 :  9, \
7829                                   (((imm) & 0x3) == 2) ? 18 : 10, \
7830                                   (((imm) & 0x3) == 2) ? 19 : 11, \
7831                                   (((imm) & 0x3) == 3) ? 16 : 12, \
7832                                   (((imm) & 0x3) == 3) ? 17 : 13, \
7833                                   (((imm) & 0x3) == 3) ? 18 : 14, \
7834                                   (((imm) & 0x3) == 3) ? 19 : 15); })
7835
7836 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
7837   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7838                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7839                                  (__v16sf)(W)); })
7840
7841 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
7842   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7843                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7844                                  (__v16sf)_mm512_setzero_ps()); })
7845
7846 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
7847   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7848                                  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
7849                                  (((imm) & 0x3) == 0) ? 16 :  0, \
7850                                  (((imm) & 0x3) == 0) ? 17 :  1, \
7851                                  (((imm) & 0x3) == 0) ? 18 :  2, \
7852                                  (((imm) & 0x3) == 0) ? 19 :  3, \
7853                                  (((imm) & 0x3) == 1) ? 16 :  4, \
7854                                  (((imm) & 0x3) == 1) ? 17 :  5, \
7855                                  (((imm) & 0x3) == 1) ? 18 :  6, \
7856                                  (((imm) & 0x3) == 1) ? 19 :  7, \
7857                                  (((imm) & 0x3) == 2) ? 16 :  8, \
7858                                  (((imm) & 0x3) == 2) ? 17 :  9, \
7859                                  (((imm) & 0x3) == 2) ? 18 : 10, \
7860                                  (((imm) & 0x3) == 2) ? 19 : 11, \
7861                                  (((imm) & 0x3) == 3) ? 16 : 12, \
7862                                  (((imm) & 0x3) == 3) ? 17 : 13, \
7863                                  (((imm) & 0x3) == 3) ? 18 : 14, \
7864                                  (((imm) & 0x3) == 3) ? 19 : 15); })
7865
7866 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
7867   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7868                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7869                                  (__v16si)(W)); })
7870
7871 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
7872   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7873                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7874                                  (__v16si)_mm512_setzero_si512()); })
7875
7876 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
7877   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7878                                             (int)(((C)<<2) | (B)), \
7879                                             (__v8df)_mm512_undefined_pd(), \
7880                                             (__mmask8)-1, (int)(R)); })
7881
7882 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
7883   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7884                                             (int)(((C)<<2) | (B)), \
7885                                             (__v8df)(__m512d)(W), \
7886                                             (__mmask8)(U), (int)(R)); })
7887
7888 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
7889   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7890                                             (int)(((C)<<2) | (B)), \
7891                                             (__v8df)_mm512_setzero_pd(), \
7892                                             (__mmask8)(U), (int)(R)); })
7893
7894 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
7895   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7896                                             (int)(((C)<<2) | (B)), \
7897                                             (__v8df)_mm512_setzero_pd(), \
7898                                             (__mmask8)-1, \
7899                                             _MM_FROUND_CUR_DIRECTION); })
7900
7901 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
7902   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7903                                             (int)(((C)<<2) | (B)), \
7904                                             (__v8df)(__m512d)(W), \
7905                                             (__mmask8)(U), \
7906                                             _MM_FROUND_CUR_DIRECTION); })
7907
7908 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
7909   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7910                                             (int)(((C)<<2) | (B)), \
7911                                             (__v8df)_mm512_setzero_pd(), \
7912                                             (__mmask8)(U), \
7913                                             _MM_FROUND_CUR_DIRECTION); })
7914
7915 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
7916   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7917                                            (int)(((C)<<2) | (B)), \
7918                                            (__v16sf)_mm512_undefined_ps(), \
7919                                            (__mmask16)-1, (int)(R)); })
7920
7921 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
7922   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7923                                            (int)(((C)<<2) | (B)), \
7924                                            (__v16sf)(__m512)(W), \
7925                                            (__mmask16)(U), (int)(R)); })
7926
7927 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
7928   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7929                                            (int)(((C)<<2) | (B)), \
7930                                            (__v16sf)_mm512_setzero_ps(), \
7931                                            (__mmask16)(U), (int)(R)); })
7932
7933 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
7934   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7935                                            (int)(((C)<<2)|(B)), \
7936                                            (__v16sf)_mm512_undefined_ps(), \
7937                                            (__mmask16)-1, \
7938                                            _MM_FROUND_CUR_DIRECTION); })
7939
7940 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
7941   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7942                                            (int)(((C)<<2)|(B)), \
7943                                            (__v16sf)(__m512)(W), \
7944                                            (__mmask16)(U), \
7945                                            _MM_FROUND_CUR_DIRECTION); })
7946
7947 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
7948   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7949                                            (int)(((C)<<2)|(B)), \
7950                                            (__v16sf)_mm512_setzero_ps(), \
7951                                            (__mmask16)(U), \
7952                                            _MM_FROUND_CUR_DIRECTION); })
7953
7954 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
7955   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7956                                            (__v8df)_mm512_undefined_pd(), \
7957                                            (__mmask8)-1, (int)(R)); })
7958
7959 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
7960   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7961                                            (__v8df)(__m512d)(W), \
7962                                            (__mmask8)(U), (int)(R)); })
7963
7964 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
7965   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7966                                            (__v8df)_mm512_setzero_pd(), \
7967                                            (__mmask8)(U), (int)(R)); })
7968
7969 static __inline__ __m512d __DEFAULT_FN_ATTRS
7970 _mm512_getexp_pd (__m512d __A)
7971 {
7972   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7973                 (__v8df) _mm512_undefined_pd (),
7974                 (__mmask8) -1,
7975                 _MM_FROUND_CUR_DIRECTION);
7976 }
7977
7978 static __inline__ __m512d __DEFAULT_FN_ATTRS
7979 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7980 {
7981   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7982                 (__v8df) __W,
7983                 (__mmask8) __U,
7984                 _MM_FROUND_CUR_DIRECTION);
7985 }
7986
7987 static __inline__ __m512d __DEFAULT_FN_ATTRS
7988 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7989 {
7990   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7991                 (__v8df) _mm512_setzero_pd (),
7992                 (__mmask8) __U,
7993                 _MM_FROUND_CUR_DIRECTION);
7994 }
7995
7996 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
7997   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7998                                           (__v16sf)_mm512_undefined_ps(), \
7999                                           (__mmask16)-1, (int)(R)); })
8000
8001 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8002   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8003                                           (__v16sf)(__m512)(W), \
8004                                           (__mmask16)(U), (int)(R)); })
8005
8006 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8007   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8008                                           (__v16sf)_mm512_setzero_ps(), \
8009                                           (__mmask16)(U), (int)(R)); })
8010
8011 static __inline__ __m512 __DEFAULT_FN_ATTRS
8012 _mm512_getexp_ps (__m512 __A)
8013 {
8014   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8015                (__v16sf) _mm512_undefined_ps (),
8016                (__mmask16) -1,
8017                _MM_FROUND_CUR_DIRECTION);
8018 }
8019
8020 static __inline__ __m512 __DEFAULT_FN_ATTRS
8021 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8022 {
8023   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8024                (__v16sf) __W,
8025                (__mmask16) __U,
8026                _MM_FROUND_CUR_DIRECTION);
8027 }
8028
8029 static __inline__ __m512 __DEFAULT_FN_ATTRS
8030 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8031 {
8032   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8033                (__v16sf) _mm512_setzero_ps (),
8034                (__mmask16) __U,
8035                _MM_FROUND_CUR_DIRECTION);
8036 }
8037
8038 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8039   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8040                                        (float const *)(addr), \
8041                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8042                                        (int)(scale)); })
8043
8044 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
8045   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8046                                        (float const *)(addr), \
8047                                        (__v8di)(__m512i)(index), \
8048                                        (__mmask8)(mask), (int)(scale)); })
8049
8050 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8051   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8052                                         (int const *)(addr), \
8053                                         (__v8di)(__m512i)(index), \
8054                                         (__mmask8)-1, (int)(scale)); })
8055
8056 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8057   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8058                                         (int const *)(addr), \
8059                                         (__v8di)(__m512i)(index), \
8060                                         (__mmask8)(mask), (int)(scale)); })
8061
8062 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8063   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8064                                        (double const *)(addr), \
8065                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8066                                        (int)(scale)); })
8067
8068 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8069   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8070                                        (double const *)(addr), \
8071                                        (__v8di)(__m512i)(index), \
8072                                        (__mmask8)(mask), (int)(scale)); })
8073
8074 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8075   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8076                                        (long long const *)(addr), \
8077                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8078                                        (int)(scale)); })
8079
8080 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8081   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8082                                        (long long const *)(addr), \
8083                                        (__v8di)(__m512i)(index), \
8084                                        (__mmask8)(mask), (int)(scale)); })
8085
8086 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8087   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8088                                        (float const *)(addr), \
8089                                        (__v16sf)(__m512)(index), \
8090                                        (__mmask16)-1, (int)(scale)); })
8091
8092 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8093   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8094                                        (float const *)(addr), \
8095                                        (__v16sf)(__m512)(index), \
8096                                        (__mmask16)(mask), (int)(scale)); })
8097
8098 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8099   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8100                                         (int const *)(addr), \
8101                                         (__v16si)(__m512i)(index), \
8102                                         (__mmask16)-1, (int)(scale)); })
8103
8104 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8105   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8106                                         (int const *)(addr), \
8107                                         (__v16si)(__m512i)(index), \
8108                                         (__mmask16)(mask), (int)(scale)); })
8109
8110 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8111   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8112                                        (double const *)(addr), \
8113                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8114                                        (int)(scale)); })
8115
8116 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8117   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8118                                        (double const *)(addr), \
8119                                        (__v8si)(__m256i)(index), \
8120                                        (__mmask8)(mask), (int)(scale)); })
8121
8122 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8123   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8124                                        (long long const *)(addr), \
8125                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8126                                        (int)(scale)); })
8127
8128 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8129   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8130                                        (long long const *)(addr), \
8131                                        (__v8si)(__m256i)(index), \
8132                                        (__mmask8)(mask), (int)(scale)); })
8133
8134 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8135   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8136                                 (__v8di)(__m512i)(index), \
8137                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8138
8139 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8140   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8141                                 (__v8di)(__m512i)(index), \
8142                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8143
8144 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8145   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8146                                 (__v8di)(__m512i)(index), \
8147                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8148
8149 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8150   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8151                                 (__v8di)(__m512i)(index), \
8152                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8153
8154 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8155   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8156                                (__v8di)(__m512i)(index), \
8157                                (__v8df)(__m512d)(v1), (int)(scale)); })
8158
8159 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8160   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8161                                (__v8di)(__m512i)(index), \
8162                                (__v8df)(__m512d)(v1), (int)(scale)); })
8163
8164 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8165   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8166                                (__v8di)(__m512i)(index), \
8167                                (__v8di)(__m512i)(v1), (int)(scale)); })
8168
8169 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8170   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8171                                (__v8di)(__m512i)(index), \
8172                                (__v8di)(__m512i)(v1), (int)(scale)); })
8173
8174 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8175   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8176                                 (__v16si)(__m512i)(index), \
8177                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8178
8179 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8180   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8181                                 (__v16si)(__m512i)(index), \
8182                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8183
8184 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8185   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8186                                 (__v16si)(__m512i)(index), \
8187                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8188
8189 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8190   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8191                                 (__v16si)(__m512i)(index), \
8192                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8193
8194 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8195   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8196                                (__v8si)(__m256i)(index), \
8197                                (__v8df)(__m512d)(v1), (int)(scale)); })
8198
8199 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8200   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8201                                (__v8si)(__m256i)(index), \
8202                                (__v8df)(__m512d)(v1), (int)(scale)); })
8203
8204 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8205   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8206                                (__v8si)(__m256i)(index), \
8207                                (__v8di)(__m512i)(v1), (int)(scale)); })
8208
8209 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8210   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8211                                (__v8si)(__m256i)(index), \
8212                                (__v8di)(__m512i)(v1), (int)(scale)); })
8213
8214 static __inline__ __m128 __DEFAULT_FN_ATTRS
8215 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8216 {
8217  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8218           (__v4sf) __A,
8219           (__v4sf) __B,
8220           (__mmask8) __U,
8221           _MM_FROUND_CUR_DIRECTION);
8222 }
8223
8224 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8225   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8226                                         (__v4sf)(__m128)(A), \
8227                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8228                                         (int)(R)); })
8229
8230 static __inline__ __m128 __DEFAULT_FN_ATTRS
8231 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8232 {
8233  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8234           (__v4sf) __B,
8235           (__v4sf) __C,
8236           (__mmask8) __U,
8237           _MM_FROUND_CUR_DIRECTION);
8238 }
8239
8240 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8241   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8242                                          (__v4sf)(__m128)(B), \
8243                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8244                                          _MM_FROUND_CUR_DIRECTION); })
8245
8246 static __inline__ __m128 __DEFAULT_FN_ATTRS
8247 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8248 {
8249  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8250           (__v4sf) __X,
8251           (__v4sf) __Y,
8252           (__mmask8) __U,
8253           _MM_FROUND_CUR_DIRECTION);
8254 }
8255
8256 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8257   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8258                                          (__v4sf)(__m128)(X), \
8259                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8260                                          (int)(R)); })
8261
8262 static __inline__ __m128 __DEFAULT_FN_ATTRS
8263 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8264 {
8265  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8266           (__v4sf) __A,
8267           -(__v4sf) __B,
8268           (__mmask8) __U,
8269           _MM_FROUND_CUR_DIRECTION);
8270 }
8271
8272 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8273   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8274                                         (__v4sf)(__m128)(A), \
8275                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8276                                         (int)(R)); })
8277
8278 static __inline__ __m128 __DEFAULT_FN_ATTRS
8279 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8280 {
8281  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8282           (__v4sf) __B,
8283           -(__v4sf) __C,
8284           (__mmask8) __U,
8285           _MM_FROUND_CUR_DIRECTION);
8286 }
8287
8288 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8289   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8290                                          (__v4sf)(__m128)(B), \
8291                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8292                                          (int)(R)); })
8293
8294 static __inline__ __m128 __DEFAULT_FN_ATTRS
8295 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8296 {
8297  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8298           (__v4sf) __X,
8299           (__v4sf) __Y,
8300           (__mmask8) __U,
8301           _MM_FROUND_CUR_DIRECTION);
8302 }
8303
8304 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8305   (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8306                                          (__v4sf)(__m128)(X), \
8307                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8308                                          (int)(R)); })
8309
8310 static __inline__ __m128 __DEFAULT_FN_ATTRS
8311 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8312 {
8313  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8314           -(__v4sf) __A,
8315           (__v4sf) __B,
8316           (__mmask8) __U,
8317           _MM_FROUND_CUR_DIRECTION);
8318 }
8319
8320 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8321   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8322                                         -(__v4sf)(__m128)(A), \
8323                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8324                                         (int)(R)); })
8325
8326 static __inline__ __m128 __DEFAULT_FN_ATTRS
8327 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8328 {
8329  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8330           (__v4sf) __B,
8331           (__v4sf) __C,
8332           (__mmask8) __U,
8333           _MM_FROUND_CUR_DIRECTION);
8334 }
8335
8336 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8337   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8338                                          (__v4sf)(__m128)(B), \
8339                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8340                                          (int)(R)); })
8341
8342 static __inline__ __m128 __DEFAULT_FN_ATTRS
8343 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8344 {
8345  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8346           (__v4sf) __X,
8347           (__v4sf) __Y,
8348           (__mmask8) __U,
8349           _MM_FROUND_CUR_DIRECTION);
8350 }
8351
8352 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8353   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8354                                          (__v4sf)(__m128)(X), \
8355                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8356                                          (int)(R)); })
8357
8358 static __inline__ __m128 __DEFAULT_FN_ATTRS
8359 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8360 {
8361  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8362           -(__v4sf) __A,
8363           -(__v4sf) __B,
8364           (__mmask8) __U,
8365           _MM_FROUND_CUR_DIRECTION);
8366 }
8367
8368 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8369   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8370                                         -(__v4sf)(__m128)(A), \
8371                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
8372                                         (int)(R)); })
8373
8374 static __inline__ __m128 __DEFAULT_FN_ATTRS
8375 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8376 {
8377  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8378           (__v4sf) __B,
8379           -(__v4sf) __C,
8380           (__mmask8) __U,
8381           _MM_FROUND_CUR_DIRECTION);
8382 }
8383
8384 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8385   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8386                                          (__v4sf)(__m128)(B), \
8387                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8388                                          _MM_FROUND_CUR_DIRECTION); })
8389
8390 static __inline__ __m128 __DEFAULT_FN_ATTRS
8391 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8392 {
8393  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8394           (__v4sf) __X,
8395           (__v4sf) __Y,
8396           (__mmask8) __U,
8397           _MM_FROUND_CUR_DIRECTION);
8398 }
8399
8400 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8401   (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8402                                          (__v4sf)(__m128)(X), \
8403                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8404                                          (int)(R)); })
8405
8406 static __inline__ __m128d __DEFAULT_FN_ATTRS
8407 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8408 {
8409  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8410           (__v2df) __A,
8411           (__v2df) __B,
8412           (__mmask8) __U,
8413           _MM_FROUND_CUR_DIRECTION);
8414 }
8415
8416 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8417   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8418                                          (__v2df)(__m128d)(A), \
8419                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
8420                                          (int)(R)); })
8421
8422 static __inline__ __m128d __DEFAULT_FN_ATTRS
8423 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8424 {
8425  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8426           (__v2df) __B,
8427           (__v2df) __C,
8428           (__mmask8) __U,
8429           _MM_FROUND_CUR_DIRECTION);
8430 }
8431
8432 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8433   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8434                                           (__v2df)(__m128d)(B), \
8435                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8436                                           _MM_FROUND_CUR_DIRECTION); })
8437
8438 static __inline__ __m128d __DEFAULT_FN_ATTRS
8439 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8440 {
8441  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8442           (__v2df) __X,
8443           (__v2df) __Y,
8444           (__mmask8) __U,
8445           _MM_FROUND_CUR_DIRECTION);
8446 }
8447
8448 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8449   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8450                                           (__v2df)(__m128d)(X), \
8451                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8452                                           (int)(R)); })
8453
8454 static __inline__ __m128d __DEFAULT_FN_ATTRS
8455 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8456 {
8457  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8458           (__v2df) __A,
8459           -(__v2df) __B,
8460           (__mmask8) __U,
8461           _MM_FROUND_CUR_DIRECTION);
8462 }
8463
8464 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8465   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8466                                          (__v2df)(__m128d)(A), \
8467                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
8468                                          (int)(R)); })
8469
8470 static __inline__ __m128d __DEFAULT_FN_ATTRS
8471 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8472 {
8473  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8474           (__v2df) __B,
8475           -(__v2df) __C,
8476           (__mmask8) __U,
8477           _MM_FROUND_CUR_DIRECTION);
8478 }
8479
8480 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8481   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8482                                           (__v2df)(__m128d)(B), \
8483                                           -(__v2df)(__m128d)(C), \
8484                                           (__mmask8)(U), (int)(R)); })
8485
8486 static __inline__ __m128d __DEFAULT_FN_ATTRS
8487 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8488 {
8489  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8490           (__v2df) __X,
8491           (__v2df) __Y,
8492           (__mmask8) __U,
8493           _MM_FROUND_CUR_DIRECTION);
8494 }
8495
8496 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8497   (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8498                                           (__v2df)(__m128d)(X), \
8499                                           (__v2df)(__m128d)(Y), \
8500                                           (__mmask8)(U), (int)(R)); })
8501
8502 static __inline__ __m128d __DEFAULT_FN_ATTRS
8503 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8504 {
8505  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8506           -(__v2df) __A,
8507           (__v2df) __B,
8508           (__mmask8) __U,
8509           _MM_FROUND_CUR_DIRECTION);
8510 }
8511
8512 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8513   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8514                                          -(__v2df)(__m128d)(A), \
8515                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
8516                                          (int)(R)); })
8517
8518 static __inline__ __m128d __DEFAULT_FN_ATTRS
8519 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8520 {
8521  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8522           (__v2df) __B,
8523           (__v2df) __C,
8524           (__mmask8) __U,
8525           _MM_FROUND_CUR_DIRECTION);
8526 }
8527
8528 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8529   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8530                                           (__v2df)(__m128d)(B), \
8531                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8532                                           (int)(R)); })
8533
8534 static __inline__ __m128d __DEFAULT_FN_ATTRS
8535 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8536 {
8537  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8538           (__v2df) __X,
8539           (__v2df) __Y,
8540           (__mmask8) __U,
8541           _MM_FROUND_CUR_DIRECTION);
8542 }
8543
8544 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8545   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8546                                           (__v2df)(__m128d)(X), \
8547                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8548                                           (int)(R)); })
8549
8550 static __inline__ __m128d __DEFAULT_FN_ATTRS
8551 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8552 {
8553  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8554           -(__v2df) __A,
8555           -(__v2df) __B,
8556           (__mmask8) __U,
8557           _MM_FROUND_CUR_DIRECTION);
8558 }
8559
8560 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8561   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8562                                          -(__v2df)(__m128d)(A), \
8563                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
8564                                          (int)(R)); })
8565
8566 static __inline__ __m128d __DEFAULT_FN_ATTRS
8567 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8568 {
8569  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8570           (__v2df) __B,
8571           -(__v2df) __C,
8572           (__mmask8) __U,
8573           _MM_FROUND_CUR_DIRECTION);
8574 }
8575
8576 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8577   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8578                                           (__v2df)(__m128d)(B), \
8579                                           -(__v2df)(__m128d)(C), \
8580                                           (__mmask8)(U), \
8581                                           _MM_FROUND_CUR_DIRECTION); })
8582
8583 static __inline__ __m128d __DEFAULT_FN_ATTRS
8584 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8585 {
8586  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8587           (__v2df) __X,
8588           (__v2df) (__Y),
8589           (__mmask8) __U,
8590           _MM_FROUND_CUR_DIRECTION);
8591 }
8592
8593 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8594   (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8595                                           (__v2df)(__m128d)(X), \
8596                                           (__v2df)(__m128d)(Y), \
8597                                           (__mmask8)(U), (int)(R)); })
8598
8599 #define _mm512_permutex_pd(X, C) __extension__ ({ \
8600   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8601                                    (__v8df)_mm512_undefined_pd(), \
8602                                    0 + (((C) >> 0) & 0x3), \
8603                                    0 + (((C) >> 2) & 0x3), \
8604                                    0 + (((C) >> 4) & 0x3), \
8605                                    0 + (((C) >> 6) & 0x3), \
8606                                    4 + (((C) >> 0) & 0x3), \
8607                                    4 + (((C) >> 2) & 0x3), \
8608                                    4 + (((C) >> 4) & 0x3), \
8609                                    4 + (((C) >> 6) & 0x3)); })
8610
8611 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8612   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8613                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8614                                        (__v8df)(__m512d)(W)); })
8615
8616 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8617   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8618                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8619                                        (__v8df)_mm512_setzero_pd()); })
8620
8621 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
8622   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8623                                    (__v8di)_mm512_undefined_epi32(), \
8624                                    0 + (((C) >> 0) & 0x3), \
8625                                    0 + (((C) >> 2) & 0x3), \
8626                                    0 + (((C) >> 4) & 0x3), \
8627                                    0 + (((C) >> 6) & 0x3), \
8628                                    4 + (((C) >> 0) & 0x3), \
8629                                    4 + (((C) >> 2) & 0x3), \
8630                                    4 + (((C) >> 4) & 0x3), \
8631                                    4 + (((C) >> 6) & 0x3)); })
8632
8633 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8634   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8635                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8636                                       (__v8di)(__m512i)(W)); })
8637
8638 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8639   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8640                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8641                                       (__v8di)_mm512_setzero_si512()); })
8642
8643 static __inline__ __m512d __DEFAULT_FN_ATTRS
8644 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8645 {
8646   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8647                  (__v8di) __X,
8648                  (__v8df) _mm512_undefined_pd (),
8649                  (__mmask8) -1);
8650 }
8651
8652 static __inline__ __m512d __DEFAULT_FN_ATTRS
8653 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8654 {
8655   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8656                  (__v8di) __X,
8657                  (__v8df) __W,
8658                  (__mmask8) __U);
8659 }
8660
8661 static __inline__ __m512d __DEFAULT_FN_ATTRS
8662 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8663 {
8664   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8665                  (__v8di) __X,
8666                  (__v8df) _mm512_setzero_pd (),
8667                  (__mmask8) __U);
8668 }
8669
8670 static __inline__ __m512i __DEFAULT_FN_ATTRS
8671 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8672 {
8673   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8674                  (__v8di) __X,
8675                  (__v8di) _mm512_setzero_si512 (),
8676                  __M);
8677 }
8678
8679 static __inline__ __m512i __DEFAULT_FN_ATTRS
8680 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8681 {
8682   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8683                  (__v8di) __X,
8684                  (__v8di) _mm512_undefined_epi32 (),
8685                  (__mmask8) -1);
8686 }
8687
8688 static __inline__ __m512i __DEFAULT_FN_ATTRS
8689 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8690              __m512i __Y)
8691 {
8692   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8693                  (__v8di) __X,
8694                  (__v8di) __W,
8695                  __M);
8696 }
8697
8698 static __inline__ __m512 __DEFAULT_FN_ATTRS
8699 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8700 {
8701   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8702                 (__v16si) __X,
8703                 (__v16sf) _mm512_undefined_ps (),
8704                 (__mmask16) -1);
8705 }
8706
8707 static __inline__ __m512 __DEFAULT_FN_ATTRS
8708 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8709 {
8710   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8711                 (__v16si) __X,
8712                 (__v16sf) __W,
8713                 (__mmask16) __U);
8714 }
8715
8716 static __inline__ __m512 __DEFAULT_FN_ATTRS
8717 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8718 {
8719   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8720                 (__v16si) __X,
8721                 (__v16sf) _mm512_setzero_ps (),
8722                 (__mmask16) __U);
8723 }
8724
8725 static __inline__ __m512i __DEFAULT_FN_ATTRS
8726 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8727 {
8728   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8729                  (__v16si) __X,
8730                  (__v16si) _mm512_setzero_si512 (),
8731                  __M);
8732 }
8733
8734 static __inline__ __m512i __DEFAULT_FN_ATTRS
8735 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8736 {
8737   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8738                  (__v16si) __X,
8739                  (__v16si) _mm512_undefined_epi32 (),
8740                  (__mmask16) -1);
8741 }
8742
8743 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8744
8745 static __inline__ __m512i __DEFAULT_FN_ATTRS
8746 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8747              __m512i __Y)
8748 {
8749   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8750                  (__v16si) __X,
8751                  (__v16si) __W,
8752                  __M);
8753 }
8754
8755 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8756
8757 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8758 _mm512_kand (__mmask16 __A, __mmask16 __B)
8759 {
8760   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8761 }
8762
8763 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8764 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8765 {
8766   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8767 }
8768
8769 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8770 _mm512_kor (__mmask16 __A, __mmask16 __B)
8771 {
8772   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8773 }
8774
8775 static __inline__ int __DEFAULT_FN_ATTRS
8776 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8777 {
8778   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8779 }
8780
8781 static __inline__ int __DEFAULT_FN_ATTRS
8782 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8783 {
8784   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8785 }
8786
8787 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8788 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8789 {
8790   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8791 }
8792
8793 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8794 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8795 {
8796   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8797 }
8798
8799 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8800 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8801 {
8802   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8803 }
8804
8805 static __inline__ void __DEFAULT_FN_ATTRS
8806 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8807 {
8808   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8809   __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8810 }
8811
8812 static __inline__ __m512i __DEFAULT_FN_ATTRS
8813 _mm512_stream_load_si512 (void const *__P)
8814 {
8815   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8816   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8817 }
8818
8819 static __inline__ void __DEFAULT_FN_ATTRS
8820 _mm512_stream_pd (double *__P, __m512d __A)
8821 {
8822   typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8823   __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8824 }
8825
8826 static __inline__ void __DEFAULT_FN_ATTRS
8827 _mm512_stream_ps (float *__P, __m512 __A)
8828 {
8829   typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8830   __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8831 }
8832
8833 static __inline__ __m512d __DEFAULT_FN_ATTRS
8834 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8835 {
8836   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8837                   (__v8df) __W,
8838                   (__mmask8) __U);
8839 }
8840
8841 static __inline__ __m512d __DEFAULT_FN_ATTRS
8842 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8843 {
8844   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8845                   (__v8df)
8846                   _mm512_setzero_pd (),
8847                   (__mmask8) __U);
8848 }
8849
8850 static __inline__ __m512i __DEFAULT_FN_ATTRS
8851 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8852 {
8853   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8854                   (__v8di) __W,
8855                   (__mmask8) __U);
8856 }
8857
8858 static __inline__ __m512i __DEFAULT_FN_ATTRS
8859 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8860 {
8861   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8862                   (__v8di)
8863                   _mm512_setzero_si512 (),
8864                   (__mmask8) __U);
8865 }
8866
8867 static __inline__ __m512 __DEFAULT_FN_ATTRS
8868 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8869 {
8870   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8871                  (__v16sf) __W,
8872                  (__mmask16) __U);
8873 }
8874
8875 static __inline__ __m512 __DEFAULT_FN_ATTRS
8876 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8877 {
8878   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8879                  (__v16sf)
8880                  _mm512_setzero_ps (),
8881                  (__mmask16) __U);
8882 }
8883
8884 static __inline__ __m512i __DEFAULT_FN_ATTRS
8885 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8886 {
8887   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8888                   (__v16si) __W,
8889                   (__mmask16) __U);
8890 }
8891
8892 static __inline__ __m512i __DEFAULT_FN_ATTRS
8893 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8894 {
8895   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8896                   (__v16si)
8897                   _mm512_setzero_si512 (),
8898                   (__mmask16) __U);
8899 }
8900
8901 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
8902   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8903                                       (__v4sf)(__m128)(Y), (int)(P), \
8904                                       (__mmask8)-1, (int)(R)); })
8905
8906 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
8907   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8908                                       (__v4sf)(__m128)(Y), (int)(P), \
8909                                       (__mmask8)(M), (int)(R)); })
8910
8911 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
8912   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8913                                       (__v4sf)(__m128)(Y), (int)(P), \
8914                                       (__mmask8)-1, \
8915                                       _MM_FROUND_CUR_DIRECTION); })
8916
8917 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
8918   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8919                                       (__v4sf)(__m128)(Y), (int)(P), \
8920                                       (__mmask8)(M), \
8921                                       _MM_FROUND_CUR_DIRECTION); })
8922
8923 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
8924   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8925                                       (__v2df)(__m128d)(Y), (int)(P), \
8926                                       (__mmask8)-1, (int)(R)); })
8927
8928 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
8929   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8930                                       (__v2df)(__m128d)(Y), (int)(P), \
8931                                       (__mmask8)(M), (int)(R)); })
8932
8933 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
8934   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8935                                       (__v2df)(__m128d)(Y), (int)(P), \
8936                                       (__mmask8)-1, \
8937                                       _MM_FROUND_CUR_DIRECTION); })
8938
8939 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
8940   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8941                                       (__v2df)(__m128d)(Y), (int)(P), \
8942                                       (__mmask8)(M), \
8943                                       _MM_FROUND_CUR_DIRECTION); })
8944
8945 /* Bit Test */
8946
8947 static __inline __mmask16 __DEFAULT_FN_ATTRS
8948 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8949 {
8950   return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8951                                    _mm512_setzero_epi32());
8952 }
8953
8954 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8955 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8956 {
8957   return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8958                                         _mm512_setzero_epi32());
8959 }
8960
8961 static __inline __mmask8 __DEFAULT_FN_ATTRS
8962 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8963 {
8964   return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8965                                    _mm512_setzero_epi32());
8966 }
8967
8968 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8969 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8970 {
8971   return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8972                                         _mm512_setzero_epi32());
8973 }
8974
8975 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8976 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8977 {
8978   return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8979                                   _mm512_setzero_epi32());
8980 }
8981
8982 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8983 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8984 {
8985   return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8986                                        _mm512_setzero_epi32());
8987 }
8988
8989 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8990 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8991 {
8992   return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8993                                   _mm512_setzero_epi32());
8994 }
8995
8996 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
8997 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8998 {
8999   return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
9000                                        _mm512_setzero_epi32());
9001 }
9002
9003 static __inline__ __m512 __DEFAULT_FN_ATTRS
9004 _mm512_movehdup_ps (__m512 __A)
9005 {
9006   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9007                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9008 }
9009
9010 static __inline__ __m512 __DEFAULT_FN_ATTRS
9011 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9012 {
9013   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9014                                              (__v16sf)_mm512_movehdup_ps(__A),
9015                                              (__v16sf)__W);
9016 }
9017
9018 static __inline__ __m512 __DEFAULT_FN_ATTRS
9019 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9020 {
9021   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9022                                              (__v16sf)_mm512_movehdup_ps(__A),
9023                                              (__v16sf)_mm512_setzero_ps());
9024 }
9025
9026 static __inline__ __m512 __DEFAULT_FN_ATTRS
9027 _mm512_moveldup_ps (__m512 __A)
9028 {
9029   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9030                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9031 }
9032
9033 static __inline__ __m512 __DEFAULT_FN_ATTRS
9034 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9035 {
9036   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9037                                              (__v16sf)_mm512_moveldup_ps(__A),
9038                                              (__v16sf)__W);
9039 }
9040
9041 static __inline__ __m512 __DEFAULT_FN_ATTRS
9042 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9043 {
9044   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9045                                              (__v16sf)_mm512_moveldup_ps(__A),
9046                                              (__v16sf)_mm512_setzero_ps());
9047 }
9048
9049 static __inline__ __m128 __DEFAULT_FN_ATTRS
9050 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9051 {
9052   __m128 res = __A;
9053   res[0] = (__U & 1) ? __B[0] : __W[0];
9054   return res;
9055 }
9056
9057 static __inline__ __m128 __DEFAULT_FN_ATTRS
9058 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9059 {
9060   __m128 res = __A;
9061   res[0] = (__U & 1) ? __B[0] : 0;
9062   return res;
9063 }
9064
9065 static __inline__ __m128d __DEFAULT_FN_ATTRS
9066 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9067 {
9068   __m128d res = __A;
9069   res[0] = (__U & 1) ? __B[0] : __W[0];
9070   return res;
9071 }
9072
9073 static __inline__ __m128d __DEFAULT_FN_ATTRS
9074 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9075 {
9076   __m128d res = __A;
9077   res[0] = (__U & 1) ? __B[0] : 0;
9078   return res;
9079 }
9080
9081 static __inline__ void __DEFAULT_FN_ATTRS
9082 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9083 {
9084   __builtin_ia32_storess128_mask ((__v16sf *)__W,
9085                 (__v16sf) _mm512_castps128_ps512(__A),
9086                 (__mmask16) __U & (__mmask16)1);
9087 }
9088
9089 static __inline__ void __DEFAULT_FN_ATTRS
9090 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9091 {
9092   __builtin_ia32_storesd128_mask ((__v8df *)__W,
9093                 (__v8df) _mm512_castpd128_pd512(__A),
9094                 (__mmask8) __U & 1);
9095 }
9096
9097 static __inline__ __m128 __DEFAULT_FN_ATTRS
9098 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9099 {
9100   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9101                                                 (__v4sf) {0.0, 0.0, 0.0, 0.0},
9102                                                 0, 4, 4, 4);
9103
9104   return (__m128) __builtin_shufflevector(
9105                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9106                                       (__v16sf) _mm512_castps128_ps512(src),
9107                                       (__mmask16) __U & 1),
9108                            _mm512_undefined_ps(), 0, 1, 2, 3);
9109 }
9110
9111 static __inline__ __m128 __DEFAULT_FN_ATTRS
9112 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
9113 {
9114   return (__m128) __builtin_shufflevector(
9115                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9116                                       (__v16sf) _mm512_setzero_ps(),
9117                                       (__mmask16) __U & 1),
9118                            _mm512_undefined_ps(), 0, 1, 2, 3);
9119 }
9120
9121 static __inline__ __m128d __DEFAULT_FN_ATTRS
9122 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9123 {
9124   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9125                                                  (__v2df) {0.0, 0.0}, 0, 2);
9126
9127   return (__m128d) __builtin_shufflevector(
9128                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9129                                       (__v8df) _mm512_castpd128_pd512(src),
9130                                       (__mmask8) __U & 1),
9131                             _mm512_undefined_pd(), 0, 1);
9132 }
9133
9134 static __inline__ __m128d __DEFAULT_FN_ATTRS
9135 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
9136 {
9137   return (__m128d) __builtin_shufflevector(
9138                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9139                                       (__v8df) _mm512_setzero_pd(),
9140                                       (__mmask8) __U & 1),
9141                             _mm512_undefined_pd(), 0, 1);
9142 }
9143
9144 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9145   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9146                                    (__v16si)_mm512_undefined_epi32(), \
9147                                    0  + (((I) >> 0) & 0x3), \
9148                                    0  + (((I) >> 2) & 0x3), \
9149                                    0  + (((I) >> 4) & 0x3), \
9150                                    0  + (((I) >> 6) & 0x3), \
9151                                    4  + (((I) >> 0) & 0x3), \
9152                                    4  + (((I) >> 2) & 0x3), \
9153                                    4  + (((I) >> 4) & 0x3), \
9154                                    4  + (((I) >> 6) & 0x3), \
9155                                    8  + (((I) >> 0) & 0x3), \
9156                                    8  + (((I) >> 2) & 0x3), \
9157                                    8  + (((I) >> 4) & 0x3), \
9158                                    8  + (((I) >> 6) & 0x3), \
9159                                    12 + (((I) >> 0) & 0x3), \
9160                                    12 + (((I) >> 2) & 0x3), \
9161                                    12 + (((I) >> 4) & 0x3), \
9162                                    12 + (((I) >> 6) & 0x3)); })
9163
9164 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9165   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9166                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9167                                       (__v16si)(__m512i)(W)); })
9168
9169 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9170   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9171                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9172                                       (__v16si)_mm512_setzero_si512()); })
9173
9174 static __inline__ __m512d __DEFAULT_FN_ATTRS
9175 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9176 {
9177   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9178                 (__v8df) __W,
9179                 (__mmask8) __U);
9180 }
9181
9182 static __inline__ __m512d __DEFAULT_FN_ATTRS
9183 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9184 {
9185   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9186                 (__v8df) _mm512_setzero_pd (),
9187                 (__mmask8) __U);
9188 }
9189
9190 static __inline__ __m512i __DEFAULT_FN_ATTRS
9191 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9192 {
9193   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9194                 (__v8di) __W,
9195                 (__mmask8) __U);
9196 }
9197
9198 static __inline__ __m512i __DEFAULT_FN_ATTRS
9199 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9200 {
9201   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9202                 (__v8di) _mm512_setzero_pd (),
9203                 (__mmask8) __U);
9204 }
9205
9206 static __inline__ __m512d __DEFAULT_FN_ATTRS
9207 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9208 {
9209   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9210               (__v8df) __W,
9211               (__mmask8) __U);
9212 }
9213
9214 static __inline__ __m512d __DEFAULT_FN_ATTRS
9215 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9216 {
9217   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9218               (__v8df) _mm512_setzero_pd(),
9219               (__mmask8) __U);
9220 }
9221
9222 static __inline__ __m512i __DEFAULT_FN_ATTRS
9223 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9224 {
9225   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9226               (__v8di) __W,
9227               (__mmask8) __U);
9228 }
9229
9230 static __inline__ __m512i __DEFAULT_FN_ATTRS
9231 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9232 {
9233   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9234               (__v8di) _mm512_setzero_pd(),
9235               (__mmask8) __U);
9236 }
9237
9238 static __inline__ __m512 __DEFAULT_FN_ATTRS
9239 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9240 {
9241   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9242                    (__v16sf) __W,
9243                    (__mmask16) __U);
9244 }
9245
9246 static __inline__ __m512 __DEFAULT_FN_ATTRS
9247 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9248 {
9249   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9250                    (__v16sf) _mm512_setzero_ps(),
9251                    (__mmask16) __U);
9252 }
9253
9254 static __inline__ __m512i __DEFAULT_FN_ATTRS
9255 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9256 {
9257   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9258               (__v16si) __W,
9259               (__mmask16) __U);
9260 }
9261
9262 static __inline__ __m512i __DEFAULT_FN_ATTRS
9263 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9264 {
9265   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9266               (__v16si) _mm512_setzero_ps(),
9267               (__mmask16) __U);
9268 }
9269
9270 static __inline__ __m512 __DEFAULT_FN_ATTRS
9271 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9272 {
9273   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9274                (__v16sf) __W,
9275                (__mmask16) __U);
9276 }
9277
9278 static __inline__ __m512 __DEFAULT_FN_ATTRS
9279 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9280 {
9281   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9282                (__v16sf) _mm512_setzero_ps(),
9283                (__mmask16) __U);
9284 }
9285
9286 static __inline__ __m512i __DEFAULT_FN_ATTRS
9287 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9288 {
9289   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9290                 (__v16si) __W,
9291                 (__mmask16) __U);
9292 }
9293
9294 static __inline__ __m512i __DEFAULT_FN_ATTRS
9295 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9296 {
9297   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9298                 (__v16si) _mm512_setzero_ps(),
9299                 (__mmask16) __U);
9300 }
9301
9302 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9303   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9304                                            (__v8df)_mm512_undefined_pd(), \
9305                                            (__mmask8)-1, (int)(R)); })
9306
9307 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9308   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9309                                            (__v8df)(__m512d)(W), \
9310                                            (__mmask8)(U), (int)(R)); })
9311
9312 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9313   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9314                                            (__v8df)_mm512_setzero_pd(), \
9315                                            (__mmask8)(U), (int)(R)); })
9316
9317 static __inline__ __m512d __DEFAULT_FN_ATTRS
9318 _mm512_cvtps_pd (__m256 __A)
9319 {
9320   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9321                 (__v8df)
9322                 _mm512_undefined_pd (),
9323                 (__mmask8) -1,
9324                 _MM_FROUND_CUR_DIRECTION);
9325 }
9326
9327 static __inline__ __m512d __DEFAULT_FN_ATTRS
9328 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9329 {
9330   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9331                 (__v8df) __W,
9332                 (__mmask8) __U,
9333                 _MM_FROUND_CUR_DIRECTION);
9334 }
9335
9336 static __inline__ __m512d __DEFAULT_FN_ATTRS
9337 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9338 {
9339   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9340                 (__v8df)
9341                 _mm512_setzero_pd (),
9342                 (__mmask8) __U,
9343                 _MM_FROUND_CUR_DIRECTION);
9344 }
9345
9346 static __inline__ __m512 __DEFAULT_FN_ATTRS
9347 _mm512_cvtpslo_pd (__m512 __A)
9348 {
9349   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9350 }
9351
9352 static __inline__ __m512 __DEFAULT_FN_ATTRS
9353 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9354 {
9355   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9356 }
9357
9358 static __inline__ __m512d __DEFAULT_FN_ATTRS
9359 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9360 {
9361   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9362               (__v8df) __A,
9363               (__v8df) __W);
9364 }
9365
9366 static __inline__ __m512d __DEFAULT_FN_ATTRS
9367 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9368 {
9369   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9370               (__v8df) __A,
9371               (__v8df) _mm512_setzero_pd ());
9372 }
9373
9374 static __inline__ __m512 __DEFAULT_FN_ATTRS
9375 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9376 {
9377   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9378              (__v16sf) __A,
9379              (__v16sf) __W);
9380 }
9381
9382 static __inline__ __m512 __DEFAULT_FN_ATTRS
9383 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9384 {
9385   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9386              (__v16sf) __A,
9387              (__v16sf) _mm512_setzero_ps ());
9388 }
9389
9390 static __inline__ void __DEFAULT_FN_ATTRS
9391 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9392 {
9393   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9394             (__mmask8) __U);
9395 }
9396
9397 static __inline__ void __DEFAULT_FN_ATTRS
9398 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9399 {
9400   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9401             (__mmask8) __U);
9402 }
9403
9404 static __inline__ void __DEFAULT_FN_ATTRS
9405 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9406 {
9407   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9408             (__mmask16) __U);
9409 }
9410
9411 static __inline__ void __DEFAULT_FN_ATTRS
9412 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9413 {
9414   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9415             (__mmask16) __U);
9416 }
9417
9418 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9419   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9420                                              (__v2df)(__m128d)(B), \
9421                                              (__v4sf)_mm_undefined_ps(), \
9422                                              (__mmask8)-1, (int)(R)); })
9423
9424 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9425   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9426                                              (__v2df)(__m128d)(B), \
9427                                              (__v4sf)(__m128)(W), \
9428                                              (__mmask8)(U), (int)(R)); })
9429
9430 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9431   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9432                                              (__v2df)(__m128d)(B), \
9433                                              (__v4sf)_mm_setzero_ps(), \
9434                                              (__mmask8)(U), (int)(R)); })
9435
9436 static __inline__ __m128 __DEFAULT_FN_ATTRS
9437 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9438 {
9439   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9440                                              (__v2df)(__B),
9441                                              (__v4sf)(__W),
9442                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9443 }
9444
9445 static __inline__ __m128 __DEFAULT_FN_ATTRS
9446 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9447 {
9448   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9449                                              (__v2df)(__B),
9450                                              (__v4sf)_mm_setzero_ps(),
9451                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9452 }
9453
9454 #define _mm_cvtss_i32 _mm_cvtss_si32
9455 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9456 #define _mm_cvti32_sd _mm_cvtsi32_sd
9457 #define _mm_cvti32_ss _mm_cvtsi32_ss
9458 #ifdef __x86_64__
9459 #define _mm_cvtss_i64 _mm_cvtss_si64
9460 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9461 #define _mm_cvti64_sd _mm_cvtsi64_sd
9462 #define _mm_cvti64_ss _mm_cvtsi64_ss
9463 #endif
9464
9465 #ifdef __x86_64__
9466 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9467   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9468                                      (int)(R)); })
9469
9470 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9471   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9472                                      (int)(R)); })
9473 #endif
9474
9475 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9476   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9477
9478 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9479   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9480
9481 #ifdef __x86_64__
9482 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9483   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9484                                     (int)(R)); })
9485
9486 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9487   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9488                                     (int)(R)); })
9489 #endif
9490
9491 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9492   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9493                                               (__v4sf)(__m128)(B), \
9494                                               (__v2df)_mm_undefined_pd(), \
9495                                               (__mmask8)-1, (int)(R)); })
9496
9497 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9498   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9499                                               (__v4sf)(__m128)(B), \
9500                                               (__v2df)(__m128d)(W), \
9501                                               (__mmask8)(U), (int)(R)); })
9502
9503 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9504   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9505                                               (__v4sf)(__m128)(B), \
9506                                               (__v2df)_mm_setzero_pd(), \
9507                                               (__mmask8)(U), (int)(R)); })
9508
9509 static __inline__ __m128d __DEFAULT_FN_ATTRS
9510 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9511 {
9512   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9513                                               (__v4sf)(__B),
9514                                               (__v2df)(__W),
9515                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9516 }
9517
9518 static __inline__ __m128d __DEFAULT_FN_ATTRS
9519 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9520 {
9521   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9522                                               (__v4sf)(__B),
9523                                               (__v2df)_mm_setzero_pd(),
9524                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9525 }
9526
9527 static __inline__ __m128d __DEFAULT_FN_ATTRS
9528 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9529 {
9530   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9531 }
9532
9533 #ifdef __x86_64__
9534 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9535   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9536                                       (unsigned long long)(B), (int)(R)); })
9537
9538 static __inline__ __m128d __DEFAULT_FN_ATTRS
9539 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9540 {
9541   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9542                  _MM_FROUND_CUR_DIRECTION);
9543 }
9544 #endif
9545
9546 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9547   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9548                                      (int)(R)); })
9549
9550 static __inline__ __m128 __DEFAULT_FN_ATTRS
9551 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9552 {
9553   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9554                 _MM_FROUND_CUR_DIRECTION);
9555 }
9556
9557 #ifdef __x86_64__
9558 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9559   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9560                                      (unsigned long long)(B), (int)(R)); })
9561
9562 static __inline__ __m128 __DEFAULT_FN_ATTRS
9563 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9564 {
9565   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9566                 _MM_FROUND_CUR_DIRECTION);
9567 }
9568 #endif
9569
9570 static __inline__ __m512i __DEFAULT_FN_ATTRS
9571 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9572 {
9573   return (__m512i) __builtin_ia32_selectd_512(__M,
9574                                               (__v16si) _mm512_set1_epi32(__A),
9575                                               (__v16si) __O);
9576 }
9577
9578 #ifdef __x86_64__
9579 static __inline__ __m512i __DEFAULT_FN_ATTRS
9580 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9581 {
9582   return (__m512i) __builtin_ia32_selectq_512(__M,
9583                                               (__v8di) _mm512_set1_epi64(__A),
9584                                               (__v8di) __O);
9585 }
9586 #endif
9587
9588 static  __inline __m512i __DEFAULT_FN_ATTRS
9589 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9590     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9591     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9592     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9593     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9594     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9595     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9596     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9597     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9598     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9599     char __e4, char __e3, char __e2, char __e1, char __e0) {
9600
9601   return __extension__ (__m512i)(__v64qi)
9602     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9603      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9604      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9605      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9606      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9607      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9608      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9609      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9610 }
9611
9612 static  __inline __m512i __DEFAULT_FN_ATTRS
9613 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9614     short __e27, short __e26, short __e25, short __e24, short __e23,
9615     short __e22, short __e21, short __e20, short __e19, short __e18,
9616     short __e17, short __e16, short __e15, short __e14, short __e13,
9617     short __e12, short __e11, short __e10, short __e9, short __e8,
9618     short __e7, short __e6, short __e5, short __e4, short __e3,
9619     short __e2, short __e1, short __e0) {
9620   return __extension__ (__m512i)(__v32hi)
9621     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9622      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9623      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9624      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9625 }
9626
9627 static __inline __m512i __DEFAULT_FN_ATTRS
9628 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9629      int __E, int __F, int __G, int __H,
9630      int __I, int __J, int __K, int __L,
9631      int __M, int __N, int __O, int __P)
9632 {
9633   return __extension__ (__m512i)(__v16si)
9634   { __P, __O, __N, __M, __L, __K, __J, __I,
9635     __H, __G, __F, __E, __D, __C, __B, __A };
9636 }
9637
9638 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9639        e8,e9,e10,e11,e12,e13,e14,e15)          \
9640   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9641                    (e5),(e4),(e3),(e2),(e1),(e0))
9642
9643 static __inline__ __m512i __DEFAULT_FN_ATTRS
9644 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9645      long long __D, long long __E, long long __F,
9646      long long __G, long long __H)
9647 {
9648   return __extension__ (__m512i) (__v8di)
9649   { __H, __G, __F, __E, __D, __C, __B, __A };
9650 }
9651
9652 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9653   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9654
9655 static __inline__ __m512d __DEFAULT_FN_ATTRS
9656 _mm512_set_pd (double __A, double __B, double __C, double __D,
9657         double __E, double __F, double __G, double __H)
9658 {
9659   return __extension__ (__m512d)
9660   { __H, __G, __F, __E, __D, __C, __B, __A };
9661 }
9662
9663 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9664   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9665
9666 static __inline__ __m512 __DEFAULT_FN_ATTRS
9667 _mm512_set_ps (float __A, float __B, float __C, float __D,
9668         float __E, float __F, float __G, float __H,
9669         float __I, float __J, float __K, float __L,
9670         float __M, float __N, float __O, float __P)
9671 {
9672   return __extension__ (__m512)
9673   { __P, __O, __N, __M, __L, __K, __J, __I,
9674     __H, __G, __F, __E, __D, __C, __B, __A };
9675 }
9676
9677 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9678   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9679                 (e4),(e3),(e2),(e1),(e0))
9680
9681 static __inline__ __m512 __DEFAULT_FN_ATTRS
9682 _mm512_abs_ps(__m512 __A)
9683 {
9684   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9685 }
9686
9687 static __inline__ __m512 __DEFAULT_FN_ATTRS
9688 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9689 {
9690   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9691 }
9692
9693 static __inline__ __m512d __DEFAULT_FN_ATTRS
9694 _mm512_abs_pd(__m512d __A)
9695 {
9696   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9697 }
9698
9699 static __inline__ __m512d __DEFAULT_FN_ATTRS
9700 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9701 {
9702   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9703 }
9704
9705 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9706 // outputs. This class of vector operation forms the basis of many scientific
9707 // computations. In vector-reduction arithmetic, the evaluation off is
9708 // independent of the order of the input elements of V.
9709
9710 // Used bisection method. At each step, we partition the vector with previous
9711 // step in half, and the operation is performed on its two halves.
9712 // This takes log2(n) steps where n is the number of elements in the vector.
9713
9714 // Vec512 - Vector with size 512.
9715 // Operator - Can be one of following: +,*,&,|
9716 // T2  - Can get 'i' for int and 'f' for float.
9717 // T1 - Can get 'i' for int and 'd' for double.
9718
9719 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
9720   __extension__({                                                      \
9721     __m256##T1 Vec256 = __builtin_shufflevector(                       \
9722                             (__v8d##T2)Vec512,                         \
9723                             (__v8d##T2)Vec512,                         \
9724                             0, 1, 2, 3)                                \
9725                         Operator                                       \
9726                         __builtin_shufflevector(                       \
9727                             (__v8d##T2)Vec512,                         \
9728                             (__v8d##T2)Vec512,                         \
9729                             4, 5, 6, 7);                               \
9730     __m128##T1 Vec128 = __builtin_shufflevector(                       \
9731                             (__v4d##T2)Vec256,                         \
9732                             (__v4d##T2)Vec256,                         \
9733                             0, 1)                                      \
9734                         Operator                                       \
9735                         __builtin_shufflevector(                       \
9736                             (__v4d##T2)Vec256,                         \
9737                             (__v4d##T2)Vec256,                         \
9738                             2, 3);                                     \
9739     Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
9740                                      (__v2d##T2)Vec128, 0, -1)         \
9741              Operator                                                  \
9742              __builtin_shufflevector((__v2d##T2)Vec128,                \
9743                                      (__v2d##T2)Vec128, 1, -1);        \
9744     return Vec128[0];                                                  \
9745   })
9746
9747 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9748   _mm512_reduce_operator_64bit(__W, +, i, i);
9749 }
9750
9751 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9752   _mm512_reduce_operator_64bit(__W, *, i, i);
9753 }
9754
9755 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9756   _mm512_reduce_operator_64bit(__W, &, i, i);
9757 }
9758
9759 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9760   _mm512_reduce_operator_64bit(__W, |, i, i);
9761 }
9762
9763 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9764   _mm512_reduce_operator_64bit(__W, +, f, d);
9765 }
9766
9767 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9768   _mm512_reduce_operator_64bit(__W, *, f, d);
9769 }
9770
9771 // Vec512 - Vector with size 512.
9772 // Vec512Neutral - All vector elements set to the identity element.
9773 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9774 // Operator - Can be one of following: +,*,&,|
9775 // Mask - Intrinsic Mask
9776 // T2  - Can get 'i' for int and 'f' for float.
9777 // T1 - Can get 'i' for int and 'd' for packed double-precision.
9778 // T3 - Can be Pd for packed double or q for q-word.
9779
9780 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
9781                                           Mask, T2, T1, T3)                    \
9782   __extension__({                                                              \
9783     Vec512 = __builtin_ia32_select##T3##_512(                                  \
9784                  (__mmask8)Mask,                                               \
9785                  (__v8d##T2)Vec512,                                            \
9786                  (__v8d##T2)Vec512Neutral);                                    \
9787     _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
9788   })
9789
9790 static __inline__ long long __DEFAULT_FN_ATTRS
9791 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9792   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9793 }
9794
9795 static __inline__ long long __DEFAULT_FN_ATTRS
9796 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9797   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9798 }
9799
9800 static __inline__ long long __DEFAULT_FN_ATTRS
9801 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9802   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9803                                     &, __M,  i, i, q);
9804 }
9805
9806 static __inline__ long long __DEFAULT_FN_ATTRS
9807 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9808   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
9809                                     i, i, q);
9810 }
9811
9812 static __inline__ double __DEFAULT_FN_ATTRS
9813 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9814   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
9815                                     f, d, pd);
9816 }
9817
9818 static __inline__ double __DEFAULT_FN_ATTRS
9819 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9820   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9821                                     f, d, pd);
9822 }
9823
9824 // Vec512 - Vector with size 512.
9825 // Operator - Can be one of following: +,*,&,|
9826 // T2 - Can get 'i' for int and ' ' for packed single.
9827 // T1 - Can get 'i' for int and 'f' for float.
9828
9829 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9830     __m256##T1 Vec256 =                                                        \
9831             (__m256##T1)(__builtin_shufflevector(                              \
9832                                     (__v16s##T2)Vec512,                        \
9833                                     (__v16s##T2)Vec512,                        \
9834                                     0, 1, 2, 3, 4, 5, 6, 7)                    \
9835                                 Operator                                       \
9836                          __builtin_shufflevector(                              \
9837                                     (__v16s##T2)Vec512,                        \
9838                                     (__v16s##T2)Vec512,                        \
9839                                     8, 9, 10, 11, 12, 13, 14, 15));            \
9840     __m128##T1 Vec128 =                                                        \
9841              (__m128##T1)(__builtin_shufflevector(                             \
9842                                     (__v8s##T2)Vec256,                         \
9843                                     (__v8s##T2)Vec256,                         \
9844                                     0, 1, 2, 3)                                \
9845                                 Operator                                       \
9846                           __builtin_shufflevector(                             \
9847                                     (__v8s##T2)Vec256,                         \
9848                                     (__v8s##T2)Vec256,                         \
9849                                     4, 5, 6, 7));                              \
9850     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
9851                                     (__v4s##T2)Vec128,                         \
9852                                     (__v4s##T2)Vec128,                         \
9853                                     0, 1, -1, -1)                              \
9854                                 Operator                                       \
9855                           __builtin_shufflevector(                             \
9856                                     (__v4s##T2)Vec128,                         \
9857                                     (__v4s##T2)Vec128,                         \
9858                                     2, 3, -1, -1));                            \
9859     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
9860                                     (__v4s##T2)Vec128,                         \
9861                                     (__v4s##T2)Vec128,                         \
9862                                     0, -1, -1, -1)                             \
9863                                 Operator                                       \
9864                           __builtin_shufflevector(                             \
9865                                     (__v4s##T2)Vec128,                         \
9866                                     (__v4s##T2)Vec128,                         \
9867                                     1, -1, -1, -1));                           \
9868     return Vec128[0];                                                          \
9869   })
9870
9871 static __inline__ int __DEFAULT_FN_ATTRS
9872 _mm512_reduce_add_epi32(__m512i __W) {
9873   _mm512_reduce_operator_32bit(__W, +, i, i);
9874 }
9875
9876 static __inline__ int __DEFAULT_FN_ATTRS
9877 _mm512_reduce_mul_epi32(__m512i __W) {
9878   _mm512_reduce_operator_32bit(__W, *, i, i);
9879 }
9880
9881 static __inline__ int __DEFAULT_FN_ATTRS
9882 _mm512_reduce_and_epi32(__m512i __W) {
9883   _mm512_reduce_operator_32bit(__W, &, i, i);
9884 }
9885
9886 static __inline__ int __DEFAULT_FN_ATTRS
9887 _mm512_reduce_or_epi32(__m512i __W) {
9888   _mm512_reduce_operator_32bit(__W, |, i, i);
9889 }
9890
9891 static __inline__ float __DEFAULT_FN_ATTRS
9892 _mm512_reduce_add_ps(__m512 __W) {
9893   _mm512_reduce_operator_32bit(__W, +, f, );
9894 }
9895
9896 static __inline__ float __DEFAULT_FN_ATTRS
9897 _mm512_reduce_mul_ps(__m512 __W) {
9898   _mm512_reduce_operator_32bit(__W, *, f, );
9899 }
9900
9901 // Vec512 - Vector with size 512.
9902 // Vec512Neutral - All vector elements set to the identity element.
9903 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9904 // Operator - Can be one of following: +,*,&,|
9905 // Mask - Intrinsic Mask
9906 // T2  - Can get 'i' for int and 'f' for float.
9907 // T1 - Can get 'i' for int and 'd' for double.
9908 // T3 - Can be Ps for packed single or d for d-word.
9909
9910 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
9911                                           Mask, T2, T1, T3)                    \
9912   __extension__({                                                              \
9913     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
9914                              (__mmask16)Mask,                                  \
9915                              (__v16s##T2)Vec512,                               \
9916                              (__v16s##T2)Vec512Neutral);                       \
9917     _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
9918   })
9919
9920 static __inline__ int __DEFAULT_FN_ATTRS
9921 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9922   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
9923 }
9924
9925 static __inline__ int __DEFAULT_FN_ATTRS
9926 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9927   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
9928 }
9929
9930 static __inline__ int __DEFAULT_FN_ATTRS
9931 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9932   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
9933                                     i, i, d);
9934 }
9935
9936 static __inline__ int __DEFAULT_FN_ATTRS
9937 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9938   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
9939 }
9940
9941 static __inline__ float __DEFAULT_FN_ATTRS
9942 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9943   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
9944 }
9945
9946 static __inline__ float __DEFAULT_FN_ATTRS
9947 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9948   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
9949 }
9950
9951 // Used bisection method. At each step, we partition the vector with previous
9952 // step in half, and the operation is performed on its two halves.
9953 // This takes log2(n) steps where n is the number of elements in the vector.
9954 // This macro uses only intrinsics from the AVX512F feature.
9955
9956 // Vec512 - Vector with size of 512.
9957 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9958 //              __mm512_max_epi64
9959 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9960 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9961
9962 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9963         Vec512 = _mm512_##IntrinName(                                          \
9964                                 (__m512##T1)__builtin_shufflevector(           \
9965                                                 (__v8d##T2)Vec512,             \
9966                                                 (__v8d##T2)Vec512,             \
9967                                                  0, 1, 2, 3, -1, -1, -1, -1),  \
9968                                 (__m512##T1)__builtin_shufflevector(           \
9969                                                 (__v8d##T2)Vec512,             \
9970                                                 (__v8d##T2)Vec512,             \
9971                                                  4, 5, 6, 7, -1, -1, -1, -1)); \
9972         Vec512 = _mm512_##IntrinName(                                          \
9973                                 (__m512##T1)__builtin_shufflevector(           \
9974                                                 (__v8d##T2)Vec512,             \
9975                                                 (__v8d##T2)Vec512,             \
9976                                                  0, 1, -1, -1, -1, -1, -1, -1),\
9977                                 (__m512##T1)__builtin_shufflevector(           \
9978                                                 (__v8d##T2)Vec512,             \
9979                                                 (__v8d##T2)Vec512,             \
9980                                                  2, 3, -1, -1, -1, -1, -1,     \
9981                                                  -1));                         \
9982         Vec512 = _mm512_##IntrinName(                                          \
9983                                 (__m512##T1)__builtin_shufflevector(           \
9984                                                 (__v8d##T2)Vec512,             \
9985                                                 (__v8d##T2)Vec512,             \
9986                                                 0, -1, -1, -1, -1, -1, -1, -1),\
9987                                 (__m512##T1)__builtin_shufflevector(           \
9988                                                 (__v8d##T2)Vec512,             \
9989                                                 (__v8d##T2)Vec512,             \
9990                                                 1, -1, -1, -1, -1, -1, -1, -1))\
9991                                                 ;                              \
9992     return Vec512[0];                                                          \
9993   })
9994
9995 static __inline__ long long __DEFAULT_FN_ATTRS
9996 _mm512_reduce_max_epi64(__m512i __V) {
9997   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
9998 }
9999
10000 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10001 _mm512_reduce_max_epu64(__m512i __V) {
10002   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10003 }
10004
10005 static __inline__ double __DEFAULT_FN_ATTRS
10006 _mm512_reduce_max_pd(__m512d __V) {
10007   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10008 }
10009
10010 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10011 (__m512i __V) {
10012   _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10013 }
10014
10015 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10016 _mm512_reduce_min_epu64(__m512i __V) {
10017   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10018 }
10019
10020 static __inline__ double __DEFAULT_FN_ATTRS
10021 _mm512_reduce_min_pd(__m512d __V) {
10022   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10023 }
10024
10025 // Vec512 - Vector with size 512.
10026 // Vec512Neutral - A 512 length vector with elements set to the identity element
10027 // Identity element: {max_epi,0x8000000000000000}
10028 //                   {max_epu,0x0000000000000000}
10029 //                   {max_pd, 0xFFF0000000000000}
10030 //                   {min_epi,0x7FFFFFFFFFFFFFFF}
10031 //                   {min_epu,0xFFFFFFFFFFFFFFFF}
10032 //                   {min_pd, 0x7FF0000000000000}
10033 //
10034 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10035 //              __mm512_max_epi64
10036 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10037 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10038 // T3 - Can get 'q' q word and 'pd' for packed double.
10039 //      [__builtin_ia32_select{q|pd}_512]
10040 // Mask - Intrinsic Mask
10041
10042 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10043                                         T2, T3, Mask)                          \
10044   __extension__({                                                              \
10045     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10046                              (__mmask8)Mask,                                   \
10047                              (__v8d##T2)Vec512,                                \
10048                              (__v8d##T2)Vec512Neutral);                        \
10049     _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
10050   })
10051
10052 static __inline__ long long __DEFAULT_FN_ATTRS
10053 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10054   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10055                                   max_epi64, i, i, q, __M);
10056 }
10057
10058 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10059 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10060   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10061                                   max_epu64, i, i, q, __M);
10062 }
10063
10064 static __inline__ double __DEFAULT_FN_ATTRS
10065 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10066   _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10067                                   max_pd, d, f, pd, __M);
10068 }
10069
10070 static __inline__ long long __DEFAULT_FN_ATTRS
10071 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10072   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10073                                   min_epi64, i, i, q, __M);
10074 }
10075
10076 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10077 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10078   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10079                                   min_epu64, i, i, q, __M);
10080 }
10081
10082 static __inline__ double __DEFAULT_FN_ATTRS
10083 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10084   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10085                                   min_pd, d, f, pd, __M);
10086 }
10087
10088 // Vec512 - Vector with size 512.
10089 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10090 //              __mm512_max_epi32
10091 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10092 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10093
10094 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10095     Vec512 = _mm512_##IntrinName(                                              \
10096                   (__m512##T1)__builtin_shufflevector(                         \
10097                                   (__v16s##T2)Vec512,                          \
10098                                   (__v16s##T2)Vec512,                          \
10099                                   0, 1, 2, 3, 4, 5, 6, 7,                      \
10100                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10101                   (__m512##T1)__builtin_shufflevector(                         \
10102                                   (__v16s##T2)Vec512,                          \
10103                                   (__v16s##T2)Vec512,                          \
10104                                   8, 9, 10, 11, 12, 13, 14, 15,                \
10105                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10106     Vec512 = _mm512_##IntrinName(                                              \
10107                   (__m512##T1)__builtin_shufflevector(                         \
10108                                   (__v16s##T2)Vec512,                          \
10109                                   (__v16s##T2)Vec512,                          \
10110                                   0, 1, 2, 3, -1, -1, -1, -1,                  \
10111                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10112                   (__m512##T1)__builtin_shufflevector(                         \
10113                                   (__v16s##T2)Vec512,                          \
10114                                   (__v16s##T2)Vec512,                          \
10115                                   4, 5, 6, 7, -1, -1, -1, -1,                  \
10116                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10117     Vec512 = _mm512_##IntrinName(                                              \
10118                   (__m512##T1)__builtin_shufflevector(                         \
10119                                   (__v16s##T2)Vec512,                          \
10120                                   (__v16s##T2)Vec512,                          \
10121                                   0, 1, -1, -1, -1, -1, -1, -1,                \
10122                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10123                   (__m512##T1)__builtin_shufflevector(                         \
10124                                   (__v16s##T2)Vec512,                          \
10125                                   (__v16s##T2)Vec512,                          \
10126                                   2, 3, -1, -1, -1, -1, -1, -1,                \
10127                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10128     Vec512 = _mm512_##IntrinName(                                              \
10129                   (__m512##T1)__builtin_shufflevector(                         \
10130                                   (__v16s##T2)Vec512,                          \
10131                                   (__v16s##T2)Vec512,                          \
10132                                   0,  -1, -1, -1, -1, -1, -1, -1,              \
10133                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10134                   (__m512##T1)__builtin_shufflevector(                         \
10135                                   (__v16s##T2)Vec512,                          \
10136                                   (__v16s##T2)Vec512,                          \
10137                                   1, -1, -1, -1, -1, -1, -1, -1,               \
10138                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10139     return Vec512[0];                                                          \
10140   })
10141
10142 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10143   _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10144 }
10145
10146 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10147 _mm512_reduce_max_epu32(__m512i a) {
10148   _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10149 }
10150
10151 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10152   _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10153 }
10154
10155 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10156   _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10157 }
10158
10159 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10160 _mm512_reduce_min_epu32(__m512i a) {
10161   _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10162 }
10163
10164 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10165   _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10166 }
10167
10168 // Vec512 - Vector with size 512.
10169 // Vec512Neutral - A 512 length vector with elements set to the identity element
10170 // Identity element: {max_epi,0x80000000}
10171 //                   {max_epu,0x00000000}
10172 //                   {max_ps, 0xFF800000}
10173 //                   {min_epi,0x7FFFFFFF}
10174 //                   {min_epu,0xFFFFFFFF}
10175 //                   {min_ps, 0x7F800000}
10176 //
10177 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10178 //              __mm512_max_epi32
10179 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10180 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10181 // T3 - Can get 'q' q word and 'pd' for packed double.
10182 //      [__builtin_ia32_select{q|pd}_512]
10183 // Mask - Intrinsic Mask
10184
10185 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10186                                         T2, T3, Mask)                          \
10187   __extension__({                                                              \
10188     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10189                                         (__mmask16)Mask,                       \
10190                                         (__v16s##T2)Vec512,                    \
10191                                         (__v16s##T2)Vec512Neutral);            \
10192    _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
10193    })
10194
10195 static __inline__ int __DEFAULT_FN_ATTRS
10196 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10197   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10198                                   i, i, d, __M);
10199 }
10200
10201 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10202 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10203   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10204                                   i, i, d, __M);
10205 }
10206
10207 static __inline__ float __DEFAULT_FN_ATTRS
10208 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10209   _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10210                                   ps, __M);
10211 }
10212
10213 static __inline__ int __DEFAULT_FN_ATTRS
10214 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10215   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10216                                   i, i, d, __M);
10217 }
10218
10219 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10220 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10221   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10222                                   i, i, d, __M);
10223 }
10224
10225 static __inline__ float __DEFAULT_FN_ATTRS
10226 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10227   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10228                                   ps, __M);
10229 }
10230
10231 #undef __DEFAULT_FN_ATTRS
10232
10233 #endif // __AVX512FINTRIN_H