]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
Merge ^/head r314178 through r314269.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / Headers / avx512fintrin.h
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
26
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
29
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
36
37 /* Unsigned types */
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
46
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
49
50 /* Rounding mode macros.  */
51 #define _MM_FROUND_TO_NEAREST_INT   0x00
52 #define _MM_FROUND_TO_NEG_INF       0x01
53 #define _MM_FROUND_TO_POS_INF       0x02
54 #define _MM_FROUND_TO_ZERO          0x03
55 #define _MM_FROUND_CUR_DIRECTION    0x04
56
57 /* Constants for integer comparison predicates */
58 typedef enum {
59     _MM_CMPINT_EQ,      /* Equal */
60     _MM_CMPINT_LT,      /* Less than */
61     _MM_CMPINT_LE,      /* Less than or Equal */
62     _MM_CMPINT_UNUSED,
63     _MM_CMPINT_NE,      /* Not Equal */
64     _MM_CMPINT_NLT,     /* Not Less than */
65 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
66     _MM_CMPINT_NLE      /* Not Less than or Equal */
67 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
68 } _MM_CMPINT_ENUM;
69
70 typedef enum
71 {
72   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157   _MM_PERM_DDDD = 0xFF
158 } _MM_PERM_ENUM;
159
160 typedef enum
161 {
162   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
163   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
164   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
165   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
166 } _MM_MANTISSA_NORM_ENUM;
167
168 typedef enum
169 {
170   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
171   _MM_MANT_SIGN_zero,   /* sign = 0             */
172   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
173 } _MM_MANTISSA_SIGN_ENUM;
174
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177
178 /* Create vectors with repeated elements */
179
180 static  __inline __m512i __DEFAULT_FN_ATTRS
181 _mm512_setzero_si512(void)
182 {
183   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184 }
185
186 #define _mm512_setzero_epi32 _mm512_setzero_si512
187
188 static __inline__ __m512d __DEFAULT_FN_ATTRS
189 _mm512_undefined_pd(void)
190 {
191   return (__m512d)__builtin_ia32_undef512();
192 }
193
194 static __inline__ __m512 __DEFAULT_FN_ATTRS
195 _mm512_undefined(void)
196 {
197   return (__m512)__builtin_ia32_undef512();
198 }
199
200 static __inline__ __m512 __DEFAULT_FN_ATTRS
201 _mm512_undefined_ps(void)
202 {
203   return (__m512)__builtin_ia32_undef512();
204 }
205
206 static __inline__ __m512i __DEFAULT_FN_ATTRS
207 _mm512_undefined_epi32(void)
208 {
209   return (__m512i)__builtin_ia32_undef512();
210 }
211
212 static __inline__ __m512i __DEFAULT_FN_ATTRS
213 _mm512_broadcastd_epi32 (__m128i __A)
214 {
215   return (__m512i)__builtin_shufflevector((__v4si) __A,
216                                           (__v4si)_mm_undefined_si128(),
217                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218 }
219
220 static __inline__ __m512i __DEFAULT_FN_ATTRS
221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222 {
223   return (__m512i)__builtin_ia32_selectd_512(__M,
224                                              (__v16si) _mm512_broadcastd_epi32(__A),
225                                              (__v16si) __O);
226 }
227
228 static __inline__ __m512i __DEFAULT_FN_ATTRS
229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230 {
231   return (__m512i)__builtin_ia32_selectd_512(__M,
232                                              (__v16si) _mm512_broadcastd_epi32(__A),
233                                              (__v16si) _mm512_setzero_si512());
234 }
235
236 static __inline__ __m512i __DEFAULT_FN_ATTRS
237 _mm512_broadcastq_epi64 (__m128i __A)
238 {
239   return (__m512i)__builtin_shufflevector((__v2di) __A,
240                                           (__v2di) _mm_undefined_si128(),
241                                           0, 0, 0, 0, 0, 0, 0, 0);
242 }
243
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246 {
247   return (__m512i)__builtin_ia32_selectq_512(__M,
248                                              (__v8di) _mm512_broadcastq_epi64(__A),
249                                              (__v8di) __O);
250
251 }
252
253 static __inline__ __m512i __DEFAULT_FN_ATTRS
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255 {
256   return (__m512i)__builtin_ia32_selectq_512(__M,
257                                              (__v8di) _mm512_broadcastq_epi64(__A),
258                                              (__v8di) _mm512_setzero_si512());
259 }
260
261 static __inline __m512i __DEFAULT_FN_ATTRS
262 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
263 {
264   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
265                  (__v16si)
266                  _mm512_setzero_si512 (),
267                  __M);
268 }
269
270 static __inline __m512i __DEFAULT_FN_ATTRS
271 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
272 {
273 #ifdef __x86_64__
274   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
275                  (__v8di)
276                  _mm512_setzero_si512 (),
277                  __M);
278 #else
279   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
280                  (__v8di)
281                  _mm512_setzero_si512 (),
282                  __M);
283 #endif
284 }
285
286 static __inline __m512 __DEFAULT_FN_ATTRS
287 _mm512_setzero_ps(void)
288 {
289   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
290                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
291 }
292
293 #define _mm512_setzero _mm512_setzero_ps
294
295 static  __inline __m512d __DEFAULT_FN_ATTRS
296 _mm512_setzero_pd(void)
297 {
298   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
299 }
300
301 static __inline __m512 __DEFAULT_FN_ATTRS
302 _mm512_set1_ps(float __w)
303 {
304   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
305                    __w, __w, __w, __w, __w, __w, __w, __w  };
306 }
307
308 static __inline __m512d __DEFAULT_FN_ATTRS
309 _mm512_set1_pd(double __w)
310 {
311   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
312 }
313
314 static __inline __m512i __DEFAULT_FN_ATTRS
315 _mm512_set1_epi8(char __w)
316 {
317   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
318                              __w, __w, __w, __w, __w, __w, __w, __w,
319                              __w, __w, __w, __w, __w, __w, __w, __w,
320                              __w, __w, __w, __w, __w, __w, __w, __w,
321                              __w, __w, __w, __w, __w, __w, __w, __w,
322                              __w, __w, __w, __w, __w, __w, __w, __w,
323                              __w, __w, __w, __w, __w, __w, __w, __w,
324                              __w, __w, __w, __w, __w, __w, __w, __w  };
325 }
326
327 static __inline __m512i __DEFAULT_FN_ATTRS
328 _mm512_set1_epi16(short __w)
329 {
330   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
331                              __w, __w, __w, __w, __w, __w, __w, __w,
332                              __w, __w, __w, __w, __w, __w, __w, __w,
333                              __w, __w, __w, __w, __w, __w, __w, __w };
334 }
335
336 static __inline __m512i __DEFAULT_FN_ATTRS
337 _mm512_set1_epi32(int __s)
338 {
339   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
340                              __s, __s, __s, __s, __s, __s, __s, __s };
341 }
342
343 static __inline __m512i __DEFAULT_FN_ATTRS
344 _mm512_set1_epi64(long long __d)
345 {
346   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
347 }
348
349 static __inline__ __m512 __DEFAULT_FN_ATTRS
350 _mm512_broadcastss_ps(__m128 __A)
351 {
352   return (__m512)__builtin_shufflevector((__v4sf) __A,
353                                          (__v4sf)_mm_undefined_ps(),
354                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
355 }
356
357 static __inline __m512i __DEFAULT_FN_ATTRS
358 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
359 {
360   return  (__m512i)(__v16si)
361    { __D, __C, __B, __A, __D, __C, __B, __A,
362      __D, __C, __B, __A, __D, __C, __B, __A };
363 }
364
365 static __inline __m512i __DEFAULT_FN_ATTRS
366 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
367        long long __D)
368 {
369   return  (__m512i) (__v8di)
370    { __D, __C, __B, __A, __D, __C, __B, __A };
371 }
372
373 static __inline __m512d __DEFAULT_FN_ATTRS
374 _mm512_set4_pd (double __A, double __B, double __C, double __D)
375 {
376   return  (__m512d)
377    { __D, __C, __B, __A, __D, __C, __B, __A };
378 }
379
380 static __inline __m512 __DEFAULT_FN_ATTRS
381 _mm512_set4_ps (float __A, float __B, float __C, float __D)
382 {
383   return  (__m512)
384    { __D, __C, __B, __A, __D, __C, __B, __A,
385      __D, __C, __B, __A, __D, __C, __B, __A };
386 }
387
388 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
389   _mm512_set4_epi32((e3),(e2),(e1),(e0))
390
391 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
392   _mm512_set4_epi64((e3),(e2),(e1),(e0))
393
394 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
395   _mm512_set4_pd((e3),(e2),(e1),(e0))
396
397 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
398   _mm512_set4_ps((e3),(e2),(e1),(e0))
399
400 static __inline__ __m512d __DEFAULT_FN_ATTRS
401 _mm512_broadcastsd_pd(__m128d __A)
402 {
403   return (__m512d)__builtin_shufflevector((__v2df) __A,
404                                           (__v2df) _mm_undefined_pd(),
405                                           0, 0, 0, 0, 0, 0, 0, 0);
406 }
407
408 /* Cast between vector types */
409
410 static __inline __m512d __DEFAULT_FN_ATTRS
411 _mm512_castpd256_pd512(__m256d __a)
412 {
413   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
414 }
415
416 static __inline __m512 __DEFAULT_FN_ATTRS
417 _mm512_castps256_ps512(__m256 __a)
418 {
419   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
420                                           -1, -1, -1, -1, -1, -1, -1, -1);
421 }
422
423 static __inline __m128d __DEFAULT_FN_ATTRS
424 _mm512_castpd512_pd128(__m512d __a)
425 {
426   return __builtin_shufflevector(__a, __a, 0, 1);
427 }
428
429 static __inline __m256d __DEFAULT_FN_ATTRS
430 _mm512_castpd512_pd256 (__m512d __A)
431 {
432   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
433 }
434
435 static __inline __m128 __DEFAULT_FN_ATTRS
436 _mm512_castps512_ps128(__m512 __a)
437 {
438   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
439 }
440
441 static __inline __m256 __DEFAULT_FN_ATTRS
442 _mm512_castps512_ps256 (__m512 __A)
443 {
444   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
445 }
446
447 static __inline __m512 __DEFAULT_FN_ATTRS
448 _mm512_castpd_ps (__m512d __A)
449 {
450   return (__m512) (__A);
451 }
452
453 static __inline __m512i __DEFAULT_FN_ATTRS
454 _mm512_castpd_si512 (__m512d __A)
455 {
456   return (__m512i) (__A);
457 }
458
459 static __inline__ __m512d __DEFAULT_FN_ATTRS
460 _mm512_castpd128_pd512 (__m128d __A)
461 {
462   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
463 }
464
465 static __inline __m512d __DEFAULT_FN_ATTRS
466 _mm512_castps_pd (__m512 __A)
467 {
468   return (__m512d) (__A);
469 }
470
471 static __inline __m512i __DEFAULT_FN_ATTRS
472 _mm512_castps_si512 (__m512 __A)
473 {
474   return (__m512i) (__A);
475 }
476
477 static __inline__ __m512 __DEFAULT_FN_ATTRS
478 _mm512_castps128_ps512 (__m128 __A)
479 {
480     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
481 }
482
483 static __inline__ __m512i __DEFAULT_FN_ATTRS
484 _mm512_castsi128_si512 (__m128i __A)
485 {
486    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
487 }
488
489 static __inline__ __m512i __DEFAULT_FN_ATTRS
490 _mm512_castsi256_si512 (__m256i __A)
491 {
492    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
493 }
494
495 static __inline __m512 __DEFAULT_FN_ATTRS
496 _mm512_castsi512_ps (__m512i __A)
497 {
498   return (__m512) (__A);
499 }
500
501 static __inline __m512d __DEFAULT_FN_ATTRS
502 _mm512_castsi512_pd (__m512i __A)
503 {
504   return (__m512d) (__A);
505 }
506
507 static __inline __m128i __DEFAULT_FN_ATTRS
508 _mm512_castsi512_si128 (__m512i __A)
509 {
510   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
511 }
512
513 static __inline __m256i __DEFAULT_FN_ATTRS
514 _mm512_castsi512_si256 (__m512i __A)
515 {
516   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
517 }
518
519 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
520 _mm512_int2mask(int __a)
521 {
522   return (__mmask16)__a;
523 }
524
525 static __inline__ int __DEFAULT_FN_ATTRS
526 _mm512_mask2int(__mmask16 __a)
527 {
528   return (int)__a;
529 }
530
531 /* Bitwise operators */
532 static __inline__ __m512i __DEFAULT_FN_ATTRS
533 _mm512_and_epi32(__m512i __a, __m512i __b)
534 {
535   return (__m512i)((__v16su)__a & (__v16su)__b);
536 }
537
538 static __inline__ __m512i __DEFAULT_FN_ATTRS
539 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
540 {
541   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
542                 (__v16si) _mm512_and_epi32(__a, __b),
543                 (__v16si) __src);
544 }
545
546 static __inline__ __m512i __DEFAULT_FN_ATTRS
547 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
548 {
549   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
550                                          __k, __a, __b);
551 }
552
553 static __inline__ __m512i __DEFAULT_FN_ATTRS
554 _mm512_and_epi64(__m512i __a, __m512i __b)
555 {
556   return (__m512i)((__v8du)__a & (__v8du)__b);
557 }
558
559 static __inline__ __m512i __DEFAULT_FN_ATTRS
560 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
561 {
562     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
563                 (__v8di) _mm512_and_epi64(__a, __b),
564                 (__v8di) __src);
565 }
566
567 static __inline__ __m512i __DEFAULT_FN_ATTRS
568 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
569 {
570   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
571                                          __k, __a, __b);
572 }
573
574 static __inline__ __m512i __DEFAULT_FN_ATTRS
575 _mm512_andnot_si512 (__m512i __A, __m512i __B)
576 {
577   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
578 }
579
580 static __inline__ __m512i __DEFAULT_FN_ATTRS
581 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
582 {
583   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
584 }
585
586 static __inline__ __m512i __DEFAULT_FN_ATTRS
587 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
588 {
589   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
590                                          (__v16si)_mm512_andnot_epi32(__A, __B),
591                                          (__v16si)__W);
592 }
593
594 static __inline__ __m512i __DEFAULT_FN_ATTRS
595 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
596 {
597   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
598                                            __U, __A, __B);
599 }
600
601 static __inline__ __m512i __DEFAULT_FN_ATTRS
602 _mm512_andnot_epi64(__m512i __A, __m512i __B)
603 {
604   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
605 }
606
607 static __inline__ __m512i __DEFAULT_FN_ATTRS
608 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
609 {
610   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
611                                           (__v8di)_mm512_andnot_epi64(__A, __B),
612                                           (__v8di)__W);
613 }
614
615 static __inline__ __m512i __DEFAULT_FN_ATTRS
616 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
617 {
618   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
619                                            __U, __A, __B);
620 }
621
622 static __inline__ __m512i __DEFAULT_FN_ATTRS
623 _mm512_or_epi32(__m512i __a, __m512i __b)
624 {
625   return (__m512i)((__v16su)__a | (__v16su)__b);
626 }
627
628 static __inline__ __m512i __DEFAULT_FN_ATTRS
629 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
630 {
631   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
632                                              (__v16si)_mm512_or_epi32(__a, __b),
633                                              (__v16si)__src);
634 }
635
636 static __inline__ __m512i __DEFAULT_FN_ATTRS
637 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
638 {
639   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
640 }
641
642 static __inline__ __m512i __DEFAULT_FN_ATTRS
643 _mm512_or_epi64(__m512i __a, __m512i __b)
644 {
645   return (__m512i)((__v8du)__a | (__v8du)__b);
646 }
647
648 static __inline__ __m512i __DEFAULT_FN_ATTRS
649 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
650 {
651   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
652                                              (__v8di)_mm512_or_epi64(__a, __b),
653                                              (__v8di)__src);
654 }
655
656 static __inline__ __m512i __DEFAULT_FN_ATTRS
657 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
658 {
659   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
660 }
661
662 static __inline__ __m512i __DEFAULT_FN_ATTRS
663 _mm512_xor_epi32(__m512i __a, __m512i __b)
664 {
665   return (__m512i)((__v16su)__a ^ (__v16su)__b);
666 }
667
668 static __inline__ __m512i __DEFAULT_FN_ATTRS
669 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
670 {
671   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
672                                             (__v16si)_mm512_xor_epi32(__a, __b),
673                                             (__v16si)__src);
674 }
675
676 static __inline__ __m512i __DEFAULT_FN_ATTRS
677 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
678 {
679   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
680 }
681
682 static __inline__ __m512i __DEFAULT_FN_ATTRS
683 _mm512_xor_epi64(__m512i __a, __m512i __b)
684 {
685   return (__m512i)((__v8du)__a ^ (__v8du)__b);
686 }
687
688 static __inline__ __m512i __DEFAULT_FN_ATTRS
689 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
690 {
691   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
692                                              (__v8di)_mm512_xor_epi64(__a, __b),
693                                              (__v8di)__src);
694 }
695
696 static __inline__ __m512i __DEFAULT_FN_ATTRS
697 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
698 {
699   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
700 }
701
702 static __inline__ __m512i __DEFAULT_FN_ATTRS
703 _mm512_and_si512(__m512i __a, __m512i __b)
704 {
705   return (__m512i)((__v8du)__a & (__v8du)__b);
706 }
707
708 static __inline__ __m512i __DEFAULT_FN_ATTRS
709 _mm512_or_si512(__m512i __a, __m512i __b)
710 {
711   return (__m512i)((__v8du)__a | (__v8du)__b);
712 }
713
714 static __inline__ __m512i __DEFAULT_FN_ATTRS
715 _mm512_xor_si512(__m512i __a, __m512i __b)
716 {
717   return (__m512i)((__v8du)__a ^ (__v8du)__b);
718 }
719
720 /* Arithmetic */
721
722 static __inline __m512d __DEFAULT_FN_ATTRS
723 _mm512_add_pd(__m512d __a, __m512d __b)
724 {
725   return (__m512d)((__v8df)__a + (__v8df)__b);
726 }
727
728 static __inline __m512 __DEFAULT_FN_ATTRS
729 _mm512_add_ps(__m512 __a, __m512 __b)
730 {
731   return (__m512)((__v16sf)__a + (__v16sf)__b);
732 }
733
734 static __inline __m512d __DEFAULT_FN_ATTRS
735 _mm512_mul_pd(__m512d __a, __m512d __b)
736 {
737   return (__m512d)((__v8df)__a * (__v8df)__b);
738 }
739
740 static __inline __m512 __DEFAULT_FN_ATTRS
741 _mm512_mul_ps(__m512 __a, __m512 __b)
742 {
743   return (__m512)((__v16sf)__a * (__v16sf)__b);
744 }
745
746 static __inline __m512d __DEFAULT_FN_ATTRS
747 _mm512_sub_pd(__m512d __a, __m512d __b)
748 {
749   return (__m512d)((__v8df)__a - (__v8df)__b);
750 }
751
752 static __inline __m512 __DEFAULT_FN_ATTRS
753 _mm512_sub_ps(__m512 __a, __m512 __b)
754 {
755   return (__m512)((__v16sf)__a - (__v16sf)__b);
756 }
757
758 static __inline__ __m512i __DEFAULT_FN_ATTRS
759 _mm512_add_epi64 (__m512i __A, __m512i __B)
760 {
761   return (__m512i) ((__v8du) __A + (__v8du) __B);
762 }
763
764 static __inline__ __m512i __DEFAULT_FN_ATTRS
765 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
766 {
767   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
768                                              (__v8di)_mm512_add_epi64(__A, __B),
769                                              (__v8di)__W);
770 }
771
772 static __inline__ __m512i __DEFAULT_FN_ATTRS
773 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
774 {
775   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
776                                              (__v8di)_mm512_add_epi64(__A, __B),
777                                              (__v8di)_mm512_setzero_si512());
778 }
779
780 static __inline__ __m512i __DEFAULT_FN_ATTRS
781 _mm512_sub_epi64 (__m512i __A, __m512i __B)
782 {
783   return (__m512i) ((__v8du) __A - (__v8du) __B);
784 }
785
786 static __inline__ __m512i __DEFAULT_FN_ATTRS
787 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
788 {
789   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
790                                              (__v8di)_mm512_sub_epi64(__A, __B),
791                                              (__v8di)__W);
792 }
793
794 static __inline__ __m512i __DEFAULT_FN_ATTRS
795 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
796 {
797   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
798                                              (__v8di)_mm512_sub_epi64(__A, __B),
799                                              (__v8di)_mm512_setzero_si512());
800 }
801
802 static __inline__ __m512i __DEFAULT_FN_ATTRS
803 _mm512_add_epi32 (__m512i __A, __m512i __B)
804 {
805   return (__m512i) ((__v16su) __A + (__v16su) __B);
806 }
807
808 static __inline__ __m512i __DEFAULT_FN_ATTRS
809 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
810 {
811   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
812                                              (__v16si)_mm512_add_epi32(__A, __B),
813                                              (__v16si)__W);
814 }
815
816 static __inline__ __m512i __DEFAULT_FN_ATTRS
817 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
818 {
819   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
820                                              (__v16si)_mm512_add_epi32(__A, __B),
821                                              (__v16si)_mm512_setzero_si512());
822 }
823
824 static __inline__ __m512i __DEFAULT_FN_ATTRS
825 _mm512_sub_epi32 (__m512i __A, __m512i __B)
826 {
827   return (__m512i) ((__v16su) __A - (__v16su) __B);
828 }
829
830 static __inline__ __m512i __DEFAULT_FN_ATTRS
831 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
832 {
833   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
834                                              (__v16si)_mm512_sub_epi32(__A, __B),
835                                              (__v16si)__W);
836 }
837
838 static __inline__ __m512i __DEFAULT_FN_ATTRS
839 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
840 {
841   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
842                                              (__v16si)_mm512_sub_epi32(__A, __B),
843                                              (__v16si)_mm512_setzero_si512());
844 }
845
846 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
847   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
848                                         (__v8df)(__m512d)(B), \
849                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
850                                         (int)(R)); })
851
852 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
853   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
854                                         (__v8df)(__m512d)(B), \
855                                         (__v8df)_mm512_setzero_pd(), \
856                                         (__mmask8)(U), (int)(R)); })
857
858 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
859   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
860                                         (__v8df)(__m512d)(B), \
861                                         (__v8df)_mm512_undefined_pd(), \
862                                         (__mmask8)-1, (int)(R)); })
863
864 static  __inline__ __m512d __DEFAULT_FN_ATTRS
865 _mm512_max_pd(__m512d __A, __m512d __B)
866 {
867   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
868              (__v8df) __B,
869              (__v8df)
870              _mm512_setzero_pd (),
871              (__mmask8) -1,
872              _MM_FROUND_CUR_DIRECTION);
873 }
874
875 static __inline__ __m512d __DEFAULT_FN_ATTRS
876 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
877 {
878   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
879                   (__v8df) __B,
880                   (__v8df) __W,
881                   (__mmask8) __U,
882                   _MM_FROUND_CUR_DIRECTION);
883 }
884
885 static __inline__ __m512d __DEFAULT_FN_ATTRS
886 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
887 {
888   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
889                   (__v8df) __B,
890                   (__v8df)
891                   _mm512_setzero_pd (),
892                   (__mmask8) __U,
893                   _MM_FROUND_CUR_DIRECTION);
894 }
895
896 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
897   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
898                                        (__v16sf)(__m512)(B), \
899                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
900                                        (int)(R)); })
901
902 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
903   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
904                                        (__v16sf)(__m512)(B), \
905                                        (__v16sf)_mm512_setzero_ps(), \
906                                        (__mmask16)(U), (int)(R)); })
907
908 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
909   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
910                                        (__v16sf)(__m512)(B), \
911                                        (__v16sf)_mm512_undefined_ps(), \
912                                        (__mmask16)-1, (int)(R)); })
913
914 static  __inline__ __m512 __DEFAULT_FN_ATTRS
915 _mm512_max_ps(__m512 __A, __m512 __B)
916 {
917   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
918             (__v16sf) __B,
919             (__v16sf)
920             _mm512_setzero_ps (),
921             (__mmask16) -1,
922             _MM_FROUND_CUR_DIRECTION);
923 }
924
925 static __inline__ __m512 __DEFAULT_FN_ATTRS
926 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
927 {
928   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
929                  (__v16sf) __B,
930                  (__v16sf) __W,
931                  (__mmask16) __U,
932                  _MM_FROUND_CUR_DIRECTION);
933 }
934
935 static __inline__ __m512 __DEFAULT_FN_ATTRS
936 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
937 {
938   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
939                  (__v16sf) __B,
940                  (__v16sf)
941                  _mm512_setzero_ps (),
942                  (__mmask16) __U,
943                  _MM_FROUND_CUR_DIRECTION);
944 }
945
946 static __inline__ __m128 __DEFAULT_FN_ATTRS
947 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
948   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
949                 (__v4sf) __B,
950                 (__v4sf) __W,
951                 (__mmask8) __U,
952                 _MM_FROUND_CUR_DIRECTION);
953 }
954
955 static __inline__ __m128 __DEFAULT_FN_ATTRS
956 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
957   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
958                 (__v4sf) __B,
959                 (__v4sf)  _mm_setzero_ps (),
960                 (__mmask8) __U,
961                 _MM_FROUND_CUR_DIRECTION);
962 }
963
964 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
965   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
966                                           (__v4sf)(__m128)(B), \
967                                           (__v4sf)_mm_setzero_ps(), \
968                                           (__mmask8)-1, (int)(R)); })
969
970 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
971   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
972                                           (__v4sf)(__m128)(B), \
973                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
974                                           (int)(R)); })
975
976 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
977   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
978                                           (__v4sf)(__m128)(B), \
979                                           (__v4sf)_mm_setzero_ps(), \
980                                           (__mmask8)(U), (int)(R)); })
981
982 static __inline__ __m128d __DEFAULT_FN_ATTRS
983 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
984   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
985                 (__v2df) __B,
986                 (__v2df) __W,
987                 (__mmask8) __U,
988                 _MM_FROUND_CUR_DIRECTION);
989 }
990
991 static __inline__ __m128d __DEFAULT_FN_ATTRS
992 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
993   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
994                 (__v2df) __B,
995                 (__v2df)  _mm_setzero_pd (),
996                 (__mmask8) __U,
997                 _MM_FROUND_CUR_DIRECTION);
998 }
999
1000 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
1001   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1002                                            (__v2df)(__m128d)(B), \
1003                                            (__v2df)_mm_setzero_pd(), \
1004                                            (__mmask8)-1, (int)(R)); })
1005
1006 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1007   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1008                                            (__v2df)(__m128d)(B), \
1009                                            (__v2df)(__m128d)(W), \
1010                                            (__mmask8)(U), (int)(R)); })
1011
1012 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1013   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1014                                            (__v2df)(__m128d)(B), \
1015                                            (__v2df)_mm_setzero_pd(), \
1016                                            (__mmask8)(U), (int)(R)); })
1017
1018 static __inline __m512i
1019 __DEFAULT_FN_ATTRS
1020 _mm512_max_epi32(__m512i __A, __m512i __B)
1021 {
1022   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1023               (__v16si) __B,
1024               (__v16si)
1025               _mm512_setzero_si512 (),
1026               (__mmask16) -1);
1027 }
1028
1029 static __inline__ __m512i __DEFAULT_FN_ATTRS
1030 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1031 {
1032   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1033                    (__v16si) __B,
1034                    (__v16si) __W, __M);
1035 }
1036
1037 static __inline__ __m512i __DEFAULT_FN_ATTRS
1038 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1039 {
1040   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1041                    (__v16si) __B,
1042                    (__v16si)
1043                    _mm512_setzero_si512 (),
1044                    __M);
1045 }
1046
1047 static __inline __m512i __DEFAULT_FN_ATTRS
1048 _mm512_max_epu32(__m512i __A, __m512i __B)
1049 {
1050   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1051               (__v16si) __B,
1052               (__v16si)
1053               _mm512_setzero_si512 (),
1054               (__mmask16) -1);
1055 }
1056
1057 static __inline__ __m512i __DEFAULT_FN_ATTRS
1058 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1059 {
1060   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1061                    (__v16si) __B,
1062                    (__v16si) __W, __M);
1063 }
1064
1065 static __inline__ __m512i __DEFAULT_FN_ATTRS
1066 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1067 {
1068   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1069                    (__v16si) __B,
1070                    (__v16si)
1071                    _mm512_setzero_si512 (),
1072                    __M);
1073 }
1074
1075 static __inline __m512i __DEFAULT_FN_ATTRS
1076 _mm512_max_epi64(__m512i __A, __m512i __B)
1077 {
1078   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1079               (__v8di) __B,
1080               (__v8di)
1081               _mm512_setzero_si512 (),
1082               (__mmask8) -1);
1083 }
1084
1085 static __inline__ __m512i __DEFAULT_FN_ATTRS
1086 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1087 {
1088   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1089                    (__v8di) __B,
1090                    (__v8di) __W, __M);
1091 }
1092
1093 static __inline__ __m512i __DEFAULT_FN_ATTRS
1094 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1095 {
1096   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1097                    (__v8di) __B,
1098                    (__v8di)
1099                    _mm512_setzero_si512 (),
1100                    __M);
1101 }
1102
1103 static __inline __m512i __DEFAULT_FN_ATTRS
1104 _mm512_max_epu64(__m512i __A, __m512i __B)
1105 {
1106   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1107               (__v8di) __B,
1108               (__v8di)
1109               _mm512_setzero_si512 (),
1110               (__mmask8) -1);
1111 }
1112
1113 static __inline__ __m512i __DEFAULT_FN_ATTRS
1114 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1115 {
1116   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1117                    (__v8di) __B,
1118                    (__v8di) __W, __M);
1119 }
1120
1121 static __inline__ __m512i __DEFAULT_FN_ATTRS
1122 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1123 {
1124   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1125                    (__v8di) __B,
1126                    (__v8di)
1127                    _mm512_setzero_si512 (),
1128                    __M);
1129 }
1130
1131 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1132   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1133                                         (__v8df)(__m512d)(B), \
1134                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1135                                         (int)(R)); })
1136
1137 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1138   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1139                                         (__v8df)(__m512d)(B), \
1140                                         (__v8df)_mm512_setzero_pd(), \
1141                                         (__mmask8)(U), (int)(R)); })
1142
1143 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1144   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1145                                         (__v8df)(__m512d)(B), \
1146                                         (__v8df)_mm512_undefined_pd(), \
1147                                         (__mmask8)-1, (int)(R)); })
1148
1149 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1150 _mm512_min_pd(__m512d __A, __m512d __B)
1151 {
1152   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1153              (__v8df) __B,
1154              (__v8df)
1155              _mm512_setzero_pd (),
1156              (__mmask8) -1,
1157              _MM_FROUND_CUR_DIRECTION);
1158 }
1159
1160 static __inline__ __m512d __DEFAULT_FN_ATTRS
1161 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1162 {
1163   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1164                   (__v8df) __B,
1165                   (__v8df) __W,
1166                   (__mmask8) __U,
1167                   _MM_FROUND_CUR_DIRECTION);
1168 }
1169
1170 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1171   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1172                                        (__v16sf)(__m512)(B), \
1173                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1174                                        (int)(R)); })
1175
1176 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1177   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1178                                        (__v16sf)(__m512)(B), \
1179                                        (__v16sf)_mm512_setzero_ps(), \
1180                                        (__mmask16)(U), (int)(R)); })
1181
1182 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1183   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1184                                        (__v16sf)(__m512)(B), \
1185                                        (__v16sf)_mm512_undefined_ps(), \
1186                                        (__mmask16)-1, (int)(R)); })
1187
1188 static __inline__ __m512d __DEFAULT_FN_ATTRS
1189 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1190 {
1191   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1192                   (__v8df) __B,
1193                   (__v8df)
1194                   _mm512_setzero_pd (),
1195                   (__mmask8) __U,
1196                   _MM_FROUND_CUR_DIRECTION);
1197 }
1198
1199 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1200 _mm512_min_ps(__m512 __A, __m512 __B)
1201 {
1202   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1203             (__v16sf) __B,
1204             (__v16sf)
1205             _mm512_setzero_ps (),
1206             (__mmask16) -1,
1207             _MM_FROUND_CUR_DIRECTION);
1208 }
1209
1210 static __inline__ __m512 __DEFAULT_FN_ATTRS
1211 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1212 {
1213   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1214                  (__v16sf) __B,
1215                  (__v16sf) __W,
1216                  (__mmask16) __U,
1217                  _MM_FROUND_CUR_DIRECTION);
1218 }
1219
1220 static __inline__ __m512 __DEFAULT_FN_ATTRS
1221 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1222 {
1223   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1224                  (__v16sf) __B,
1225                  (__v16sf)
1226                  _mm512_setzero_ps (),
1227                  (__mmask16) __U,
1228                  _MM_FROUND_CUR_DIRECTION);
1229 }
1230
1231 static __inline__ __m128 __DEFAULT_FN_ATTRS
1232 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1233   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1234                 (__v4sf) __B,
1235                 (__v4sf) __W,
1236                 (__mmask8) __U,
1237                 _MM_FROUND_CUR_DIRECTION);
1238 }
1239
1240 static __inline__ __m128 __DEFAULT_FN_ATTRS
1241 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1242   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1243                 (__v4sf) __B,
1244                 (__v4sf)  _mm_setzero_ps (),
1245                 (__mmask8) __U,
1246                 _MM_FROUND_CUR_DIRECTION);
1247 }
1248
1249 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
1250   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1251                                           (__v4sf)(__m128)(B), \
1252                                           (__v4sf)_mm_setzero_ps(), \
1253                                           (__mmask8)-1, (int)(R)); })
1254
1255 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1256   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1257                                           (__v4sf)(__m128)(B), \
1258                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1259                                           (int)(R)); })
1260
1261 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1262   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1263                                           (__v4sf)(__m128)(B), \
1264                                           (__v4sf)_mm_setzero_ps(), \
1265                                           (__mmask8)(U), (int)(R)); })
1266
1267 static __inline__ __m128d __DEFAULT_FN_ATTRS
1268 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1269   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1270                 (__v2df) __B,
1271                 (__v2df) __W,
1272                 (__mmask8) __U,
1273                 _MM_FROUND_CUR_DIRECTION);
1274 }
1275
1276 static __inline__ __m128d __DEFAULT_FN_ATTRS
1277 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1278   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1279                 (__v2df) __B,
1280                 (__v2df)  _mm_setzero_pd (),
1281                 (__mmask8) __U,
1282                 _MM_FROUND_CUR_DIRECTION);
1283 }
1284
1285 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
1286   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1287                                            (__v2df)(__m128d)(B), \
1288                                            (__v2df)_mm_setzero_pd(), \
1289                                            (__mmask8)-1, (int)(R)); })
1290
1291 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1292   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1293                                            (__v2df)(__m128d)(B), \
1294                                            (__v2df)(__m128d)(W), \
1295                                            (__mmask8)(U), (int)(R)); })
1296
1297 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1298   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1299                                            (__v2df)(__m128d)(B), \
1300                                            (__v2df)_mm_setzero_pd(), \
1301                                            (__mmask8)(U), (int)(R)); })
1302
1303 static __inline __m512i
1304 __DEFAULT_FN_ATTRS
1305 _mm512_min_epi32(__m512i __A, __m512i __B)
1306 {
1307   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1308               (__v16si) __B,
1309               (__v16si)
1310               _mm512_setzero_si512 (),
1311               (__mmask16) -1);
1312 }
1313
1314 static __inline__ __m512i __DEFAULT_FN_ATTRS
1315 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1316 {
1317   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1318                    (__v16si) __B,
1319                    (__v16si) __W, __M);
1320 }
1321
1322 static __inline__ __m512i __DEFAULT_FN_ATTRS
1323 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1324 {
1325   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1326                    (__v16si) __B,
1327                    (__v16si)
1328                    _mm512_setzero_si512 (),
1329                    __M);
1330 }
1331
1332 static __inline __m512i __DEFAULT_FN_ATTRS
1333 _mm512_min_epu32(__m512i __A, __m512i __B)
1334 {
1335   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1336               (__v16si) __B,
1337               (__v16si)
1338               _mm512_setzero_si512 (),
1339               (__mmask16) -1);
1340 }
1341
1342 static __inline__ __m512i __DEFAULT_FN_ATTRS
1343 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1344 {
1345   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1346                    (__v16si) __B,
1347                    (__v16si) __W, __M);
1348 }
1349
1350 static __inline__ __m512i __DEFAULT_FN_ATTRS
1351 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1352 {
1353   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1354                    (__v16si) __B,
1355                    (__v16si)
1356                    _mm512_setzero_si512 (),
1357                    __M);
1358 }
1359
1360 static __inline __m512i __DEFAULT_FN_ATTRS
1361 _mm512_min_epi64(__m512i __A, __m512i __B)
1362 {
1363   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1364               (__v8di) __B,
1365               (__v8di)
1366               _mm512_setzero_si512 (),
1367               (__mmask8) -1);
1368 }
1369
1370 static __inline__ __m512i __DEFAULT_FN_ATTRS
1371 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1372 {
1373   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1374                    (__v8di) __B,
1375                    (__v8di) __W, __M);
1376 }
1377
1378 static __inline__ __m512i __DEFAULT_FN_ATTRS
1379 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1380 {
1381   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1382                    (__v8di) __B,
1383                    (__v8di)
1384                    _mm512_setzero_si512 (),
1385                    __M);
1386 }
1387
1388 static __inline __m512i __DEFAULT_FN_ATTRS
1389 _mm512_min_epu64(__m512i __A, __m512i __B)
1390 {
1391   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1392               (__v8di) __B,
1393               (__v8di)
1394               _mm512_setzero_si512 (),
1395               (__mmask8) -1);
1396 }
1397
1398 static __inline__ __m512i __DEFAULT_FN_ATTRS
1399 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1400 {
1401   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1402                    (__v8di) __B,
1403                    (__v8di) __W, __M);
1404 }
1405
1406 static __inline__ __m512i __DEFAULT_FN_ATTRS
1407 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1408 {
1409   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1410                    (__v8di) __B,
1411                    (__v8di)
1412                    _mm512_setzero_si512 (),
1413                    __M);
1414 }
1415
1416 static __inline __m512i __DEFAULT_FN_ATTRS
1417 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1418 {
1419   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1420 }
1421
1422 static __inline __m512i __DEFAULT_FN_ATTRS
1423 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1424 {
1425   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1426                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1427                                              (__v8di)__W);
1428 }
1429
1430 static __inline __m512i __DEFAULT_FN_ATTRS
1431 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1432 {
1433   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1434                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1435                                              (__v8di)_mm512_setzero_si512 ());
1436 }
1437
1438 static __inline __m512i __DEFAULT_FN_ATTRS
1439 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1440 {
1441   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1442 }
1443
1444 static __inline __m512i __DEFAULT_FN_ATTRS
1445 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1446 {
1447   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1448                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1449                                              (__v8di)__W);
1450 }
1451
1452 static __inline __m512i __DEFAULT_FN_ATTRS
1453 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1454 {
1455   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1456                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1457                                              (__v8di)_mm512_setzero_si512 ());
1458 }
1459
1460 static __inline __m512i __DEFAULT_FN_ATTRS
1461 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1462 {
1463   return (__m512i) ((__v16su) __A * (__v16su) __B);
1464 }
1465
1466 static __inline __m512i __DEFAULT_FN_ATTRS
1467 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1468 {
1469   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1470                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1471                                              (__v16si)_mm512_setzero_si512());
1472 }
1473
1474 static __inline __m512i __DEFAULT_FN_ATTRS
1475 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1476 {
1477   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1478                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1479                                              (__v16si)__W);
1480 }
1481
1482 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1483   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1484                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
1485                                          (int)(R)); })
1486
1487 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1488   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1489                                          (__v8df)_mm512_setzero_pd(), \
1490                                          (__mmask8)(U), (int)(R)); })
1491
1492 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1493   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1494                                          (__v8df)_mm512_undefined_pd(), \
1495                                          (__mmask8)-1, (int)(R)); })
1496
1497 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1498 _mm512_sqrt_pd(__m512d __a)
1499 {
1500   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1501                                                 (__v8df) _mm512_setzero_pd (),
1502                                                 (__mmask8) -1,
1503                                                 _MM_FROUND_CUR_DIRECTION);
1504 }
1505
1506 static __inline__ __m512d __DEFAULT_FN_ATTRS
1507 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1508 {
1509   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1510                    (__v8df) __W,
1511                    (__mmask8) __U,
1512                    _MM_FROUND_CUR_DIRECTION);
1513 }
1514
1515 static __inline__ __m512d __DEFAULT_FN_ATTRS
1516 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517 {
1518   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1519                    (__v8df)
1520                    _mm512_setzero_pd (),
1521                    (__mmask8) __U,
1522                    _MM_FROUND_CUR_DIRECTION);
1523 }
1524
1525 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1526   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1527                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
1528                                         (int)(R)); })
1529
1530 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1531   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1532                                         (__v16sf)_mm512_setzero_ps(), \
1533                                         (__mmask16)(U), (int)(R)); })
1534
1535 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1536   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1537                                         (__v16sf)_mm512_undefined_ps(), \
1538                                         (__mmask16)-1, (int)(R)); })
1539
1540 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1541 _mm512_sqrt_ps(__m512 __a)
1542 {
1543   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1544                                                (__v16sf) _mm512_setzero_ps (),
1545                                                (__mmask16) -1,
1546                                                _MM_FROUND_CUR_DIRECTION);
1547 }
1548
1549 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1550 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1551 {
1552   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1553                                                (__v16sf) __W,
1554                                                (__mmask16) __U,
1555                                                _MM_FROUND_CUR_DIRECTION);
1556 }
1557
1558 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1559 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1560 {
1561   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1562                                                (__v16sf) _mm512_setzero_ps (),
1563                                                (__mmask16) __U,
1564                                                _MM_FROUND_CUR_DIRECTION);
1565 }
1566
1567 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1568 _mm512_rsqrt14_pd(__m512d __A)
1569 {
1570   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571                  (__v8df)
1572                  _mm512_setzero_pd (),
1573                  (__mmask8) -1);}
1574
1575 static __inline__ __m512d __DEFAULT_FN_ATTRS
1576 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1577 {
1578   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579                   (__v8df) __W,
1580                   (__mmask8) __U);
1581 }
1582
1583 static __inline__ __m512d __DEFAULT_FN_ATTRS
1584 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1585 {
1586   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1587                   (__v8df)
1588                   _mm512_setzero_pd (),
1589                   (__mmask8) __U);
1590 }
1591
1592 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1593 _mm512_rsqrt14_ps(__m512 __A)
1594 {
1595   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1596                 (__v16sf)
1597                 _mm512_setzero_ps (),
1598                 (__mmask16) -1);
1599 }
1600
1601 static __inline__ __m512 __DEFAULT_FN_ATTRS
1602 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1603 {
1604   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605                  (__v16sf) __W,
1606                  (__mmask16) __U);
1607 }
1608
1609 static __inline__ __m512 __DEFAULT_FN_ATTRS
1610 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1611 {
1612   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1613                  (__v16sf)
1614                  _mm512_setzero_ps (),
1615                  (__mmask16) __U);
1616 }
1617
1618 static  __inline__ __m128 __DEFAULT_FN_ATTRS
1619 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1620 {
1621   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1622              (__v4sf) __B,
1623              (__v4sf)
1624              _mm_setzero_ps (),
1625              (__mmask8) -1);
1626 }
1627
1628 static __inline__ __m128 __DEFAULT_FN_ATTRS
1629 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1630 {
1631  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632           (__v4sf) __B,
1633           (__v4sf) __W,
1634           (__mmask8) __U);
1635 }
1636
1637 static __inline__ __m128 __DEFAULT_FN_ATTRS
1638 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1639 {
1640  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1641           (__v4sf) __B,
1642           (__v4sf) _mm_setzero_ps (),
1643           (__mmask8) __U);
1644 }
1645
1646 static  __inline__ __m128d __DEFAULT_FN_ATTRS
1647 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1648 {
1649   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1650               (__v2df) __B,
1651               (__v2df)
1652               _mm_setzero_pd (),
1653               (__mmask8) -1);
1654 }
1655
1656 static __inline__ __m128d __DEFAULT_FN_ATTRS
1657 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1658 {
1659  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1660           (__v2df) __B,
1661           (__v2df) __W,
1662           (__mmask8) __U);
1663 }
1664
1665 static __inline__ __m128d __DEFAULT_FN_ATTRS
1666 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1667 {
1668  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1669           (__v2df) __B,
1670           (__v2df) _mm_setzero_pd (),
1671           (__mmask8) __U);
1672 }
1673
1674 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1675 _mm512_rcp14_pd(__m512d __A)
1676 {
1677   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1678                (__v8df)
1679                _mm512_setzero_pd (),
1680                (__mmask8) -1);
1681 }
1682
1683 static __inline__ __m512d __DEFAULT_FN_ATTRS
1684 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1685 {
1686   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687                 (__v8df) __W,
1688                 (__mmask8) __U);
1689 }
1690
1691 static __inline__ __m512d __DEFAULT_FN_ATTRS
1692 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1693 {
1694   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1695                 (__v8df)
1696                 _mm512_setzero_pd (),
1697                 (__mmask8) __U);
1698 }
1699
1700 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1701 _mm512_rcp14_ps(__m512 __A)
1702 {
1703   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1704               (__v16sf)
1705               _mm512_setzero_ps (),
1706               (__mmask16) -1);
1707 }
1708
1709 static __inline__ __m512 __DEFAULT_FN_ATTRS
1710 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1711 {
1712   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713                    (__v16sf) __W,
1714                    (__mmask16) __U);
1715 }
1716
1717 static __inline__ __m512 __DEFAULT_FN_ATTRS
1718 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1719 {
1720   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1721                    (__v16sf)
1722                    _mm512_setzero_ps (),
1723                    (__mmask16) __U);
1724 }
1725
1726 static  __inline__ __m128 __DEFAULT_FN_ATTRS
1727 _mm_rcp14_ss(__m128 __A, __m128 __B)
1728 {
1729   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1730                  (__v4sf) __B,
1731                  (__v4sf)
1732                  _mm_setzero_ps (),
1733                  (__mmask8) -1);
1734 }
1735
1736 static __inline__ __m128 __DEFAULT_FN_ATTRS
1737 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1738 {
1739  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740           (__v4sf) __B,
1741           (__v4sf) __W,
1742           (__mmask8) __U);
1743 }
1744
1745 static __inline__ __m128 __DEFAULT_FN_ATTRS
1746 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1747 {
1748  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1749           (__v4sf) __B,
1750           (__v4sf) _mm_setzero_ps (),
1751           (__mmask8) __U);
1752 }
1753
1754 static  __inline__ __m128d __DEFAULT_FN_ATTRS
1755 _mm_rcp14_sd(__m128d __A, __m128d __B)
1756 {
1757   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1758             (__v2df) __B,
1759             (__v2df)
1760             _mm_setzero_pd (),
1761             (__mmask8) -1);
1762 }
1763
1764 static __inline__ __m128d __DEFAULT_FN_ATTRS
1765 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1766 {
1767  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1768           (__v2df) __B,
1769           (__v2df) __W,
1770           (__mmask8) __U);
1771 }
1772
1773 static __inline__ __m128d __DEFAULT_FN_ATTRS
1774 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1775 {
1776  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1777           (__v2df) __B,
1778           (__v2df) _mm_setzero_pd (),
1779           (__mmask8) __U);
1780 }
1781
1782 static __inline __m512 __DEFAULT_FN_ATTRS
1783 _mm512_floor_ps(__m512 __A)
1784 {
1785   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1786                                                   _MM_FROUND_FLOOR,
1787                                                   (__v16sf) __A, -1,
1788                                                   _MM_FROUND_CUR_DIRECTION);
1789 }
1790
1791 static __inline__ __m512 __DEFAULT_FN_ATTRS
1792 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1793 {
1794   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1795                    _MM_FROUND_FLOOR,
1796                    (__v16sf) __W, __U,
1797                    _MM_FROUND_CUR_DIRECTION);
1798 }
1799
1800 static __inline __m512d __DEFAULT_FN_ATTRS
1801 _mm512_floor_pd(__m512d __A)
1802 {
1803   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1804                                                    _MM_FROUND_FLOOR,
1805                                                    (__v8df) __A, -1,
1806                                                    _MM_FROUND_CUR_DIRECTION);
1807 }
1808
1809 static __inline__ __m512d __DEFAULT_FN_ATTRS
1810 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1811 {
1812   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1813                 _MM_FROUND_FLOOR,
1814                 (__v8df) __W, __U,
1815                 _MM_FROUND_CUR_DIRECTION);
1816 }
1817
1818 static __inline__ __m512 __DEFAULT_FN_ATTRS
1819 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1820 {
1821   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1822                    _MM_FROUND_CEIL,
1823                    (__v16sf) __W, __U,
1824                    _MM_FROUND_CUR_DIRECTION);
1825 }
1826
1827 static __inline __m512 __DEFAULT_FN_ATTRS
1828 _mm512_ceil_ps(__m512 __A)
1829 {
1830   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1831                                                   _MM_FROUND_CEIL,
1832                                                   (__v16sf) __A, -1,
1833                                                   _MM_FROUND_CUR_DIRECTION);
1834 }
1835
1836 static __inline __m512d __DEFAULT_FN_ATTRS
1837 _mm512_ceil_pd(__m512d __A)
1838 {
1839   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1840                                                    _MM_FROUND_CEIL,
1841                                                    (__v8df) __A, -1,
1842                                                    _MM_FROUND_CUR_DIRECTION);
1843 }
1844
1845 static __inline__ __m512d __DEFAULT_FN_ATTRS
1846 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1847 {
1848   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1849                 _MM_FROUND_CEIL,
1850                 (__v8df) __W, __U,
1851                 _MM_FROUND_CUR_DIRECTION);
1852 }
1853
1854 static __inline __m512i __DEFAULT_FN_ATTRS
1855 _mm512_abs_epi64(__m512i __A)
1856 {
1857   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1858              (__v8di)
1859              _mm512_setzero_si512 (),
1860              (__mmask8) -1);
1861 }
1862
1863 static __inline__ __m512i __DEFAULT_FN_ATTRS
1864 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1865 {
1866   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1867                   (__v8di) __W,
1868                   (__mmask8) __U);
1869 }
1870
1871 static __inline__ __m512i __DEFAULT_FN_ATTRS
1872 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1873 {
1874   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1875                   (__v8di)
1876                   _mm512_setzero_si512 (),
1877                   (__mmask8) __U);
1878 }
1879
1880 static __inline __m512i __DEFAULT_FN_ATTRS
1881 _mm512_abs_epi32(__m512i __A)
1882 {
1883   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1884              (__v16si)
1885              _mm512_setzero_si512 (),
1886              (__mmask16) -1);
1887 }
1888
1889 static __inline__ __m512i __DEFAULT_FN_ATTRS
1890 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1891 {
1892   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1893                   (__v16si) __W,
1894                   (__mmask16) __U);
1895 }
1896
1897 static __inline__ __m512i __DEFAULT_FN_ATTRS
1898 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1899 {
1900   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1901                   (__v16si)
1902                   _mm512_setzero_si512 (),
1903                   (__mmask16) __U);
1904 }
1905
1906 static __inline__ __m128 __DEFAULT_FN_ATTRS
1907 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1908   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1909                 (__v4sf) __B,
1910                 (__v4sf) __W,
1911                 (__mmask8) __U,
1912                 _MM_FROUND_CUR_DIRECTION);
1913 }
1914
1915 static __inline__ __m128 __DEFAULT_FN_ATTRS
1916 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1917   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1918                 (__v4sf) __B,
1919                 (__v4sf)  _mm_setzero_ps (),
1920                 (__mmask8) __U,
1921                 _MM_FROUND_CUR_DIRECTION);
1922 }
1923
1924 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
1925   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1926                                           (__v4sf)(__m128)(B), \
1927                                           (__v4sf)_mm_setzero_ps(), \
1928                                           (__mmask8)-1, (int)(R)); })
1929
1930 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
1931   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1932                                           (__v4sf)(__m128)(B), \
1933                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1934                                           (int)(R)); })
1935
1936 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
1937   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1938                                           (__v4sf)(__m128)(B), \
1939                                           (__v4sf)_mm_setzero_ps(), \
1940                                           (__mmask8)(U), (int)(R)); })
1941
1942 static __inline__ __m128d __DEFAULT_FN_ATTRS
1943 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1944   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1945                 (__v2df) __B,
1946                 (__v2df) __W,
1947                 (__mmask8) __U,
1948                 _MM_FROUND_CUR_DIRECTION);
1949 }
1950
1951 static __inline__ __m128d __DEFAULT_FN_ATTRS
1952 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1953   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1954                 (__v2df) __B,
1955                 (__v2df)  _mm_setzero_pd (),
1956                 (__mmask8) __U,
1957                 _MM_FROUND_CUR_DIRECTION);
1958 }
1959 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
1960   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1961                                            (__v2df)(__m128d)(B), \
1962                                            (__v2df)_mm_setzero_pd(), \
1963                                            (__mmask8)-1, (int)(R)); })
1964
1965 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
1966   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1967                                            (__v2df)(__m128d)(B), \
1968                                            (__v2df)(__m128d)(W), \
1969                                            (__mmask8)(U), (int)(R)); })
1970
1971 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
1972   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1973                                            (__v2df)(__m128d)(B), \
1974                                            (__v2df)_mm_setzero_pd(), \
1975                                            (__mmask8)(U), (int)(R)); })
1976
1977 static __inline__ __m512d __DEFAULT_FN_ATTRS
1978 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1979   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1980                                               (__v8df)_mm512_add_pd(__A, __B),
1981                                               (__v8df)__W);
1982 }
1983
1984 static __inline__ __m512d __DEFAULT_FN_ATTRS
1985 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1986   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1987                                               (__v8df)_mm512_add_pd(__A, __B),
1988                                               (__v8df)_mm512_setzero_pd());
1989 }
1990
1991 static __inline__ __m512 __DEFAULT_FN_ATTRS
1992 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1993   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1994                                              (__v16sf)_mm512_add_ps(__A, __B),
1995                                              (__v16sf)__W);
1996 }
1997
1998 static __inline__ __m512 __DEFAULT_FN_ATTRS
1999 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2000   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2001                                              (__v16sf)_mm512_add_ps(__A, __B),
2002                                              (__v16sf)_mm512_setzero_ps());
2003 }
2004
2005 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2006   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2007                                         (__v8df)(__m512d)(B), \
2008                                         (__v8df)_mm512_setzero_pd(), \
2009                                         (__mmask8)-1, (int)(R)); })
2010
2011 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2012   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2013                                         (__v8df)(__m512d)(B), \
2014                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2015                                         (int)(R)); })
2016
2017 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2018   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2019                                         (__v8df)(__m512d)(B), \
2020                                         (__v8df)_mm512_setzero_pd(), \
2021                                         (__mmask8)(U), (int)(R)); })
2022
2023 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2024   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2025                                        (__v16sf)(__m512)(B), \
2026                                        (__v16sf)_mm512_setzero_ps(), \
2027                                        (__mmask16)-1, (int)(R)); })
2028
2029 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2030   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2031                                        (__v16sf)(__m512)(B), \
2032                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2033                                        (int)(R)); })
2034
2035 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2036   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2037                                        (__v16sf)(__m512)(B), \
2038                                        (__v16sf)_mm512_setzero_ps(), \
2039                                        (__mmask16)(U), (int)(R)); })
2040
2041 static __inline__ __m128 __DEFAULT_FN_ATTRS
2042 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2043   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2044                 (__v4sf) __B,
2045                 (__v4sf) __W,
2046                 (__mmask8) __U,
2047                 _MM_FROUND_CUR_DIRECTION);
2048 }
2049
2050 static __inline__ __m128 __DEFAULT_FN_ATTRS
2051 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2052   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2053                 (__v4sf) __B,
2054                 (__v4sf)  _mm_setzero_ps (),
2055                 (__mmask8) __U,
2056                 _MM_FROUND_CUR_DIRECTION);
2057 }
2058 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2059   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2060                                           (__v4sf)(__m128)(B), \
2061                                           (__v4sf)_mm_setzero_ps(), \
2062                                           (__mmask8)-1, (int)(R)); })
2063
2064 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2065   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2066                                           (__v4sf)(__m128)(B), \
2067                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2068                                           (int)(R)); })
2069
2070 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2071   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2072                                           (__v4sf)(__m128)(B), \
2073                                           (__v4sf)_mm_setzero_ps(), \
2074                                           (__mmask8)(U), (int)(R)); })
2075
2076 static __inline__ __m128d __DEFAULT_FN_ATTRS
2077 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2078   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2079                 (__v2df) __B,
2080                 (__v2df) __W,
2081                 (__mmask8) __U,
2082                 _MM_FROUND_CUR_DIRECTION);
2083 }
2084
2085 static __inline__ __m128d __DEFAULT_FN_ATTRS
2086 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2087   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2088                 (__v2df) __B,
2089                 (__v2df)  _mm_setzero_pd (),
2090                 (__mmask8) __U,
2091                 _MM_FROUND_CUR_DIRECTION);
2092 }
2093
2094 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2095   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2096                                            (__v2df)(__m128d)(B), \
2097                                            (__v2df)_mm_setzero_pd(), \
2098                                            (__mmask8)-1, (int)(R)); })
2099
2100 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2101   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2102                                            (__v2df)(__m128d)(B), \
2103                                            (__v2df)(__m128d)(W), \
2104                                            (__mmask8)(U), (int)(R)); })
2105
2106 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2107   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2108                                            (__v2df)(__m128d)(B), \
2109                                            (__v2df)_mm_setzero_pd(), \
2110                                            (__mmask8)(U), (int)(R)); })
2111
2112 static __inline__ __m512d __DEFAULT_FN_ATTRS
2113 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2114   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2115                                               (__v8df)_mm512_sub_pd(__A, __B),
2116                                               (__v8df)__W);
2117 }
2118
2119 static __inline__ __m512d __DEFAULT_FN_ATTRS
2120 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2121   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2122                                               (__v8df)_mm512_sub_pd(__A, __B),
2123                                               (__v8df)_mm512_setzero_pd());
2124 }
2125
2126 static __inline__ __m512 __DEFAULT_FN_ATTRS
2127 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2128   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2129                                              (__v16sf)_mm512_sub_ps(__A, __B),
2130                                              (__v16sf)__W);
2131 }
2132
2133 static __inline__ __m512 __DEFAULT_FN_ATTRS
2134 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2135   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2136                                              (__v16sf)_mm512_sub_ps(__A, __B),
2137                                              (__v16sf)_mm512_setzero_ps());
2138 }
2139
2140 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2141   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2142                                         (__v8df)(__m512d)(B), \
2143                                         (__v8df)_mm512_setzero_pd(), \
2144                                         (__mmask8)-1, (int)(R)); })
2145
2146 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2147   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2148                                         (__v8df)(__m512d)(B), \
2149                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2150                                         (int)(R)); })
2151
2152 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2153   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2154                                         (__v8df)(__m512d)(B), \
2155                                         (__v8df)_mm512_setzero_pd(), \
2156                                         (__mmask8)(U), (int)(R)); })
2157
2158 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2159   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2160                                        (__v16sf)(__m512)(B), \
2161                                        (__v16sf)_mm512_setzero_ps(), \
2162                                        (__mmask16)-1, (int)(R)); })
2163
2164 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2165   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2166                                        (__v16sf)(__m512)(B), \
2167                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2168                                        (int)(R)); });
2169
2170 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2171   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2172                                        (__v16sf)(__m512)(B), \
2173                                        (__v16sf)_mm512_setzero_ps(), \
2174                                        (__mmask16)(U), (int)(R)); });
2175
2176 static __inline__ __m128 __DEFAULT_FN_ATTRS
2177 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2178   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2179                 (__v4sf) __B,
2180                 (__v4sf) __W,
2181                 (__mmask8) __U,
2182                 _MM_FROUND_CUR_DIRECTION);
2183 }
2184
2185 static __inline__ __m128 __DEFAULT_FN_ATTRS
2186 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2187   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2188                 (__v4sf) __B,
2189                 (__v4sf)  _mm_setzero_ps (),
2190                 (__mmask8) __U,
2191                 _MM_FROUND_CUR_DIRECTION);
2192 }
2193 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2194   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2195                                           (__v4sf)(__m128)(B), \
2196                                           (__v4sf)_mm_setzero_ps(), \
2197                                           (__mmask8)-1, (int)(R)); })
2198
2199 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2200   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2201                                           (__v4sf)(__m128)(B), \
2202                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2203                                           (int)(R)); })
2204
2205 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2206   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2207                                           (__v4sf)(__m128)(B), \
2208                                           (__v4sf)_mm_setzero_ps(), \
2209                                           (__mmask8)(U), (int)(R)); })
2210
2211 static __inline__ __m128d __DEFAULT_FN_ATTRS
2212 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2213   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2214                 (__v2df) __B,
2215                 (__v2df) __W,
2216                 (__mmask8) __U,
2217                 _MM_FROUND_CUR_DIRECTION);
2218 }
2219
2220 static __inline__ __m128d __DEFAULT_FN_ATTRS
2221 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2222   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2223                 (__v2df) __B,
2224                 (__v2df)  _mm_setzero_pd (),
2225                 (__mmask8) __U,
2226                 _MM_FROUND_CUR_DIRECTION);
2227 }
2228
2229 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2230   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2231                                            (__v2df)(__m128d)(B), \
2232                                            (__v2df)_mm_setzero_pd(), \
2233                                            (__mmask8)-1, (int)(R)); })
2234
2235 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2236   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2237                                            (__v2df)(__m128d)(B), \
2238                                            (__v2df)(__m128d)(W), \
2239                                            (__mmask8)(U), (int)(R)); })
2240
2241 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2242   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2243                                            (__v2df)(__m128d)(B), \
2244                                            (__v2df)_mm_setzero_pd(), \
2245                                            (__mmask8)(U), (int)(R)); })
2246
2247 static __inline__ __m512d __DEFAULT_FN_ATTRS
2248 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2249   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2250                                               (__v8df)_mm512_mul_pd(__A, __B),
2251                                               (__v8df)__W);
2252 }
2253
2254 static __inline__ __m512d __DEFAULT_FN_ATTRS
2255 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2256   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2257                                               (__v8df)_mm512_mul_pd(__A, __B),
2258                                               (__v8df)_mm512_setzero_pd());
2259 }
2260
2261 static __inline__ __m512 __DEFAULT_FN_ATTRS
2262 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2263   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2264                                              (__v16sf)_mm512_mul_ps(__A, __B),
2265                                              (__v16sf)__W);
2266 }
2267
2268 static __inline__ __m512 __DEFAULT_FN_ATTRS
2269 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2270   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2271                                              (__v16sf)_mm512_mul_ps(__A, __B),
2272                                              (__v16sf)_mm512_setzero_ps());
2273 }
2274
2275 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2276   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2277                                         (__v8df)(__m512d)(B), \
2278                                         (__v8df)_mm512_setzero_pd(), \
2279                                         (__mmask8)-1, (int)(R)); })
2280
2281 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2282   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2283                                         (__v8df)(__m512d)(B), \
2284                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2285                                         (int)(R)); })
2286
2287 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2288   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2289                                         (__v8df)(__m512d)(B), \
2290                                         (__v8df)_mm512_setzero_pd(), \
2291                                         (__mmask8)(U), (int)(R)); })
2292
2293 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2294   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2295                                        (__v16sf)(__m512)(B), \
2296                                        (__v16sf)_mm512_setzero_ps(), \
2297                                        (__mmask16)-1, (int)(R)); })
2298
2299 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2300   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2301                                        (__v16sf)(__m512)(B), \
2302                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2303                                        (int)(R)); });
2304
2305 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2306   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2307                                        (__v16sf)(__m512)(B), \
2308                                        (__v16sf)_mm512_setzero_ps(), \
2309                                        (__mmask16)(U), (int)(R)); });
2310
2311 static __inline__ __m128 __DEFAULT_FN_ATTRS
2312 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2313   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2314                 (__v4sf) __B,
2315                 (__v4sf) __W,
2316                 (__mmask8) __U,
2317                 _MM_FROUND_CUR_DIRECTION);
2318 }
2319
2320 static __inline__ __m128 __DEFAULT_FN_ATTRS
2321 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2322   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2323                 (__v4sf) __B,
2324                 (__v4sf)  _mm_setzero_ps (),
2325                 (__mmask8) __U,
2326                 _MM_FROUND_CUR_DIRECTION);
2327 }
2328
2329 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
2330   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2331                                           (__v4sf)(__m128)(B), \
2332                                           (__v4sf)_mm_setzero_ps(), \
2333                                           (__mmask8)-1, (int)(R)); })
2334
2335 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2336   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2337                                           (__v4sf)(__m128)(B), \
2338                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2339                                           (int)(R)); })
2340
2341 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2342   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2343                                           (__v4sf)(__m128)(B), \
2344                                           (__v4sf)_mm_setzero_ps(), \
2345                                           (__mmask8)(U), (int)(R)); })
2346
2347 static __inline__ __m128d __DEFAULT_FN_ATTRS
2348 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2349   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2350                 (__v2df) __B,
2351                 (__v2df) __W,
2352                 (__mmask8) __U,
2353                 _MM_FROUND_CUR_DIRECTION);
2354 }
2355
2356 static __inline__ __m128d __DEFAULT_FN_ATTRS
2357 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2358   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2359                 (__v2df) __B,
2360                 (__v2df)  _mm_setzero_pd (),
2361                 (__mmask8) __U,
2362                 _MM_FROUND_CUR_DIRECTION);
2363 }
2364
2365 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
2366   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2367                                            (__v2df)(__m128d)(B), \
2368                                            (__v2df)_mm_setzero_pd(), \
2369                                            (__mmask8)-1, (int)(R)); })
2370
2371 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2372   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2373                                            (__v2df)(__m128d)(B), \
2374                                            (__v2df)(__m128d)(W), \
2375                                            (__mmask8)(U), (int)(R)); })
2376
2377 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2378   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2379                                            (__v2df)(__m128d)(B), \
2380                                            (__v2df)_mm_setzero_pd(), \
2381                                            (__mmask8)(U), (int)(R)); })
2382
2383 static __inline __m512d __DEFAULT_FN_ATTRS
2384 _mm512_div_pd(__m512d __a, __m512d __b)
2385 {
2386   return (__m512d)((__v8df)__a/(__v8df)__b);
2387 }
2388
2389 static __inline__ __m512d __DEFAULT_FN_ATTRS
2390 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2391   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2392                                               (__v8df)_mm512_div_pd(__A, __B),
2393                                               (__v8df)__W);
2394 }
2395
2396 static __inline__ __m512d __DEFAULT_FN_ATTRS
2397 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2398   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2399                                               (__v8df)_mm512_div_pd(__A, __B),
2400                                               (__v8df)_mm512_setzero_pd());
2401 }
2402
2403 static __inline __m512 __DEFAULT_FN_ATTRS
2404 _mm512_div_ps(__m512 __a, __m512 __b)
2405 {
2406   return (__m512)((__v16sf)__a/(__v16sf)__b);
2407 }
2408
2409 static __inline__ __m512 __DEFAULT_FN_ATTRS
2410 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2411   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2412                                              (__v16sf)_mm512_div_ps(__A, __B),
2413                                              (__v16sf)__W);
2414 }
2415
2416 static __inline__ __m512 __DEFAULT_FN_ATTRS
2417 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2418   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2419                                              (__v16sf)_mm512_div_ps(__A, __B),
2420                                              (__v16sf)_mm512_setzero_ps());
2421 }
2422
2423 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2424   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2425                                         (__v8df)(__m512d)(B), \
2426                                         (__v8df)_mm512_setzero_pd(), \
2427                                         (__mmask8)-1, (int)(R)); })
2428
2429 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2430   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2431                                         (__v8df)(__m512d)(B), \
2432                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2433                                         (int)(R)); })
2434
2435 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2436   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2437                                         (__v8df)(__m512d)(B), \
2438                                         (__v8df)_mm512_setzero_pd(), \
2439                                         (__mmask8)(U), (int)(R)); })
2440
2441 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2442   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2443                                        (__v16sf)(__m512)(B), \
2444                                        (__v16sf)_mm512_setzero_ps(), \
2445                                        (__mmask16)-1, (int)(R)); })
2446
2447 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2448   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2449                                        (__v16sf)(__m512)(B), \
2450                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2451                                        (int)(R)); });
2452
2453 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2454   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2455                                        (__v16sf)(__m512)(B), \
2456                                        (__v16sf)_mm512_setzero_ps(), \
2457                                        (__mmask16)(U), (int)(R)); });
2458
2459 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
2460   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2461                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
2462                                          _MM_FROUND_CUR_DIRECTION); })
2463
2464 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2465   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2466                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2467                                          _MM_FROUND_CUR_DIRECTION); })
2468
2469 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2470   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2471                                          (__v16sf)_mm512_setzero_ps(), \
2472                                          (__mmask16)(A), \
2473                                          _MM_FROUND_CUR_DIRECTION); })
2474
2475 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2476   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2477                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2478                                          (int)(R)); })
2479
2480 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2481   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2482                                          (__v16sf)_mm512_setzero_ps(), \
2483                                          (__mmask16)(A), (int)(R)); })
2484
2485 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2486   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2487                                          (__v16sf)_mm512_undefined_ps(), \
2488                                          (__mmask16)-1, (int)(R)); })
2489
2490 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
2491   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2492                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
2493                                           _MM_FROUND_CUR_DIRECTION); })
2494
2495 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2496   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2497                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2498                                           _MM_FROUND_CUR_DIRECTION); })
2499
2500 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2501   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2502                                           (__v8df)_mm512_setzero_pd(), \
2503                                           (__mmask8)(A), \
2504                                           _MM_FROUND_CUR_DIRECTION); })
2505
2506 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2507   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2508                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2509                                           (int)(R)); })
2510
2511 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2512   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2513                                           (__v8df)_mm512_setzero_pd(), \
2514                                           (__mmask8)(A), (int)(R)); })
2515
2516 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2517   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2518                                           (__v8df)_mm512_undefined_pd(), \
2519                                           (__mmask8)-1, (int)(R)); })
2520
2521 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2522   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2523                                            (__v8df)(__m512d)(B), \
2524                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2525                                            (int)(R)); })
2526
2527
2528 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2529   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2530                                            (__v8df)(__m512d)(B), \
2531                                            (__v8df)(__m512d)(C), \
2532                                            (__mmask8)(U), (int)(R)); })
2533
2534
2535 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2536   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2537                                             (__v8df)(__m512d)(B), \
2538                                             (__v8df)(__m512d)(C), \
2539                                             (__mmask8)(U), (int)(R)); })
2540
2541
2542 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2543   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2544                                             (__v8df)(__m512d)(B), \
2545                                             (__v8df)(__m512d)(C), \
2546                                             (__mmask8)(U), (int)(R)); })
2547
2548
2549 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2550   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2551                                            (__v8df)(__m512d)(B), \
2552                                            -(__v8df)(__m512d)(C), \
2553                                            (__mmask8)-1, (int)(R)); })
2554
2555
2556 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2557   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2558                                            (__v8df)(__m512d)(B), \
2559                                            -(__v8df)(__m512d)(C), \
2560                                            (__mmask8)(U), (int)(R)); })
2561
2562
2563 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2564   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2565                                             (__v8df)(__m512d)(B), \
2566                                             -(__v8df)(__m512d)(C), \
2567                                             (__mmask8)(U), (int)(R)); })
2568
2569
2570 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2571   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2572                                            (__v8df)(__m512d)(B), \
2573                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2574                                            (int)(R)); })
2575
2576
2577 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2578   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2579                                             (__v8df)(__m512d)(B), \
2580                                             (__v8df)(__m512d)(C), \
2581                                             (__mmask8)(U), (int)(R)); })
2582
2583
2584 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2585   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2586                                             (__v8df)(__m512d)(B), \
2587                                             (__v8df)(__m512d)(C), \
2588                                             (__mmask8)(U), (int)(R)); })
2589
2590
2591 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2592   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2593                                            (__v8df)(__m512d)(B), \
2594                                            -(__v8df)(__m512d)(C), \
2595                                            (__mmask8)-1, (int)(R)); })
2596
2597
2598 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2599   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2600                                             (__v8df)(__m512d)(B), \
2601                                             -(__v8df)(__m512d)(C), \
2602                                             (__mmask8)(U), (int)(R)); })
2603
2604
2605 static __inline__ __m512d __DEFAULT_FN_ATTRS
2606 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2607 {
2608   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2609                                                     (__v8df) __B,
2610                                                     (__v8df) __C,
2611                                                     (__mmask8) -1,
2612                                                     _MM_FROUND_CUR_DIRECTION);
2613 }
2614
2615 static __inline__ __m512d __DEFAULT_FN_ATTRS
2616 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2617 {
2618   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2619                                                     (__v8df) __B,
2620                                                     (__v8df) __C,
2621                                                     (__mmask8) __U,
2622                                                     _MM_FROUND_CUR_DIRECTION);
2623 }
2624
2625 static __inline__ __m512d __DEFAULT_FN_ATTRS
2626 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2627 {
2628   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2629                                                      (__v8df) __B,
2630                                                      (__v8df) __C,
2631                                                      (__mmask8) __U,
2632                                                      _MM_FROUND_CUR_DIRECTION);
2633 }
2634
2635 static __inline__ __m512d __DEFAULT_FN_ATTRS
2636 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2637 {
2638   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2639                                                      (__v8df) __B,
2640                                                      (__v8df) __C,
2641                                                      (__mmask8) __U,
2642                                                      _MM_FROUND_CUR_DIRECTION);
2643 }
2644
2645 static __inline__ __m512d __DEFAULT_FN_ATTRS
2646 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2647 {
2648   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2649                                                     (__v8df) __B,
2650                                                     -(__v8df) __C,
2651                                                     (__mmask8) -1,
2652                                                     _MM_FROUND_CUR_DIRECTION);
2653 }
2654
2655 static __inline__ __m512d __DEFAULT_FN_ATTRS
2656 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2657 {
2658   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2659                                                     (__v8df) __B,
2660                                                     -(__v8df) __C,
2661                                                     (__mmask8) __U,
2662                                                     _MM_FROUND_CUR_DIRECTION);
2663 }
2664
2665 static __inline__ __m512d __DEFAULT_FN_ATTRS
2666 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2667 {
2668   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2669                                                      (__v8df) __B,
2670                                                      -(__v8df) __C,
2671                                                      (__mmask8) __U,
2672                                                      _MM_FROUND_CUR_DIRECTION);
2673 }
2674
2675 static __inline__ __m512d __DEFAULT_FN_ATTRS
2676 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2677 {
2678   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2679                                                     (__v8df) __B,
2680                                                     (__v8df) __C,
2681                                                     (__mmask8) -1,
2682                                                     _MM_FROUND_CUR_DIRECTION);
2683 }
2684
2685 static __inline__ __m512d __DEFAULT_FN_ATTRS
2686 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2687 {
2688   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2689                                                      (__v8df) __B,
2690                                                      (__v8df) __C,
2691                                                      (__mmask8) __U,
2692                                                      _MM_FROUND_CUR_DIRECTION);
2693 }
2694
2695 static __inline__ __m512d __DEFAULT_FN_ATTRS
2696 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2697 {
2698   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2699                                                      (__v8df) __B,
2700                                                      (__v8df) __C,
2701                                                      (__mmask8) __U,
2702                                                      _MM_FROUND_CUR_DIRECTION);
2703 }
2704
2705 static __inline__ __m512d __DEFAULT_FN_ATTRS
2706 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2707 {
2708   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2709                                                     (__v8df) __B,
2710                                                     -(__v8df) __C,
2711                                                     (__mmask8) -1,
2712                                                     _MM_FROUND_CUR_DIRECTION);
2713 }
2714
2715 static __inline__ __m512d __DEFAULT_FN_ATTRS
2716 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2717 {
2718   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2719                                                      (__v8df) __B,
2720                                                      -(__v8df) __C,
2721                                                      (__mmask8) __U,
2722                                                      _MM_FROUND_CUR_DIRECTION);
2723 }
2724
2725 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2726   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2727                                           (__v16sf)(__m512)(B), \
2728                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2729                                           (int)(R)); })
2730
2731
2732 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2733   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2734                                           (__v16sf)(__m512)(B), \
2735                                           (__v16sf)(__m512)(C), \
2736                                           (__mmask16)(U), (int)(R)); })
2737
2738
2739 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2740   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2741                                            (__v16sf)(__m512)(B), \
2742                                            (__v16sf)(__m512)(C), \
2743                                            (__mmask16)(U), (int)(R)); })
2744
2745
2746 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2747   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2748                                            (__v16sf)(__m512)(B), \
2749                                            (__v16sf)(__m512)(C), \
2750                                            (__mmask16)(U), (int)(R)); })
2751
2752
2753 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2754   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2755                                           (__v16sf)(__m512)(B), \
2756                                           -(__v16sf)(__m512)(C), \
2757                                           (__mmask16)-1, (int)(R)); })
2758
2759
2760 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2761   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2762                                           (__v16sf)(__m512)(B), \
2763                                           -(__v16sf)(__m512)(C), \
2764                                           (__mmask16)(U), (int)(R)); })
2765
2766
2767 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2768   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2769                                            (__v16sf)(__m512)(B), \
2770                                            -(__v16sf)(__m512)(C), \
2771                                            (__mmask16)(U), (int)(R)); })
2772
2773
2774 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2775   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2776                                           (__v16sf)(__m512)(B), \
2777                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2778                                           (int)(R)); })
2779
2780
2781 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2782   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2783                                            (__v16sf)(__m512)(B), \
2784                                            (__v16sf)(__m512)(C), \
2785                                            (__mmask16)(U), (int)(R)); })
2786
2787
2788 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2789   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2790                                            (__v16sf)(__m512)(B), \
2791                                            (__v16sf)(__m512)(C), \
2792                                            (__mmask16)(U), (int)(R)); })
2793
2794
2795 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2796   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2797                                           (__v16sf)(__m512)(B), \
2798                                           -(__v16sf)(__m512)(C), \
2799                                           (__mmask16)-1, (int)(R)); })
2800
2801
2802 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2803   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2804                                            (__v16sf)(__m512)(B), \
2805                                            -(__v16sf)(__m512)(C), \
2806                                            (__mmask16)(U), (int)(R)); })
2807
2808
2809 static __inline__ __m512 __DEFAULT_FN_ATTRS
2810 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2811 {
2812   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2813                                                    (__v16sf) __B,
2814                                                    (__v16sf) __C,
2815                                                    (__mmask16) -1,
2816                                                    _MM_FROUND_CUR_DIRECTION);
2817 }
2818
2819 static __inline__ __m512 __DEFAULT_FN_ATTRS
2820 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2821 {
2822   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2823                                                    (__v16sf) __B,
2824                                                    (__v16sf) __C,
2825                                                    (__mmask16) __U,
2826                                                    _MM_FROUND_CUR_DIRECTION);
2827 }
2828
2829 static __inline__ __m512 __DEFAULT_FN_ATTRS
2830 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2831 {
2832   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2833                                                     (__v16sf) __B,
2834                                                     (__v16sf) __C,
2835                                                     (__mmask16) __U,
2836                                                     _MM_FROUND_CUR_DIRECTION);
2837 }
2838
2839 static __inline__ __m512 __DEFAULT_FN_ATTRS
2840 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2841 {
2842   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2843                                                     (__v16sf) __B,
2844                                                     (__v16sf) __C,
2845                                                     (__mmask16) __U,
2846                                                     _MM_FROUND_CUR_DIRECTION);
2847 }
2848
2849 static __inline__ __m512 __DEFAULT_FN_ATTRS
2850 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2851 {
2852   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2853                                                    (__v16sf) __B,
2854                                                    -(__v16sf) __C,
2855                                                    (__mmask16) -1,
2856                                                    _MM_FROUND_CUR_DIRECTION);
2857 }
2858
2859 static __inline__ __m512 __DEFAULT_FN_ATTRS
2860 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2861 {
2862   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2863                                                    (__v16sf) __B,
2864                                                    -(__v16sf) __C,
2865                                                    (__mmask16) __U,
2866                                                    _MM_FROUND_CUR_DIRECTION);
2867 }
2868
2869 static __inline__ __m512 __DEFAULT_FN_ATTRS
2870 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2871 {
2872   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2873                                                     (__v16sf) __B,
2874                                                     -(__v16sf) __C,
2875                                                     (__mmask16) __U,
2876                                                     _MM_FROUND_CUR_DIRECTION);
2877 }
2878
2879 static __inline__ __m512 __DEFAULT_FN_ATTRS
2880 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2881 {
2882   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2883                                                    (__v16sf) __B,
2884                                                    (__v16sf) __C,
2885                                                    (__mmask16) -1,
2886                                                    _MM_FROUND_CUR_DIRECTION);
2887 }
2888
2889 static __inline__ __m512 __DEFAULT_FN_ATTRS
2890 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2891 {
2892   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2893                                                     (__v16sf) __B,
2894                                                     (__v16sf) __C,
2895                                                     (__mmask16) __U,
2896                                                     _MM_FROUND_CUR_DIRECTION);
2897 }
2898
2899 static __inline__ __m512 __DEFAULT_FN_ATTRS
2900 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2901 {
2902   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2903                                                     (__v16sf) __B,
2904                                                     (__v16sf) __C,
2905                                                     (__mmask16) __U,
2906                                                     _MM_FROUND_CUR_DIRECTION);
2907 }
2908
2909 static __inline__ __m512 __DEFAULT_FN_ATTRS
2910 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2911 {
2912   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2913                                                    (__v16sf) __B,
2914                                                    -(__v16sf) __C,
2915                                                    (__mmask16) -1,
2916                                                    _MM_FROUND_CUR_DIRECTION);
2917 }
2918
2919 static __inline__ __m512 __DEFAULT_FN_ATTRS
2920 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2921 {
2922   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2923                                                     (__v16sf) __B,
2924                                                     -(__v16sf) __C,
2925                                                     (__mmask16) __U,
2926                                                     _MM_FROUND_CUR_DIRECTION);
2927 }
2928
2929 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
2930   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2931                                               (__v8df)(__m512d)(B), \
2932                                               (__v8df)(__m512d)(C), \
2933                                               (__mmask8)-1, (int)(R)); })
2934
2935
2936 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
2937   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2938                                               (__v8df)(__m512d)(B), \
2939                                               (__v8df)(__m512d)(C), \
2940                                               (__mmask8)(U), (int)(R)); })
2941
2942
2943 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
2944   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2945                                                (__v8df)(__m512d)(B), \
2946                                                (__v8df)(__m512d)(C), \
2947                                                (__mmask8)(U), (int)(R)); })
2948
2949
2950 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
2951   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2952                                                (__v8df)(__m512d)(B), \
2953                                                (__v8df)(__m512d)(C), \
2954                                                (__mmask8)(U), (int)(R)); })
2955
2956
2957 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
2958   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2959                                               (__v8df)(__m512d)(B), \
2960                                               -(__v8df)(__m512d)(C), \
2961                                               (__mmask8)-1, (int)(R)); })
2962
2963
2964 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
2965   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2966                                               (__v8df)(__m512d)(B), \
2967                                               -(__v8df)(__m512d)(C), \
2968                                               (__mmask8)(U), (int)(R)); })
2969
2970
2971 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
2972   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2973                                                (__v8df)(__m512d)(B), \
2974                                                -(__v8df)(__m512d)(C), \
2975                                                (__mmask8)(U), (int)(R)); })
2976
2977
2978 static __inline__ __m512d __DEFAULT_FN_ATTRS
2979 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2980 {
2981   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2982                                                        (__v8df) __B,
2983                                                        (__v8df) __C,
2984                                                        (__mmask8) -1,
2985                                                        _MM_FROUND_CUR_DIRECTION);
2986 }
2987
2988 static __inline__ __m512d __DEFAULT_FN_ATTRS
2989 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2990 {
2991   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2992                                                        (__v8df) __B,
2993                                                        (__v8df) __C,
2994                                                        (__mmask8) __U,
2995                                                        _MM_FROUND_CUR_DIRECTION);
2996 }
2997
2998 static __inline__ __m512d __DEFAULT_FN_ATTRS
2999 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3000 {
3001   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3002                                                         (__v8df) __B,
3003                                                         (__v8df) __C,
3004                                                         (__mmask8) __U,
3005                                                         _MM_FROUND_CUR_DIRECTION);
3006 }
3007
3008 static __inline__ __m512d __DEFAULT_FN_ATTRS
3009 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3010 {
3011   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3012                                                         (__v8df) __B,
3013                                                         (__v8df) __C,
3014                                                         (__mmask8) __U,
3015                                                         _MM_FROUND_CUR_DIRECTION);
3016 }
3017
3018 static __inline__ __m512d __DEFAULT_FN_ATTRS
3019 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3020 {
3021   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3022                                                        (__v8df) __B,
3023                                                        -(__v8df) __C,
3024                                                        (__mmask8) -1,
3025                                                        _MM_FROUND_CUR_DIRECTION);
3026 }
3027
3028 static __inline__ __m512d __DEFAULT_FN_ATTRS
3029 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3030 {
3031   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3032                                                        (__v8df) __B,
3033                                                        -(__v8df) __C,
3034                                                        (__mmask8) __U,
3035                                                        _MM_FROUND_CUR_DIRECTION);
3036 }
3037
3038 static __inline__ __m512d __DEFAULT_FN_ATTRS
3039 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3040 {
3041   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3042                                                         (__v8df) __B,
3043                                                         -(__v8df) __C,
3044                                                         (__mmask8) __U,
3045                                                         _MM_FROUND_CUR_DIRECTION);
3046 }
3047
3048 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3049   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3050                                              (__v16sf)(__m512)(B), \
3051                                              (__v16sf)(__m512)(C), \
3052                                              (__mmask16)-1, (int)(R)); })
3053
3054
3055 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3056   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3057                                              (__v16sf)(__m512)(B), \
3058                                              (__v16sf)(__m512)(C), \
3059                                              (__mmask16)(U), (int)(R)); })
3060
3061
3062 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3063   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3064                                               (__v16sf)(__m512)(B), \
3065                                               (__v16sf)(__m512)(C), \
3066                                               (__mmask16)(U), (int)(R)); })
3067
3068
3069 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3070   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3071                                               (__v16sf)(__m512)(B), \
3072                                               (__v16sf)(__m512)(C), \
3073                                               (__mmask16)(U), (int)(R)); })
3074
3075
3076 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3077   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3078                                              (__v16sf)(__m512)(B), \
3079                                              -(__v16sf)(__m512)(C), \
3080                                              (__mmask16)-1, (int)(R)); })
3081
3082
3083 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3084   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3085                                              (__v16sf)(__m512)(B), \
3086                                              -(__v16sf)(__m512)(C), \
3087                                              (__mmask16)(U), (int)(R)); })
3088
3089
3090 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3091   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3092                                               (__v16sf)(__m512)(B), \
3093                                               -(__v16sf)(__m512)(C), \
3094                                               (__mmask16)(U), (int)(R)); })
3095
3096
3097 static __inline__ __m512 __DEFAULT_FN_ATTRS
3098 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3099 {
3100   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3101                                                       (__v16sf) __B,
3102                                                       (__v16sf) __C,
3103                                                       (__mmask16) -1,
3104                                                       _MM_FROUND_CUR_DIRECTION);
3105 }
3106
3107 static __inline__ __m512 __DEFAULT_FN_ATTRS
3108 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3109 {
3110   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3111                                                       (__v16sf) __B,
3112                                                       (__v16sf) __C,
3113                                                       (__mmask16) __U,
3114                                                       _MM_FROUND_CUR_DIRECTION);
3115 }
3116
3117 static __inline__ __m512 __DEFAULT_FN_ATTRS
3118 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3119 {
3120   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3121                                                        (__v16sf) __B,
3122                                                        (__v16sf) __C,
3123                                                        (__mmask16) __U,
3124                                                        _MM_FROUND_CUR_DIRECTION);
3125 }
3126
3127 static __inline__ __m512 __DEFAULT_FN_ATTRS
3128 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3129 {
3130   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3131                                                        (__v16sf) __B,
3132                                                        (__v16sf) __C,
3133                                                        (__mmask16) __U,
3134                                                        _MM_FROUND_CUR_DIRECTION);
3135 }
3136
3137 static __inline__ __m512 __DEFAULT_FN_ATTRS
3138 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3139 {
3140   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3141                                                       (__v16sf) __B,
3142                                                       -(__v16sf) __C,
3143                                                       (__mmask16) -1,
3144                                                       _MM_FROUND_CUR_DIRECTION);
3145 }
3146
3147 static __inline__ __m512 __DEFAULT_FN_ATTRS
3148 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3149 {
3150   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3151                                                       (__v16sf) __B,
3152                                                       -(__v16sf) __C,
3153                                                       (__mmask16) __U,
3154                                                       _MM_FROUND_CUR_DIRECTION);
3155 }
3156
3157 static __inline__ __m512 __DEFAULT_FN_ATTRS
3158 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3159 {
3160   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3161                                                        (__v16sf) __B,
3162                                                        -(__v16sf) __C,
3163                                                        (__mmask16) __U,
3164                                                        _MM_FROUND_CUR_DIRECTION);
3165 }
3166
3167 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3168   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3169                                             (__v8df)(__m512d)(B), \
3170                                             (__v8df)(__m512d)(C), \
3171                                             (__mmask8)(U), (int)(R)); })
3172
3173
3174 static __inline__ __m512d __DEFAULT_FN_ATTRS
3175 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3176 {
3177   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3178                                                      (__v8df) __B,
3179                                                      (__v8df) __C,
3180                                                      (__mmask8) __U,
3181                                                      _MM_FROUND_CUR_DIRECTION);
3182 }
3183
3184 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3185   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3186                                            (__v16sf)(__m512)(B), \
3187                                            (__v16sf)(__m512)(C), \
3188                                            (__mmask16)(U), (int)(R)); })
3189
3190
3191 static __inline__ __m512 __DEFAULT_FN_ATTRS
3192 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3193 {
3194   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3195                                                     (__v16sf) __B,
3196                                                     (__v16sf) __C,
3197                                                     (__mmask16) __U,
3198                                                     _MM_FROUND_CUR_DIRECTION);
3199 }
3200
3201 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3202   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3203                                                (__v8df)(__m512d)(B), \
3204                                                (__v8df)(__m512d)(C), \
3205                                                (__mmask8)(U), (int)(R)); })
3206
3207
3208 static __inline__ __m512d __DEFAULT_FN_ATTRS
3209 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3210 {
3211   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3212                                                         (__v8df) __B,
3213                                                         (__v8df) __C,
3214                                                         (__mmask8) __U,
3215                                                         _MM_FROUND_CUR_DIRECTION);
3216 }
3217
3218 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3219   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3220                                               (__v16sf)(__m512)(B), \
3221                                               (__v16sf)(__m512)(C), \
3222                                               (__mmask16)(U), (int)(R)); })
3223
3224
3225 static __inline__ __m512 __DEFAULT_FN_ATTRS
3226 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3227 {
3228   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3229                                                        (__v16sf) __B,
3230                                                        (__v16sf) __C,
3231                                                        (__mmask16) __U,
3232                                                        _MM_FROUND_CUR_DIRECTION);
3233 }
3234
3235 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3236   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3237                                             (__v8df)(__m512d)(B), \
3238                                             (__v8df)(__m512d)(C), \
3239                                             (__mmask8)(U), (int)(R)); })
3240
3241
3242 static __inline__ __m512d __DEFAULT_FN_ATTRS
3243 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3244 {
3245   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3246                                                      (__v8df) __B,
3247                                                      (__v8df) __C,
3248                                                      (__mmask8) __U,
3249                                                      _MM_FROUND_CUR_DIRECTION);
3250 }
3251
3252 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3253   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3254                                            (__v16sf)(__m512)(B), \
3255                                            (__v16sf)(__m512)(C), \
3256                                            (__mmask16)(U), (int)(R)); })
3257
3258
3259 static __inline__ __m512 __DEFAULT_FN_ATTRS
3260 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3261 {
3262   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3263                                                     (__v16sf) __B,
3264                                                     (__v16sf) __C,
3265                                                     (__mmask16) __U,
3266                                                     _MM_FROUND_CUR_DIRECTION);
3267 }
3268
3269 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3270   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3271                                             (__v8df)(__m512d)(B), \
3272                                             (__v8df)(__m512d)(C), \
3273                                             (__mmask8)(U), (int)(R)); })
3274
3275
3276 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3277   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3278                                              (__v8df)(__m512d)(B), \
3279                                              (__v8df)(__m512d)(C), \
3280                                              (__mmask8)(U), (int)(R)); })
3281
3282
3283 static __inline__ __m512d __DEFAULT_FN_ATTRS
3284 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3285 {
3286   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3287                                                      (__v8df) __B,
3288                                                      (__v8df) __C,
3289                                                      (__mmask8) __U,
3290                                                      _MM_FROUND_CUR_DIRECTION);
3291 }
3292
3293 static __inline__ __m512d __DEFAULT_FN_ATTRS
3294 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3295 {
3296   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3297                                                       (__v8df) __B,
3298                                                       (__v8df) __C,
3299                                                       (__mmask8) __U,
3300                                                       _MM_FROUND_CUR_DIRECTION);
3301 }
3302
3303 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3304   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3305                                            (__v16sf)(__m512)(B), \
3306                                            (__v16sf)(__m512)(C), \
3307                                            (__mmask16)(U), (int)(R)); })
3308
3309
3310 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3311   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3312                                             (__v16sf)(__m512)(B), \
3313                                             (__v16sf)(__m512)(C), \
3314                                             (__mmask16)(U), (int)(R)); })
3315
3316
3317 static __inline__ __m512 __DEFAULT_FN_ATTRS
3318 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3319 {
3320   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3321                                                     (__v16sf) __B,
3322                                                     (__v16sf) __C,
3323                                                     (__mmask16) __U,
3324                                                     _MM_FROUND_CUR_DIRECTION);
3325 }
3326
3327 static __inline__ __m512 __DEFAULT_FN_ATTRS
3328 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3329 {
3330   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3331                                                      (__v16sf) __B,
3332                                                      (__v16sf) __C,
3333                                                      (__mmask16) __U,
3334                                                      _MM_FROUND_CUR_DIRECTION);
3335 }
3336
3337
3338
3339 /* Vector permutations */
3340
3341 static __inline __m512i __DEFAULT_FN_ATTRS
3342 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3343 {
3344   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3345                                                        /* idx */ ,
3346                                                        (__v16si) __A,
3347                                                        (__v16si) __B,
3348                                                        (__mmask16) -1);
3349 }
3350
3351 static __inline__ __m512i __DEFAULT_FN_ATTRS
3352 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3353                                 __m512i __I, __m512i __B)
3354 {
3355   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3356                                                         /* idx */ ,
3357                                                         (__v16si) __A,
3358                                                         (__v16si) __B,
3359                                                         (__mmask16) __U);
3360 }
3361
3362 static __inline__ __m512i __DEFAULT_FN_ATTRS
3363 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3364                                  __m512i __I, __m512i __B)
3365 {
3366   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3367                                                         /* idx */ ,
3368                                                         (__v16si) __A,
3369                                                         (__v16si) __B,
3370                                                         (__mmask16) __U);
3371 }
3372
3373 static __inline __m512i __DEFAULT_FN_ATTRS
3374 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3375 {
3376   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3377                                                        /* idx */ ,
3378                                                        (__v8di) __A,
3379                                                        (__v8di) __B,
3380                                                        (__mmask8) -1);
3381 }
3382
3383 static __inline__ __m512i __DEFAULT_FN_ATTRS
3384 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3385                                 __m512i __B)
3386 {
3387   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3388                                                        /* idx */ ,
3389                                                        (__v8di) __A,
3390                                                        (__v8di) __B,
3391                                                        (__mmask8) __U);
3392 }
3393
3394
3395 static __inline__ __m512i __DEFAULT_FN_ATTRS
3396 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3397          __m512i __I, __m512i __B)
3398 {
3399   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3400                                                         /* idx */ ,
3401                                                         (__v8di) __A,
3402                                                         (__v8di) __B,
3403                                                         (__mmask8) __U);
3404 }
3405
3406 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3407   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3408                                    (__v8di)(__m512i)(A), \
3409                                    ((int)(I) & 0x7) + 0, \
3410                                    ((int)(I) & 0x7) + 1, \
3411                                    ((int)(I) & 0x7) + 2, \
3412                                    ((int)(I) & 0x7) + 3, \
3413                                    ((int)(I) & 0x7) + 4, \
3414                                    ((int)(I) & 0x7) + 5, \
3415                                    ((int)(I) & 0x7) + 6, \
3416                                    ((int)(I) & 0x7) + 7); })
3417
3418 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3419   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3420                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3421                                  (__v8di)(__m512i)(W)); })
3422
3423 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3424   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3425                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3426                                  (__v8di)_mm512_setzero_si512()); })
3427
3428 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3429   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3430                                    (__v16si)(__m512i)(A), \
3431                                    ((int)(I) & 0xf) + 0, \
3432                                    ((int)(I) & 0xf) + 1, \
3433                                    ((int)(I) & 0xf) + 2, \
3434                                    ((int)(I) & 0xf) + 3, \
3435                                    ((int)(I) & 0xf) + 4, \
3436                                    ((int)(I) & 0xf) + 5, \
3437                                    ((int)(I) & 0xf) + 6, \
3438                                    ((int)(I) & 0xf) + 7, \
3439                                    ((int)(I) & 0xf) + 8, \
3440                                    ((int)(I) & 0xf) + 9, \
3441                                    ((int)(I) & 0xf) + 10, \
3442                                    ((int)(I) & 0xf) + 11, \
3443                                    ((int)(I) & 0xf) + 12, \
3444                                    ((int)(I) & 0xf) + 13, \
3445                                    ((int)(I) & 0xf) + 14, \
3446                                    ((int)(I) & 0xf) + 15); })
3447
3448 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3449   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3450                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3451                                 (__v16si)(__m512i)(W)); })
3452
3453 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3454   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3455                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3456                                 (__v16si)_mm512_setzero_si512()); })
3457 /* Vector Extract */
3458
3459 #define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
3460   (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
3461                                    (__v8df)_mm512_undefined_pd(), \
3462                                    ((I) & 1) ? 4 : 0,             \
3463                                    ((I) & 1) ? 5 : 1,             \
3464                                    ((I) & 1) ? 6 : 2,             \
3465                                    ((I) & 1) ? 7 : 3); })
3466
3467 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3468   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3469                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3470                                    (__v4df)(W)); })
3471
3472 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3473   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3474                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3475                                    (__v4df)_mm256_setzero_pd()); })
3476
3477 #define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
3478   (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
3479                                   (__v16sf)_mm512_undefined_ps(), \
3480                                   0 + ((I) & 0x3) * 4,            \
3481                                   1 + ((I) & 0x3) * 4,            \
3482                                   2 + ((I) & 0x3) * 4,            \
3483                                   3 + ((I) & 0x3) * 4); })
3484
3485 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3486   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3487                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3488                                    (__v4sf)(W)); })
3489
3490 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3491   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3492                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3493                                    (__v4sf)_mm_setzero_ps()); })
3494
3495 /* Vector Blend */
3496
3497 static __inline __m512d __DEFAULT_FN_ATTRS
3498 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3499 {
3500   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3501                  (__v8df) __W,
3502                  (__v8df) __A);
3503 }
3504
3505 static __inline __m512 __DEFAULT_FN_ATTRS
3506 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3507 {
3508   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3509                 (__v16sf) __W,
3510                 (__v16sf) __A);
3511 }
3512
3513 static __inline __m512i __DEFAULT_FN_ATTRS
3514 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3515 {
3516   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3517                 (__v8di) __W,
3518                 (__v8di) __A);
3519 }
3520
3521 static __inline __m512i __DEFAULT_FN_ATTRS
3522 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3523 {
3524   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3525                 (__v16si) __W,
3526                 (__v16si) __A);
3527 }
3528
3529 /* Compare */
3530
3531 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3532   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3533                                           (__v16sf)(__m512)(B), (int)(P), \
3534                                           (__mmask16)-1, (int)(R)); })
3535
3536 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3537   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3538                                           (__v16sf)(__m512)(B), (int)(P), \
3539                                           (__mmask16)(U), (int)(R)); })
3540
3541 #define _mm512_cmp_ps_mask(A, B, P) \
3542   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3543 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3544   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3545
3546 #define _mm512_cmpeq_ps_mask(A, B) \
3547     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3548 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3549     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3550
3551 #define _mm512_cmplt_ps_mask(A, B) \
3552     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3553 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3554     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3555
3556 #define _mm512_cmple_ps_mask(A, B) \
3557     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3558 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3559     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3560
3561 #define _mm512_cmpunord_ps_mask(A, B) \
3562     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3563 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3564     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3565
3566 #define _mm512_cmpneq_ps_mask(A, B) \
3567     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3568 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3569     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3570
3571 #define _mm512_cmpnlt_ps_mask(A, B) \
3572     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3573 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3574     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3575
3576 #define _mm512_cmpnle_ps_mask(A, B) \
3577     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3578 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3579     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3580
3581 #define _mm512_cmpord_ps_mask(A, B) \
3582     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3583 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3584     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3585
3586 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3587   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3588                                          (__v8df)(__m512d)(B), (int)(P), \
3589                                          (__mmask8)-1, (int)(R)); })
3590
3591 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3592   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3593                                          (__v8df)(__m512d)(B), (int)(P), \
3594                                          (__mmask8)(U), (int)(R)); })
3595
3596 #define _mm512_cmp_pd_mask(A, B, P) \
3597   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3598 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3599   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3600
3601 #define _mm512_cmpeq_pd_mask(A, B) \
3602     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3603 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3604     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3605
3606 #define _mm512_cmplt_pd_mask(A, B) \
3607     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3608 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3609     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3610
3611 #define _mm512_cmple_pd_mask(A, B) \
3612     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3613 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3614     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3615
3616 #define _mm512_cmpunord_pd_mask(A, B) \
3617     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3618 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3619     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3620
3621 #define _mm512_cmpneq_pd_mask(A, B) \
3622     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3623 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3624     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3625
3626 #define _mm512_cmpnlt_pd_mask(A, B) \
3627     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3628 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3629     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3630
3631 #define _mm512_cmpnle_pd_mask(A, B) \
3632     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3633 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3634     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3635
3636 #define _mm512_cmpord_pd_mask(A, B) \
3637     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3638 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3639     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3640
3641 /* Conversion */
3642
3643 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3644   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3645                                              (__v16si)_mm512_undefined_epi32(), \
3646                                              (__mmask16)-1, (int)(R)); })
3647
3648 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3649   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3650                                              (__v16si)(__m512i)(W), \
3651                                              (__mmask16)(U), (int)(R)); })
3652
3653 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3654   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3655                                              (__v16si)_mm512_setzero_si512(), \
3656                                              (__mmask16)(U), (int)(R)); })
3657
3658
3659 static __inline __m512i __DEFAULT_FN_ATTRS
3660 _mm512_cvttps_epu32(__m512 __A)
3661 {
3662   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3663                   (__v16si)
3664                   _mm512_setzero_si512 (),
3665                   (__mmask16) -1,
3666                   _MM_FROUND_CUR_DIRECTION);
3667 }
3668
3669 static __inline__ __m512i __DEFAULT_FN_ATTRS
3670 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3671 {
3672   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3673                    (__v16si) __W,
3674                    (__mmask16) __U,
3675                    _MM_FROUND_CUR_DIRECTION);
3676 }
3677
3678 static __inline__ __m512i __DEFAULT_FN_ATTRS
3679 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3680 {
3681   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3682                    (__v16si) _mm512_setzero_si512 (),
3683                    (__mmask16) __U,
3684                    _MM_FROUND_CUR_DIRECTION);
3685 }
3686
3687 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3688   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3689                                           (__v16sf)_mm512_setzero_ps(), \
3690                                           (__mmask16)-1, (int)(R)); })
3691
3692 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3693   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3694                                           (__v16sf)(__m512)(W), \
3695                                           (__mmask16)(U), (int)(R)); })
3696
3697 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3698   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3699                                           (__v16sf)_mm512_setzero_ps(), \
3700                                           (__mmask16)(U), (int)(R)); })
3701
3702 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3703   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3704                                            (__v16sf)_mm512_setzero_ps(), \
3705                                            (__mmask16)-1, (int)(R)); })
3706
3707 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3708   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3709                                            (__v16sf)(__m512)(W), \
3710                                            (__mmask16)(U), (int)(R)); })
3711
3712 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3713   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3714                                            (__v16sf)_mm512_setzero_ps(), \
3715                                            (__mmask16)(U), (int)(R)); })
3716
3717 static __inline__ __m512 __DEFAULT_FN_ATTRS
3718 _mm512_cvtepu32_ps (__m512i __A)
3719 {
3720   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3721                  (__v16sf) _mm512_undefined_ps (),
3722                  (__mmask16) -1,
3723                  _MM_FROUND_CUR_DIRECTION);
3724 }
3725
3726 static __inline__ __m512 __DEFAULT_FN_ATTRS
3727 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3728 {
3729   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3730                  (__v16sf) __W,
3731                  (__mmask16) __U,
3732                  _MM_FROUND_CUR_DIRECTION);
3733 }
3734
3735 static __inline__ __m512 __DEFAULT_FN_ATTRS
3736 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3737 {
3738   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3739                  (__v16sf) _mm512_setzero_ps (),
3740                  (__mmask16) __U,
3741                  _MM_FROUND_CUR_DIRECTION);
3742 }
3743
3744 static __inline __m512d __DEFAULT_FN_ATTRS
3745 _mm512_cvtepi32_pd(__m256i __A)
3746 {
3747   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3748 }
3749
3750 static __inline__ __m512d __DEFAULT_FN_ATTRS
3751 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3752 {
3753   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3754                                               (__v8df)_mm512_cvtepi32_pd(__A),
3755                                               (__v8df)__W);
3756 }
3757
3758 static __inline__ __m512d __DEFAULT_FN_ATTRS
3759 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3760 {
3761   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3762                                               (__v8df)_mm512_cvtepi32_pd(__A),
3763                                               (__v8df)_mm512_setzero_pd());
3764 }
3765
3766 static __inline__ __m512d __DEFAULT_FN_ATTRS
3767 _mm512_cvtepi32lo_pd(__m512i __A)
3768 {
3769   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3770 }
3771
3772 static __inline__ __m512d __DEFAULT_FN_ATTRS
3773 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3774 {
3775   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3776 }
3777
3778 static __inline__ __m512 __DEFAULT_FN_ATTRS
3779 _mm512_cvtepi32_ps (__m512i __A)
3780 {
3781   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3782                 (__v16sf) _mm512_undefined_ps (),
3783                 (__mmask16) -1,
3784                 _MM_FROUND_CUR_DIRECTION);
3785 }
3786
3787 static __inline__ __m512 __DEFAULT_FN_ATTRS
3788 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3789 {
3790   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3791                 (__v16sf) __W,
3792                 (__mmask16) __U,
3793                 _MM_FROUND_CUR_DIRECTION);
3794 }
3795
3796 static __inline__ __m512 __DEFAULT_FN_ATTRS
3797 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3798 {
3799   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3800                 (__v16sf) _mm512_setzero_ps (),
3801                 (__mmask16) __U,
3802                 _MM_FROUND_CUR_DIRECTION);
3803 }
3804
3805 static __inline __m512d __DEFAULT_FN_ATTRS
3806 _mm512_cvtepu32_pd(__m256i __A)
3807 {
3808   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3809 }
3810
3811 static __inline__ __m512d __DEFAULT_FN_ATTRS
3812 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3813 {
3814   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3815                                               (__v8df)_mm512_cvtepu32_pd(__A),
3816                                               (__v8df)__W);
3817 }
3818
3819 static __inline__ __m512d __DEFAULT_FN_ATTRS
3820 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3821 {
3822   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3823                                               (__v8df)_mm512_cvtepu32_pd(__A),
3824                                               (__v8df)_mm512_setzero_pd());
3825 }
3826
3827 static __inline__ __m512d __DEFAULT_FN_ATTRS
3828 _mm512_cvtepu32lo_pd(__m512i __A)
3829 {
3830   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3831 }
3832
3833 static __inline__ __m512d __DEFAULT_FN_ATTRS
3834 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3835 {
3836   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3837 }
3838
3839 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3840   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3841                                           (__v8sf)_mm256_setzero_ps(), \
3842                                           (__mmask8)-1, (int)(R)); })
3843
3844 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3845   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3846                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
3847                                           (int)(R)); })
3848
3849 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3850   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3851                                           (__v8sf)_mm256_setzero_ps(), \
3852                                           (__mmask8)(U), (int)(R)); })
3853
3854 static __inline__ __m256 __DEFAULT_FN_ATTRS
3855 _mm512_cvtpd_ps (__m512d __A)
3856 {
3857   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3858                 (__v8sf) _mm256_undefined_ps (),
3859                 (__mmask8) -1,
3860                 _MM_FROUND_CUR_DIRECTION);
3861 }
3862
3863 static __inline__ __m256 __DEFAULT_FN_ATTRS
3864 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3865 {
3866   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3867                 (__v8sf) __W,
3868                 (__mmask8) __U,
3869                 _MM_FROUND_CUR_DIRECTION);
3870 }
3871
3872 static __inline__ __m256 __DEFAULT_FN_ATTRS
3873 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3874 {
3875   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3876                 (__v8sf) _mm256_setzero_ps (),
3877                 (__mmask8) __U,
3878                 _MM_FROUND_CUR_DIRECTION);
3879 }
3880
3881 static __inline__ __m512 __DEFAULT_FN_ATTRS
3882 _mm512_cvtpd_pslo (__m512d __A)
3883 {
3884   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3885                 (__v8sf) _mm256_setzero_ps (),
3886                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3887 }
3888
3889 static __inline__ __m512 __DEFAULT_FN_ATTRS
3890 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3891 {
3892   return (__m512) __builtin_shufflevector (
3893                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3894                                                __U, __A),
3895                 (__v8sf) _mm256_setzero_ps (),
3896                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3897 }
3898
3899 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
3900   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3901                                             (__v16hi)_mm256_undefined_si256(), \
3902                                             (__mmask16)-1); })
3903
3904 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
3905   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3906                                             (__v16hi)(__m256i)(U), \
3907                                             (__mmask16)(W)); })
3908
3909 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
3910   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3911                                             (__v16hi)_mm256_setzero_si256(), \
3912                                             (__mmask16)(W)); })
3913
3914 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
3915   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3916                                             (__v16hi)_mm256_setzero_si256(), \
3917                                             (__mmask16)-1); })
3918
3919 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
3920   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3921                                             (__v16hi)(__m256i)(U), \
3922                                             (__mmask16)(W)); })
3923
3924 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
3925   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3926                                             (__v16hi)_mm256_setzero_si256(), \
3927                                             (__mmask16)(W)); })
3928
3929 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
3930   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3931                                            (__v16sf)_mm512_undefined_ps(), \
3932                                            (__mmask16)-1, (int)(R)); })
3933
3934 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
3935   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3936                                            (__v16sf)(__m512)(W), \
3937                                            (__mmask16)(U), (int)(R)); })
3938
3939 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
3940   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3941                                            (__v16sf)_mm512_setzero_ps(), \
3942                                            (__mmask16)(U), (int)(R)); })
3943
3944
3945 static  __inline __m512 __DEFAULT_FN_ATTRS
3946 _mm512_cvtph_ps(__m256i __A)
3947 {
3948   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3949                 (__v16sf)
3950                 _mm512_setzero_ps (),
3951                 (__mmask16) -1,
3952                 _MM_FROUND_CUR_DIRECTION);
3953 }
3954
3955 static __inline__ __m512 __DEFAULT_FN_ATTRS
3956 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3957 {
3958   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3959                  (__v16sf) __W,
3960                  (__mmask16) __U,
3961                  _MM_FROUND_CUR_DIRECTION);
3962 }
3963
3964 static __inline__ __m512 __DEFAULT_FN_ATTRS
3965 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3966 {
3967   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3968                  (__v16sf) _mm512_setzero_ps (),
3969                  (__mmask16) __U,
3970                  _MM_FROUND_CUR_DIRECTION);
3971 }
3972
3973 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
3974   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3975                                             (__v8si)_mm256_setzero_si256(), \
3976                                             (__mmask8)-1, (int)(R)); })
3977
3978 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
3979   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3980                                             (__v8si)(__m256i)(W), \
3981                                             (__mmask8)(U), (int)(R)); })
3982
3983 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
3984   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3985                                             (__v8si)_mm256_setzero_si256(), \
3986                                             (__mmask8)(U), (int)(R)); })
3987
3988 static __inline __m256i __DEFAULT_FN_ATTRS
3989 _mm512_cvttpd_epi32(__m512d __a)
3990 {
3991   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3992                                                    (__v8si)_mm256_setzero_si256(),
3993                                                    (__mmask8) -1,
3994                                                     _MM_FROUND_CUR_DIRECTION);
3995 }
3996
3997 static __inline__ __m256i __DEFAULT_FN_ATTRS
3998 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3999 {
4000   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4001                   (__v8si) __W,
4002                   (__mmask8) __U,
4003                   _MM_FROUND_CUR_DIRECTION);
4004 }
4005
4006 static __inline__ __m256i __DEFAULT_FN_ATTRS
4007 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4008 {
4009   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4010                   (__v8si) _mm256_setzero_si256 (),
4011                   (__mmask8) __U,
4012                   _MM_FROUND_CUR_DIRECTION);
4013 }
4014
4015 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4016   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4017                                             (__v16si)_mm512_setzero_si512(), \
4018                                             (__mmask16)-1, (int)(R)); })
4019
4020 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4021   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4022                                             (__v16si)(__m512i)(W), \
4023                                             (__mmask16)(U), (int)(R)); })
4024
4025 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4026   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4027                                             (__v16si)_mm512_setzero_si512(), \
4028                                             (__mmask16)(U), (int)(R)); })
4029
4030 static __inline __m512i __DEFAULT_FN_ATTRS
4031 _mm512_cvttps_epi32(__m512 __a)
4032 {
4033   return (__m512i)
4034     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4035                                      (__v16si) _mm512_setzero_si512 (),
4036                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4037 }
4038
4039 static __inline__ __m512i __DEFAULT_FN_ATTRS
4040 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4041 {
4042   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4043                   (__v16si) __W,
4044                   (__mmask16) __U,
4045                   _MM_FROUND_CUR_DIRECTION);
4046 }
4047
4048 static __inline__ __m512i __DEFAULT_FN_ATTRS
4049 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4050 {
4051   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4052                   (__v16si) _mm512_setzero_si512 (),
4053                   (__mmask16) __U,
4054                   _MM_FROUND_CUR_DIRECTION);
4055 }
4056
4057 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4058   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4059                                            (__v16si)_mm512_setzero_si512(), \
4060                                            (__mmask16)-1, (int)(R)); })
4061
4062 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4063   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4064                                            (__v16si)(__m512i)(W), \
4065                                            (__mmask16)(U), (int)(R)); })
4066
4067 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4068   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4069                                            (__v16si)_mm512_setzero_si512(), \
4070                                            (__mmask16)(U), (int)(R)); })
4071
4072 static __inline__ __m512i __DEFAULT_FN_ATTRS
4073 _mm512_cvtps_epi32 (__m512 __A)
4074 {
4075   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4076                  (__v16si) _mm512_undefined_epi32 (),
4077                  (__mmask16) -1,
4078                  _MM_FROUND_CUR_DIRECTION);
4079 }
4080
4081 static __inline__ __m512i __DEFAULT_FN_ATTRS
4082 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4083 {
4084   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4085                  (__v16si) __W,
4086                  (__mmask16) __U,
4087                  _MM_FROUND_CUR_DIRECTION);
4088 }
4089
4090 static __inline__ __m512i __DEFAULT_FN_ATTRS
4091 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4092 {
4093   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4094                  (__v16si)
4095                  _mm512_setzero_si512 (),
4096                  (__mmask16) __U,
4097                  _MM_FROUND_CUR_DIRECTION);
4098 }
4099
4100 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4101   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4102                                            (__v8si)_mm256_setzero_si256(), \
4103                                            (__mmask8)-1, (int)(R)); })
4104
4105 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4106   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4107                                            (__v8si)(__m256i)(W), \
4108                                            (__mmask8)(U), (int)(R)); })
4109
4110 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4111   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4112                                            (__v8si)_mm256_setzero_si256(), \
4113                                            (__mmask8)(U), (int)(R)); })
4114
4115 static __inline__ __m256i __DEFAULT_FN_ATTRS
4116 _mm512_cvtpd_epi32 (__m512d __A)
4117 {
4118   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4119                  (__v8si)
4120                  _mm256_undefined_si256 (),
4121                  (__mmask8) -1,
4122                  _MM_FROUND_CUR_DIRECTION);
4123 }
4124
4125 static __inline__ __m256i __DEFAULT_FN_ATTRS
4126 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4127 {
4128   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4129                  (__v8si) __W,
4130                  (__mmask8) __U,
4131                  _MM_FROUND_CUR_DIRECTION);
4132 }
4133
4134 static __inline__ __m256i __DEFAULT_FN_ATTRS
4135 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4136 {
4137   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4138                  (__v8si)
4139                  _mm256_setzero_si256 (),
4140                  (__mmask8) __U,
4141                  _MM_FROUND_CUR_DIRECTION);
4142 }
4143
4144 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4145   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4146                                             (__v16si)_mm512_setzero_si512(), \
4147                                             (__mmask16)-1, (int)(R)); })
4148
4149 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4150   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4151                                             (__v16si)(__m512i)(W), \
4152                                             (__mmask16)(U), (int)(R)); })
4153
4154 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4155   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4156                                             (__v16si)_mm512_setzero_si512(), \
4157                                             (__mmask16)(U), (int)(R)); })
4158
4159 static __inline__ __m512i __DEFAULT_FN_ATTRS
4160 _mm512_cvtps_epu32 ( __m512 __A)
4161 {
4162   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4163                   (__v16si)\
4164                   _mm512_undefined_epi32 (),\
4165                   (__mmask16) -1,\
4166                   _MM_FROUND_CUR_DIRECTION);\
4167 }
4168
4169 static __inline__ __m512i __DEFAULT_FN_ATTRS
4170 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4171 {
4172   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4173                   (__v16si) __W,
4174                   (__mmask16) __U,
4175                   _MM_FROUND_CUR_DIRECTION);
4176 }
4177
4178 static __inline__ __m512i __DEFAULT_FN_ATTRS
4179 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4180 {
4181   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4182                   (__v16si) 
4183                   _mm512_setzero_si512 (),
4184                   (__mmask16) __U ,
4185                   _MM_FROUND_CUR_DIRECTION);
4186 }
4187
4188 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4189   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4190                                             (__v8si)_mm256_setzero_si256(), \
4191                                             (__mmask8)-1, (int)(R)); })
4192
4193 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4194   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4195                                             (__v8si)(W), \
4196                                             (__mmask8)(U), (int)(R)); })
4197
4198 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4199   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4200                                             (__v8si)_mm256_setzero_si256(), \
4201                                             (__mmask8)(U), (int)(R)); })
4202
4203 static __inline__ __m256i __DEFAULT_FN_ATTRS
4204 _mm512_cvtpd_epu32 (__m512d __A)
4205 {
4206   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4207                   (__v8si)
4208                   _mm256_undefined_si256 (),
4209                   (__mmask8) -1,
4210                   _MM_FROUND_CUR_DIRECTION);
4211 }
4212
4213 static __inline__ __m256i __DEFAULT_FN_ATTRS
4214 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4215 {
4216   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4217                   (__v8si) __W,
4218                   (__mmask8) __U,
4219                   _MM_FROUND_CUR_DIRECTION);
4220 }
4221
4222 static __inline__ __m256i __DEFAULT_FN_ATTRS
4223 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4224 {
4225   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4226                   (__v8si)
4227                   _mm256_setzero_si256 (),
4228                   (__mmask8) __U,
4229                   _MM_FROUND_CUR_DIRECTION);
4230 }
4231
4232 /* Unpack and Interleave */
4233
4234 static __inline __m512d __DEFAULT_FN_ATTRS
4235 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4236 {
4237   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4238                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4239 }
4240
4241 static __inline__ __m512d __DEFAULT_FN_ATTRS
4242 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4243 {
4244   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4245                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4246                                            (__v8df)__W);
4247 }
4248
4249 static __inline__ __m512d __DEFAULT_FN_ATTRS
4250 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4251 {
4252   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4253                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4254                                            (__v8df)_mm512_setzero_pd());
4255 }
4256
4257 static __inline __m512d __DEFAULT_FN_ATTRS
4258 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4259 {
4260   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4261                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4262 }
4263
4264 static __inline__ __m512d __DEFAULT_FN_ATTRS
4265 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4266 {
4267   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4268                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4269                                            (__v8df)__W);
4270 }
4271
4272 static __inline__ __m512d __DEFAULT_FN_ATTRS
4273 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4274 {
4275   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4276                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4277                                            (__v8df)_mm512_setzero_pd());
4278 }
4279
4280 static __inline __m512 __DEFAULT_FN_ATTRS
4281 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4282 {
4283   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4284                                          2,    18,    3,    19,
4285                                          2+4,  18+4,  3+4,  19+4,
4286                                          2+8,  18+8,  3+8,  19+8,
4287                                          2+12, 18+12, 3+12, 19+12);
4288 }
4289
4290 static __inline__ __m512 __DEFAULT_FN_ATTRS
4291 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4292 {
4293   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4294                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4295                                           (__v16sf)__W);
4296 }
4297
4298 static __inline__ __m512 __DEFAULT_FN_ATTRS
4299 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4300 {
4301   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4302                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4303                                           (__v16sf)_mm512_setzero_ps());
4304 }
4305
4306 static __inline __m512 __DEFAULT_FN_ATTRS
4307 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4308 {
4309   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4310                                          0,    16,    1,    17,
4311                                          0+4,  16+4,  1+4,  17+4,
4312                                          0+8,  16+8,  1+8,  17+8,
4313                                          0+12, 16+12, 1+12, 17+12);
4314 }
4315
4316 static __inline__ __m512 __DEFAULT_FN_ATTRS
4317 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4318 {
4319   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4320                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4321                                           (__v16sf)__W);
4322 }
4323
4324 static __inline__ __m512 __DEFAULT_FN_ATTRS
4325 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4326 {
4327   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4328                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4329                                           (__v16sf)_mm512_setzero_ps());
4330 }
4331
4332 static __inline__ __m512i __DEFAULT_FN_ATTRS
4333 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4334 {
4335   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4336                                           2,    18,    3,    19,
4337                                           2+4,  18+4,  3+4,  19+4,
4338                                           2+8,  18+8,  3+8,  19+8,
4339                                           2+12, 18+12, 3+12, 19+12);
4340 }
4341
4342 static __inline__ __m512i __DEFAULT_FN_ATTRS
4343 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4344 {
4345   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4346                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4347                                        (__v16si)__W);
4348 }
4349
4350 static __inline__ __m512i __DEFAULT_FN_ATTRS
4351 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4352 {
4353   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4354                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4355                                        (__v16si)_mm512_setzero_si512());
4356 }
4357
4358 static __inline__ __m512i __DEFAULT_FN_ATTRS
4359 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4360 {
4361   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4362                                           0,    16,    1,    17,
4363                                           0+4,  16+4,  1+4,  17+4,
4364                                           0+8,  16+8,  1+8,  17+8,
4365                                           0+12, 16+12, 1+12, 17+12);
4366 }
4367
4368 static __inline__ __m512i __DEFAULT_FN_ATTRS
4369 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4370 {
4371   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4372                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4373                                        (__v16si)__W);
4374 }
4375
4376 static __inline__ __m512i __DEFAULT_FN_ATTRS
4377 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4378 {
4379   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4380                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4381                                        (__v16si)_mm512_setzero_si512());
4382 }
4383
4384 static __inline__ __m512i __DEFAULT_FN_ATTRS
4385 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4386 {
4387   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4388                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4389 }
4390
4391 static __inline__ __m512i __DEFAULT_FN_ATTRS
4392 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4393 {
4394   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4395                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4396                                         (__v8di)__W);
4397 }
4398
4399 static __inline__ __m512i __DEFAULT_FN_ATTRS
4400 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4401 {
4402   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4403                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4404                                         (__v8di)_mm512_setzero_si512());
4405 }
4406
4407 static __inline__ __m512i __DEFAULT_FN_ATTRS
4408 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4409 {
4410   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4411                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4412 }
4413
4414 static __inline__ __m512i __DEFAULT_FN_ATTRS
4415 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4416 {
4417   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4418                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4419                                         (__v8di)__W);
4420 }
4421
4422 static __inline__ __m512i __DEFAULT_FN_ATTRS
4423 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4424 {
4425   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4426                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4427                                         (__v8di)_mm512_setzero_si512());
4428 }
4429
4430 /* Bit Test */
4431
4432 static __inline __mmask16 __DEFAULT_FN_ATTRS
4433 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
4434 {
4435   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4436             (__v16si) __B,
4437             (__mmask16) -1);
4438 }
4439
4440 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4441 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
4442 {
4443   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4444                  (__v16si) __B, __U);
4445 }
4446
4447 static __inline __mmask8 __DEFAULT_FN_ATTRS
4448 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
4449 {
4450   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
4451                  (__v8di) __B,
4452                  (__mmask8) -1);
4453 }
4454
4455 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4456 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
4457 {
4458   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
4459 }
4460
4461
4462 /* SIMD load ops */
4463
4464 static __inline __m512i __DEFAULT_FN_ATTRS
4465 _mm512_loadu_si512 (void const *__P)
4466 {
4467   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4468                   (__v16si)
4469                   _mm512_setzero_si512 (),
4470                   (__mmask16) -1);
4471 }
4472
4473 static __inline __m512i __DEFAULT_FN_ATTRS
4474 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4475 {
4476   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4477                   (__v16si) __W,
4478                   (__mmask16) __U);
4479 }
4480
4481
4482 static __inline __m512i __DEFAULT_FN_ATTRS
4483 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4484 {
4485   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4486                                                      (__v16si)
4487                                                      _mm512_setzero_si512 (),
4488                                                      (__mmask16) __U);
4489 }
4490
4491 static __inline __m512i __DEFAULT_FN_ATTRS
4492 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4493 {
4494   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4495                   (__v8di) __W,
4496                   (__mmask8) __U);
4497 }
4498
4499 static __inline __m512i __DEFAULT_FN_ATTRS
4500 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4501 {
4502   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4503                                                      (__v8di)
4504                                                      _mm512_setzero_si512 (),
4505                                                      (__mmask8) __U);
4506 }
4507
4508 static __inline __m512 __DEFAULT_FN_ATTRS
4509 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4510 {
4511   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4512                    (__v16sf) __W,
4513                    (__mmask16) __U);
4514 }
4515
4516 static __inline __m512 __DEFAULT_FN_ATTRS
4517 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4518 {
4519   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4520                                                   (__v16sf)
4521                                                   _mm512_setzero_ps (),
4522                                                   (__mmask16) __U);
4523 }
4524
4525 static __inline __m512d __DEFAULT_FN_ATTRS
4526 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4527 {
4528   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4529                 (__v8df) __W,
4530                 (__mmask8) __U);
4531 }
4532
4533 static __inline __m512d __DEFAULT_FN_ATTRS
4534 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4535 {
4536   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4537                                                    (__v8df)
4538                                                    _mm512_setzero_pd (),
4539                                                    (__mmask8) __U);
4540 }
4541
4542 static __inline __m512d __DEFAULT_FN_ATTRS
4543 _mm512_loadu_pd(double const *__p)
4544 {
4545   struct __loadu_pd {
4546     __m512d __v;
4547   } __attribute__((__packed__, __may_alias__));
4548   return ((struct __loadu_pd*)__p)->__v;
4549 }
4550
4551 static __inline __m512 __DEFAULT_FN_ATTRS
4552 _mm512_loadu_ps(float const *__p)
4553 {
4554   struct __loadu_ps {
4555     __m512 __v;
4556   } __attribute__((__packed__, __may_alias__));
4557   return ((struct __loadu_ps*)__p)->__v;
4558 }
4559
4560 static __inline __m512 __DEFAULT_FN_ATTRS
4561 _mm512_load_ps(float const *__p)
4562 {
4563   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4564                                                   (__v16sf)
4565                                                   _mm512_setzero_ps (),
4566                                                   (__mmask16) -1);
4567 }
4568
4569 static __inline __m512 __DEFAULT_FN_ATTRS
4570 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4571 {
4572   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4573                    (__v16sf) __W,
4574                    (__mmask16) __U);
4575 }
4576
4577 static __inline __m512 __DEFAULT_FN_ATTRS
4578 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4579 {
4580   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4581                                                   (__v16sf)
4582                                                   _mm512_setzero_ps (),
4583                                                   (__mmask16) __U);
4584 }
4585
4586 static __inline __m512d __DEFAULT_FN_ATTRS
4587 _mm512_load_pd(double const *__p)
4588 {
4589   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4590                                                    (__v8df)
4591                                                    _mm512_setzero_pd (),
4592                                                    (__mmask8) -1);
4593 }
4594
4595 static __inline __m512d __DEFAULT_FN_ATTRS
4596 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4597 {
4598   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4599                           (__v8df) __W,
4600                           (__mmask8) __U);
4601 }
4602
4603 static __inline __m512d __DEFAULT_FN_ATTRS
4604 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4605 {
4606   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4607                                                    (__v8df)
4608                                                    _mm512_setzero_pd (),
4609                                                    (__mmask8) __U);
4610 }
4611
4612 static __inline __m512i __DEFAULT_FN_ATTRS
4613 _mm512_load_si512 (void const *__P)
4614 {
4615   return *(__m512i *) __P;
4616 }
4617
4618 static __inline __m512i __DEFAULT_FN_ATTRS
4619 _mm512_load_epi32 (void const *__P)
4620 {
4621   return *(__m512i *) __P;
4622 }
4623
4624 static __inline __m512i __DEFAULT_FN_ATTRS
4625 _mm512_load_epi64 (void const *__P)
4626 {
4627   return *(__m512i *) __P;
4628 }
4629
4630 /* SIMD store ops */
4631
4632 static __inline void __DEFAULT_FN_ATTRS
4633 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4634 {
4635   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4636                                      (__mmask8) __U);
4637 }
4638
4639 static __inline void __DEFAULT_FN_ATTRS
4640 _mm512_storeu_si512 (void *__P, __m512i __A)
4641 {
4642   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4643             (__mmask16) -1);
4644 }
4645
4646 static __inline void __DEFAULT_FN_ATTRS
4647 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4648 {
4649   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4650                                      (__mmask16) __U);
4651 }
4652
4653 static __inline void __DEFAULT_FN_ATTRS
4654 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4655 {
4656   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4657 }
4658
4659 static __inline void __DEFAULT_FN_ATTRS
4660 _mm512_storeu_pd(void *__P, __m512d __A)
4661 {
4662   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4663 }
4664
4665 static __inline void __DEFAULT_FN_ATTRS
4666 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4667 {
4668   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4669                                    (__mmask16) __U);
4670 }
4671
4672 static __inline void __DEFAULT_FN_ATTRS
4673 _mm512_storeu_ps(void *__P, __m512 __A)
4674 {
4675   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4676 }
4677
4678 static __inline void __DEFAULT_FN_ATTRS
4679 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4680 {
4681   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4682 }
4683
4684 static __inline void __DEFAULT_FN_ATTRS
4685 _mm512_store_pd(void *__P, __m512d __A)
4686 {
4687   *(__m512d*)__P = __A;
4688 }
4689
4690 static __inline void __DEFAULT_FN_ATTRS
4691 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4692 {
4693   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4694                                    (__mmask16) __U);
4695 }
4696
4697 static __inline void __DEFAULT_FN_ATTRS
4698 _mm512_store_ps(void *__P, __m512 __A)
4699 {
4700   *(__m512*)__P = __A;
4701 }
4702
4703 static __inline void __DEFAULT_FN_ATTRS
4704 _mm512_store_si512 (void *__P, __m512i __A)
4705 {
4706   *(__m512i *) __P = __A;
4707 }
4708
4709 static __inline void __DEFAULT_FN_ATTRS
4710 _mm512_store_epi32 (void *__P, __m512i __A)
4711 {
4712   *(__m512i *) __P = __A;
4713 }
4714
4715 static __inline void __DEFAULT_FN_ATTRS
4716 _mm512_store_epi64 (void *__P, __m512i __A)
4717 {
4718   *(__m512i *) __P = __A;
4719 }
4720
4721 /* Mask ops */
4722
4723 static __inline __mmask16 __DEFAULT_FN_ATTRS
4724 _mm512_knot(__mmask16 __M)
4725 {
4726   return __builtin_ia32_knothi(__M);
4727 }
4728
4729 /* Integer compare */
4730
4731 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4732 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
4733   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4734                                                    (__mmask16)-1);
4735 }
4736
4737 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4738 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4739   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4740                                                    __u);
4741 }
4742
4743 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4744 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
4745   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4746                                                  (__mmask16)-1);
4747 }
4748
4749 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4750 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4751   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4752                                                  __u);
4753 }
4754
4755 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4756 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4757   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4758                                                   __u);
4759 }
4760
4761 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4762 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
4763   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4764                                                   (__mmask8)-1);
4765 }
4766
4767 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4768 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
4769   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4770                                                 (__mmask8)-1);
4771 }
4772
4773 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4774 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4775   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4776                                                 __u);
4777 }
4778
4779 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4780 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
4781   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4782                                                 (__mmask16)-1);
4783 }
4784
4785 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4786 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4787   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4788                                                 __u);
4789 }
4790
4791 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4792 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
4793   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4794                                                  (__mmask16)-1);
4795 }
4796
4797 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4798 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4799   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4800                                                  __u);
4801 }
4802
4803 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4804 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
4805   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4806                                                (__mmask8)-1);
4807 }
4808
4809 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4810 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4811   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4812                                                __u);
4813 }
4814
4815 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4816 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
4817   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4818                                                 (__mmask8)-1);
4819 }
4820
4821 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4822 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4823   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4824                                                 __u);
4825 }
4826
4827 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4828 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
4829   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4830                                                    (__mmask16)-1);
4831 }
4832
4833 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4834 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4835   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4836                                                    __u);
4837 }
4838
4839 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4840 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
4841   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4842                                                  (__mmask16)-1);
4843 }
4844
4845 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4846 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4847   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4848                                                  __u);
4849 }
4850
4851 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4852 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4853   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4854                                                   __u);
4855 }
4856
4857 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4858 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
4859   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4860                                                   (__mmask8)-1);
4861 }
4862
4863 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4864 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
4865   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4866                                                 (__mmask8)-1);
4867 }
4868
4869 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4870 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4871   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4872                                                 __u);
4873 }
4874
4875 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4876 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
4877   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4878                                                 (__mmask16)-1);
4879 }
4880
4881 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4882 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4883   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4884                                                 __u);
4885 }
4886
4887 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4888 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
4889   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4890                                                  (__mmask16)-1);
4891 }
4892
4893 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4894 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4895   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4896                                                  __u);
4897 }
4898
4899 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4900 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
4901   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4902                                                (__mmask8)-1);
4903 }
4904
4905 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4906 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4907   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4908                                                __u);
4909 }
4910
4911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4912 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
4913   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4914                                                 (__mmask8)-1);
4915 }
4916
4917 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4918 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4919   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4920                                                 __u);
4921 }
4922
4923 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4924 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
4925   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4926                                                 (__mmask16)-1);
4927 }
4928
4929 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4930 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4931   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4932                                                 __u);
4933 }
4934
4935 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4936 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
4937   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4938                                                  (__mmask16)-1);
4939 }
4940
4941 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4942 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4943   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4944                                                  __u);
4945 }
4946
4947 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4948 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
4949   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4950                                                (__mmask8)-1);
4951 }
4952
4953 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4954 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4955   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4956                                                __u);
4957 }
4958
4959 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4960 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
4961   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4962                                                 (__mmask8)-1);
4963 }
4964
4965 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4966 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4967   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4968                                                 __u);
4969 }
4970
4971 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4972 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
4973   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4974                                                 (__mmask16)-1);
4975 }
4976
4977 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4978 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4979   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4980                                                 __u);
4981 }
4982
4983 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4984 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
4985   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4986                                                  (__mmask16)-1);
4987 }
4988
4989 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4990 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4991   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4992                                                  __u);
4993 }
4994
4995 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4996 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
4997   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4998                                                (__mmask8)-1);
4999 }
5000
5001 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5002 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5003   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5004                                                __u);
5005 }
5006
5007 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5008 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
5009   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5010                                                 (__mmask8)-1);
5011 }
5012
5013 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5014 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5015   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5016                                                 __u);
5017 }
5018
5019 static __inline__ __m512i __DEFAULT_FN_ATTRS
5020 _mm512_cvtepi8_epi32(__m128i __A)
5021 {
5022   /* This function always performs a signed extension, but __v16qi is a char
5023      which may be signed or unsigned, so use __v16qs. */
5024   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
5025 }
5026
5027 static __inline__ __m512i __DEFAULT_FN_ATTRS
5028 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5029 {
5030   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5031                                              (__v16si)_mm512_cvtepi8_epi32(__A),
5032                                              (__v16si)__W);
5033 }
5034
5035 static __inline__ __m512i __DEFAULT_FN_ATTRS
5036 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
5037 {
5038   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5039                                              (__v16si)_mm512_cvtepi8_epi32(__A),
5040                                              (__v16si)_mm512_setzero_si512());
5041 }
5042
5043 static __inline__ __m512i __DEFAULT_FN_ATTRS
5044 _mm512_cvtepi8_epi64(__m128i __A)
5045 {
5046   /* This function always performs a signed extension, but __v16qi is a char
5047      which may be signed or unsigned, so use __v16qs. */
5048   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5049 }
5050
5051 static __inline__ __m512i __DEFAULT_FN_ATTRS
5052 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5053 {
5054   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5055                                              (__v8di)_mm512_cvtepi8_epi64(__A),
5056                                              (__v8di)__W);
5057 }
5058
5059 static __inline__ __m512i __DEFAULT_FN_ATTRS
5060 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
5061 {
5062   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5063                                              (__v8di)_mm512_cvtepi8_epi64(__A),
5064                                              (__v8di)_mm512_setzero_si512 ());
5065 }
5066
5067 static __inline__ __m512i __DEFAULT_FN_ATTRS
5068 _mm512_cvtepi32_epi64(__m256i __X)
5069 {
5070   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
5071 }
5072
5073 static __inline__ __m512i __DEFAULT_FN_ATTRS
5074 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5075 {
5076   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5077                                              (__v8di)_mm512_cvtepi32_epi64(__X),
5078                                              (__v8di)__W);
5079 }
5080
5081 static __inline__ __m512i __DEFAULT_FN_ATTRS
5082 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
5083 {
5084   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5085                                              (__v8di)_mm512_cvtepi32_epi64(__X),
5086                                              (__v8di)_mm512_setzero_si512());
5087 }
5088
5089 static __inline__ __m512i __DEFAULT_FN_ATTRS
5090 _mm512_cvtepi16_epi32(__m256i __A)
5091 {
5092   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
5093 }
5094
5095 static __inline__ __m512i __DEFAULT_FN_ATTRS
5096 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5097 {
5098   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5099                                             (__v16si)_mm512_cvtepi16_epi32(__A),
5100                                             (__v16si)__W);
5101 }
5102
5103 static __inline__ __m512i __DEFAULT_FN_ATTRS
5104 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5105 {
5106   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5107                                             (__v16si)_mm512_cvtepi16_epi32(__A),
5108                                             (__v16si)_mm512_setzero_si512 ());
5109 }
5110
5111 static __inline__ __m512i __DEFAULT_FN_ATTRS
5112 _mm512_cvtepi16_epi64(__m128i __A)
5113 {
5114   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5115 }
5116
5117 static __inline__ __m512i __DEFAULT_FN_ATTRS
5118 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5119 {
5120   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5121                                              (__v8di)_mm512_cvtepi16_epi64(__A),
5122                                              (__v8di)__W);
5123 }
5124
5125 static __inline__ __m512i __DEFAULT_FN_ATTRS
5126 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5127 {
5128   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5129                                              (__v8di)_mm512_cvtepi16_epi64(__A),
5130                                              (__v8di)_mm512_setzero_si512());
5131 }
5132
5133 static __inline__ __m512i __DEFAULT_FN_ATTRS
5134 _mm512_cvtepu8_epi32(__m128i __A)
5135 {
5136   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5137 }
5138
5139 static __inline__ __m512i __DEFAULT_FN_ATTRS
5140 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5141 {
5142   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5143                                              (__v16si)_mm512_cvtepu8_epi32(__A),
5144                                              (__v16si)__W);
5145 }
5146
5147 static __inline__ __m512i __DEFAULT_FN_ATTRS
5148 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5149 {
5150   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5151                                              (__v16si)_mm512_cvtepu8_epi32(__A),
5152                                              (__v16si)_mm512_setzero_si512());
5153 }
5154
5155 static __inline__ __m512i __DEFAULT_FN_ATTRS
5156 _mm512_cvtepu8_epi64(__m128i __A)
5157 {
5158   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5159 }
5160
5161 static __inline__ __m512i __DEFAULT_FN_ATTRS
5162 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5163 {
5164   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5165                                              (__v8di)_mm512_cvtepu8_epi64(__A),
5166                                              (__v8di)__W);
5167 }
5168
5169 static __inline__ __m512i __DEFAULT_FN_ATTRS
5170 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5171 {
5172   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5173                                              (__v8di)_mm512_cvtepu8_epi64(__A),
5174                                              (__v8di)_mm512_setzero_si512());
5175 }
5176
5177 static __inline__ __m512i __DEFAULT_FN_ATTRS
5178 _mm512_cvtepu32_epi64(__m256i __X)
5179 {
5180   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5181 }
5182
5183 static __inline__ __m512i __DEFAULT_FN_ATTRS
5184 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5185 {
5186   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5187                                              (__v8di)_mm512_cvtepu32_epi64(__X),
5188                                              (__v8di)__W);
5189 }
5190
5191 static __inline__ __m512i __DEFAULT_FN_ATTRS
5192 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5193 {
5194   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5195                                              (__v8di)_mm512_cvtepu32_epi64(__X),
5196                                              (__v8di)_mm512_setzero_si512());
5197 }
5198
5199 static __inline__ __m512i __DEFAULT_FN_ATTRS
5200 _mm512_cvtepu16_epi32(__m256i __A)
5201 {
5202   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5203 }
5204
5205 static __inline__ __m512i __DEFAULT_FN_ATTRS
5206 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5207 {
5208   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5209                                             (__v16si)_mm512_cvtepu16_epi32(__A),
5210                                             (__v16si)__W);
5211 }
5212
5213 static __inline__ __m512i __DEFAULT_FN_ATTRS
5214 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5215 {
5216   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5217                                             (__v16si)_mm512_cvtepu16_epi32(__A),
5218                                             (__v16si)_mm512_setzero_si512());
5219 }
5220
5221 static __inline__ __m512i __DEFAULT_FN_ATTRS
5222 _mm512_cvtepu16_epi64(__m128i __A)
5223 {
5224   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5225 }
5226
5227 static __inline__ __m512i __DEFAULT_FN_ATTRS
5228 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5229 {
5230   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5231                                              (__v8di)_mm512_cvtepu16_epi64(__A),
5232                                              (__v8di)__W);
5233 }
5234
5235 static __inline__ __m512i __DEFAULT_FN_ATTRS
5236 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5237 {
5238   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5239                                              (__v8di)_mm512_cvtepu16_epi64(__A),
5240                                              (__v8di)_mm512_setzero_si512());
5241 }
5242
5243 static __inline__ __m512i __DEFAULT_FN_ATTRS
5244 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
5245 {
5246   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5247               (__v16si) __B,
5248               (__v16si)
5249               _mm512_setzero_si512 (),
5250               (__mmask16) -1);
5251 }
5252
5253 static __inline__ __m512i __DEFAULT_FN_ATTRS
5254 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5255 {
5256   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5257               (__v16si) __B,
5258               (__v16si) __W,
5259               (__mmask16) __U);
5260 }
5261
5262 static __inline__ __m512i __DEFAULT_FN_ATTRS
5263 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5264 {
5265   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5266               (__v16si) __B,
5267               (__v16si)
5268               _mm512_setzero_si512 (),
5269               (__mmask16) __U);
5270 }
5271
5272 static __inline__ __m512i __DEFAULT_FN_ATTRS
5273 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
5274 {
5275   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5276               (__v8di) __B,
5277               (__v8di)
5278               _mm512_setzero_si512 (),
5279               (__mmask8) -1);
5280 }
5281
5282 static __inline__ __m512i __DEFAULT_FN_ATTRS
5283 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5284 {
5285   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5286               (__v8di) __B,
5287               (__v8di) __W,
5288               (__mmask8) __U);
5289 }
5290
5291 static __inline__ __m512i __DEFAULT_FN_ATTRS
5292 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5293 {
5294   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5295               (__v8di) __B,
5296               (__v8di)
5297               _mm512_setzero_si512 (),
5298               (__mmask8) __U);
5299 }
5300
5301
5302
5303 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5304   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5305                                          (__v16si)(__m512i)(b), (int)(p), \
5306                                          (__mmask16)-1); })
5307
5308 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5309   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5310                                           (__v16si)(__m512i)(b), (int)(p), \
5311                                           (__mmask16)-1); })
5312
5313 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5314   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5315                                         (__v8di)(__m512i)(b), (int)(p), \
5316                                         (__mmask8)-1); })
5317
5318 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5319   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5320                                          (__v8di)(__m512i)(b), (int)(p), \
5321                                          (__mmask8)-1); })
5322
5323 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5324   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5325                                          (__v16si)(__m512i)(b), (int)(p), \
5326                                          (__mmask16)(m)); })
5327
5328 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5329   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5330                                           (__v16si)(__m512i)(b), (int)(p), \
5331                                           (__mmask16)(m)); })
5332
5333 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5334   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5335                                         (__v8di)(__m512i)(b), (int)(p), \
5336                                         (__mmask8)(m)); })
5337
5338 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5339   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5340                                          (__v8di)(__m512i)(b), (int)(p), \
5341                                          (__mmask8)(m)); })
5342
5343 #define _mm512_rol_epi32(a, b) __extension__ ({ \
5344   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5345                                         (__v16si)_mm512_setzero_si512(), \
5346                                         (__mmask16)-1); })
5347
5348 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5349   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5350                                         (__v16si)(__m512i)(W), \
5351                                         (__mmask16)(U)); })
5352
5353 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5354   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5355                                         (__v16si)_mm512_setzero_si512(), \
5356                                         (__mmask16)(U)); })
5357
5358 #define _mm512_rol_epi64(a, b) __extension__ ({ \
5359   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5360                                         (__v8di)_mm512_setzero_si512(), \
5361                                         (__mmask8)-1); })
5362
5363 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5364   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5365                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5366
5367 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5368   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5369                                         (__v8di)_mm512_setzero_si512(), \
5370                                         (__mmask8)(U)); })
5371 static __inline__ __m512i __DEFAULT_FN_ATTRS
5372 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5373 {
5374   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5375               (__v16si) __B,
5376               (__v16si)
5377               _mm512_setzero_si512 (),
5378               (__mmask16) -1);
5379 }
5380
5381 static __inline__ __m512i __DEFAULT_FN_ATTRS
5382 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5383 {
5384   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5385               (__v16si) __B,
5386               (__v16si) __W,
5387               (__mmask16) __U);
5388 }
5389
5390 static __inline__ __m512i __DEFAULT_FN_ATTRS
5391 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5392 {
5393   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5394               (__v16si) __B,
5395               (__v16si)
5396               _mm512_setzero_si512 (),
5397               (__mmask16) __U);
5398 }
5399
5400 static __inline__ __m512i __DEFAULT_FN_ATTRS
5401 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5402 {
5403   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5404               (__v8di) __B,
5405               (__v8di)
5406               _mm512_setzero_si512 (),
5407               (__mmask8) -1);
5408 }
5409
5410 static __inline__ __m512i __DEFAULT_FN_ATTRS
5411 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5412 {
5413   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5414               (__v8di) __B,
5415               (__v8di) __W,
5416               (__mmask8) __U);
5417 }
5418
5419 static __inline__ __m512i __DEFAULT_FN_ATTRS
5420 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5421 {
5422   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5423               (__v8di) __B,
5424               (__v8di)
5425               _mm512_setzero_si512 (),
5426               (__mmask8) __U);
5427 }
5428
5429 #define _mm512_ror_epi32(A, B) __extension__ ({ \
5430   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5431                                         (__v16si)_mm512_setzero_si512(), \
5432                                         (__mmask16)-1); })
5433
5434 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5435   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5436                                         (__v16si)(__m512i)(W), \
5437                                         (__mmask16)(U)); })
5438
5439 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5440   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5441                                         (__v16si)_mm512_setzero_si512(), \
5442                                         (__mmask16)(U)); })
5443
5444 #define _mm512_ror_epi64(A, B) __extension__ ({ \
5445   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5446                                         (__v8di)_mm512_setzero_si512(), \
5447                                         (__mmask8)-1); })
5448
5449 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5450   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5451                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5452
5453 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5454   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5455                                         (__v8di)_mm512_setzero_si512(), \
5456                                         (__mmask8)(U)); })
5457
5458 static __inline__ __m512i __DEFAULT_FN_ATTRS
5459 _mm512_slli_epi32(__m512i __A, int __B)
5460 {
5461   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5462 }
5463
5464 static __inline__ __m512i __DEFAULT_FN_ATTRS
5465 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5466 {
5467   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5468                                          (__v16si)_mm512_slli_epi32(__A, __B),
5469                                          (__v16si)__W);
5470 }
5471
5472 static __inline__ __m512i __DEFAULT_FN_ATTRS
5473 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5474   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5475                                          (__v16si)_mm512_slli_epi32(__A, __B),
5476                                          (__v16si)_mm512_setzero_si512());
5477 }
5478
5479 static __inline__ __m512i __DEFAULT_FN_ATTRS
5480 _mm512_slli_epi64(__m512i __A, int __B)
5481 {
5482   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5483 }
5484
5485 static __inline__ __m512i __DEFAULT_FN_ATTRS
5486 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5487 {
5488   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5489                                           (__v8di)_mm512_slli_epi64(__A, __B),
5490                                           (__v8di)__W);
5491 }
5492
5493 static __inline__ __m512i __DEFAULT_FN_ATTRS
5494 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5495 {
5496   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5497                                           (__v8di)_mm512_slli_epi64(__A, __B),
5498                                           (__v8di)_mm512_setzero_si512());
5499 }
5500
5501 static __inline__ __m512i __DEFAULT_FN_ATTRS
5502 _mm512_srli_epi32(__m512i __A, int __B)
5503 {
5504   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5505 }
5506
5507 static __inline__ __m512i __DEFAULT_FN_ATTRS
5508 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5509 {
5510   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5511                                          (__v16si)_mm512_srli_epi32(__A, __B),
5512                                          (__v16si)__W);
5513 }
5514
5515 static __inline__ __m512i __DEFAULT_FN_ATTRS
5516 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5517   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5518                                          (__v16si)_mm512_srli_epi32(__A, __B),
5519                                          (__v16si)_mm512_setzero_si512());
5520 }
5521
5522 static __inline__ __m512i __DEFAULT_FN_ATTRS
5523 _mm512_srli_epi64(__m512i __A, int __B)
5524 {
5525   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5526 }
5527
5528 static __inline__ __m512i __DEFAULT_FN_ATTRS
5529 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5530 {
5531   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5532                                           (__v8di)_mm512_srli_epi64(__A, __B),
5533                                           (__v8di)__W);
5534 }
5535
5536 static __inline__ __m512i __DEFAULT_FN_ATTRS
5537 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5538 {
5539   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5540                                           (__v8di)_mm512_srli_epi64(__A, __B),
5541                                           (__v8di)_mm512_setzero_si512());
5542 }
5543
5544 static __inline__ __m512i __DEFAULT_FN_ATTRS
5545 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5546 {
5547   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5548               (__v16si) __W,
5549               (__mmask16) __U);
5550 }
5551
5552 static __inline__ __m512i __DEFAULT_FN_ATTRS
5553 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5554 {
5555   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5556               (__v16si)
5557               _mm512_setzero_si512 (),
5558               (__mmask16) __U);
5559 }
5560
5561 static __inline__ void __DEFAULT_FN_ATTRS
5562 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5563 {
5564   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5565           (__mmask16) __U);
5566 }
5567
5568 static __inline__ __m512i __DEFAULT_FN_ATTRS
5569 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5570 {
5571   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5572                  (__v16si) __A,
5573                  (__v16si) __W);
5574 }
5575
5576 static __inline__ __m512i __DEFAULT_FN_ATTRS
5577 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5578 {
5579   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5580                  (__v16si) __A,
5581                  (__v16si) _mm512_setzero_si512 ());
5582 }
5583
5584 static __inline__ __m512i __DEFAULT_FN_ATTRS
5585 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5586 {
5587   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5588                  (__v8di) __A,
5589                  (__v8di) __W);
5590 }
5591
5592 static __inline__ __m512i __DEFAULT_FN_ATTRS
5593 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5594 {
5595   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5596                  (__v8di) __A,
5597                  (__v8di) _mm512_setzero_si512 ());
5598 }
5599
5600 static __inline__ __m512i __DEFAULT_FN_ATTRS
5601 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5602 {
5603   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5604               (__v8di) __W,
5605               (__mmask8) __U);
5606 }
5607
5608 static __inline__ __m512i __DEFAULT_FN_ATTRS
5609 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5610 {
5611   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5612               (__v8di)
5613               _mm512_setzero_si512 (),
5614               (__mmask8) __U);
5615 }
5616
5617 static __inline__ void __DEFAULT_FN_ATTRS
5618 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5619 {
5620   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5621           (__mmask8) __U);
5622 }
5623
5624 static __inline__ __m512d __DEFAULT_FN_ATTRS
5625 _mm512_movedup_pd (__m512d __A)
5626 {
5627   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5628                                           0, 0, 2, 2, 4, 4, 6, 6);
5629 }
5630
5631 static __inline__ __m512d __DEFAULT_FN_ATTRS
5632 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5633 {
5634   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5635                                               (__v8df)_mm512_movedup_pd(__A),
5636                                               (__v8df)__W);
5637 }
5638
5639 static __inline__ __m512d __DEFAULT_FN_ATTRS
5640 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5641 {
5642   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5643                                               (__v8df)_mm512_movedup_pd(__A),
5644                                               (__v8df)_mm512_setzero_pd());
5645 }
5646
5647 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5648   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5649                                              (__v8df)(__m512d)(B), \
5650                                              (__v8di)(__m512i)(C), (int)(imm), \
5651                                              (__mmask8)-1, (int)(R)); })
5652
5653 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5654   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5655                                              (__v8df)(__m512d)(B), \
5656                                              (__v8di)(__m512i)(C), (int)(imm), \
5657                                              (__mmask8)(U), (int)(R)); })
5658
5659 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5660   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5661                                              (__v8df)(__m512d)(B), \
5662                                              (__v8di)(__m512i)(C), (int)(imm), \
5663                                              (__mmask8)-1, \
5664                                              _MM_FROUND_CUR_DIRECTION); })
5665
5666 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5667   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5668                                              (__v8df)(__m512d)(B), \
5669                                              (__v8di)(__m512i)(C), (int)(imm), \
5670                                              (__mmask8)(U), \
5671                                              _MM_FROUND_CUR_DIRECTION); })
5672
5673 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5674   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5675                                               (__v8df)(__m512d)(B), \
5676                                               (__v8di)(__m512i)(C), \
5677                                               (int)(imm), (__mmask8)(U), \
5678                                               (int)(R)); })
5679
5680 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5681   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5682                                               (__v8df)(__m512d)(B), \
5683                                               (__v8di)(__m512i)(C), \
5684                                               (int)(imm), (__mmask8)(U), \
5685                                               _MM_FROUND_CUR_DIRECTION); })
5686
5687 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5688   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5689                                             (__v16sf)(__m512)(B), \
5690                                             (__v16si)(__m512i)(C), (int)(imm), \
5691                                             (__mmask16)-1, (int)(R)); })
5692
5693 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5694   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5695                                             (__v16sf)(__m512)(B), \
5696                                             (__v16si)(__m512i)(C), (int)(imm), \
5697                                             (__mmask16)(U), (int)(R)); })
5698
5699 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5700   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5701                                             (__v16sf)(__m512)(B), \
5702                                             (__v16si)(__m512i)(C), (int)(imm), \
5703                                             (__mmask16)-1, \
5704                                             _MM_FROUND_CUR_DIRECTION); })
5705
5706 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5707   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5708                                             (__v16sf)(__m512)(B), \
5709                                             (__v16si)(__m512i)(C), (int)(imm), \
5710                                             (__mmask16)(U), \
5711                                             _MM_FROUND_CUR_DIRECTION); })
5712
5713 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5714   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5715                                              (__v16sf)(__m512)(B), \
5716                                              (__v16si)(__m512i)(C), \
5717                                              (int)(imm), (__mmask16)(U), \
5718                                              (int)(R)); })
5719
5720 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5721   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5722                                              (__v16sf)(__m512)(B), \
5723                                              (__v16si)(__m512i)(C), \
5724                                              (int)(imm), (__mmask16)(U), \
5725                                              _MM_FROUND_CUR_DIRECTION); })
5726
5727 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5728   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5729                                           (__v2df)(__m128d)(B), \
5730                                           (__v2di)(__m128i)(C), (int)(imm), \
5731                                           (__mmask8)-1, (int)(R)); })
5732
5733 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5734   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5735                                           (__v2df)(__m128d)(B), \
5736                                           (__v2di)(__m128i)(C), (int)(imm), \
5737                                           (__mmask8)(U), (int)(R)); })
5738
5739 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5740   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5741                                           (__v2df)(__m128d)(B), \
5742                                           (__v2di)(__m128i)(C), (int)(imm), \
5743                                           (__mmask8)-1, \
5744                                           _MM_FROUND_CUR_DIRECTION); })
5745
5746 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5747   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5748                                           (__v2df)(__m128d)(B), \
5749                                           (__v2di)(__m128i)(C), (int)(imm), \
5750                                           (__mmask8)(U), \
5751                                           _MM_FROUND_CUR_DIRECTION); })
5752
5753 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5754   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5755                                            (__v2df)(__m128d)(B), \
5756                                            (__v2di)(__m128i)(C), (int)(imm), \
5757                                            (__mmask8)(U), (int)(R)); })
5758
5759 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5760   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5761                                            (__v2df)(__m128d)(B), \
5762                                            (__v2di)(__m128i)(C), (int)(imm), \
5763                                            (__mmask8)(U), \
5764                                            _MM_FROUND_CUR_DIRECTION); })
5765
5766 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5767   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5768                                          (__v4sf)(__m128)(B), \
5769                                          (__v4si)(__m128i)(C), (int)(imm), \
5770                                          (__mmask8)-1, (int)(R)); })
5771
5772 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5773   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5774                                          (__v4sf)(__m128)(B), \
5775                                          (__v4si)(__m128i)(C), (int)(imm), \
5776                                          (__mmask8)(U), (int)(R)); })
5777
5778 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5779   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5780                                          (__v4sf)(__m128)(B), \
5781                                          (__v4si)(__m128i)(C), (int)(imm), \
5782                                          (__mmask8)-1, \
5783                                          _MM_FROUND_CUR_DIRECTION); })
5784
5785 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5786   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5787                                          (__v4sf)(__m128)(B), \
5788                                          (__v4si)(__m128i)(C), (int)(imm), \
5789                                          (__mmask8)(U), \
5790                                          _MM_FROUND_CUR_DIRECTION); })
5791
5792 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5793   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5794                                           (__v4sf)(__m128)(B), \
5795                                           (__v4si)(__m128i)(C), (int)(imm), \
5796                                           (__mmask8)(U), (int)(R)); })
5797
5798 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5799   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5800                                           (__v4sf)(__m128)(B), \
5801                                           (__v4si)(__m128i)(C), (int)(imm), \
5802                                           (__mmask8)(U), \
5803                                           _MM_FROUND_CUR_DIRECTION); })
5804
5805 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5806   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5807                                                  (__v2df)(__m128d)(B), \
5808                                                  (__v2df)_mm_setzero_pd(), \
5809                                                  (__mmask8)-1, (int)(R)); })
5810
5811
5812 static __inline__ __m128d __DEFAULT_FN_ATTRS
5813 _mm_getexp_sd (__m128d __A, __m128d __B)
5814 {
5815   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5816                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5817 }
5818
5819 static __inline__ __m128d __DEFAULT_FN_ATTRS
5820 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5821 {
5822  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5823           (__v2df) __B,
5824           (__v2df) __W,
5825           (__mmask8) __U,
5826           _MM_FROUND_CUR_DIRECTION);
5827 }
5828
5829 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5830   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5831                                                  (__v2df)(__m128d)(B), \
5832                                                  (__v2df)(__m128d)(W), \
5833                                                  (__mmask8)(U), (int)(R)); })
5834
5835 static __inline__ __m128d __DEFAULT_FN_ATTRS
5836 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5837 {
5838  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5839           (__v2df) __B,
5840           (__v2df) _mm_setzero_pd (),
5841           (__mmask8) __U,
5842           _MM_FROUND_CUR_DIRECTION);
5843 }
5844
5845 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5846   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5847                                                  (__v2df)(__m128d)(B), \
5848                                                  (__v2df)_mm_setzero_pd(), \
5849                                                  (__mmask8)(U), (int)(R)); })
5850
5851 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5852   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5853                                                 (__v4sf)(__m128)(B), \
5854                                                 (__v4sf)_mm_setzero_ps(), \
5855                                                 (__mmask8)-1, (int)(R)); })
5856
5857 static __inline__ __m128 __DEFAULT_FN_ATTRS
5858 _mm_getexp_ss (__m128 __A, __m128 __B)
5859 {
5860   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5861                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5862 }
5863
5864 static __inline__ __m128 __DEFAULT_FN_ATTRS
5865 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5866 {
5867  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5868           (__v4sf) __B,
5869           (__v4sf) __W,
5870           (__mmask8) __U,
5871           _MM_FROUND_CUR_DIRECTION);
5872 }
5873
5874 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5875   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5876                                                 (__v4sf)(__m128)(B), \
5877                                                 (__v4sf)(__m128)(W), \
5878                                                 (__mmask8)(U), (int)(R)); })
5879
5880 static __inline__ __m128 __DEFAULT_FN_ATTRS
5881 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5882 {
5883  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5884           (__v4sf) __B,
5885           (__v4sf) _mm_setzero_pd (),
5886           (__mmask8) __U,
5887           _MM_FROUND_CUR_DIRECTION);
5888 }
5889
5890 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5891   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5892                                                 (__v4sf)(__m128)(B), \
5893                                                 (__v4sf)_mm_setzero_ps(), \
5894                                                 (__mmask8)(U), (int)(R)); })
5895
5896 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5897   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5898                                                (__v2df)(__m128d)(B), \
5899                                                (int)(((D)<<2) | (C)), \
5900                                                (__v2df)_mm_setzero_pd(), \
5901                                                (__mmask8)-1, (int)(R)); })
5902
5903 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
5904   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5905                                                (__v2df)(__m128d)(B), \
5906                                                (int)(((D)<<2) | (C)), \
5907                                                (__v2df)_mm_setzero_pd(), \
5908                                                (__mmask8)-1, \
5909                                                _MM_FROUND_CUR_DIRECTION); })
5910
5911 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5912   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5913                                                (__v2df)(__m128d)(B), \
5914                                                (int)(((D)<<2) | (C)), \
5915                                                (__v2df)(__m128d)(W), \
5916                                                (__mmask8)(U), \
5917                                                _MM_FROUND_CUR_DIRECTION); })
5918
5919 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5920   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5921                                                (__v2df)(__m128d)(B), \
5922                                                (int)(((D)<<2) | (C)), \
5923                                                (__v2df)(__m128d)(W), \
5924                                                (__mmask8)(U), (int)(R)); })
5925
5926 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5927   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5928                                                (__v2df)(__m128d)(B), \
5929                                                (int)(((D)<<2) | (C)), \
5930                                                (__v2df)_mm_setzero_pd(), \
5931                                                (__mmask8)(U), \
5932                                                _MM_FROUND_CUR_DIRECTION); })
5933
5934 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5935   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5936                                                (__v2df)(__m128d)(B), \
5937                                                (int)(((D)<<2) | (C)), \
5938                                                (__v2df)_mm_setzero_pd(), \
5939                                                (__mmask8)(U), (int)(R)); })
5940
5941 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5942   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5943                                               (__v4sf)(__m128)(B), \
5944                                               (int)(((D)<<2) | (C)), \
5945                                               (__v4sf)_mm_setzero_ps(), \
5946                                               (__mmask8)-1, (int)(R)); })
5947
5948 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5949   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5950                                               (__v4sf)(__m128)(B), \
5951                                               (int)(((D)<<2) | (C)), \
5952                                               (__v4sf)_mm_setzero_ps(), \
5953                                               (__mmask8)-1, \
5954                                               _MM_FROUND_CUR_DIRECTION); })
5955
5956 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5957   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5958                                               (__v4sf)(__m128)(B), \
5959                                               (int)(((D)<<2) | (C)), \
5960                                               (__v4sf)(__m128)(W), \
5961                                               (__mmask8)(U), \
5962                                               _MM_FROUND_CUR_DIRECTION); })
5963
5964 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5965   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5966                                               (__v4sf)(__m128)(B), \
5967                                               (int)(((D)<<2) | (C)), \
5968                                               (__v4sf)(__m128)(W), \
5969                                               (__mmask8)(U), (int)(R)); })
5970
5971 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5972   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5973                                               (__v4sf)(__m128)(B), \
5974                                               (int)(((D)<<2) | (C)), \
5975                                               (__v4sf)_mm_setzero_pd(), \
5976                                               (__mmask8)(U), \
5977                                               _MM_FROUND_CUR_DIRECTION); })
5978
5979 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5980   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5981                                               (__v4sf)(__m128)(B), \
5982                                               (int)(((D)<<2) | (C)), \
5983                                               (__v4sf)_mm_setzero_ps(), \
5984                                               (__mmask8)(U), (int)(R)); })
5985
5986 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5987 _mm512_kmov (__mmask16 __A)
5988 {
5989   return  __A;
5990 }
5991
5992 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5993   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5994                               (int)(P), (int)(R)); })
5995
5996 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5997   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5998                               (int)(P), (int)(R)); })
5999
6000 #ifdef __x86_64__
6001 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
6002   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6003 #endif
6004
6005 static __inline__ __m512i __DEFAULT_FN_ATTRS
6006 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6007          __mmask16 __U, __m512i __B)
6008 {
6009   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6010                    (__v16si) __I
6011                    /* idx */ ,
6012                    (__v16si) __B,
6013                    (__mmask16) __U);
6014 }
6015
6016 static __inline__ __m512i __DEFAULT_FN_ATTRS
6017 _mm512_sll_epi32(__m512i __A, __m128i __B)
6018 {
6019   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
6020 }
6021
6022 static __inline__ __m512i __DEFAULT_FN_ATTRS
6023 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6024 {
6025   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6026                                           (__v16si)_mm512_sll_epi32(__A, __B),
6027                                           (__v16si)__W);
6028 }
6029
6030 static __inline__ __m512i __DEFAULT_FN_ATTRS
6031 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6032 {
6033   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6034                                           (__v16si)_mm512_sll_epi32(__A, __B),
6035                                           (__v16si)_mm512_setzero_si512());
6036 }
6037
6038 static __inline__ __m512i __DEFAULT_FN_ATTRS
6039 _mm512_sll_epi64(__m512i __A, __m128i __B)
6040 {
6041   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
6042 }
6043
6044 static __inline__ __m512i __DEFAULT_FN_ATTRS
6045 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6046 {
6047   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6048                                              (__v8di)_mm512_sll_epi64(__A, __B),
6049                                              (__v8di)__W);
6050 }
6051
6052 static __inline__ __m512i __DEFAULT_FN_ATTRS
6053 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6054 {
6055   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6056                                            (__v8di)_mm512_sll_epi64(__A, __B),
6057                                            (__v8di)_mm512_setzero_si512());
6058 }
6059
6060 static __inline__ __m512i __DEFAULT_FN_ATTRS
6061 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
6062 {
6063   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
6064 }
6065
6066 static __inline__ __m512i __DEFAULT_FN_ATTRS
6067 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6068 {
6069   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6070                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
6071                                            (__v16si)__W);
6072 }
6073
6074 static __inline__ __m512i __DEFAULT_FN_ATTRS
6075 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6076 {
6077   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6078                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
6079                                            (__v16si)_mm512_setzero_si512());
6080 }
6081
6082 static __inline__ __m512i __DEFAULT_FN_ATTRS
6083 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
6084 {
6085   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
6086 }
6087
6088 static __inline__ __m512i __DEFAULT_FN_ATTRS
6089 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6090 {
6091   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6092                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
6093                                             (__v8di)__W);
6094 }
6095
6096 static __inline__ __m512i __DEFAULT_FN_ATTRS
6097 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6098 {
6099   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6100                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
6101                                             (__v8di)_mm512_setzero_si512());
6102 }
6103
6104 static __inline__ __m512i __DEFAULT_FN_ATTRS
6105 _mm512_sra_epi32(__m512i __A, __m128i __B)
6106 {
6107   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6108 }
6109
6110 static __inline__ __m512i __DEFAULT_FN_ATTRS
6111 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6112 {
6113   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6114                                           (__v16si)_mm512_sra_epi32(__A, __B),
6115                                           (__v16si)__W);
6116 }
6117
6118 static __inline__ __m512i __DEFAULT_FN_ATTRS
6119 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6120 {
6121   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6122                                           (__v16si)_mm512_sra_epi32(__A, __B),
6123                                           (__v16si)_mm512_setzero_si512());
6124 }
6125
6126 static __inline__ __m512i __DEFAULT_FN_ATTRS
6127 _mm512_sra_epi64(__m512i __A, __m128i __B)
6128 {
6129   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6130 }
6131
6132 static __inline__ __m512i __DEFAULT_FN_ATTRS
6133 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6134 {
6135   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6136                                            (__v8di)_mm512_sra_epi64(__A, __B),
6137                                            (__v8di)__W);
6138 }
6139
6140 static __inline__ __m512i __DEFAULT_FN_ATTRS
6141 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6142 {
6143   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6144                                            (__v8di)_mm512_sra_epi64(__A, __B),
6145                                            (__v8di)_mm512_setzero_si512());
6146 }
6147
6148 static __inline__ __m512i __DEFAULT_FN_ATTRS
6149 _mm512_srav_epi32(__m512i __X, __m512i __Y)
6150 {
6151   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6152 }
6153
6154 static __inline__ __m512i __DEFAULT_FN_ATTRS
6155 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6156 {
6157   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6158                                            (__v16si)_mm512_srav_epi32(__X, __Y),
6159                                            (__v16si)__W);
6160 }
6161
6162 static __inline__ __m512i __DEFAULT_FN_ATTRS
6163 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6164 {
6165   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6166                                            (__v16si)_mm512_srav_epi32(__X, __Y),
6167                                            (__v16si)_mm512_setzero_si512());
6168 }
6169
6170 static __inline__ __m512i __DEFAULT_FN_ATTRS
6171 _mm512_srav_epi64(__m512i __X, __m512i __Y)
6172 {
6173   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6174 }
6175
6176 static __inline__ __m512i __DEFAULT_FN_ATTRS
6177 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6178 {
6179   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6180                                             (__v8di)_mm512_srav_epi64(__X, __Y),
6181                                             (__v8di)__W);
6182 }
6183
6184 static __inline__ __m512i __DEFAULT_FN_ATTRS
6185 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6186 {
6187   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6188                                             (__v8di)_mm512_srav_epi64(__X, __Y),
6189                                             (__v8di)_mm512_setzero_si512());
6190 }
6191
6192 static __inline__ __m512i __DEFAULT_FN_ATTRS
6193 _mm512_srl_epi32(__m512i __A, __m128i __B)
6194 {
6195   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6196 }
6197
6198 static __inline__ __m512i __DEFAULT_FN_ATTRS
6199 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6200 {
6201   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6202                                           (__v16si)_mm512_srl_epi32(__A, __B),
6203                                           (__v16si)__W);
6204 }
6205
6206 static __inline__ __m512i __DEFAULT_FN_ATTRS
6207 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6208 {
6209   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6210                                           (__v16si)_mm512_srl_epi32(__A, __B),
6211                                           (__v16si)_mm512_setzero_si512());
6212 }
6213
6214 static __inline__ __m512i __DEFAULT_FN_ATTRS
6215 _mm512_srl_epi64(__m512i __A, __m128i __B)
6216 {
6217   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6218 }
6219
6220 static __inline__ __m512i __DEFAULT_FN_ATTRS
6221 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6222 {
6223   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6224                                            (__v8di)_mm512_srl_epi64(__A, __B),
6225                                            (__v8di)__W);
6226 }
6227
6228 static __inline__ __m512i __DEFAULT_FN_ATTRS
6229 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6230 {
6231   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6232                                            (__v8di)_mm512_srl_epi64(__A, __B),
6233                                            (__v8di)_mm512_setzero_si512());
6234 }
6235
6236 static __inline__ __m512i __DEFAULT_FN_ATTRS
6237 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
6238 {
6239   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6240 }
6241
6242 static __inline__ __m512i __DEFAULT_FN_ATTRS
6243 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6244 {
6245   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6246                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
6247                                            (__v16si)__W);
6248 }
6249
6250 static __inline__ __m512i __DEFAULT_FN_ATTRS
6251 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6252 {
6253   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6254                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
6255                                            (__v16si)_mm512_setzero_si512());
6256 }
6257
6258 static __inline__ __m512i __DEFAULT_FN_ATTRS
6259 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6260 {
6261   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6262 }
6263
6264 static __inline__ __m512i __DEFAULT_FN_ATTRS
6265 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6266 {
6267   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6268                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
6269                                             (__v8di)__W);
6270 }
6271
6272 static __inline__ __m512i __DEFAULT_FN_ATTRS
6273 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6274 {
6275   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6276                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
6277                                             (__v8di)_mm512_setzero_si512());
6278 }
6279
6280 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6281   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6282                                             (__v16si)(__m512i)(B), \
6283                                             (__v16si)(__m512i)(C), (int)(imm), \
6284                                             (__mmask16)-1); })
6285
6286 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6287   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6288                                             (__v16si)(__m512i)(B), \
6289                                             (__v16si)(__m512i)(C), (int)(imm), \
6290                                             (__mmask16)(U)); })
6291
6292 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6293   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6294                                              (__v16si)(__m512i)(B), \
6295                                              (__v16si)(__m512i)(C), \
6296                                              (int)(imm), (__mmask16)(U)); })
6297
6298 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6299   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6300                                             (__v8di)(__m512i)(B), \
6301                                             (__v8di)(__m512i)(C), (int)(imm), \
6302                                             (__mmask8)-1); })
6303
6304 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6305   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6306                                             (__v8di)(__m512i)(B), \
6307                                             (__v8di)(__m512i)(C), (int)(imm), \
6308                                             (__mmask8)(U)); })
6309
6310 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6311   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6312                                              (__v8di)(__m512i)(B), \
6313                                              (__v8di)(__m512i)(C), (int)(imm), \
6314                                              (__mmask8)(U)); })
6315
6316 #ifdef __x86_64__
6317 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6318   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6319 #endif
6320
6321 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6322   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6323
6324 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6325   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6326
6327 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6328   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6329
6330 static __inline__ unsigned __DEFAULT_FN_ATTRS
6331 _mm_cvtsd_u32 (__m128d __A)
6332 {
6333   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6334              _MM_FROUND_CUR_DIRECTION);
6335 }
6336
6337 #ifdef __x86_64__
6338 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6339   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6340                                                   (int)(R)); })
6341
6342 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6343 _mm_cvtsd_u64 (__m128d __A)
6344 {
6345   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6346                  __A,
6347                  _MM_FROUND_CUR_DIRECTION);
6348 }
6349 #endif
6350
6351 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6352   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6353
6354 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6355   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6356
6357 #ifdef __x86_64__
6358 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6359   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6360
6361 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6362   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6363 #endif
6364
6365 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6366   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6367
6368 static __inline__ unsigned __DEFAULT_FN_ATTRS
6369 _mm_cvtss_u32 (__m128 __A)
6370 {
6371   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6372              _MM_FROUND_CUR_DIRECTION);
6373 }
6374
6375 #ifdef __x86_64__
6376 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6377   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6378                                                   (int)(R)); })
6379
6380 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6381 _mm_cvtss_u64 (__m128 __A)
6382 {
6383   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6384                  __A,
6385                  _MM_FROUND_CUR_DIRECTION);
6386 }
6387 #endif
6388
6389 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6390   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6391
6392 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6393   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6394
6395 static __inline__ int __DEFAULT_FN_ATTRS
6396 _mm_cvttsd_i32 (__m128d __A)
6397 {
6398   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6399               _MM_FROUND_CUR_DIRECTION);
6400 }
6401
6402 #ifdef __x86_64__
6403 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6404   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6405
6406 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6407   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6408
6409 static __inline__ long long __DEFAULT_FN_ATTRS
6410 _mm_cvttsd_i64 (__m128d __A)
6411 {
6412   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6413               _MM_FROUND_CUR_DIRECTION);
6414 }
6415 #endif
6416
6417 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6418   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6419
6420 static __inline__ unsigned __DEFAULT_FN_ATTRS
6421 _mm_cvttsd_u32 (__m128d __A)
6422 {
6423   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6424               _MM_FROUND_CUR_DIRECTION);
6425 }
6426
6427 #ifdef __x86_64__
6428 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6429   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6430                                                    (int)(R)); })
6431
6432 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6433 _mm_cvttsd_u64 (__m128d __A)
6434 {
6435   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6436                   __A,
6437                   _MM_FROUND_CUR_DIRECTION);
6438 }
6439 #endif
6440
6441 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6442   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6443
6444 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6445   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6446
6447 static __inline__ int __DEFAULT_FN_ATTRS
6448 _mm_cvttss_i32 (__m128 __A)
6449 {
6450   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6451               _MM_FROUND_CUR_DIRECTION);
6452 }
6453
6454 #ifdef __x86_64__
6455 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6456   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6457
6458 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6459   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6460
6461 static __inline__ long long __DEFAULT_FN_ATTRS
6462 _mm_cvttss_i64 (__m128 __A)
6463 {
6464   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6465               _MM_FROUND_CUR_DIRECTION);
6466 }
6467 #endif
6468
6469 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6470   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6471
6472 static __inline__ unsigned __DEFAULT_FN_ATTRS
6473 _mm_cvttss_u32 (__m128 __A)
6474 {
6475   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6476               _MM_FROUND_CUR_DIRECTION);
6477 }
6478
6479 #ifdef __x86_64__
6480 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6481   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6482                                                    (int)(R)); })
6483
6484 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6485 _mm_cvttss_u64 (__m128 __A)
6486 {
6487   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6488                   __A,
6489                   _MM_FROUND_CUR_DIRECTION);
6490 }
6491 #endif
6492
6493 static __inline__ __m512d __DEFAULT_FN_ATTRS
6494 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6495             __m512d __B)
6496 {
6497   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6498               (__v8di) __I
6499               /* idx */ ,
6500               (__v8df) __B,
6501               (__mmask8) __U);
6502 }
6503
6504 static __inline__ __m512 __DEFAULT_FN_ATTRS
6505 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6506             __m512 __B)
6507 {
6508   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6509                    (__v16si) __I
6510                    /* idx */ ,
6511                    (__v16sf) __B,
6512                    (__mmask16) __U);
6513 }
6514
6515 static __inline__ __m512i __DEFAULT_FN_ATTRS
6516 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6517          __mmask8 __U, __m512i __B)
6518 {
6519   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6520                    (__v8di) __I
6521                    /* idx */ ,
6522                    (__v8di) __B,
6523                    (__mmask8) __U);
6524 }
6525
6526 #define _mm512_permute_pd(X, C) __extension__ ({ \
6527   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6528                                    (__v8df)_mm512_undefined_pd(), \
6529                                    0 + (((C) >> 0) & 0x1), \
6530                                    0 + (((C) >> 1) & 0x1), \
6531                                    2 + (((C) >> 2) & 0x1), \
6532                                    2 + (((C) >> 3) & 0x1), \
6533                                    4 + (((C) >> 4) & 0x1), \
6534                                    4 + (((C) >> 5) & 0x1), \
6535                                    6 + (((C) >> 6) & 0x1), \
6536                                    6 + (((C) >> 7) & 0x1)); })
6537
6538 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6539   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6540                                        (__v8df)_mm512_permute_pd((X), (C)), \
6541                                        (__v8df)(__m512d)(W)); })
6542
6543 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6544   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6545                                        (__v8df)_mm512_permute_pd((X), (C)), \
6546                                        (__v8df)_mm512_setzero_pd()); })
6547
6548 #define _mm512_permute_ps(X, C) __extension__ ({ \
6549   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6550                                   (__v16sf)_mm512_undefined_ps(), \
6551                                    0  + (((C) >> 0) & 0x3), \
6552                                    0  + (((C) >> 2) & 0x3), \
6553                                    0  + (((C) >> 4) & 0x3), \
6554                                    0  + (((C) >> 6) & 0x3), \
6555                                    4  + (((C) >> 0) & 0x3), \
6556                                    4  + (((C) >> 2) & 0x3), \
6557                                    4  + (((C) >> 4) & 0x3), \
6558                                    4  + (((C) >> 6) & 0x3), \
6559                                    8  + (((C) >> 0) & 0x3), \
6560                                    8  + (((C) >> 2) & 0x3), \
6561                                    8  + (((C) >> 4) & 0x3), \
6562                                    8  + (((C) >> 6) & 0x3), \
6563                                    12 + (((C) >> 0) & 0x3), \
6564                                    12 + (((C) >> 2) & 0x3), \
6565                                    12 + (((C) >> 4) & 0x3), \
6566                                    12 + (((C) >> 6) & 0x3)); })
6567
6568 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6569   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6570                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6571                                       (__v16sf)(__m512)(W)); })
6572
6573 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6574   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6575                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6576                                       (__v16sf)_mm512_setzero_ps()); })
6577
6578 static __inline__ __m512d __DEFAULT_FN_ATTRS
6579 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6580 {
6581   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6582 }
6583
6584 static __inline__ __m512d __DEFAULT_FN_ATTRS
6585 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6586 {
6587   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6588                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6589                                          (__v8df)__W);
6590 }
6591
6592 static __inline__ __m512d __DEFAULT_FN_ATTRS
6593 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6594 {
6595   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6596                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6597                                          (__v8df)_mm512_setzero_pd());
6598 }
6599
6600 static __inline__ __m512 __DEFAULT_FN_ATTRS
6601 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6602 {
6603   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6604 }
6605
6606 static __inline__ __m512 __DEFAULT_FN_ATTRS
6607 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6608 {
6609   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6610                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6611                                         (__v16sf)__W);
6612 }
6613
6614 static __inline__ __m512 __DEFAULT_FN_ATTRS
6615 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6616 {
6617   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6618                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6619                                         (__v16sf)_mm512_setzero_ps());
6620 }
6621
6622 static __inline __m512d __DEFAULT_FN_ATTRS
6623 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6624 {
6625   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6626                     /* idx */ ,
6627                     (__v8df) __A,
6628                     (__v8df) __B,
6629                     (__mmask8) -1);
6630 }
6631
6632 static __inline__ __m512d __DEFAULT_FN_ATTRS
6633 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6634 {
6635   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6636                     /* idx */ ,
6637                     (__v8df) __A,
6638                     (__v8df) __B,
6639                     (__mmask8) __U);
6640 }
6641
6642 static __inline__ __m512d __DEFAULT_FN_ATTRS
6643 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6644             __m512d __B)
6645 {
6646   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6647                                                          /* idx */ ,
6648                                                          (__v8df) __A,
6649                                                          (__v8df) __B,
6650                                                          (__mmask8) __U);
6651 }
6652
6653 static __inline __m512 __DEFAULT_FN_ATTRS
6654 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6655 {
6656   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6657                                                          /* idx */ ,
6658                                                          (__v16sf) __A,
6659                                                          (__v16sf) __B,
6660                                                          (__mmask16) -1);
6661 }
6662
6663 static __inline__ __m512 __DEFAULT_FN_ATTRS
6664 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6665 {
6666   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6667                                                          /* idx */ ,
6668                                                          (__v16sf) __A,
6669                                                          (__v16sf) __B,
6670                                                          (__mmask16) __U);
6671 }
6672
6673 static __inline__ __m512 __DEFAULT_FN_ATTRS
6674 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6675             __m512 __B)
6676 {
6677   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6678                                                         /* idx */ ,
6679                                                         (__v16sf) __A,
6680                                                         (__v16sf) __B,
6681                                                         (__mmask16) __U);
6682 }
6683
6684 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6685 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
6686 {
6687   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6688              (__v16si) __B,
6689              (__mmask16) -1);
6690 }
6691
6692 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6693 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
6694 {
6695   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6696              (__v16si) __B, __U);
6697 }
6698
6699 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6700 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
6701 {
6702   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6703             (__v8di) __B,
6704             (__mmask8) -1);
6705 }
6706
6707 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6708 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
6709 {
6710   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6711             (__v8di) __B, __U);
6712 }
6713
6714 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6715   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6716                                              (__v8si)_mm256_undefined_si256(), \
6717                                              (__mmask8)-1, (int)(R)); })
6718
6719 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6720   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6721                                              (__v8si)(__m256i)(W), \
6722                                              (__mmask8)(U), (int)(R)); })
6723
6724 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6725   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6726                                              (__v8si)_mm256_setzero_si256(), \
6727                                              (__mmask8)(U), (int)(R)); })
6728
6729 static __inline__ __m256i __DEFAULT_FN_ATTRS
6730 _mm512_cvttpd_epu32 (__m512d __A)
6731 {
6732   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6733                   (__v8si)
6734                   _mm256_undefined_si256 (),
6735                   (__mmask8) -1,
6736                   _MM_FROUND_CUR_DIRECTION);
6737 }
6738
6739 static __inline__ __m256i __DEFAULT_FN_ATTRS
6740 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6741 {
6742   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6743                   (__v8si) __W,
6744                   (__mmask8) __U,
6745                   _MM_FROUND_CUR_DIRECTION);
6746 }
6747
6748 static __inline__ __m256i __DEFAULT_FN_ATTRS
6749 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6750 {
6751   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6752                   (__v8si)
6753                   _mm256_setzero_si256 (),
6754                   (__mmask8) __U,
6755                   _MM_FROUND_CUR_DIRECTION);
6756 }
6757
6758 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6759   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6760                                                 (__v2df)(__m128d)(B), \
6761                                                 (__v2df)_mm_setzero_pd(), \
6762                                                 (__mmask8)-1, (int)(imm), \
6763                                                 (int)(R)); })
6764
6765 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6766   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6767                                                 (__v2df)(__m128d)(B), \
6768                                                 (__v2df)_mm_setzero_pd(), \
6769                                                 (__mmask8)-1, (int)(imm), \
6770                                                 _MM_FROUND_CUR_DIRECTION); })
6771
6772 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6773   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6774                                                 (__v2df)(__m128d)(B), \
6775                                                 (__v2df)(__m128d)(W), \
6776                                                 (__mmask8)(U), (int)(imm), \
6777                                                 _MM_FROUND_CUR_DIRECTION); })
6778
6779 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6780   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6781                                                 (__v2df)(__m128d)(B), \
6782                                                 (__v2df)(__m128d)(W), \
6783                                                 (__mmask8)(U), (int)(I), \
6784                                                 (int)(R)); })
6785
6786 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6787   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6788                                                 (__v2df)(__m128d)(B), \
6789                                                 (__v2df)_mm_setzero_pd(), \
6790                                                 (__mmask8)(U), (int)(I), \
6791                                                 _MM_FROUND_CUR_DIRECTION); })
6792
6793 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6794   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6795                                                 (__v2df)(__m128d)(B), \
6796                                                 (__v2df)_mm_setzero_pd(), \
6797                                                 (__mmask8)(U), (int)(I), \
6798                                                 (int)(R)); })
6799
6800 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6801   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6802                                                (__v4sf)(__m128)(B), \
6803                                                (__v4sf)_mm_setzero_ps(), \
6804                                                (__mmask8)-1, (int)(imm), \
6805                                                (int)(R)); })
6806
6807 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6808   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6809                                                (__v4sf)(__m128)(B), \
6810                                                (__v4sf)_mm_setzero_ps(), \
6811                                                (__mmask8)-1, (int)(imm), \
6812                                                _MM_FROUND_CUR_DIRECTION); })
6813
6814 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6815   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6816                                                (__v4sf)(__m128)(B), \
6817                                                (__v4sf)(__m128)(W), \
6818                                                (__mmask8)(U), (int)(I), \
6819                                                _MM_FROUND_CUR_DIRECTION); })
6820
6821 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6822   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6823                                                (__v4sf)(__m128)(B), \
6824                                                (__v4sf)(__m128)(W), \
6825                                                (__mmask8)(U), (int)(I), \
6826                                                (int)(R)); })
6827
6828 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6829   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6830                                                (__v4sf)(__m128)(B), \
6831                                                (__v4sf)_mm_setzero_ps(), \
6832                                                (__mmask8)(U), (int)(I), \
6833                                                _MM_FROUND_CUR_DIRECTION); })
6834
6835 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6836   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6837                                                (__v4sf)(__m128)(B), \
6838                                                (__v4sf)_mm_setzero_ps(), \
6839                                                (__mmask8)(U), (int)(I), \
6840                                                (int)(R)); })
6841
6842 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6843   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6844                                            (__v8df)(__m512d)(B), \
6845                                            (__v8df)_mm512_undefined_pd(), \
6846                                            (__mmask8)-1, (int)(R)); })
6847
6848 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6849   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6850                                            (__v8df)(__m512d)(B), \
6851                                            (__v8df)(__m512d)(W), \
6852                                            (__mmask8)(U), (int)(R)); })
6853
6854 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6855   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6856                                            (__v8df)(__m512d)(B), \
6857                                            (__v8df)_mm512_setzero_pd(), \
6858                                            (__mmask8)(U), (int)(R)); })
6859
6860 static __inline__ __m512d __DEFAULT_FN_ATTRS
6861 _mm512_scalef_pd (__m512d __A, __m512d __B)
6862 {
6863   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6864                 (__v8df) __B,
6865                 (__v8df)
6866                 _mm512_undefined_pd (),
6867                 (__mmask8) -1,
6868                 _MM_FROUND_CUR_DIRECTION);
6869 }
6870
6871 static __inline__ __m512d __DEFAULT_FN_ATTRS
6872 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6873 {
6874   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6875                 (__v8df) __B,
6876                 (__v8df) __W,
6877                 (__mmask8) __U,
6878                 _MM_FROUND_CUR_DIRECTION);
6879 }
6880
6881 static __inline__ __m512d __DEFAULT_FN_ATTRS
6882 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6883 {
6884   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6885                 (__v8df) __B,
6886                 (__v8df)
6887                 _mm512_setzero_pd (),
6888                 (__mmask8) __U,
6889                 _MM_FROUND_CUR_DIRECTION);
6890 }
6891
6892 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6893   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6894                                           (__v16sf)(__m512)(B), \
6895                                           (__v16sf)_mm512_undefined_ps(), \
6896                                           (__mmask16)-1, (int)(R)); })
6897
6898 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6899   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6900                                           (__v16sf)(__m512)(B), \
6901                                           (__v16sf)(__m512)(W), \
6902                                           (__mmask16)(U), (int)(R)); })
6903
6904 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6905   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6906                                           (__v16sf)(__m512)(B), \
6907                                           (__v16sf)_mm512_setzero_ps(), \
6908                                           (__mmask16)(U), (int)(R)); })
6909
6910 static __inline__ __m512 __DEFAULT_FN_ATTRS
6911 _mm512_scalef_ps (__m512 __A, __m512 __B)
6912 {
6913   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6914                (__v16sf) __B,
6915                (__v16sf)
6916                _mm512_undefined_ps (),
6917                (__mmask16) -1,
6918                _MM_FROUND_CUR_DIRECTION);
6919 }
6920
6921 static __inline__ __m512 __DEFAULT_FN_ATTRS
6922 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6923 {
6924   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6925                (__v16sf) __B,
6926                (__v16sf) __W,
6927                (__mmask16) __U,
6928                _MM_FROUND_CUR_DIRECTION);
6929 }
6930
6931 static __inline__ __m512 __DEFAULT_FN_ATTRS
6932 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6933 {
6934   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6935                (__v16sf) __B,
6936                (__v16sf)
6937                _mm512_setzero_ps (),
6938                (__mmask16) __U,
6939                _MM_FROUND_CUR_DIRECTION);
6940 }
6941
6942 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
6943   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6944                                               (__v2df)(__m128d)(B), \
6945                                               (__v2df)_mm_setzero_pd(), \
6946                                               (__mmask8)-1, (int)(R)); })
6947
6948 static __inline__ __m128d __DEFAULT_FN_ATTRS
6949 _mm_scalef_sd (__m128d __A, __m128d __B)
6950 {
6951   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6952               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6953               (__mmask8) -1,
6954               _MM_FROUND_CUR_DIRECTION);
6955 }
6956
6957 static __inline__ __m128d __DEFAULT_FN_ATTRS
6958 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6959 {
6960  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6961                  (__v2df) __B,
6962                 (__v2df) __W,
6963                 (__mmask8) __U,
6964                 _MM_FROUND_CUR_DIRECTION);
6965 }
6966
6967 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
6968   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6969                                               (__v2df)(__m128d)(B), \
6970                                               (__v2df)(__m128d)(W), \
6971                                               (__mmask8)(U), (int)(R)); })
6972
6973 static __inline__ __m128d __DEFAULT_FN_ATTRS
6974 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6975 {
6976  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6977                  (__v2df) __B,
6978                 (__v2df) _mm_setzero_pd (),
6979                 (__mmask8) __U,
6980                 _MM_FROUND_CUR_DIRECTION);
6981 }
6982
6983 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
6984   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6985                                               (__v2df)(__m128d)(B), \
6986                                               (__v2df)_mm_setzero_pd(), \
6987                                               (__mmask8)(U), (int)(R)); })
6988
6989 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
6990   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6991                                              (__v4sf)(__m128)(B), \
6992                                              (__v4sf)_mm_setzero_ps(), \
6993                                              (__mmask8)-1, (int)(R)); })
6994
6995 static __inline__ __m128 __DEFAULT_FN_ATTRS
6996 _mm_scalef_ss (__m128 __A, __m128 __B)
6997 {
6998   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6999              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
7000              (__mmask8) -1,
7001              _MM_FROUND_CUR_DIRECTION);
7002 }
7003
7004 static __inline__ __m128 __DEFAULT_FN_ATTRS
7005 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7006 {
7007  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7008                 (__v4sf) __B,
7009                 (__v4sf) __W,
7010                 (__mmask8) __U,
7011                 _MM_FROUND_CUR_DIRECTION);
7012 }
7013
7014 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
7015   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7016                                              (__v4sf)(__m128)(B), \
7017                                              (__v4sf)(__m128)(W), \
7018                                              (__mmask8)(U), (int)(R)); })
7019
7020 static __inline__ __m128 __DEFAULT_FN_ATTRS
7021 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
7022 {
7023  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7024                  (__v4sf) __B,
7025                 (__v4sf) _mm_setzero_ps (),
7026                 (__mmask8) __U,
7027                 _MM_FROUND_CUR_DIRECTION);
7028 }
7029
7030 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
7031   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7032                                              (__v4sf)(__m128)(B), \
7033                                              (__v4sf)_mm_setzero_ps(), \
7034                                              (__mmask8)(U), \
7035                                              _MM_FROUND_CUR_DIRECTION); })
7036
7037 static __inline__ __m512i __DEFAULT_FN_ATTRS
7038 _mm512_srai_epi32(__m512i __A, int __B)
7039 {
7040   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
7041 }
7042
7043 static __inline__ __m512i __DEFAULT_FN_ATTRS
7044 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
7045 {
7046   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7047                                          (__v16si)_mm512_srai_epi32(__A, __B), \
7048                                          (__v16si)__W);
7049 }
7050
7051 static __inline__ __m512i __DEFAULT_FN_ATTRS
7052 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
7053   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7054                                          (__v16si)_mm512_srai_epi32(__A, __B), \
7055                                          (__v16si)_mm512_setzero_si512());
7056 }
7057
7058 static __inline__ __m512i __DEFAULT_FN_ATTRS
7059 _mm512_srai_epi64(__m512i __A, int __B)
7060 {
7061   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
7062 }
7063
7064 static __inline__ __m512i __DEFAULT_FN_ATTRS
7065 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
7066 {
7067   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7068                                           (__v8di)_mm512_srai_epi64(__A, __B), \
7069                                           (__v8di)__W);
7070 }
7071
7072 static __inline__ __m512i __DEFAULT_FN_ATTRS
7073 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
7074 {
7075   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7076                                           (__v8di)_mm512_srai_epi64(__A, __B), \
7077                                           (__v8di)_mm512_setzero_si512());
7078 }
7079
7080 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
7081   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7082                                          (__v16sf)(__m512)(B), (int)(imm), \
7083                                          (__v16sf)_mm512_undefined_ps(), \
7084                                          (__mmask16)-1); })
7085
7086 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7087   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7088                                          (__v16sf)(__m512)(B), (int)(imm), \
7089                                          (__v16sf)(__m512)(W), \
7090                                          (__mmask16)(U)); })
7091
7092 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7093   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7094                                          (__v16sf)(__m512)(B), (int)(imm), \
7095                                          (__v16sf)_mm512_setzero_ps(), \
7096                                          (__mmask16)(U)); })
7097
7098 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
7099   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7100                                           (__v8df)(__m512d)(B), (int)(imm), \
7101                                           (__v8df)_mm512_undefined_pd(), \
7102                                           (__mmask8)-1); })
7103
7104 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7105   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7106                                           (__v8df)(__m512d)(B), (int)(imm), \
7107                                           (__v8df)(__m512d)(W), \
7108                                           (__mmask8)(U)); })
7109
7110 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7111   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7112                                           (__v8df)(__m512d)(B), (int)(imm), \
7113                                           (__v8df)_mm512_setzero_pd(), \
7114                                           (__mmask8)(U)); })
7115
7116 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7117   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7118                                           (__v16si)(__m512i)(B), (int)(imm), \
7119                                           (__v16si)_mm512_setzero_si512(), \
7120                                           (__mmask16)-1); })
7121
7122 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7123   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7124                                           (__v16si)(__m512i)(B), (int)(imm), \
7125                                           (__v16si)(__m512i)(W), \
7126                                           (__mmask16)(U)); })
7127
7128 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7129   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7130                                           (__v16si)(__m512i)(B), (int)(imm), \
7131                                           (__v16si)_mm512_setzero_si512(), \
7132                                           (__mmask16)(U)); })
7133
7134 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7135   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7136                                           (__v8di)(__m512i)(B), (int)(imm), \
7137                                           (__v8di)_mm512_setzero_si512(), \
7138                                           (__mmask8)-1); })
7139
7140 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7141   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7142                                           (__v8di)(__m512i)(B), (int)(imm), \
7143                                           (__v8di)(__m512i)(W), \
7144                                           (__mmask8)(U)); })
7145
7146 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7147   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7148                                           (__v8di)(__m512i)(B), (int)(imm), \
7149                                           (__v8di)_mm512_setzero_si512(), \
7150                                           (__mmask8)(U)); })
7151
7152 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7153   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7154                                    (__v8df)(__m512d)(B), \
7155                                    0  + (((M) >> 0) & 0x1), \
7156                                    8  + (((M) >> 1) & 0x1), \
7157                                    2  + (((M) >> 2) & 0x1), \
7158                                    10 + (((M) >> 3) & 0x1), \
7159                                    4  + (((M) >> 4) & 0x1), \
7160                                    12 + (((M) >> 5) & 0x1), \
7161                                    6  + (((M) >> 6) & 0x1), \
7162                                    14 + (((M) >> 7) & 0x1)); })
7163
7164 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7165   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7166                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7167                                        (__v8df)(__m512d)(W)); })
7168
7169 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7170   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7171                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7172                                        (__v8df)_mm512_setzero_pd()); })
7173
7174 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7175   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7176                                    (__v16sf)(__m512)(B), \
7177                                    0  + (((M) >> 0) & 0x3), \
7178                                    0  + (((M) >> 2) & 0x3), \
7179                                    16 + (((M) >> 4) & 0x3), \
7180                                    16 + (((M) >> 6) & 0x3), \
7181                                    4  + (((M) >> 0) & 0x3), \
7182                                    4  + (((M) >> 2) & 0x3), \
7183                                    20 + (((M) >> 4) & 0x3), \
7184                                    20 + (((M) >> 6) & 0x3), \
7185                                    8  + (((M) >> 0) & 0x3), \
7186                                    8  + (((M) >> 2) & 0x3), \
7187                                    24 + (((M) >> 4) & 0x3), \
7188                                    24 + (((M) >> 6) & 0x3), \
7189                                    12 + (((M) >> 0) & 0x3), \
7190                                    12 + (((M) >> 2) & 0x3), \
7191                                    28 + (((M) >> 4) & 0x3), \
7192                                    28 + (((M) >> 6) & 0x3)); })
7193
7194 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7195   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7196                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7197                                       (__v16sf)(__m512)(W)); })
7198
7199 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7200   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7201                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7202                                       (__v16sf)_mm512_setzero_ps()); })
7203
7204 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7205   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7206                                             (__v2df)(__m128d)(B), \
7207                                             (__v2df)_mm_setzero_pd(), \
7208                                             (__mmask8)-1, (int)(R)); })
7209
7210 static __inline__ __m128d __DEFAULT_FN_ATTRS
7211 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7212 {
7213  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7214                  (__v2df) __B,
7215                 (__v2df) __W,
7216                 (__mmask8) __U,
7217                 _MM_FROUND_CUR_DIRECTION);
7218 }
7219
7220 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7221   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7222                                             (__v2df)(__m128d)(B), \
7223                                             (__v2df)(__m128d)(W), \
7224                                             (__mmask8)(U), (int)(R)); })
7225
7226 static __inline__ __m128d __DEFAULT_FN_ATTRS
7227 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7228 {
7229  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7230                  (__v2df) __B,
7231                 (__v2df) _mm_setzero_pd (),
7232                 (__mmask8) __U,
7233                 _MM_FROUND_CUR_DIRECTION);
7234 }
7235
7236 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7237   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7238                                             (__v2df)(__m128d)(B), \
7239                                             (__v2df)_mm_setzero_pd(), \
7240                                             (__mmask8)(U), (int)(R)); })
7241
7242 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7243   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7244                                            (__v4sf)(__m128)(B), \
7245                                            (__v4sf)_mm_setzero_ps(), \
7246                                            (__mmask8)-1, (int)(R)); })
7247
7248 static __inline__ __m128 __DEFAULT_FN_ATTRS
7249 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7250 {
7251  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7252                  (__v4sf) __B,
7253                 (__v4sf) __W,
7254                 (__mmask8) __U,
7255                 _MM_FROUND_CUR_DIRECTION);
7256 }
7257
7258 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7259   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7260                                            (__v4sf)(__m128)(B), \
7261                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
7262                                            (int)(R)); })
7263
7264 static __inline__ __m128 __DEFAULT_FN_ATTRS
7265 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7266 {
7267  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7268                  (__v4sf) __B,
7269                 (__v4sf) _mm_setzero_ps (),
7270                 (__mmask8) __U,
7271                 _MM_FROUND_CUR_DIRECTION);
7272 }
7273
7274 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7275   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7276                                            (__v4sf)(__m128)(B), \
7277                                            (__v4sf)_mm_setzero_ps(), \
7278                                            (__mmask8)(U), (int)(R)); })
7279
7280 static __inline__ __m512 __DEFAULT_FN_ATTRS
7281 _mm512_broadcast_f32x4 (__m128 __A)
7282 {
7283   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7284                  (__v16sf)
7285                  _mm512_undefined_ps (),
7286                  (__mmask16) -1);
7287 }
7288
7289 static __inline__ __m512 __DEFAULT_FN_ATTRS
7290 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
7291 {
7292   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7293                  (__v16sf) __O,
7294                  __M);
7295 }
7296
7297 static __inline__ __m512 __DEFAULT_FN_ATTRS
7298 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
7299 {
7300   return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7301                  (__v16sf)
7302                  _mm512_setzero_ps (),
7303                  __M);
7304 }
7305
7306 static __inline__ __m512d __DEFAULT_FN_ATTRS
7307 _mm512_broadcast_f64x4 (__m256d __A)
7308 {
7309   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7310                   (__v8df)
7311                   _mm512_undefined_pd (),
7312                   (__mmask8) -1);
7313 }
7314
7315 static __inline__ __m512d __DEFAULT_FN_ATTRS
7316 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
7317 {
7318   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7319                   (__v8df) __O,
7320                   __M);
7321 }
7322
7323 static __inline__ __m512d __DEFAULT_FN_ATTRS
7324 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
7325 {
7326   return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7327                   (__v8df)
7328                   _mm512_setzero_pd (),
7329                   __M);
7330 }
7331
7332 static __inline__ __m512i __DEFAULT_FN_ATTRS
7333 _mm512_broadcast_i32x4 (__m128i __A)
7334 {
7335   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7336                   (__v16si)
7337                   _mm512_undefined_epi32 (),
7338                   (__mmask16) -1);
7339 }
7340
7341 static __inline__ __m512i __DEFAULT_FN_ATTRS
7342 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
7343 {
7344   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7345                   (__v16si) __O,
7346                   __M);
7347 }
7348
7349 static __inline__ __m512i __DEFAULT_FN_ATTRS
7350 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
7351 {
7352   return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7353                   (__v16si)
7354                   _mm512_setzero_si512 (),
7355                   __M);
7356 }
7357
7358 static __inline__ __m512i __DEFAULT_FN_ATTRS
7359 _mm512_broadcast_i64x4 (__m256i __A)
7360 {
7361   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7362                   (__v8di)
7363                   _mm512_undefined_epi32 (),
7364                   (__mmask8) -1);
7365 }
7366
7367 static __inline__ __m512i __DEFAULT_FN_ATTRS
7368 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
7369 {
7370   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7371                   (__v8di) __O,
7372                   __M);
7373 }
7374
7375 static __inline__ __m512i __DEFAULT_FN_ATTRS
7376 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
7377 {
7378   return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7379                   (__v8di)
7380                   _mm512_setzero_si512 (),
7381                   __M);
7382 }
7383
7384 static __inline__ __m512d __DEFAULT_FN_ATTRS
7385 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7386 {
7387   return (__m512d)__builtin_ia32_selectpd_512(__M,
7388                                               (__v8df) _mm512_broadcastsd_pd(__A),
7389                                               (__v8df) __O);
7390 }
7391
7392 static __inline__ __m512d __DEFAULT_FN_ATTRS
7393 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7394 {
7395   return (__m512d)__builtin_ia32_selectpd_512(__M,
7396                                               (__v8df) _mm512_broadcastsd_pd(__A),
7397                                               (__v8df) _mm512_setzero_pd());
7398 }
7399
7400 static __inline__ __m512 __DEFAULT_FN_ATTRS
7401 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7402 {
7403   return (__m512)__builtin_ia32_selectps_512(__M,
7404                                              (__v16sf) _mm512_broadcastss_ps(__A),
7405                                              (__v16sf) __O);
7406 }
7407
7408 static __inline__ __m512 __DEFAULT_FN_ATTRS
7409 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7410 {
7411   return (__m512)__builtin_ia32_selectps_512(__M,
7412                                              (__v16sf) _mm512_broadcastss_ps(__A),
7413                                              (__v16sf) _mm512_setzero_ps());
7414 }
7415
7416 static __inline__ __m128i __DEFAULT_FN_ATTRS
7417 _mm512_cvtsepi32_epi8 (__m512i __A)
7418 {
7419   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7420                (__v16qi) _mm_undefined_si128 (),
7421                (__mmask16) -1);
7422 }
7423
7424 static __inline__ __m128i __DEFAULT_FN_ATTRS
7425 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7426 {
7427   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7428                (__v16qi) __O, __M);
7429 }
7430
7431 static __inline__ __m128i __DEFAULT_FN_ATTRS
7432 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7433 {
7434   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7435                (__v16qi) _mm_setzero_si128 (),
7436                __M);
7437 }
7438
7439 static __inline__ void __DEFAULT_FN_ATTRS
7440 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7441 {
7442   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7443 }
7444
7445 static __inline__ __m256i __DEFAULT_FN_ATTRS
7446 _mm512_cvtsepi32_epi16 (__m512i __A)
7447 {
7448   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7449                (__v16hi) _mm256_undefined_si256 (),
7450                (__mmask16) -1);
7451 }
7452
7453 static __inline__ __m256i __DEFAULT_FN_ATTRS
7454 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7455 {
7456   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7457                (__v16hi) __O, __M);
7458 }
7459
7460 static __inline__ __m256i __DEFAULT_FN_ATTRS
7461 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7462 {
7463   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7464                (__v16hi) _mm256_setzero_si256 (),
7465                __M);
7466 }
7467
7468 static __inline__ void __DEFAULT_FN_ATTRS
7469 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7470 {
7471   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7472 }
7473
7474 static __inline__ __m128i __DEFAULT_FN_ATTRS
7475 _mm512_cvtsepi64_epi8 (__m512i __A)
7476 {
7477   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7478                (__v16qi) _mm_undefined_si128 (),
7479                (__mmask8) -1);
7480 }
7481
7482 static __inline__ __m128i __DEFAULT_FN_ATTRS
7483 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7484 {
7485   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7486                (__v16qi) __O, __M);
7487 }
7488
7489 static __inline__ __m128i __DEFAULT_FN_ATTRS
7490 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7491 {
7492   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7493                (__v16qi) _mm_setzero_si128 (),
7494                __M);
7495 }
7496
7497 static __inline__ void __DEFAULT_FN_ATTRS
7498 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7499 {
7500   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7501 }
7502
7503 static __inline__ __m256i __DEFAULT_FN_ATTRS
7504 _mm512_cvtsepi64_epi32 (__m512i __A)
7505 {
7506   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7507                (__v8si) _mm256_undefined_si256 (),
7508                (__mmask8) -1);
7509 }
7510
7511 static __inline__ __m256i __DEFAULT_FN_ATTRS
7512 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7513 {
7514   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7515                (__v8si) __O, __M);
7516 }
7517
7518 static __inline__ __m256i __DEFAULT_FN_ATTRS
7519 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7520 {
7521   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7522                (__v8si) _mm256_setzero_si256 (),
7523                __M);
7524 }
7525
7526 static __inline__ void __DEFAULT_FN_ATTRS
7527 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7528 {
7529   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7530 }
7531
7532 static __inline__ __m128i __DEFAULT_FN_ATTRS
7533 _mm512_cvtsepi64_epi16 (__m512i __A)
7534 {
7535   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7536                (__v8hi) _mm_undefined_si128 (),
7537                (__mmask8) -1);
7538 }
7539
7540 static __inline__ __m128i __DEFAULT_FN_ATTRS
7541 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7542 {
7543   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7544                (__v8hi) __O, __M);
7545 }
7546
7547 static __inline__ __m128i __DEFAULT_FN_ATTRS
7548 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7549 {
7550   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7551                (__v8hi) _mm_setzero_si128 (),
7552                __M);
7553 }
7554
7555 static __inline__ void __DEFAULT_FN_ATTRS
7556 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7557 {
7558   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7559 }
7560
7561 static __inline__ __m128i __DEFAULT_FN_ATTRS
7562 _mm512_cvtusepi32_epi8 (__m512i __A)
7563 {
7564   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7565                 (__v16qi) _mm_undefined_si128 (),
7566                 (__mmask16) -1);
7567 }
7568
7569 static __inline__ __m128i __DEFAULT_FN_ATTRS
7570 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7571 {
7572   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7573                 (__v16qi) __O,
7574                 __M);
7575 }
7576
7577 static __inline__ __m128i __DEFAULT_FN_ATTRS
7578 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7579 {
7580   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7581                 (__v16qi) _mm_setzero_si128 (),
7582                 __M);
7583 }
7584
7585 static __inline__ void __DEFAULT_FN_ATTRS
7586 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7587 {
7588   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7589 }
7590
7591 static __inline__ __m256i __DEFAULT_FN_ATTRS
7592 _mm512_cvtusepi32_epi16 (__m512i __A)
7593 {
7594   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7595                 (__v16hi) _mm256_undefined_si256 (),
7596                 (__mmask16) -1);
7597 }
7598
7599 static __inline__ __m256i __DEFAULT_FN_ATTRS
7600 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7601 {
7602   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7603                 (__v16hi) __O,
7604                 __M);
7605 }
7606
7607 static __inline__ __m256i __DEFAULT_FN_ATTRS
7608 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7609 {
7610   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7611                 (__v16hi) _mm256_setzero_si256 (),
7612                 __M);
7613 }
7614
7615 static __inline__ void __DEFAULT_FN_ATTRS
7616 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7617 {
7618   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7619 }
7620
7621 static __inline__ __m128i __DEFAULT_FN_ATTRS
7622 _mm512_cvtusepi64_epi8 (__m512i __A)
7623 {
7624   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7625                 (__v16qi) _mm_undefined_si128 (),
7626                 (__mmask8) -1);
7627 }
7628
7629 static __inline__ __m128i __DEFAULT_FN_ATTRS
7630 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7631 {
7632   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7633                 (__v16qi) __O,
7634                 __M);
7635 }
7636
7637 static __inline__ __m128i __DEFAULT_FN_ATTRS
7638 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7639 {
7640   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7641                 (__v16qi) _mm_setzero_si128 (),
7642                 __M);
7643 }
7644
7645 static __inline__ void __DEFAULT_FN_ATTRS
7646 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7647 {
7648   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7649 }
7650
7651 static __inline__ __m256i __DEFAULT_FN_ATTRS
7652 _mm512_cvtusepi64_epi32 (__m512i __A)
7653 {
7654   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7655                 (__v8si) _mm256_undefined_si256 (),
7656                 (__mmask8) -1);
7657 }
7658
7659 static __inline__ __m256i __DEFAULT_FN_ATTRS
7660 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7661 {
7662   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7663                 (__v8si) __O, __M);
7664 }
7665
7666 static __inline__ __m256i __DEFAULT_FN_ATTRS
7667 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7668 {
7669   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7670                 (__v8si) _mm256_setzero_si256 (),
7671                 __M);
7672 }
7673
7674 static __inline__ void __DEFAULT_FN_ATTRS
7675 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7676 {
7677   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7678 }
7679
7680 static __inline__ __m128i __DEFAULT_FN_ATTRS
7681 _mm512_cvtusepi64_epi16 (__m512i __A)
7682 {
7683   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7684                 (__v8hi) _mm_undefined_si128 (),
7685                 (__mmask8) -1);
7686 }
7687
7688 static __inline__ __m128i __DEFAULT_FN_ATTRS
7689 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7690 {
7691   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7692                 (__v8hi) __O, __M);
7693 }
7694
7695 static __inline__ __m128i __DEFAULT_FN_ATTRS
7696 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7697 {
7698   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7699                 (__v8hi) _mm_setzero_si128 (),
7700                 __M);
7701 }
7702
7703 static __inline__ void __DEFAULT_FN_ATTRS
7704 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7705 {
7706   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7707 }
7708
7709 static __inline__ __m128i __DEFAULT_FN_ATTRS
7710 _mm512_cvtepi32_epi8 (__m512i __A)
7711 {
7712   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7713               (__v16qi) _mm_undefined_si128 (),
7714               (__mmask16) -1);
7715 }
7716
7717 static __inline__ __m128i __DEFAULT_FN_ATTRS
7718 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7719 {
7720   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7721               (__v16qi) __O, __M);
7722 }
7723
7724 static __inline__ __m128i __DEFAULT_FN_ATTRS
7725 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7726 {
7727   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7728               (__v16qi) _mm_setzero_si128 (),
7729               __M);
7730 }
7731
7732 static __inline__ void __DEFAULT_FN_ATTRS
7733 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7734 {
7735   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7736 }
7737
7738 static __inline__ __m256i __DEFAULT_FN_ATTRS
7739 _mm512_cvtepi32_epi16 (__m512i __A)
7740 {
7741   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7742               (__v16hi) _mm256_undefined_si256 (),
7743               (__mmask16) -1);
7744 }
7745
7746 static __inline__ __m256i __DEFAULT_FN_ATTRS
7747 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7748 {
7749   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7750               (__v16hi) __O, __M);
7751 }
7752
7753 static __inline__ __m256i __DEFAULT_FN_ATTRS
7754 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7755 {
7756   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7757               (__v16hi) _mm256_setzero_si256 (),
7758               __M);
7759 }
7760
7761 static __inline__ void __DEFAULT_FN_ATTRS
7762 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7763 {
7764   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7765 }
7766
7767 static __inline__ __m128i __DEFAULT_FN_ATTRS
7768 _mm512_cvtepi64_epi8 (__m512i __A)
7769 {
7770   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7771               (__v16qi) _mm_undefined_si128 (),
7772               (__mmask8) -1);
7773 }
7774
7775 static __inline__ __m128i __DEFAULT_FN_ATTRS
7776 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7777 {
7778   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7779               (__v16qi) __O, __M);
7780 }
7781
7782 static __inline__ __m128i __DEFAULT_FN_ATTRS
7783 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7784 {
7785   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7786               (__v16qi) _mm_setzero_si128 (),
7787               __M);
7788 }
7789
7790 static __inline__ void __DEFAULT_FN_ATTRS
7791 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7792 {
7793   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7794 }
7795
7796 static __inline__ __m256i __DEFAULT_FN_ATTRS
7797 _mm512_cvtepi64_epi32 (__m512i __A)
7798 {
7799   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7800               (__v8si) _mm256_undefined_si256 (),
7801               (__mmask8) -1);
7802 }
7803
7804 static __inline__ __m256i __DEFAULT_FN_ATTRS
7805 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7806 {
7807   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7808               (__v8si) __O, __M);
7809 }
7810
7811 static __inline__ __m256i __DEFAULT_FN_ATTRS
7812 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7813 {
7814   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7815               (__v8si) _mm256_setzero_si256 (),
7816               __M);
7817 }
7818
7819 static __inline__ void __DEFAULT_FN_ATTRS
7820 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7821 {
7822   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7823 }
7824
7825 static __inline__ __m128i __DEFAULT_FN_ATTRS
7826 _mm512_cvtepi64_epi16 (__m512i __A)
7827 {
7828   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7829               (__v8hi) _mm_undefined_si128 (),
7830               (__mmask8) -1);
7831 }
7832
7833 static __inline__ __m128i __DEFAULT_FN_ATTRS
7834 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7835 {
7836   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7837               (__v8hi) __O, __M);
7838 }
7839
7840 static __inline__ __m128i __DEFAULT_FN_ATTRS
7841 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7842 {
7843   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7844               (__v8hi) _mm_setzero_si128 (),
7845               __M);
7846 }
7847
7848 static __inline__ void __DEFAULT_FN_ATTRS
7849 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7850 {
7851   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7852 }
7853
7854 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
7855   (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
7856                                    (__v16si)_mm512_undefined_epi32(), \
7857                                    0 + ((imm) & 0x3) * 4,             \
7858                                    1 + ((imm) & 0x3) * 4,             \
7859                                    2 + ((imm) & 0x3) * 4,             \
7860                                    3 + ((imm) & 0x3) * 4); })
7861
7862 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7863   (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
7864                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7865                                 (__v4si)__W); })
7866
7867 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7868   (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
7869                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7870                                 (__v4si)_mm_setzero_si128()); })
7871
7872 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
7873   (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
7874                                    (__v8di)_mm512_undefined_epi32(), \
7875                                    ((imm) & 1) ? 4 : 0,              \
7876                                    ((imm) & 1) ? 5 : 1,              \
7877                                    ((imm) & 1) ? 6 : 2,              \
7878                                    ((imm) & 1) ? 7 : 3); })
7879
7880 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7881   (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,      \
7882                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7883                                 (__v4di)__W); })
7884
7885 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7886   (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,      \
7887                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7888                                 (__v4di)_mm256_setzero_si256()); })
7889
7890 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7891   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7892                                  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7893                                  ((imm) & 0x1) ?  0 :  8, \
7894                                  ((imm) & 0x1) ?  1 :  9, \
7895                                  ((imm) & 0x1) ?  2 : 10, \
7896                                  ((imm) & 0x1) ?  3 : 11, \
7897                                  ((imm) & 0x1) ?  8 :  4, \
7898                                  ((imm) & 0x1) ?  9 :  5, \
7899                                  ((imm) & 0x1) ? 10 :  6, \
7900                                  ((imm) & 0x1) ? 11 :  7); })
7901
7902 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
7903   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7904                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7905                                   (__v8df)(W)); })
7906
7907 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
7908   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7909                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7910                                   (__v8df)_mm512_setzero_pd()); })
7911
7912 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
7913   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7914                                  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7915                                  ((imm) & 0x1) ?  0 :  8, \
7916                                  ((imm) & 0x1) ?  1 :  9, \
7917                                  ((imm) & 0x1) ?  2 : 10, \
7918                                  ((imm) & 0x1) ?  3 : 11, \
7919                                  ((imm) & 0x1) ?  8 :  4, \
7920                                  ((imm) & 0x1) ?  9 :  5, \
7921                                  ((imm) & 0x1) ? 10 :  6, \
7922                                  ((imm) & 0x1) ? 11 :  7); })
7923
7924 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
7925   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7926                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7927                                   (__v8di)(W)); })
7928
7929 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
7930   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7931                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7932                                   (__v8di)_mm512_setzero_si512()); })
7933
7934 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
7935   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7936                                   (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7937                                   (((imm) & 0x3) == 0) ? 16 :  0, \
7938                                   (((imm) & 0x3) == 0) ? 17 :  1, \
7939                                   (((imm) & 0x3) == 0) ? 18 :  2, \
7940                                   (((imm) & 0x3) == 0) ? 19 :  3, \
7941                                   (((imm) & 0x3) == 1) ? 16 :  4, \
7942                                   (((imm) & 0x3) == 1) ? 17 :  5, \
7943                                   (((imm) & 0x3) == 1) ? 18 :  6, \
7944                                   (((imm) & 0x3) == 1) ? 19 :  7, \
7945                                   (((imm) & 0x3) == 2) ? 16 :  8, \
7946                                   (((imm) & 0x3) == 2) ? 17 :  9, \
7947                                   (((imm) & 0x3) == 2) ? 18 : 10, \
7948                                   (((imm) & 0x3) == 2) ? 19 : 11, \
7949                                   (((imm) & 0x3) == 3) ? 16 : 12, \
7950                                   (((imm) & 0x3) == 3) ? 17 : 13, \
7951                                   (((imm) & 0x3) == 3) ? 18 : 14, \
7952                                   (((imm) & 0x3) == 3) ? 19 : 15); })
7953
7954 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
7955   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7956                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7957                                  (__v16sf)(W)); })
7958
7959 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
7960   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7961                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7962                                  (__v16sf)_mm512_setzero_ps()); })
7963
7964 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
7965   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
7966                                  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
7967                                  (((imm) & 0x3) == 0) ? 16 :  0, \
7968                                  (((imm) & 0x3) == 0) ? 17 :  1, \
7969                                  (((imm) & 0x3) == 0) ? 18 :  2, \
7970                                  (((imm) & 0x3) == 0) ? 19 :  3, \
7971                                  (((imm) & 0x3) == 1) ? 16 :  4, \
7972                                  (((imm) & 0x3) == 1) ? 17 :  5, \
7973                                  (((imm) & 0x3) == 1) ? 18 :  6, \
7974                                  (((imm) & 0x3) == 1) ? 19 :  7, \
7975                                  (((imm) & 0x3) == 2) ? 16 :  8, \
7976                                  (((imm) & 0x3) == 2) ? 17 :  9, \
7977                                  (((imm) & 0x3) == 2) ? 18 : 10, \
7978                                  (((imm) & 0x3) == 2) ? 19 : 11, \
7979                                  (((imm) & 0x3) == 3) ? 16 : 12, \
7980                                  (((imm) & 0x3) == 3) ? 17 : 13, \
7981                                  (((imm) & 0x3) == 3) ? 18 : 14, \
7982                                  (((imm) & 0x3) == 3) ? 19 : 15); })
7983
7984 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
7985   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7986                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7987                                  (__v16si)(W)); })
7988
7989 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
7990   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7991                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7992                                  (__v16si)_mm512_setzero_si512()); })
7993
7994 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
7995   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7996                                             (int)(((C)<<2) | (B)), \
7997                                             (__v8df)_mm512_undefined_pd(), \
7998                                             (__mmask8)-1, (int)(R)); })
7999
8000 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
8001   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8002                                             (int)(((C)<<2) | (B)), \
8003                                             (__v8df)(__m512d)(W), \
8004                                             (__mmask8)(U), (int)(R)); })
8005
8006 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
8007   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8008                                             (int)(((C)<<2) | (B)), \
8009                                             (__v8df)_mm512_setzero_pd(), \
8010                                             (__mmask8)(U), (int)(R)); })
8011
8012 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
8013   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8014                                             (int)(((C)<<2) | (B)), \
8015                                             (__v8df)_mm512_setzero_pd(), \
8016                                             (__mmask8)-1, \
8017                                             _MM_FROUND_CUR_DIRECTION); })
8018
8019 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8020   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8021                                             (int)(((C)<<2) | (B)), \
8022                                             (__v8df)(__m512d)(W), \
8023                                             (__mmask8)(U), \
8024                                             _MM_FROUND_CUR_DIRECTION); })
8025
8026 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8027   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8028                                             (int)(((C)<<2) | (B)), \
8029                                             (__v8df)_mm512_setzero_pd(), \
8030                                             (__mmask8)(U), \
8031                                             _MM_FROUND_CUR_DIRECTION); })
8032
8033 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
8034   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8035                                            (int)(((C)<<2) | (B)), \
8036                                            (__v16sf)_mm512_undefined_ps(), \
8037                                            (__mmask16)-1, (int)(R)); })
8038
8039 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
8040   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8041                                            (int)(((C)<<2) | (B)), \
8042                                            (__v16sf)(__m512)(W), \
8043                                            (__mmask16)(U), (int)(R)); })
8044
8045 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
8046   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8047                                            (int)(((C)<<2) | (B)), \
8048                                            (__v16sf)_mm512_setzero_ps(), \
8049                                            (__mmask16)(U), (int)(R)); })
8050
8051 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
8052   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8053                                            (int)(((C)<<2)|(B)), \
8054                                            (__v16sf)_mm512_undefined_ps(), \
8055                                            (__mmask16)-1, \
8056                                            _MM_FROUND_CUR_DIRECTION); })
8057
8058 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8059   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8060                                            (int)(((C)<<2)|(B)), \
8061                                            (__v16sf)(__m512)(W), \
8062                                            (__mmask16)(U), \
8063                                            _MM_FROUND_CUR_DIRECTION); })
8064
8065 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8066   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8067                                            (int)(((C)<<2)|(B)), \
8068                                            (__v16sf)_mm512_setzero_ps(), \
8069                                            (__mmask16)(U), \
8070                                            _MM_FROUND_CUR_DIRECTION); })
8071
8072 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
8073   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8074                                            (__v8df)_mm512_undefined_pd(), \
8075                                            (__mmask8)-1, (int)(R)); })
8076
8077 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
8078   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8079                                            (__v8df)(__m512d)(W), \
8080                                            (__mmask8)(U), (int)(R)); })
8081
8082 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
8083   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8084                                            (__v8df)_mm512_setzero_pd(), \
8085                                            (__mmask8)(U), (int)(R)); })
8086
8087 static __inline__ __m512d __DEFAULT_FN_ATTRS
8088 _mm512_getexp_pd (__m512d __A)
8089 {
8090   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8091                 (__v8df) _mm512_undefined_pd (),
8092                 (__mmask8) -1,
8093                 _MM_FROUND_CUR_DIRECTION);
8094 }
8095
8096 static __inline__ __m512d __DEFAULT_FN_ATTRS
8097 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
8098 {
8099   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8100                 (__v8df) __W,
8101                 (__mmask8) __U,
8102                 _MM_FROUND_CUR_DIRECTION);
8103 }
8104
8105 static __inline__ __m512d __DEFAULT_FN_ATTRS
8106 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
8107 {
8108   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8109                 (__v8df) _mm512_setzero_pd (),
8110                 (__mmask8) __U,
8111                 _MM_FROUND_CUR_DIRECTION);
8112 }
8113
8114 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
8115   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8116                                           (__v16sf)_mm512_undefined_ps(), \
8117                                           (__mmask16)-1, (int)(R)); })
8118
8119 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8120   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8121                                           (__v16sf)(__m512)(W), \
8122                                           (__mmask16)(U), (int)(R)); })
8123
8124 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8125   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8126                                           (__v16sf)_mm512_setzero_ps(), \
8127                                           (__mmask16)(U), (int)(R)); })
8128
8129 static __inline__ __m512 __DEFAULT_FN_ATTRS
8130 _mm512_getexp_ps (__m512 __A)
8131 {
8132   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8133                (__v16sf) _mm512_undefined_ps (),
8134                (__mmask16) -1,
8135                _MM_FROUND_CUR_DIRECTION);
8136 }
8137
8138 static __inline__ __m512 __DEFAULT_FN_ATTRS
8139 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8140 {
8141   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8142                (__v16sf) __W,
8143                (__mmask16) __U,
8144                _MM_FROUND_CUR_DIRECTION);
8145 }
8146
8147 static __inline__ __m512 __DEFAULT_FN_ATTRS
8148 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8149 {
8150   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8151                (__v16sf) _mm512_setzero_ps (),
8152                (__mmask16) __U,
8153                _MM_FROUND_CUR_DIRECTION);
8154 }
8155
8156 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8157   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8158                                        (float const *)(addr), \
8159                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8160                                        (int)(scale)); })
8161
8162 #define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
8163                                   __addr, __scale) __extension__({\
8164 __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
8165                               __addr,(__v8di) __index, __mask, __scale);\
8166 })
8167
8168 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8169   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8170                                         (int const *)(addr), \
8171                                         (__v8di)(__m512i)(index), \
8172                                         (__mmask8)-1, (int)(scale)); })
8173
8174 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8175   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8176                                         (int const *)(addr), \
8177                                         (__v8di)(__m512i)(index), \
8178                                         (__mmask8)(mask), (int)(scale)); })
8179
8180 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8181   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8182                                        (double const *)(addr), \
8183                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8184                                        (int)(scale)); })
8185
8186 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8187   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8188                                        (double const *)(addr), \
8189                                        (__v8di)(__m512i)(index), \
8190                                        (__mmask8)(mask), (int)(scale)); })
8191
8192 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8193   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8194                                        (long long const *)(addr), \
8195                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8196                                        (int)(scale)); })
8197
8198 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8199   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8200                                        (long long const *)(addr), \
8201                                        (__v8di)(__m512i)(index), \
8202                                        (__mmask8)(mask), (int)(scale)); })
8203
8204 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8205   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8206                                        (float const *)(addr), \
8207                                        (__v16sf)(__m512)(index), \
8208                                        (__mmask16)-1, (int)(scale)); })
8209
8210 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8211   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8212                                        (float const *)(addr), \
8213                                        (__v16sf)(__m512)(index), \
8214                                        (__mmask16)(mask), (int)(scale)); })
8215
8216 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8217   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8218                                         (int const *)(addr), \
8219                                         (__v16si)(__m512i)(index), \
8220                                         (__mmask16)-1, (int)(scale)); })
8221
8222 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8223   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8224                                         (int const *)(addr), \
8225                                         (__v16si)(__m512i)(index), \
8226                                         (__mmask16)(mask), (int)(scale)); })
8227
8228 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8229   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8230                                        (double const *)(addr), \
8231                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8232                                        (int)(scale)); })
8233
8234 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8235   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8236                                        (double const *)(addr), \
8237                                        (__v8si)(__m256i)(index), \
8238                                        (__mmask8)(mask), (int)(scale)); })
8239
8240 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8241   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8242                                        (long long const *)(addr), \
8243                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8244                                        (int)(scale)); })
8245
8246 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8247   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8248                                        (long long const *)(addr), \
8249                                        (__v8si)(__m256i)(index), \
8250                                        (__mmask8)(mask), (int)(scale)); })
8251
8252 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8253   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8254                                 (__v8di)(__m512i)(index), \
8255                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8256
8257 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8258   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8259                                 (__v8di)(__m512i)(index), \
8260                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8261
8262 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8263   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8264                                 (__v8di)(__m512i)(index), \
8265                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8266
8267 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8268   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8269                                 (__v8di)(__m512i)(index), \
8270                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8271
8272 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8273   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8274                                (__v8di)(__m512i)(index), \
8275                                (__v8df)(__m512d)(v1), (int)(scale)); })
8276
8277 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8278   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8279                                (__v8di)(__m512i)(index), \
8280                                (__v8df)(__m512d)(v1), (int)(scale)); })
8281
8282 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8283   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8284                                (__v8di)(__m512i)(index), \
8285                                (__v8di)(__m512i)(v1), (int)(scale)); })
8286
8287 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8288   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8289                                (__v8di)(__m512i)(index), \
8290                                (__v8di)(__m512i)(v1), (int)(scale)); })
8291
8292 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8293   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8294                                 (__v16si)(__m512i)(index), \
8295                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8296
8297 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8298   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8299                                 (__v16si)(__m512i)(index), \
8300                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8301
8302 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8303   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8304                                 (__v16si)(__m512i)(index), \
8305                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8306
8307 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8308   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8309                                 (__v16si)(__m512i)(index), \
8310                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8311
8312 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8313   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8314                                (__v8si)(__m256i)(index), \
8315                                (__v8df)(__m512d)(v1), (int)(scale)); })
8316
8317 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8318   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8319                                (__v8si)(__m256i)(index), \
8320                                (__v8df)(__m512d)(v1), (int)(scale)); })
8321
8322 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8323   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8324                                (__v8si)(__m256i)(index), \
8325                                (__v8di)(__m512i)(v1), (int)(scale)); })
8326
8327 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8328   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8329                                (__v8si)(__m256i)(index), \
8330                                (__v8di)(__m512i)(v1), (int)(scale)); })
8331
8332 static __inline__ __m128 __DEFAULT_FN_ATTRS
8333 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8334 {
8335  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8336           (__v4sf) __A,
8337           (__v4sf) __B,
8338           (__mmask8) __U,
8339           _MM_FROUND_CUR_DIRECTION);
8340 }
8341
8342 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8343   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8344                                         (__v4sf)(__m128)(A), \
8345                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8346                                         (int)(R)); })
8347
8348 static __inline__ __m128 __DEFAULT_FN_ATTRS
8349 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8350 {
8351  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8352           (__v4sf) __B,
8353           (__v4sf) __C,
8354           (__mmask8) __U,
8355           _MM_FROUND_CUR_DIRECTION);
8356 }
8357
8358 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8359   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8360                                          (__v4sf)(__m128)(B), \
8361                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8362                                          _MM_FROUND_CUR_DIRECTION); })
8363
8364 static __inline__ __m128 __DEFAULT_FN_ATTRS
8365 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8366 {
8367  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8368           (__v4sf) __X,
8369           (__v4sf) __Y,
8370           (__mmask8) __U,
8371           _MM_FROUND_CUR_DIRECTION);
8372 }
8373
8374 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8375   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8376                                          (__v4sf)(__m128)(X), \
8377                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8378                                          (int)(R)); })
8379
8380 static __inline__ __m128 __DEFAULT_FN_ATTRS
8381 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8382 {
8383  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8384           (__v4sf) __A,
8385           -(__v4sf) __B,
8386           (__mmask8) __U,
8387           _MM_FROUND_CUR_DIRECTION);
8388 }
8389
8390 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8391   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8392                                         (__v4sf)(__m128)(A), \
8393                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8394                                         (int)(R)); })
8395
8396 static __inline__ __m128 __DEFAULT_FN_ATTRS
8397 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8398 {
8399  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8400           (__v4sf) __B,
8401           -(__v4sf) __C,
8402           (__mmask8) __U,
8403           _MM_FROUND_CUR_DIRECTION);
8404 }
8405
8406 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8407   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8408                                          (__v4sf)(__m128)(B), \
8409                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8410                                          (int)(R)); })
8411
8412 static __inline__ __m128 __DEFAULT_FN_ATTRS
8413 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8414 {
8415  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8416           (__v4sf) __X,
8417           (__v4sf) __Y,
8418           (__mmask8) __U,
8419           _MM_FROUND_CUR_DIRECTION);
8420 }
8421
8422 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8423   (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8424                                          (__v4sf)(__m128)(X), \
8425                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8426                                          (int)(R)); })
8427
8428 static __inline__ __m128 __DEFAULT_FN_ATTRS
8429 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8430 {
8431  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8432           -(__v4sf) __A,
8433           (__v4sf) __B,
8434           (__mmask8) __U,
8435           _MM_FROUND_CUR_DIRECTION);
8436 }
8437
8438 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8439   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8440                                         -(__v4sf)(__m128)(A), \
8441                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8442                                         (int)(R)); })
8443
8444 static __inline__ __m128 __DEFAULT_FN_ATTRS
8445 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8446 {
8447  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8448           (__v4sf) __B,
8449           (__v4sf) __C,
8450           (__mmask8) __U,
8451           _MM_FROUND_CUR_DIRECTION);
8452 }
8453
8454 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8455   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8456                                          (__v4sf)(__m128)(B), \
8457                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8458                                          (int)(R)); })
8459
8460 static __inline__ __m128 __DEFAULT_FN_ATTRS
8461 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8462 {
8463  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8464           (__v4sf) __X,
8465           (__v4sf) __Y,
8466           (__mmask8) __U,
8467           _MM_FROUND_CUR_DIRECTION);
8468 }
8469
8470 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8471   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8472                                          (__v4sf)(__m128)(X), \
8473                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8474                                          (int)(R)); })
8475
8476 static __inline__ __m128 __DEFAULT_FN_ATTRS
8477 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8478 {
8479  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8480           -(__v4sf) __A,
8481           -(__v4sf) __B,
8482           (__mmask8) __U,
8483           _MM_FROUND_CUR_DIRECTION);
8484 }
8485
8486 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8487   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8488                                         -(__v4sf)(__m128)(A), \
8489                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
8490                                         (int)(R)); })
8491
8492 static __inline__ __m128 __DEFAULT_FN_ATTRS
8493 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8494 {
8495  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8496           (__v4sf) __B,
8497           -(__v4sf) __C,
8498           (__mmask8) __U,
8499           _MM_FROUND_CUR_DIRECTION);
8500 }
8501
8502 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8503   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8504                                          (__v4sf)(__m128)(B), \
8505                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8506                                          _MM_FROUND_CUR_DIRECTION); })
8507
8508 static __inline__ __m128 __DEFAULT_FN_ATTRS
8509 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8510 {
8511  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8512           (__v4sf) __X,
8513           (__v4sf) __Y,
8514           (__mmask8) __U,
8515           _MM_FROUND_CUR_DIRECTION);
8516 }
8517
8518 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8519   (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8520                                          (__v4sf)(__m128)(X), \
8521                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8522                                          (int)(R)); })
8523
8524 static __inline__ __m128d __DEFAULT_FN_ATTRS
8525 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8526 {
8527  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8528           (__v2df) __A,
8529           (__v2df) __B,
8530           (__mmask8) __U,
8531           _MM_FROUND_CUR_DIRECTION);
8532 }
8533
8534 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8535   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8536                                          (__v2df)(__m128d)(A), \
8537                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
8538                                          (int)(R)); })
8539
8540 static __inline__ __m128d __DEFAULT_FN_ATTRS
8541 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8542 {
8543  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8544           (__v2df) __B,
8545           (__v2df) __C,
8546           (__mmask8) __U,
8547           _MM_FROUND_CUR_DIRECTION);
8548 }
8549
8550 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8551   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8552                                           (__v2df)(__m128d)(B), \
8553                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8554                                           _MM_FROUND_CUR_DIRECTION); })
8555
8556 static __inline__ __m128d __DEFAULT_FN_ATTRS
8557 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8558 {
8559  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8560           (__v2df) __X,
8561           (__v2df) __Y,
8562           (__mmask8) __U,
8563           _MM_FROUND_CUR_DIRECTION);
8564 }
8565
8566 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8567   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8568                                           (__v2df)(__m128d)(X), \
8569                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8570                                           (int)(R)); })
8571
8572 static __inline__ __m128d __DEFAULT_FN_ATTRS
8573 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8574 {
8575  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8576           (__v2df) __A,
8577           -(__v2df) __B,
8578           (__mmask8) __U,
8579           _MM_FROUND_CUR_DIRECTION);
8580 }
8581
8582 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8583   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8584                                          (__v2df)(__m128d)(A), \
8585                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
8586                                          (int)(R)); })
8587
8588 static __inline__ __m128d __DEFAULT_FN_ATTRS
8589 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8590 {
8591  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8592           (__v2df) __B,
8593           -(__v2df) __C,
8594           (__mmask8) __U,
8595           _MM_FROUND_CUR_DIRECTION);
8596 }
8597
8598 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8599   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8600                                           (__v2df)(__m128d)(B), \
8601                                           -(__v2df)(__m128d)(C), \
8602                                           (__mmask8)(U), (int)(R)); })
8603
8604 static __inline__ __m128d __DEFAULT_FN_ATTRS
8605 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8606 {
8607  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8608           (__v2df) __X,
8609           (__v2df) __Y,
8610           (__mmask8) __U,
8611           _MM_FROUND_CUR_DIRECTION);
8612 }
8613
8614 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8615   (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8616                                           (__v2df)(__m128d)(X), \
8617                                           (__v2df)(__m128d)(Y), \
8618                                           (__mmask8)(U), (int)(R)); })
8619
8620 static __inline__ __m128d __DEFAULT_FN_ATTRS
8621 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8622 {
8623  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8624           -(__v2df) __A,
8625           (__v2df) __B,
8626           (__mmask8) __U,
8627           _MM_FROUND_CUR_DIRECTION);
8628 }
8629
8630 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8631   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8632                                          -(__v2df)(__m128d)(A), \
8633                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
8634                                          (int)(R)); })
8635
8636 static __inline__ __m128d __DEFAULT_FN_ATTRS
8637 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8638 {
8639  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8640           (__v2df) __B,
8641           (__v2df) __C,
8642           (__mmask8) __U,
8643           _MM_FROUND_CUR_DIRECTION);
8644 }
8645
8646 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8647   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8648                                           (__v2df)(__m128d)(B), \
8649                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8650                                           (int)(R)); })
8651
8652 static __inline__ __m128d __DEFAULT_FN_ATTRS
8653 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8654 {
8655  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8656           (__v2df) __X,
8657           (__v2df) __Y,
8658           (__mmask8) __U,
8659           _MM_FROUND_CUR_DIRECTION);
8660 }
8661
8662 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8663   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8664                                           (__v2df)(__m128d)(X), \
8665                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8666                                           (int)(R)); })
8667
8668 static __inline__ __m128d __DEFAULT_FN_ATTRS
8669 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8670 {
8671  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8672           -(__v2df) __A,
8673           -(__v2df) __B,
8674           (__mmask8) __U,
8675           _MM_FROUND_CUR_DIRECTION);
8676 }
8677
8678 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8679   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8680                                          -(__v2df)(__m128d)(A), \
8681                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
8682                                          (int)(R)); })
8683
8684 static __inline__ __m128d __DEFAULT_FN_ATTRS
8685 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8686 {
8687  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8688           (__v2df) __B,
8689           -(__v2df) __C,
8690           (__mmask8) __U,
8691           _MM_FROUND_CUR_DIRECTION);
8692 }
8693
8694 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8695   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8696                                           (__v2df)(__m128d)(B), \
8697                                           -(__v2df)(__m128d)(C), \
8698                                           (__mmask8)(U), \
8699                                           _MM_FROUND_CUR_DIRECTION); })
8700
8701 static __inline__ __m128d __DEFAULT_FN_ATTRS
8702 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8703 {
8704  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8705           (__v2df) __X,
8706           (__v2df) (__Y),
8707           (__mmask8) __U,
8708           _MM_FROUND_CUR_DIRECTION);
8709 }
8710
8711 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8712   (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8713                                           (__v2df)(__m128d)(X), \
8714                                           (__v2df)(__m128d)(Y), \
8715                                           (__mmask8)(U), (int)(R)); })
8716
8717 #define _mm512_permutex_pd(X, C) __extension__ ({ \
8718   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8719                                    (__v8df)_mm512_undefined_pd(), \
8720                                    0 + (((C) >> 0) & 0x3), \
8721                                    0 + (((C) >> 2) & 0x3), \
8722                                    0 + (((C) >> 4) & 0x3), \
8723                                    0 + (((C) >> 6) & 0x3), \
8724                                    4 + (((C) >> 0) & 0x3), \
8725                                    4 + (((C) >> 2) & 0x3), \
8726                                    4 + (((C) >> 4) & 0x3), \
8727                                    4 + (((C) >> 6) & 0x3)); })
8728
8729 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8730   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8731                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8732                                        (__v8df)(__m512d)(W)); })
8733
8734 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8735   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8736                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8737                                        (__v8df)_mm512_setzero_pd()); })
8738
8739 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
8740   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8741                                    (__v8di)_mm512_undefined_epi32(), \
8742                                    0 + (((C) >> 0) & 0x3), \
8743                                    0 + (((C) >> 2) & 0x3), \
8744                                    0 + (((C) >> 4) & 0x3), \
8745                                    0 + (((C) >> 6) & 0x3), \
8746                                    4 + (((C) >> 0) & 0x3), \
8747                                    4 + (((C) >> 2) & 0x3), \
8748                                    4 + (((C) >> 4) & 0x3), \
8749                                    4 + (((C) >> 6) & 0x3)); })
8750
8751 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8752   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8753                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8754                                       (__v8di)(__m512i)(W)); })
8755
8756 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8757   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8758                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8759                                       (__v8di)_mm512_setzero_si512()); })
8760
8761 static __inline__ __m512d __DEFAULT_FN_ATTRS
8762 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8763 {
8764   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8765                  (__v8di) __X,
8766                  (__v8df) _mm512_undefined_pd (),
8767                  (__mmask8) -1);
8768 }
8769
8770 static __inline__ __m512d __DEFAULT_FN_ATTRS
8771 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8772 {
8773   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8774                  (__v8di) __X,
8775                  (__v8df) __W,
8776                  (__mmask8) __U);
8777 }
8778
8779 static __inline__ __m512d __DEFAULT_FN_ATTRS
8780 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8781 {
8782   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8783                  (__v8di) __X,
8784                  (__v8df) _mm512_setzero_pd (),
8785                  (__mmask8) __U);
8786 }
8787
8788 static __inline__ __m512i __DEFAULT_FN_ATTRS
8789 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8790 {
8791   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8792                  (__v8di) __X,
8793                  (__v8di) _mm512_setzero_si512 (),
8794                  __M);
8795 }
8796
8797 static __inline__ __m512i __DEFAULT_FN_ATTRS
8798 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8799 {
8800   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8801                  (__v8di) __X,
8802                  (__v8di) _mm512_undefined_epi32 (),
8803                  (__mmask8) -1);
8804 }
8805
8806 static __inline__ __m512i __DEFAULT_FN_ATTRS
8807 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8808              __m512i __Y)
8809 {
8810   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8811                  (__v8di) __X,
8812                  (__v8di) __W,
8813                  __M);
8814 }
8815
8816 static __inline__ __m512 __DEFAULT_FN_ATTRS
8817 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8818 {
8819   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8820                 (__v16si) __X,
8821                 (__v16sf) _mm512_undefined_ps (),
8822                 (__mmask16) -1);
8823 }
8824
8825 static __inline__ __m512 __DEFAULT_FN_ATTRS
8826 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8827 {
8828   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8829                 (__v16si) __X,
8830                 (__v16sf) __W,
8831                 (__mmask16) __U);
8832 }
8833
8834 static __inline__ __m512 __DEFAULT_FN_ATTRS
8835 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8836 {
8837   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8838                 (__v16si) __X,
8839                 (__v16sf) _mm512_setzero_ps (),
8840                 (__mmask16) __U);
8841 }
8842
8843 static __inline__ __m512i __DEFAULT_FN_ATTRS
8844 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8845 {
8846   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8847                  (__v16si) __X,
8848                  (__v16si) _mm512_setzero_si512 (),
8849                  __M);
8850 }
8851
8852 static __inline__ __m512i __DEFAULT_FN_ATTRS
8853 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8854 {
8855   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8856                  (__v16si) __X,
8857                  (__v16si) _mm512_undefined_epi32 (),
8858                  (__mmask16) -1);
8859 }
8860
8861 static __inline__ __m512i __DEFAULT_FN_ATTRS
8862 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8863              __m512i __Y)
8864 {
8865   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8866                  (__v16si) __X,
8867                  (__v16si) __W,
8868                  __M);
8869 }
8870
8871 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8872 _mm512_kand (__mmask16 __A, __mmask16 __B)
8873 {
8874   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8875 }
8876
8877 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8878 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8879 {
8880   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8881 }
8882
8883 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8884 _mm512_kor (__mmask16 __A, __mmask16 __B)
8885 {
8886   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8887 }
8888
8889 static __inline__ int __DEFAULT_FN_ATTRS
8890 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8891 {
8892   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8893 }
8894
8895 static __inline__ int __DEFAULT_FN_ATTRS
8896 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8897 {
8898   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8899 }
8900
8901 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8902 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8903 {
8904   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8905 }
8906
8907 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8908 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8909 {
8910   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8911 }
8912
8913 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8914 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8915 {
8916   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8917 }
8918
8919 static __inline__ void __DEFAULT_FN_ATTRS
8920 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8921 {
8922   __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
8923 }
8924
8925 static __inline__ __m512i __DEFAULT_FN_ATTRS
8926 _mm512_stream_load_si512 (void *__P)
8927 {
8928   return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8929 }
8930
8931 static __inline__ void __DEFAULT_FN_ATTRS
8932 _mm512_stream_pd (double *__P, __m512d __A)
8933 {
8934   __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
8935 }
8936
8937 static __inline__ void __DEFAULT_FN_ATTRS
8938 _mm512_stream_ps (float *__P, __m512 __A)
8939 {
8940   __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
8941 }
8942
8943 static __inline__ __m512d __DEFAULT_FN_ATTRS
8944 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8945 {
8946   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8947                   (__v8df) __W,
8948                   (__mmask8) __U);
8949 }
8950
8951 static __inline__ __m512d __DEFAULT_FN_ATTRS
8952 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8953 {
8954   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8955                   (__v8df)
8956                   _mm512_setzero_pd (),
8957                   (__mmask8) __U);
8958 }
8959
8960 static __inline__ __m512i __DEFAULT_FN_ATTRS
8961 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8962 {
8963   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8964                   (__v8di) __W,
8965                   (__mmask8) __U);
8966 }
8967
8968 static __inline__ __m512i __DEFAULT_FN_ATTRS
8969 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8970 {
8971   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8972                   (__v8di)
8973                   _mm512_setzero_si512 (),
8974                   (__mmask8) __U);
8975 }
8976
8977 static __inline__ __m512 __DEFAULT_FN_ATTRS
8978 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8979 {
8980   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8981                  (__v16sf) __W,
8982                  (__mmask16) __U);
8983 }
8984
8985 static __inline__ __m512 __DEFAULT_FN_ATTRS
8986 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8987 {
8988   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8989                  (__v16sf)
8990                  _mm512_setzero_ps (),
8991                  (__mmask16) __U);
8992 }
8993
8994 static __inline__ __m512i __DEFAULT_FN_ATTRS
8995 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8996 {
8997   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8998                   (__v16si) __W,
8999                   (__mmask16) __U);
9000 }
9001
9002 static __inline__ __m512i __DEFAULT_FN_ATTRS
9003 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9004 {
9005   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9006                   (__v16si)
9007                   _mm512_setzero_si512 (),
9008                   (__mmask16) __U);
9009 }
9010
9011 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
9012   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9013                                       (__v4sf)(__m128)(Y), (int)(P), \
9014                                       (__mmask8)-1, (int)(R)); })
9015
9016 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
9017   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9018                                       (__v4sf)(__m128)(Y), (int)(P), \
9019                                       (__mmask8)(M), (int)(R)); })
9020
9021 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
9022   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9023                                       (__v4sf)(__m128)(Y), (int)(P), \
9024                                       (__mmask8)-1, \
9025                                       _MM_FROUND_CUR_DIRECTION); })
9026
9027 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
9028   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9029                                       (__v4sf)(__m128)(Y), (int)(P), \
9030                                       (__mmask8)(M), \
9031                                       _MM_FROUND_CUR_DIRECTION); })
9032
9033 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
9034   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9035                                       (__v2df)(__m128d)(Y), (int)(P), \
9036                                       (__mmask8)-1, (int)(R)); })
9037
9038 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
9039   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9040                                       (__v2df)(__m128d)(Y), (int)(P), \
9041                                       (__mmask8)(M), (int)(R)); })
9042
9043 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
9044   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9045                                       (__v2df)(__m128d)(Y), (int)(P), \
9046                                       (__mmask8)-1, \
9047                                       _MM_FROUND_CUR_DIRECTION); })
9048
9049 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
9050   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9051                                       (__v2df)(__m128d)(Y), (int)(P), \
9052                                       (__mmask8)(M), \
9053                                       _MM_FROUND_CUR_DIRECTION); })
9054
9055 static __inline__ __m512 __DEFAULT_FN_ATTRS
9056 _mm512_movehdup_ps (__m512 __A)
9057 {
9058   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9059                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9060 }
9061
9062 static __inline__ __m512 __DEFAULT_FN_ATTRS
9063 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9064 {
9065   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9066                                              (__v16sf)_mm512_movehdup_ps(__A),
9067                                              (__v16sf)__W);
9068 }
9069
9070 static __inline__ __m512 __DEFAULT_FN_ATTRS
9071 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9072 {
9073   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9074                                              (__v16sf)_mm512_movehdup_ps(__A),
9075                                              (__v16sf)_mm512_setzero_ps());
9076 }
9077
9078 static __inline__ __m512 __DEFAULT_FN_ATTRS
9079 _mm512_moveldup_ps (__m512 __A)
9080 {
9081   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9082                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9083 }
9084
9085 static __inline__ __m512 __DEFAULT_FN_ATTRS
9086 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9087 {
9088   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9089                                              (__v16sf)_mm512_moveldup_ps(__A),
9090                                              (__v16sf)__W);
9091 }
9092
9093 static __inline__ __m512 __DEFAULT_FN_ATTRS
9094 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9095 {
9096   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9097                                              (__v16sf)_mm512_moveldup_ps(__A),
9098                                              (__v16sf)_mm512_setzero_ps());
9099 }
9100
9101 static __inline__ __m128 __DEFAULT_FN_ATTRS
9102 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9103 {
9104   __m128 res = __A; 
9105   res[0] = (__U & 1) ? __B[0] : __W[0];
9106   return res; 
9107 }
9108
9109 static __inline__ __m128 __DEFAULT_FN_ATTRS
9110 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9111 {
9112   __m128 res = __A; 
9113   res[0] = (__U & 1) ? __B[0] : 0; 
9114   return res; 
9115 }
9116
9117 static __inline__ __m128d __DEFAULT_FN_ATTRS
9118 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9119 {
9120   __m128d res = __A; 
9121   res[0] = (__U & 1) ? __B[0] : __W[0];
9122   return res; 
9123 }
9124
9125 static __inline__ __m128d __DEFAULT_FN_ATTRS
9126 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9127 {
9128   __m128d res = __A; 
9129   res[0] = (__U & 1) ? __B[0] : 0; 
9130   return res; 
9131 }
9132
9133 static __inline__ void __DEFAULT_FN_ATTRS
9134 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9135 {
9136   __builtin_ia32_storess128_mask ((__v16sf *)__W, 
9137                 (__v16sf) _mm512_castps128_ps512(__A),
9138                 (__mmask16) __U & (__mmask16)1);
9139 }
9140
9141 static __inline__ void __DEFAULT_FN_ATTRS
9142 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9143 {
9144   __builtin_ia32_storesd128_mask ((__v8df *)__W, 
9145                 (__v8df) _mm512_castpd128_pd512(__A),
9146                 (__mmask8) __U & 1);
9147 }
9148
9149 static __inline__ __m128 __DEFAULT_FN_ATTRS
9150 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9151 {
9152   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9153                                                 (__v4sf) {0.0, 0.0, 0.0, 0.0},
9154                                                 0, 4, 4, 4);
9155
9156   return (__m128) __builtin_shufflevector(
9157                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9158                                       (__v16sf) _mm512_castps128_ps512(src),
9159                                       (__mmask16) __U & 1),
9160                            _mm512_undefined_ps(), 0, 1, 2, 3);
9161 }
9162
9163 static __inline__ __m128 __DEFAULT_FN_ATTRS
9164 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
9165 {
9166   return (__m128) __builtin_shufflevector(
9167                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9168                                       (__v16sf) _mm512_setzero_ps(),
9169                                       (__mmask16) __U & 1),
9170                            _mm512_undefined_ps(), 0, 1, 2, 3);
9171 }
9172
9173 static __inline__ __m128d __DEFAULT_FN_ATTRS
9174 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9175 {
9176   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9177                                                  (__v2df) {0.0, 0.0}, 0, 2);
9178
9179   return (__m128d) __builtin_shufflevector(
9180                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9181                                       (__v8df) _mm512_castpd128_pd512(src),
9182                                       (__mmask8) __U & 1),
9183                             _mm512_undefined_pd(), 0, 1);
9184 }
9185
9186 static __inline__ __m128d __DEFAULT_FN_ATTRS
9187 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
9188 {
9189   return (__m128d) __builtin_shufflevector(
9190                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9191                                       (__v8df) _mm512_setzero_pd(),
9192                                       (__mmask8) __U & 1),
9193                             _mm512_undefined_pd(), 0, 1);
9194 }
9195
9196 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9197   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9198                                    (__v16si)_mm512_undefined_epi32(), \
9199                                    0  + (((I) >> 0) & 0x3), \
9200                                    0  + (((I) >> 2) & 0x3), \
9201                                    0  + (((I) >> 4) & 0x3), \
9202                                    0  + (((I) >> 6) & 0x3), \
9203                                    4  + (((I) >> 0) & 0x3), \
9204                                    4  + (((I) >> 2) & 0x3), \
9205                                    4  + (((I) >> 4) & 0x3), \
9206                                    4  + (((I) >> 6) & 0x3), \
9207                                    8  + (((I) >> 0) & 0x3), \
9208                                    8  + (((I) >> 2) & 0x3), \
9209                                    8  + (((I) >> 4) & 0x3), \
9210                                    8  + (((I) >> 6) & 0x3), \
9211                                    12 + (((I) >> 0) & 0x3), \
9212                                    12 + (((I) >> 2) & 0x3), \
9213                                    12 + (((I) >> 4) & 0x3), \
9214                                    12 + (((I) >> 6) & 0x3)); })
9215
9216 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9217   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9218                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9219                                       (__v16si)(__m512i)(W)); })
9220
9221 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9222   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9223                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9224                                       (__v16si)_mm512_setzero_si512()); })
9225
9226 static __inline__ __m512d __DEFAULT_FN_ATTRS
9227 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9228 {
9229   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9230                 (__v8df) __W,
9231                 (__mmask8) __U);
9232 }
9233
9234 static __inline__ __m512d __DEFAULT_FN_ATTRS
9235 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9236 {
9237   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9238                 (__v8df) _mm512_setzero_pd (),
9239                 (__mmask8) __U);
9240 }
9241
9242 static __inline__ __m512i __DEFAULT_FN_ATTRS
9243 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9244 {
9245   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9246                 (__v8di) __W,
9247                 (__mmask8) __U);
9248 }
9249
9250 static __inline__ __m512i __DEFAULT_FN_ATTRS
9251 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9252 {
9253   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9254                 (__v8di) _mm512_setzero_pd (),
9255                 (__mmask8) __U);
9256 }
9257
9258 static __inline__ __m512d __DEFAULT_FN_ATTRS
9259 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9260 {
9261   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9262               (__v8df) __W,
9263               (__mmask8) __U);
9264 }
9265
9266 static __inline__ __m512d __DEFAULT_FN_ATTRS
9267 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9268 {
9269   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9270               (__v8df) _mm512_setzero_pd(),
9271               (__mmask8) __U);
9272 }
9273
9274 static __inline__ __m512i __DEFAULT_FN_ATTRS
9275 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9276 {
9277   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9278               (__v8di) __W,
9279               (__mmask8) __U);
9280 }
9281
9282 static __inline__ __m512i __DEFAULT_FN_ATTRS
9283 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9284 {
9285   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9286               (__v8di) _mm512_setzero_pd(),
9287               (__mmask8) __U);
9288 }
9289
9290 static __inline__ __m512 __DEFAULT_FN_ATTRS
9291 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9292 {
9293   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9294                    (__v16sf) __W,
9295                    (__mmask16) __U);
9296 }
9297
9298 static __inline__ __m512 __DEFAULT_FN_ATTRS
9299 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9300 {
9301   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9302                    (__v16sf) _mm512_setzero_ps(),
9303                    (__mmask16) __U);
9304 }
9305
9306 static __inline__ __m512i __DEFAULT_FN_ATTRS
9307 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9308 {
9309   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9310               (__v16si) __W,
9311               (__mmask16) __U);
9312 }
9313
9314 static __inline__ __m512i __DEFAULT_FN_ATTRS
9315 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9316 {
9317   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9318               (__v16si) _mm512_setzero_ps(),
9319               (__mmask16) __U);
9320 }
9321
9322 static __inline__ __m512 __DEFAULT_FN_ATTRS
9323 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9324 {
9325   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9326                (__v16sf) __W,
9327                (__mmask16) __U);
9328 }
9329
9330 static __inline__ __m512 __DEFAULT_FN_ATTRS
9331 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9332 {
9333   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9334                (__v16sf) _mm512_setzero_ps(),
9335                (__mmask16) __U);
9336 }
9337
9338 static __inline__ __m512i __DEFAULT_FN_ATTRS
9339 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9340 {
9341   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9342                 (__v16si) __W,
9343                 (__mmask16) __U);
9344 }
9345
9346 static __inline__ __m512i __DEFAULT_FN_ATTRS
9347 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9348 {
9349   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9350                 (__v16si) _mm512_setzero_ps(),
9351                 (__mmask16) __U);
9352 }
9353
9354 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9355   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9356                                            (__v8df)_mm512_undefined_pd(), \
9357                                            (__mmask8)-1, (int)(R)); })
9358
9359 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9360   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9361                                            (__v8df)(__m512d)(W), \
9362                                            (__mmask8)(U), (int)(R)); })
9363
9364 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9365   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9366                                            (__v8df)_mm512_setzero_pd(), \
9367                                            (__mmask8)(U), (int)(R)); })
9368
9369 static __inline__ __m512d __DEFAULT_FN_ATTRS
9370 _mm512_cvtps_pd (__m256 __A)
9371 {
9372   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9373                 (__v8df)
9374                 _mm512_undefined_pd (),
9375                 (__mmask8) -1,
9376                 _MM_FROUND_CUR_DIRECTION);
9377 }
9378
9379 static __inline__ __m512d __DEFAULT_FN_ATTRS
9380 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9381 {
9382   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9383                 (__v8df) __W,
9384                 (__mmask8) __U,
9385                 _MM_FROUND_CUR_DIRECTION);
9386 }
9387
9388 static __inline__ __m512d __DEFAULT_FN_ATTRS
9389 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9390 {
9391   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9392                 (__v8df)
9393                 _mm512_setzero_pd (),
9394                 (__mmask8) __U,
9395                 _MM_FROUND_CUR_DIRECTION);
9396 }
9397
9398 static __inline__ __m512 __DEFAULT_FN_ATTRS
9399 _mm512_cvtpslo_pd (__m512 __A)
9400 {
9401   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9402 }
9403
9404 static __inline__ __m512 __DEFAULT_FN_ATTRS
9405 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9406 {
9407   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9408 }
9409
9410 static __inline__ __m512d __DEFAULT_FN_ATTRS
9411 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9412 {
9413   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9414               (__v8df) __A,
9415               (__v8df) __W);
9416 }
9417
9418 static __inline__ __m512d __DEFAULT_FN_ATTRS
9419 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9420 {
9421   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9422               (__v8df) __A,
9423               (__v8df) _mm512_setzero_pd ());
9424 }
9425
9426 static __inline__ __m512 __DEFAULT_FN_ATTRS
9427 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9428 {
9429   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9430              (__v16sf) __A,
9431              (__v16sf) __W);
9432 }
9433
9434 static __inline__ __m512 __DEFAULT_FN_ATTRS
9435 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9436 {
9437   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9438              (__v16sf) __A,
9439              (__v16sf) _mm512_setzero_ps ());
9440 }
9441
9442 static __inline__ void __DEFAULT_FN_ATTRS
9443 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9444 {
9445   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9446             (__mmask8) __U);
9447 }
9448
9449 static __inline__ void __DEFAULT_FN_ATTRS
9450 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9451 {
9452   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9453             (__mmask8) __U);
9454 }
9455
9456 static __inline__ void __DEFAULT_FN_ATTRS
9457 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9458 {
9459   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9460             (__mmask16) __U);
9461 }
9462
9463 static __inline__ void __DEFAULT_FN_ATTRS
9464 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9465 {
9466   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9467             (__mmask16) __U);
9468 }
9469
9470 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9471   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9472                                              (__v2df)(__m128d)(B), \
9473                                              (__v4sf)_mm_undefined_ps(), \
9474                                              (__mmask8)-1, (int)(R)); })
9475
9476 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9477   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9478                                              (__v2df)(__m128d)(B), \
9479                                              (__v4sf)(__m128)(W), \
9480                                              (__mmask8)(U), (int)(R)); })
9481
9482 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9483   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9484                                              (__v2df)(__m128d)(B), \
9485                                              (__v4sf)_mm_setzero_ps(), \
9486                                              (__mmask8)(U), (int)(R)); })
9487
9488 static __inline__ __m128 __DEFAULT_FN_ATTRS
9489 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9490 {
9491   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9492                                              (__v2df)(__B),
9493                                              (__v4sf)(__W), 
9494                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9495 }
9496
9497 static __inline__ __m128 __DEFAULT_FN_ATTRS
9498 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9499 {
9500   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9501                                              (__v2df)(__B),
9502                                              (__v4sf)_mm_setzero_ps(), 
9503                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9504 }
9505
9506 #define _mm_cvtss_i32 _mm_cvtss_si32
9507 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9508 #define _mm_cvti32_sd _mm_cvtsi32_sd
9509 #define _mm_cvti32_ss _mm_cvtsi32_ss
9510 #ifdef __x86_64__
9511 #define _mm_cvtss_i64 _mm_cvtss_si64
9512 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9513 #define _mm_cvti64_sd _mm_cvtsi64_sd
9514 #define _mm_cvti64_ss _mm_cvtsi64_ss
9515 #endif
9516
9517 #ifdef __x86_64__
9518 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9519   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9520                                      (int)(R)); })
9521
9522 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9523   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9524                                      (int)(R)); })
9525 #endif
9526
9527 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9528   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9529
9530 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9531   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9532
9533 #ifdef __x86_64__
9534 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9535   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9536                                     (int)(R)); })
9537
9538 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9539   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9540                                     (int)(R)); })
9541 #endif
9542
9543 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9544   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9545                                               (__v4sf)(__m128)(B), \
9546                                               (__v2df)_mm_undefined_pd(), \
9547                                               (__mmask8)-1, (int)(R)); })
9548
9549 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9550   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9551                                               (__v4sf)(__m128)(B), \
9552                                               (__v2df)(__m128d)(W), \
9553                                               (__mmask8)(U), (int)(R)); })
9554
9555 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9556   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9557                                               (__v4sf)(__m128)(B), \
9558                                               (__v2df)_mm_setzero_pd(), \
9559                                               (__mmask8)(U), (int)(R)); })
9560
9561 static __inline__ __m128d __DEFAULT_FN_ATTRS
9562 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9563 {
9564   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9565                                               (__v4sf)(__B),
9566                                               (__v2df)(__W),
9567                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
9568 }
9569
9570 static __inline__ __m128d __DEFAULT_FN_ATTRS
9571 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9572 {
9573   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9574                                               (__v4sf)(__B),
9575                                               (__v2df)_mm_setzero_pd(), 
9576                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
9577 }
9578
9579 static __inline__ __m128d __DEFAULT_FN_ATTRS
9580 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9581 {
9582   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9583 }
9584
9585 #ifdef __x86_64__
9586 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9587   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9588                                       (unsigned long long)(B), (int)(R)); })
9589
9590 static __inline__ __m128d __DEFAULT_FN_ATTRS
9591 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9592 {
9593   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9594                  _MM_FROUND_CUR_DIRECTION);
9595 }
9596 #endif
9597
9598 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9599   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9600                                      (int)(R)); })
9601
9602 static __inline__ __m128 __DEFAULT_FN_ATTRS
9603 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9604 {
9605   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9606                 _MM_FROUND_CUR_DIRECTION);
9607 }
9608
9609 #ifdef __x86_64__
9610 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9611   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9612                                      (unsigned long long)(B), (int)(R)); })
9613
9614 static __inline__ __m128 __DEFAULT_FN_ATTRS
9615 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9616 {
9617   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9618                 _MM_FROUND_CUR_DIRECTION);
9619 }
9620 #endif
9621
9622 static __inline__ __m512i __DEFAULT_FN_ATTRS
9623 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9624 {
9625   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
9626                  __M);
9627 }
9628
9629 #ifdef __x86_64__
9630 static __inline__ __m512i __DEFAULT_FN_ATTRS
9631 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9632 {
9633   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
9634                  __M);
9635 }
9636 #endif
9637
9638 static __inline __m512i __DEFAULT_FN_ATTRS
9639 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9640      int __E, int __F, int __G, int __H,
9641      int __I, int __J, int __K, int __L,
9642      int __M, int __N, int __O, int __P)
9643 {
9644   return __extension__ (__m512i)(__v16si)
9645   { __P, __O, __N, __M, __L, __K, __J, __I,
9646     __H, __G, __F, __E, __D, __C, __B, __A };
9647 }
9648
9649 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9650        e8,e9,e10,e11,e12,e13,e14,e15)          \
9651   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9652                    (e5),(e4),(e3),(e2),(e1),(e0))
9653
9654 static __inline__ __m512i __DEFAULT_FN_ATTRS
9655 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9656      long long __D, long long __E, long long __F,
9657      long long __G, long long __H)
9658 {
9659   return __extension__ (__m512i) (__v8di)
9660   { __H, __G, __F, __E, __D, __C, __B, __A };
9661 }
9662
9663 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9664   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9665
9666 static __inline__ __m512d __DEFAULT_FN_ATTRS
9667 _mm512_set_pd (double __A, double __B, double __C, double __D,
9668         double __E, double __F, double __G, double __H)
9669 {
9670   return __extension__ (__m512d)
9671   { __H, __G, __F, __E, __D, __C, __B, __A };
9672 }
9673
9674 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9675   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9676
9677 static __inline__ __m512 __DEFAULT_FN_ATTRS
9678 _mm512_set_ps (float __A, float __B, float __C, float __D,
9679         float __E, float __F, float __G, float __H,
9680         float __I, float __J, float __K, float __L,
9681         float __M, float __N, float __O, float __P)
9682 {
9683   return __extension__ (__m512)
9684   { __P, __O, __N, __M, __L, __K, __J, __I,
9685     __H, __G, __F, __E, __D, __C, __B, __A };
9686 }
9687
9688 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9689   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9690                 (e4),(e3),(e2),(e1),(e0))
9691
9692 static __inline__ __m512 __DEFAULT_FN_ATTRS
9693 _mm512_abs_ps(__m512 __A)
9694 {
9695   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9696 }
9697
9698 static __inline__ __m512 __DEFAULT_FN_ATTRS
9699 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9700 {
9701   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9702 }
9703
9704 static __inline__ __m512d __DEFAULT_FN_ATTRS
9705 _mm512_abs_pd(__m512d __A)
9706 {
9707   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9708 }
9709
9710 static __inline__ __m512d __DEFAULT_FN_ATTRS
9711 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9712 {
9713   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9714 }
9715
9716 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9717 // outputs. This class of vector operation forms the basis of many scientific
9718 // computations. In vector-reduction arithmetic, the evaluation off is
9719 // independent of the order of the input elements of V.
9720
9721 // Used bisection method. At each step, we partition the vector with previous
9722 // step in half, and the operation is performed on its two halves.
9723 // This takes log2(n) steps where n is the number of elements in the vector.
9724
9725 // Vec512 - Vector with size 512.
9726 // Operator - Can be one of following: +,*,&,|
9727 // T2  - Can get 'i' for int and 'f' for float.
9728 // T1 - Can get 'i' for int and 'd' for double.
9729
9730 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
9731   __extension__({                                                      \
9732     __m256##T1 Vec256 = __builtin_shufflevector(                       \
9733                             (__v8d##T2)Vec512,                         \
9734                             (__v8d##T2)Vec512,                         \
9735                             0, 1, 2, 3)                                \
9736                         Operator                                       \
9737                         __builtin_shufflevector(                       \
9738                             (__v8d##T2)Vec512,                         \
9739                             (__v8d##T2)Vec512,                         \
9740                             4, 5, 6, 7);                               \
9741     __m128##T1 Vec128 = __builtin_shufflevector(                       \
9742                             (__v4d##T2)Vec256,                         \
9743                             (__v4d##T2)Vec256,                         \
9744                             0, 1)                                      \
9745                         Operator                                       \
9746                         __builtin_shufflevector(                       \
9747                             (__v4d##T2)Vec256,                         \
9748                             (__v4d##T2)Vec256,                         \
9749                             2, 3);                                     \
9750     Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
9751                                      (__v2d##T2)Vec128, 0, -1)         \
9752              Operator                                                  \
9753              __builtin_shufflevector((__v2d##T2)Vec128,                \
9754                                      (__v2d##T2)Vec128, 1, -1);        \
9755     return Vec128[0];                                                  \
9756   })
9757
9758 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9759   _mm512_reduce_operator_64bit(__W, +, i, i);
9760 }
9761
9762 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9763   _mm512_reduce_operator_64bit(__W, *, i, i);
9764 }
9765
9766 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9767   _mm512_reduce_operator_64bit(__W, &, i, i);
9768 }
9769
9770 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9771   _mm512_reduce_operator_64bit(__W, |, i, i);
9772 }
9773
9774 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9775   _mm512_reduce_operator_64bit(__W, +, f, d);
9776 }
9777
9778 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9779   _mm512_reduce_operator_64bit(__W, *, f, d);
9780 }
9781
9782 // Vec512 - Vector with size 512.
9783 // Vec512Neutral - All vector elements set to the identity element. 
9784 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9785 // Operator - Can be one of following: +,*,&,|
9786 // Mask - Intrinsic Mask
9787 // T2  - Can get 'i' for int and 'f' for float.
9788 // T1 - Can get 'i' for int and 'd' for packed double-precision.
9789 // T3 - Can be Pd for packed double or q for q-word.
9790
9791 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
9792                                           Mask, T2, T1, T3)                    \
9793   __extension__({                                                              \
9794     Vec512 = __builtin_ia32_select##T3##_512(                                  \
9795                  (__mmask8)Mask,                                               \
9796                  (__v8d##T2)Vec512,                                            \
9797                  (__v8d##T2)Vec512Neutral);                                    \
9798     _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
9799   })
9800
9801 static __inline__ long long __DEFAULT_FN_ATTRS
9802 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9803   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9804 }
9805
9806 static __inline__ long long __DEFAULT_FN_ATTRS
9807 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9808   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9809 }
9810
9811 static __inline__ long long __DEFAULT_FN_ATTRS
9812 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9813   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 
9814                                     &, __M,  i, i, q);
9815 }
9816
9817 static __inline__ long long __DEFAULT_FN_ATTRS
9818 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9819   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, 
9820                                     i, i, q);
9821 }
9822
9823 static __inline__ double __DEFAULT_FN_ATTRS
9824 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9825   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, 
9826                                     f, d, pd);
9827 }
9828
9829 static __inline__ double __DEFAULT_FN_ATTRS
9830 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9831   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9832                                     f, d, pd);
9833 }
9834
9835 // Vec512 - Vector with size 512.
9836 // Operator - Can be one of following: +,*,&,|
9837 // T2 - Can get 'i' for int and ' ' for packed single.
9838 // T1 - Can get 'i' for int and 'f' for float.
9839
9840 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9841     __m256##T1 Vec256 =                                                        \
9842             (__m256##T1)(__builtin_shufflevector(                              \
9843                                     (__v16s##T2)Vec512,                        \
9844                                     (__v16s##T2)Vec512,                        \
9845                                     0, 1, 2, 3, 4, 5, 6, 7)                    \
9846                                 Operator                                       \
9847                          __builtin_shufflevector(                              \
9848                                     (__v16s##T2)Vec512,                        \
9849                                     (__v16s##T2)Vec512,                        \
9850                                     8, 9, 10, 11, 12, 13, 14, 15));            \
9851     __m128##T1 Vec128 =                                                        \
9852              (__m128##T1)(__builtin_shufflevector(                             \
9853                                     (__v8s##T2)Vec256,                         \
9854                                     (__v8s##T2)Vec256,                         \
9855                                     0, 1, 2, 3)                                \
9856                                 Operator                                       \
9857                           __builtin_shufflevector(                             \
9858                                     (__v8s##T2)Vec256,                         \
9859                                     (__v8s##T2)Vec256,                         \
9860                                     4, 5, 6, 7));                              \
9861     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
9862                                     (__v4s##T2)Vec128,                         \
9863                                     (__v4s##T2)Vec128,                         \
9864                                     0, 1, -1, -1)                              \
9865                                 Operator                                       \
9866                           __builtin_shufflevector(                             \
9867                                     (__v4s##T2)Vec128,                         \
9868                                     (__v4s##T2)Vec128,                         \
9869                                     2, 3, -1, -1));                            \
9870     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
9871                                     (__v4s##T2)Vec128,                         \
9872                                     (__v4s##T2)Vec128,                         \
9873                                     0, -1, -1, -1)                             \
9874                                 Operator                                       \
9875                           __builtin_shufflevector(                             \
9876                                     (__v4s##T2)Vec128,                         \
9877                                     (__v4s##T2)Vec128,                         \
9878                                     1, -1, -1, -1));                           \
9879     return Vec128[0];                                                          \
9880   })
9881
9882 static __inline__ int __DEFAULT_FN_ATTRS
9883 _mm512_reduce_add_epi32(__m512i __W) {
9884   _mm512_reduce_operator_32bit(__W, +, i, i);
9885 }
9886
9887 static __inline__ int __DEFAULT_FN_ATTRS 
9888 _mm512_reduce_mul_epi32(__m512i __W) {
9889   _mm512_reduce_operator_32bit(__W, *, i, i);
9890 }
9891
9892 static __inline__ int __DEFAULT_FN_ATTRS 
9893 _mm512_reduce_and_epi32(__m512i __W) {
9894   _mm512_reduce_operator_32bit(__W, &, i, i);
9895 }
9896
9897 static __inline__ int __DEFAULT_FN_ATTRS 
9898 _mm512_reduce_or_epi32(__m512i __W) {
9899   _mm512_reduce_operator_32bit(__W, |, i, i);
9900 }
9901
9902 static __inline__ float __DEFAULT_FN_ATTRS
9903 _mm512_reduce_add_ps(__m512 __W) {
9904   _mm512_reduce_operator_32bit(__W, +, f, );
9905 }
9906
9907 static __inline__ float __DEFAULT_FN_ATTRS
9908 _mm512_reduce_mul_ps(__m512 __W) {
9909   _mm512_reduce_operator_32bit(__W, *, f, );
9910 }
9911
9912 // Vec512 - Vector with size 512.
9913 // Vec512Neutral - All vector elements set to the identity element. 
9914 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9915 // Operator - Can be one of following: +,*,&,|
9916 // Mask - Intrinsic Mask
9917 // T2  - Can get 'i' for int and 'f' for float.
9918 // T1 - Can get 'i' for int and 'd' for double.
9919 // T3 - Can be Ps for packed single or d for d-word.
9920
9921 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
9922                                           Mask, T2, T1, T3)                    \
9923   __extension__({                                                              \
9924     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
9925                              (__mmask16)Mask,                                  \
9926                              (__v16s##T2)Vec512,                               \
9927                              (__v16s##T2)Vec512Neutral);                       \
9928     _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
9929   })
9930
9931 static __inline__ int __DEFAULT_FN_ATTRS
9932 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9933   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
9934 }
9935
9936 static __inline__ int __DEFAULT_FN_ATTRS
9937 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9938   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
9939 }
9940
9941 static __inline__ int __DEFAULT_FN_ATTRS
9942 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9943   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, 
9944                                     i, i, d);
9945 }
9946
9947 static __inline__ int __DEFAULT_FN_ATTRS
9948 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9949   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
9950 }
9951
9952 static __inline__ float __DEFAULT_FN_ATTRS
9953 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9954   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
9955 }
9956
9957 static __inline__ float __DEFAULT_FN_ATTRS
9958 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9959   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
9960 }
9961
9962 // Used bisection method. At each step, we partition the vector with previous
9963 // step in half, and the operation is performed on its two halves.
9964 // This takes log2(n) steps where n is the number of elements in the vector.
9965 // This macro uses only intrinsics from the AVX512F feature.
9966
9967 // Vec512 - Vector with size of 512.
9968 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9969 //              __mm512_max_epi64
9970 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9971 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9972
9973 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9974         Vec512 = _mm512_##IntrinName(                                          \
9975                                 (__m512##T1)__builtin_shufflevector(           \
9976                                                 (__v8d##T2)Vec512,             \
9977                                                 (__v8d##T2)Vec512,             \
9978                                                  0, 1, 2, 3, -1, -1, -1, -1),  \
9979                                 (__m512##T1)__builtin_shufflevector(           \
9980                                                 (__v8d##T2)Vec512,             \
9981                                                 (__v8d##T2)Vec512,             \
9982                                                  4, 5, 6, 7, -1, -1, -1, -1)); \
9983         Vec512 = _mm512_##IntrinName(                                          \
9984                                 (__m512##T1)__builtin_shufflevector(           \
9985                                                 (__v8d##T2)Vec512,             \
9986                                                 (__v8d##T2)Vec512,             \
9987                                                  0, 1, -1, -1, -1, -1, -1, -1),\
9988                                 (__m512##T1)__builtin_shufflevector(           \
9989                                                 (__v8d##T2)Vec512,             \
9990                                                 (__v8d##T2)Vec512,             \
9991                                                  2, 3, -1, -1, -1, -1, -1,     \
9992                                                  -1));                         \
9993         Vec512 = _mm512_##IntrinName(                                          \
9994                                 (__m512##T1)__builtin_shufflevector(           \
9995                                                 (__v8d##T2)Vec512,             \
9996                                                 (__v8d##T2)Vec512,             \
9997                                                 0, -1, -1, -1, -1, -1, -1, -1),\
9998                                 (__m512##T1)__builtin_shufflevector(           \
9999                                                 (__v8d##T2)Vec512,             \
10000                                                 (__v8d##T2)Vec512,             \
10001                                                 1, -1, -1, -1, -1, -1, -1, -1))\
10002                                                 ;                              \
10003     return Vec512[0];                                                          \
10004   })
10005
10006 static __inline__ long long __DEFAULT_FN_ATTRS 
10007 _mm512_reduce_max_epi64(__m512i __V) {
10008   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
10009 }
10010
10011 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10012 _mm512_reduce_max_epu64(__m512i __V) {
10013   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10014 }
10015
10016 static __inline__ double __DEFAULT_FN_ATTRS 
10017 _mm512_reduce_max_pd(__m512d __V) {
10018   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10019 }
10020
10021 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10022 (__m512i __V) {
10023   _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10024 }
10025
10026 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10027 _mm512_reduce_min_epu64(__m512i __V) {
10028   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10029 }
10030
10031 static __inline__ double __DEFAULT_FN_ATTRS 
10032 _mm512_reduce_min_pd(__m512d __V) {
10033   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10034 }
10035
10036 // Vec512 - Vector with size 512.
10037 // Vec512Neutral - A 512 length vector with elements set to the identity element
10038 // Identity element: {max_epi,0x8000000000000000}
10039 //                   {max_epu,0x0000000000000000}
10040 //                   {max_pd, 0xFFF0000000000000}
10041 //                   {min_epi,0x7FFFFFFFFFFFFFFF}
10042 //                   {min_epu,0xFFFFFFFFFFFFFFFF}
10043 //                   {min_pd, 0x7FF0000000000000}
10044 //
10045 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10046 //              __mm512_max_epi64
10047 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10048 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10049 // T3 - Can get 'q' q word and 'pd' for packed double.
10050 //      [__builtin_ia32_select{q|pd}_512]
10051 // Mask - Intrinsic Mask
10052
10053 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10054                                         T2, T3, Mask)                          \
10055   __extension__({                                                              \
10056     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10057                              (__mmask8)Mask,                                   \
10058                              (__v8d##T2)Vec512,                                \
10059                              (__v8d##T2)Vec512Neutral);                        \
10060     _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
10061   })
10062
10063 static __inline__ long long __DEFAULT_FN_ATTRS
10064 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10065   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10066                                   max_epi64, i, i, q, __M);
10067 }
10068
10069 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10070 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10071   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10072                                   max_epu64, i, i, q, __M);
10073 }
10074
10075 static __inline__ double __DEFAULT_FN_ATTRS
10076 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10077   _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10078                                   max_pd, d, f, pd, __M);
10079 }
10080
10081 static __inline__ long long __DEFAULT_FN_ATTRS
10082 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10083   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10084                                   min_epi64, i, i, q, __M);
10085 }
10086
10087 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10088 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10089   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10090                                   min_epu64, i, i, q, __M);
10091 }
10092
10093 static __inline__ double __DEFAULT_FN_ATTRS
10094 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10095   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10096                                   min_pd, d, f, pd, __M);
10097 }
10098
10099 // Vec512 - Vector with size 512.
10100 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10101 //              __mm512_max_epi32
10102 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10103 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10104
10105 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10106     Vec512 = _mm512_##IntrinName(                                              \
10107                   (__m512##T1)__builtin_shufflevector(                         \
10108                                   (__v16s##T2)Vec512,                          \
10109                                   (__v16s##T2)Vec512,                          \
10110                                   0, 1, 2, 3, 4, 5, 6, 7,                      \
10111                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10112                   (__m512##T1)__builtin_shufflevector(                         \
10113                                   (__v16s##T2)Vec512,                          \
10114                                   (__v16s##T2)Vec512,                          \
10115                                   8, 9, 10, 11, 12, 13, 14, 15,                \
10116                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10117     Vec512 = _mm512_##IntrinName(                                              \
10118                   (__m512##T1)__builtin_shufflevector(                         \
10119                                   (__v16s##T2)Vec512,                          \
10120                                   (__v16s##T2)Vec512,                          \
10121                                   0, 1, 2, 3, -1, -1, -1, -1,                  \
10122                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10123                   (__m512##T1)__builtin_shufflevector(                         \
10124                                   (__v16s##T2)Vec512,                          \
10125                                   (__v16s##T2)Vec512,                          \
10126                                   4, 5, 6, 7, -1, -1, -1, -1,                  \
10127                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10128     Vec512 = _mm512_##IntrinName(                                              \
10129                   (__m512##T1)__builtin_shufflevector(                         \
10130                                   (__v16s##T2)Vec512,                          \
10131                                   (__v16s##T2)Vec512,                          \
10132                                   0, 1, -1, -1, -1, -1, -1, -1,                \
10133                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10134                   (__m512##T1)__builtin_shufflevector(                         \
10135                                   (__v16s##T2)Vec512,                          \
10136                                   (__v16s##T2)Vec512,                          \
10137                                   2, 3, -1, -1, -1, -1, -1, -1,                \
10138                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10139     Vec512 = _mm512_##IntrinName(                                              \
10140                   (__m512##T1)__builtin_shufflevector(                         \
10141                                   (__v16s##T2)Vec512,                          \
10142                                   (__v16s##T2)Vec512,                          \
10143                                   0,  -1, -1, -1, -1, -1, -1, -1,              \
10144                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10145                   (__m512##T1)__builtin_shufflevector(                         \
10146                                   (__v16s##T2)Vec512,                          \
10147                                   (__v16s##T2)Vec512,                          \
10148                                   1, -1, -1, -1, -1, -1, -1, -1,               \
10149                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10150     return Vec512[0];                                                          \
10151   })
10152
10153 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10154   _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10155 }
10156
10157 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10158 _mm512_reduce_max_epu32(__m512i a) {
10159   _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10160 }
10161
10162 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10163   _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10164 }
10165
10166 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10167   _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10168 }
10169
10170 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10171 _mm512_reduce_min_epu32(__m512i a) {
10172   _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10173 }
10174
10175 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10176   _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10177 }
10178
10179 // Vec512 - Vector with size 512.
10180 // Vec512Neutral - A 512 length vector with elements set to the identity element
10181 // Identity element: {max_epi,0x80000000}
10182 //                   {max_epu,0x00000000}
10183 //                   {max_ps, 0xFF800000}
10184 //                   {min_epi,0x7FFFFFFF}
10185 //                   {min_epu,0xFFFFFFFF}
10186 //                   {min_ps, 0x7F800000}
10187 //
10188 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10189 //              __mm512_max_epi32
10190 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10191 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10192 // T3 - Can get 'q' q word and 'pd' for packed double.
10193 //      [__builtin_ia32_select{q|pd}_512]
10194 // Mask - Intrinsic Mask
10195
10196 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10197                                         T2, T3, Mask)                          \
10198   __extension__({                                                              \
10199     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10200                                         (__mmask16)Mask,                       \
10201                                         (__v16s##T2)Vec512,                    \
10202                                         (__v16s##T2)Vec512Neutral);            \
10203    _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
10204    })
10205
10206 static __inline__ int __DEFAULT_FN_ATTRS
10207 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10208   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10209                                   i, i, d, __M);
10210 }
10211
10212 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10213 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10214   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10215                                   i, i, d, __M);
10216 }
10217
10218 static __inline__ float __DEFAULT_FN_ATTRS
10219 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10220   _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10221                                   ps, __M);
10222 }
10223
10224 static __inline__ int __DEFAULT_FN_ATTRS
10225 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10226   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10227                                   i, i, d, __M);
10228 }
10229
10230 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10231 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10232   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10233                                   i, i, d, __M);
10234 }
10235
10236 static __inline__ float __DEFAULT_FN_ATTRS
10237 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10238   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10239                                   ps, __M);
10240 }
10241
10242 #undef __DEFAULT_FN_ATTRS
10243
10244 #endif // __AVX512FINTRIN_H