]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/Headers/avx512fintrin.h
Upgrade our copies of clang, llvm, lld and lldb to r309439 from the
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / Headers / avx512fintrin.h
1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 #ifndef __IMMINTRIN_H
24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25 #endif
26
27 #ifndef __AVX512FINTRIN_H
28 #define __AVX512FINTRIN_H
29
30 typedef char __v64qi __attribute__((__vector_size__(64)));
31 typedef short __v32hi __attribute__((__vector_size__(64)));
32 typedef double __v8df __attribute__((__vector_size__(64)));
33 typedef float __v16sf __attribute__((__vector_size__(64)));
34 typedef long long __v8di __attribute__((__vector_size__(64)));
35 typedef int __v16si __attribute__((__vector_size__(64)));
36
37 /* Unsigned types */
38 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43 typedef float __m512 __attribute__((__vector_size__(64)));
44 typedef double __m512d __attribute__((__vector_size__(64)));
45 typedef long long __m512i __attribute__((__vector_size__(64)));
46
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
49
50 /* Rounding mode macros.  */
51 #define _MM_FROUND_TO_NEAREST_INT   0x00
52 #define _MM_FROUND_TO_NEG_INF       0x01
53 #define _MM_FROUND_TO_POS_INF       0x02
54 #define _MM_FROUND_TO_ZERO          0x03
55 #define _MM_FROUND_CUR_DIRECTION    0x04
56
57 /* Constants for integer comparison predicates */
58 typedef enum {
59     _MM_CMPINT_EQ,      /* Equal */
60     _MM_CMPINT_LT,      /* Less than */
61     _MM_CMPINT_LE,      /* Less than or Equal */
62     _MM_CMPINT_UNUSED,
63     _MM_CMPINT_NE,      /* Not Equal */
64     _MM_CMPINT_NLT,     /* Not Less than */
65 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
66     _MM_CMPINT_NLE      /* Not Less than or Equal */
67 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
68 } _MM_CMPINT_ENUM;
69
70 typedef enum
71 {
72   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157   _MM_PERM_DDDD = 0xFF
158 } _MM_PERM_ENUM;
159
160 typedef enum
161 {
162   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
163   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
164   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
165   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
166 } _MM_MANTISSA_NORM_ENUM;
167
168 typedef enum
169 {
170   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
171   _MM_MANT_SIGN_zero,   /* sign = 0             */
172   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
173 } _MM_MANTISSA_SIGN_ENUM;
174
175 /* Define the default attributes for the functions in this file. */
176 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177
178 /* Create vectors with repeated elements */
179
180 static  __inline __m512i __DEFAULT_FN_ATTRS
181 _mm512_setzero_si512(void)
182 {
183   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184 }
185
186 #define _mm512_setzero_epi32 _mm512_setzero_si512
187
188 static __inline__ __m512d __DEFAULT_FN_ATTRS
189 _mm512_undefined_pd(void)
190 {
191   return (__m512d)__builtin_ia32_undef512();
192 }
193
194 static __inline__ __m512 __DEFAULT_FN_ATTRS
195 _mm512_undefined(void)
196 {
197   return (__m512)__builtin_ia32_undef512();
198 }
199
200 static __inline__ __m512 __DEFAULT_FN_ATTRS
201 _mm512_undefined_ps(void)
202 {
203   return (__m512)__builtin_ia32_undef512();
204 }
205
206 static __inline__ __m512i __DEFAULT_FN_ATTRS
207 _mm512_undefined_epi32(void)
208 {
209   return (__m512i)__builtin_ia32_undef512();
210 }
211
212 static __inline__ __m512i __DEFAULT_FN_ATTRS
213 _mm512_broadcastd_epi32 (__m128i __A)
214 {
215   return (__m512i)__builtin_shufflevector((__v4si) __A,
216                                           (__v4si)_mm_undefined_si128(),
217                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218 }
219
220 static __inline__ __m512i __DEFAULT_FN_ATTRS
221 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222 {
223   return (__m512i)__builtin_ia32_selectd_512(__M,
224                                              (__v16si) _mm512_broadcastd_epi32(__A),
225                                              (__v16si) __O);
226 }
227
228 static __inline__ __m512i __DEFAULT_FN_ATTRS
229 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230 {
231   return (__m512i)__builtin_ia32_selectd_512(__M,
232                                              (__v16si) _mm512_broadcastd_epi32(__A),
233                                              (__v16si) _mm512_setzero_si512());
234 }
235
236 static __inline__ __m512i __DEFAULT_FN_ATTRS
237 _mm512_broadcastq_epi64 (__m128i __A)
238 {
239   return (__m512i)__builtin_shufflevector((__v2di) __A,
240                                           (__v2di) _mm_undefined_si128(),
241                                           0, 0, 0, 0, 0, 0, 0, 0);
242 }
243
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
245 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246 {
247   return (__m512i)__builtin_ia32_selectq_512(__M,
248                                              (__v8di) _mm512_broadcastq_epi64(__A),
249                                              (__v8di) __O);
250
251 }
252
253 static __inline__ __m512i __DEFAULT_FN_ATTRS
254 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255 {
256   return (__m512i)__builtin_ia32_selectq_512(__M,
257                                              (__v8di) _mm512_broadcastq_epi64(__A),
258                                              (__v8di) _mm512_setzero_si512());
259 }
260
261 static __inline __m512i __DEFAULT_FN_ATTRS
262 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
263 {
264   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
265                  (__v16si)
266                  _mm512_setzero_si512 (),
267                  __M);
268 }
269
270 static __inline __m512i __DEFAULT_FN_ATTRS
271 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
272 {
273 #ifdef __x86_64__
274   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
275                  (__v8di)
276                  _mm512_setzero_si512 (),
277                  __M);
278 #else
279   return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
280                  (__v8di)
281                  _mm512_setzero_si512 (),
282                  __M);
283 #endif
284 }
285
286 static __inline __m512 __DEFAULT_FN_ATTRS
287 _mm512_setzero_ps(void)
288 {
289   return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
290                    0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
291 }
292
293 #define _mm512_setzero _mm512_setzero_ps
294
295 static  __inline __m512d __DEFAULT_FN_ATTRS
296 _mm512_setzero_pd(void)
297 {
298   return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
299 }
300
301 static __inline __m512 __DEFAULT_FN_ATTRS
302 _mm512_set1_ps(float __w)
303 {
304   return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
305                    __w, __w, __w, __w, __w, __w, __w, __w  };
306 }
307
308 static __inline __m512d __DEFAULT_FN_ATTRS
309 _mm512_set1_pd(double __w)
310 {
311   return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
312 }
313
314 static __inline __m512i __DEFAULT_FN_ATTRS
315 _mm512_set1_epi8(char __w)
316 {
317   return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
318                              __w, __w, __w, __w, __w, __w, __w, __w,
319                              __w, __w, __w, __w, __w, __w, __w, __w,
320                              __w, __w, __w, __w, __w, __w, __w, __w,
321                              __w, __w, __w, __w, __w, __w, __w, __w,
322                              __w, __w, __w, __w, __w, __w, __w, __w,
323                              __w, __w, __w, __w, __w, __w, __w, __w,
324                              __w, __w, __w, __w, __w, __w, __w, __w  };
325 }
326
327 static __inline __m512i __DEFAULT_FN_ATTRS
328 _mm512_set1_epi16(short __w)
329 {
330   return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
331                              __w, __w, __w, __w, __w, __w, __w, __w,
332                              __w, __w, __w, __w, __w, __w, __w, __w,
333                              __w, __w, __w, __w, __w, __w, __w, __w };
334 }
335
336 static __inline __m512i __DEFAULT_FN_ATTRS
337 _mm512_set1_epi32(int __s)
338 {
339   return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
340                              __s, __s, __s, __s, __s, __s, __s, __s };
341 }
342
343 static __inline __m512i __DEFAULT_FN_ATTRS
344 _mm512_set1_epi64(long long __d)
345 {
346   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
347 }
348
349 static __inline__ __m512 __DEFAULT_FN_ATTRS
350 _mm512_broadcastss_ps(__m128 __A)
351 {
352   return (__m512)__builtin_shufflevector((__v4sf) __A,
353                                          (__v4sf)_mm_undefined_ps(),
354                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
355 }
356
357 static __inline __m512i __DEFAULT_FN_ATTRS
358 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
359 {
360   return  (__m512i)(__v16si)
361    { __D, __C, __B, __A, __D, __C, __B, __A,
362      __D, __C, __B, __A, __D, __C, __B, __A };
363 }
364
365 static __inline __m512i __DEFAULT_FN_ATTRS
366 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
367        long long __D)
368 {
369   return  (__m512i) (__v8di)
370    { __D, __C, __B, __A, __D, __C, __B, __A };
371 }
372
373 static __inline __m512d __DEFAULT_FN_ATTRS
374 _mm512_set4_pd (double __A, double __B, double __C, double __D)
375 {
376   return  (__m512d)
377    { __D, __C, __B, __A, __D, __C, __B, __A };
378 }
379
380 static __inline __m512 __DEFAULT_FN_ATTRS
381 _mm512_set4_ps (float __A, float __B, float __C, float __D)
382 {
383   return  (__m512)
384    { __D, __C, __B, __A, __D, __C, __B, __A,
385      __D, __C, __B, __A, __D, __C, __B, __A };
386 }
387
388 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
389   _mm512_set4_epi32((e3),(e2),(e1),(e0))
390
391 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
392   _mm512_set4_epi64((e3),(e2),(e1),(e0))
393
394 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
395   _mm512_set4_pd((e3),(e2),(e1),(e0))
396
397 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
398   _mm512_set4_ps((e3),(e2),(e1),(e0))
399
400 static __inline__ __m512d __DEFAULT_FN_ATTRS
401 _mm512_broadcastsd_pd(__m128d __A)
402 {
403   return (__m512d)__builtin_shufflevector((__v2df) __A,
404                                           (__v2df) _mm_undefined_pd(),
405                                           0, 0, 0, 0, 0, 0, 0, 0);
406 }
407
408 /* Cast between vector types */
409
410 static __inline __m512d __DEFAULT_FN_ATTRS
411 _mm512_castpd256_pd512(__m256d __a)
412 {
413   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
414 }
415
416 static __inline __m512 __DEFAULT_FN_ATTRS
417 _mm512_castps256_ps512(__m256 __a)
418 {
419   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
420                                           -1, -1, -1, -1, -1, -1, -1, -1);
421 }
422
423 static __inline __m128d __DEFAULT_FN_ATTRS
424 _mm512_castpd512_pd128(__m512d __a)
425 {
426   return __builtin_shufflevector(__a, __a, 0, 1);
427 }
428
429 static __inline __m256d __DEFAULT_FN_ATTRS
430 _mm512_castpd512_pd256 (__m512d __A)
431 {
432   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
433 }
434
435 static __inline __m128 __DEFAULT_FN_ATTRS
436 _mm512_castps512_ps128(__m512 __a)
437 {
438   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
439 }
440
441 static __inline __m256 __DEFAULT_FN_ATTRS
442 _mm512_castps512_ps256 (__m512 __A)
443 {
444   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
445 }
446
447 static __inline __m512 __DEFAULT_FN_ATTRS
448 _mm512_castpd_ps (__m512d __A)
449 {
450   return (__m512) (__A);
451 }
452
453 static __inline __m512i __DEFAULT_FN_ATTRS
454 _mm512_castpd_si512 (__m512d __A)
455 {
456   return (__m512i) (__A);
457 }
458
459 static __inline__ __m512d __DEFAULT_FN_ATTRS
460 _mm512_castpd128_pd512 (__m128d __A)
461 {
462   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
463 }
464
465 static __inline __m512d __DEFAULT_FN_ATTRS
466 _mm512_castps_pd (__m512 __A)
467 {
468   return (__m512d) (__A);
469 }
470
471 static __inline __m512i __DEFAULT_FN_ATTRS
472 _mm512_castps_si512 (__m512 __A)
473 {
474   return (__m512i) (__A);
475 }
476
477 static __inline__ __m512 __DEFAULT_FN_ATTRS
478 _mm512_castps128_ps512 (__m128 __A)
479 {
480     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
481 }
482
483 static __inline__ __m512i __DEFAULT_FN_ATTRS
484 _mm512_castsi128_si512 (__m128i __A)
485 {
486    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
487 }
488
489 static __inline__ __m512i __DEFAULT_FN_ATTRS
490 _mm512_castsi256_si512 (__m256i __A)
491 {
492    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
493 }
494
495 static __inline __m512 __DEFAULT_FN_ATTRS
496 _mm512_castsi512_ps (__m512i __A)
497 {
498   return (__m512) (__A);
499 }
500
501 static __inline __m512d __DEFAULT_FN_ATTRS
502 _mm512_castsi512_pd (__m512i __A)
503 {
504   return (__m512d) (__A);
505 }
506
507 static __inline __m128i __DEFAULT_FN_ATTRS
508 _mm512_castsi512_si128 (__m512i __A)
509 {
510   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
511 }
512
513 static __inline __m256i __DEFAULT_FN_ATTRS
514 _mm512_castsi512_si256 (__m512i __A)
515 {
516   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
517 }
518
519 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
520 _mm512_int2mask(int __a)
521 {
522   return (__mmask16)__a;
523 }
524
525 static __inline__ int __DEFAULT_FN_ATTRS
526 _mm512_mask2int(__mmask16 __a)
527 {
528   return (int)__a;
529 }
530
531 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
532 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
533 ///    contain the value of the source vector. The upper 384 bits are set
534 ///    to zero.
535 ///
536 /// \headerfile <x86intrin.h>
537 ///
538 /// This intrinsic has no corresponding instruction.
539 ///
540 /// \param __a
541 ///    A 128-bit vector of [2 x double].
542 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
543 ///    contain the value of the parameter. The upper 384 bits are set to zero.
544 static __inline __m512d __DEFAULT_FN_ATTRS
545 _mm512_zextpd128_pd512(__m128d __a)
546 {
547   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
548 }
549
550 /// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
551 ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
552 ///    contain the value of the source vector. The upper 256 bits are set
553 ///    to zero.
554 ///
555 /// \headerfile <x86intrin.h>
556 ///
557 /// This intrinsic has no corresponding instruction.
558 ///
559 /// \param __a
560 ///    A 256-bit vector of [4 x double].
561 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
562 ///    contain the value of the parameter. The upper 256 bits are set to zero.
563 static __inline __m512d __DEFAULT_FN_ATTRS
564 _mm512_zextpd256_pd512(__m256d __a)
565 {
566   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
567 }
568
569 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
570 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
571 ///    the value of the source vector. The upper 384 bits are set to zero.
572 ///
573 /// \headerfile <x86intrin.h>
574 ///
575 /// This intrinsic has no corresponding instruction.
576 ///
577 /// \param __a
578 ///    A 128-bit vector of [4 x float].
579 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
580 ///    contain the value of the parameter. The upper 384 bits are set to zero.
581 static __inline __m512 __DEFAULT_FN_ATTRS
582 _mm512_zextps128_ps512(__m128 __a)
583 {
584   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
585 }
586
587 /// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
588 ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
589 ///    the value of the source vector. The upper 256 bits are set to zero.
590 ///
591 /// \headerfile <x86intrin.h>
592 ///
593 /// This intrinsic has no corresponding instruction.
594 ///
595 /// \param __a
596 ///    A 256-bit vector of [8 x float].
597 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
598 ///    contain the value of the parameter. The upper 256 bits are set to zero.
599 static __inline __m512 __DEFAULT_FN_ATTRS
600 _mm512_zextps256_ps512(__m256 __a)
601 {
602   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
603 }
604
605 /// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
606 ///    The lower 128 bits contain the value of the source vector. The upper
607 ///    384 bits are set to zero.
608 ///
609 /// \headerfile <x86intrin.h>
610 ///
611 /// This intrinsic has no corresponding instruction.
612 ///
613 /// \param __a
614 ///    A 128-bit integer vector.
615 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
616 ///    the parameter. The upper 384 bits are set to zero.
617 static __inline __m512i __DEFAULT_FN_ATTRS
618 _mm512_zextsi128_si512(__m128i __a)
619 {
620   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
621 }
622
623 /// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
624 ///    The lower 256 bits contain the value of the source vector. The upper
625 ///    256 bits are set to zero.
626 ///
627 /// \headerfile <x86intrin.h>
628 ///
629 /// This intrinsic has no corresponding instruction.
630 ///
631 /// \param __a
632 ///    A 256-bit integer vector.
633 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
634 ///    the parameter. The upper 256 bits are set to zero.
635 static __inline __m512i __DEFAULT_FN_ATTRS
636 _mm512_zextsi256_si512(__m256i __a)
637 {
638   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
639 }
640
641 /* Bitwise operators */
642 static __inline__ __m512i __DEFAULT_FN_ATTRS
643 _mm512_and_epi32(__m512i __a, __m512i __b)
644 {
645   return (__m512i)((__v16su)__a & (__v16su)__b);
646 }
647
648 static __inline__ __m512i __DEFAULT_FN_ATTRS
649 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
650 {
651   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
652                 (__v16si) _mm512_and_epi32(__a, __b),
653                 (__v16si) __src);
654 }
655
656 static __inline__ __m512i __DEFAULT_FN_ATTRS
657 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
658 {
659   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
660                                          __k, __a, __b);
661 }
662
663 static __inline__ __m512i __DEFAULT_FN_ATTRS
664 _mm512_and_epi64(__m512i __a, __m512i __b)
665 {
666   return (__m512i)((__v8du)__a & (__v8du)__b);
667 }
668
669 static __inline__ __m512i __DEFAULT_FN_ATTRS
670 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
671 {
672     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
673                 (__v8di) _mm512_and_epi64(__a, __b),
674                 (__v8di) __src);
675 }
676
677 static __inline__ __m512i __DEFAULT_FN_ATTRS
678 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
679 {
680   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
681                                          __k, __a, __b);
682 }
683
684 static __inline__ __m512i __DEFAULT_FN_ATTRS
685 _mm512_andnot_si512 (__m512i __A, __m512i __B)
686 {
687   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
688 }
689
690 static __inline__ __m512i __DEFAULT_FN_ATTRS
691 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
692 {
693   return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
694 }
695
696 static __inline__ __m512i __DEFAULT_FN_ATTRS
697 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
698 {
699   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
700                                          (__v16si)_mm512_andnot_epi32(__A, __B),
701                                          (__v16si)__W);
702 }
703
704 static __inline__ __m512i __DEFAULT_FN_ATTRS
705 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
706 {
707   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
708                                            __U, __A, __B);
709 }
710
711 static __inline__ __m512i __DEFAULT_FN_ATTRS
712 _mm512_andnot_epi64(__m512i __A, __m512i __B)
713 {
714   return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
715 }
716
717 static __inline__ __m512i __DEFAULT_FN_ATTRS
718 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
719 {
720   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
721                                           (__v8di)_mm512_andnot_epi64(__A, __B),
722                                           (__v8di)__W);
723 }
724
725 static __inline__ __m512i __DEFAULT_FN_ATTRS
726 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
727 {
728   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
729                                            __U, __A, __B);
730 }
731
732 static __inline__ __m512i __DEFAULT_FN_ATTRS
733 _mm512_or_epi32(__m512i __a, __m512i __b)
734 {
735   return (__m512i)((__v16su)__a | (__v16su)__b);
736 }
737
738 static __inline__ __m512i __DEFAULT_FN_ATTRS
739 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
740 {
741   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
742                                              (__v16si)_mm512_or_epi32(__a, __b),
743                                              (__v16si)__src);
744 }
745
746 static __inline__ __m512i __DEFAULT_FN_ATTRS
747 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
748 {
749   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
750 }
751
752 static __inline__ __m512i __DEFAULT_FN_ATTRS
753 _mm512_or_epi64(__m512i __a, __m512i __b)
754 {
755   return (__m512i)((__v8du)__a | (__v8du)__b);
756 }
757
758 static __inline__ __m512i __DEFAULT_FN_ATTRS
759 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
760 {
761   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
762                                              (__v8di)_mm512_or_epi64(__a, __b),
763                                              (__v8di)__src);
764 }
765
766 static __inline__ __m512i __DEFAULT_FN_ATTRS
767 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
768 {
769   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
770 }
771
772 static __inline__ __m512i __DEFAULT_FN_ATTRS
773 _mm512_xor_epi32(__m512i __a, __m512i __b)
774 {
775   return (__m512i)((__v16su)__a ^ (__v16su)__b);
776 }
777
778 static __inline__ __m512i __DEFAULT_FN_ATTRS
779 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
780 {
781   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
782                                             (__v16si)_mm512_xor_epi32(__a, __b),
783                                             (__v16si)__src);
784 }
785
786 static __inline__ __m512i __DEFAULT_FN_ATTRS
787 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
788 {
789   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
790 }
791
792 static __inline__ __m512i __DEFAULT_FN_ATTRS
793 _mm512_xor_epi64(__m512i __a, __m512i __b)
794 {
795   return (__m512i)((__v8du)__a ^ (__v8du)__b);
796 }
797
798 static __inline__ __m512i __DEFAULT_FN_ATTRS
799 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
800 {
801   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
802                                              (__v8di)_mm512_xor_epi64(__a, __b),
803                                              (__v8di)__src);
804 }
805
806 static __inline__ __m512i __DEFAULT_FN_ATTRS
807 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
808 {
809   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
810 }
811
812 static __inline__ __m512i __DEFAULT_FN_ATTRS
813 _mm512_and_si512(__m512i __a, __m512i __b)
814 {
815   return (__m512i)((__v8du)__a & (__v8du)__b);
816 }
817
818 static __inline__ __m512i __DEFAULT_FN_ATTRS
819 _mm512_or_si512(__m512i __a, __m512i __b)
820 {
821   return (__m512i)((__v8du)__a | (__v8du)__b);
822 }
823
824 static __inline__ __m512i __DEFAULT_FN_ATTRS
825 _mm512_xor_si512(__m512i __a, __m512i __b)
826 {
827   return (__m512i)((__v8du)__a ^ (__v8du)__b);
828 }
829
830 /* Arithmetic */
831
832 static __inline __m512d __DEFAULT_FN_ATTRS
833 _mm512_add_pd(__m512d __a, __m512d __b)
834 {
835   return (__m512d)((__v8df)__a + (__v8df)__b);
836 }
837
838 static __inline __m512 __DEFAULT_FN_ATTRS
839 _mm512_add_ps(__m512 __a, __m512 __b)
840 {
841   return (__m512)((__v16sf)__a + (__v16sf)__b);
842 }
843
844 static __inline __m512d __DEFAULT_FN_ATTRS
845 _mm512_mul_pd(__m512d __a, __m512d __b)
846 {
847   return (__m512d)((__v8df)__a * (__v8df)__b);
848 }
849
850 static __inline __m512 __DEFAULT_FN_ATTRS
851 _mm512_mul_ps(__m512 __a, __m512 __b)
852 {
853   return (__m512)((__v16sf)__a * (__v16sf)__b);
854 }
855
856 static __inline __m512d __DEFAULT_FN_ATTRS
857 _mm512_sub_pd(__m512d __a, __m512d __b)
858 {
859   return (__m512d)((__v8df)__a - (__v8df)__b);
860 }
861
862 static __inline __m512 __DEFAULT_FN_ATTRS
863 _mm512_sub_ps(__m512 __a, __m512 __b)
864 {
865   return (__m512)((__v16sf)__a - (__v16sf)__b);
866 }
867
868 static __inline__ __m512i __DEFAULT_FN_ATTRS
869 _mm512_add_epi64 (__m512i __A, __m512i __B)
870 {
871   return (__m512i) ((__v8du) __A + (__v8du) __B);
872 }
873
874 static __inline__ __m512i __DEFAULT_FN_ATTRS
875 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
876 {
877   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
878                                              (__v8di)_mm512_add_epi64(__A, __B),
879                                              (__v8di)__W);
880 }
881
882 static __inline__ __m512i __DEFAULT_FN_ATTRS
883 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
884 {
885   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
886                                              (__v8di)_mm512_add_epi64(__A, __B),
887                                              (__v8di)_mm512_setzero_si512());
888 }
889
890 static __inline__ __m512i __DEFAULT_FN_ATTRS
891 _mm512_sub_epi64 (__m512i __A, __m512i __B)
892 {
893   return (__m512i) ((__v8du) __A - (__v8du) __B);
894 }
895
896 static __inline__ __m512i __DEFAULT_FN_ATTRS
897 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
898 {
899   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
900                                              (__v8di)_mm512_sub_epi64(__A, __B),
901                                              (__v8di)__W);
902 }
903
904 static __inline__ __m512i __DEFAULT_FN_ATTRS
905 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
906 {
907   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
908                                              (__v8di)_mm512_sub_epi64(__A, __B),
909                                              (__v8di)_mm512_setzero_si512());
910 }
911
912 static __inline__ __m512i __DEFAULT_FN_ATTRS
913 _mm512_add_epi32 (__m512i __A, __m512i __B)
914 {
915   return (__m512i) ((__v16su) __A + (__v16su) __B);
916 }
917
918 static __inline__ __m512i __DEFAULT_FN_ATTRS
919 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
920 {
921   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
922                                              (__v16si)_mm512_add_epi32(__A, __B),
923                                              (__v16si)__W);
924 }
925
926 static __inline__ __m512i __DEFAULT_FN_ATTRS
927 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
928 {
929   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
930                                              (__v16si)_mm512_add_epi32(__A, __B),
931                                              (__v16si)_mm512_setzero_si512());
932 }
933
934 static __inline__ __m512i __DEFAULT_FN_ATTRS
935 _mm512_sub_epi32 (__m512i __A, __m512i __B)
936 {
937   return (__m512i) ((__v16su) __A - (__v16su) __B);
938 }
939
940 static __inline__ __m512i __DEFAULT_FN_ATTRS
941 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
942 {
943   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
944                                              (__v16si)_mm512_sub_epi32(__A, __B),
945                                              (__v16si)__W);
946 }
947
948 static __inline__ __m512i __DEFAULT_FN_ATTRS
949 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
950 {
951   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
952                                              (__v16si)_mm512_sub_epi32(__A, __B),
953                                              (__v16si)_mm512_setzero_si512());
954 }
955
956 #define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
957   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
958                                         (__v8df)(__m512d)(B), \
959                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
960                                         (int)(R)); })
961
962 #define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
963   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
964                                         (__v8df)(__m512d)(B), \
965                                         (__v8df)_mm512_setzero_pd(), \
966                                         (__mmask8)(U), (int)(R)); })
967
968 #define _mm512_max_round_pd(A, B, R) __extension__ ({ \
969   (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
970                                         (__v8df)(__m512d)(B), \
971                                         (__v8df)_mm512_undefined_pd(), \
972                                         (__mmask8)-1, (int)(R)); })
973
974 static  __inline__ __m512d __DEFAULT_FN_ATTRS
975 _mm512_max_pd(__m512d __A, __m512d __B)
976 {
977   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
978              (__v8df) __B,
979              (__v8df)
980              _mm512_setzero_pd (),
981              (__mmask8) -1,
982              _MM_FROUND_CUR_DIRECTION);
983 }
984
985 static __inline__ __m512d __DEFAULT_FN_ATTRS
986 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
987 {
988   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
989                   (__v8df) __B,
990                   (__v8df) __W,
991                   (__mmask8) __U,
992                   _MM_FROUND_CUR_DIRECTION);
993 }
994
995 static __inline__ __m512d __DEFAULT_FN_ATTRS
996 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
997 {
998   return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
999                   (__v8df) __B,
1000                   (__v8df)
1001                   _mm512_setzero_pd (),
1002                   (__mmask8) __U,
1003                   _MM_FROUND_CUR_DIRECTION);
1004 }
1005
1006 #define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
1007   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1008                                        (__v16sf)(__m512)(B), \
1009                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1010                                        (int)(R)); })
1011
1012 #define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1013   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1014                                        (__v16sf)(__m512)(B), \
1015                                        (__v16sf)_mm512_setzero_ps(), \
1016                                        (__mmask16)(U), (int)(R)); })
1017
1018 #define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1019   (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1020                                        (__v16sf)(__m512)(B), \
1021                                        (__v16sf)_mm512_undefined_ps(), \
1022                                        (__mmask16)-1, (int)(R)); })
1023
1024 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1025 _mm512_max_ps(__m512 __A, __m512 __B)
1026 {
1027   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1028             (__v16sf) __B,
1029             (__v16sf)
1030             _mm512_setzero_ps (),
1031             (__mmask16) -1,
1032             _MM_FROUND_CUR_DIRECTION);
1033 }
1034
1035 static __inline__ __m512 __DEFAULT_FN_ATTRS
1036 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1037 {
1038   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1039                  (__v16sf) __B,
1040                  (__v16sf) __W,
1041                  (__mmask16) __U,
1042                  _MM_FROUND_CUR_DIRECTION);
1043 }
1044
1045 static __inline__ __m512 __DEFAULT_FN_ATTRS
1046 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1047 {
1048   return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1049                  (__v16sf) __B,
1050                  (__v16sf)
1051                  _mm512_setzero_ps (),
1052                  (__mmask16) __U,
1053                  _MM_FROUND_CUR_DIRECTION);
1054 }
1055
1056 static __inline__ __m128 __DEFAULT_FN_ATTRS
1057 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1058   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1059                 (__v4sf) __B,
1060                 (__v4sf) __W,
1061                 (__mmask8) __U,
1062                 _MM_FROUND_CUR_DIRECTION);
1063 }
1064
1065 static __inline__ __m128 __DEFAULT_FN_ATTRS
1066 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1067   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1068                 (__v4sf) __B,
1069                 (__v4sf)  _mm_setzero_ps (),
1070                 (__mmask8) __U,
1071                 _MM_FROUND_CUR_DIRECTION);
1072 }
1073
1074 #define _mm_max_round_ss(A, B, R) __extension__ ({ \
1075   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1076                                           (__v4sf)(__m128)(B), \
1077                                           (__v4sf)_mm_setzero_ps(), \
1078                                           (__mmask8)-1, (int)(R)); })
1079
1080 #define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1081   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1082                                           (__v4sf)(__m128)(B), \
1083                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1084                                           (int)(R)); })
1085
1086 #define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1087   (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1088                                           (__v4sf)(__m128)(B), \
1089                                           (__v4sf)_mm_setzero_ps(), \
1090                                           (__mmask8)(U), (int)(R)); })
1091
1092 static __inline__ __m128d __DEFAULT_FN_ATTRS
1093 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1094   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1095                 (__v2df) __B,
1096                 (__v2df) __W,
1097                 (__mmask8) __U,
1098                 _MM_FROUND_CUR_DIRECTION);
1099 }
1100
1101 static __inline__ __m128d __DEFAULT_FN_ATTRS
1102 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1103   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1104                 (__v2df) __B,
1105                 (__v2df)  _mm_setzero_pd (),
1106                 (__mmask8) __U,
1107                 _MM_FROUND_CUR_DIRECTION);
1108 }
1109
1110 #define _mm_max_round_sd(A, B, R) __extension__ ({ \
1111   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1112                                            (__v2df)(__m128d)(B), \
1113                                            (__v2df)_mm_setzero_pd(), \
1114                                            (__mmask8)-1, (int)(R)); })
1115
1116 #define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1117   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1118                                            (__v2df)(__m128d)(B), \
1119                                            (__v2df)(__m128d)(W), \
1120                                            (__mmask8)(U), (int)(R)); })
1121
1122 #define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1123   (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1124                                            (__v2df)(__m128d)(B), \
1125                                            (__v2df)_mm_setzero_pd(), \
1126                                            (__mmask8)(U), (int)(R)); })
1127
1128 static __inline __m512i
1129 __DEFAULT_FN_ATTRS
1130 _mm512_max_epi32(__m512i __A, __m512i __B)
1131 {
1132   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1133               (__v16si) __B,
1134               (__v16si)
1135               _mm512_setzero_si512 (),
1136               (__mmask16) -1);
1137 }
1138
1139 static __inline__ __m512i __DEFAULT_FN_ATTRS
1140 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1141 {
1142   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1143                    (__v16si) __B,
1144                    (__v16si) __W, __M);
1145 }
1146
1147 static __inline__ __m512i __DEFAULT_FN_ATTRS
1148 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1149 {
1150   return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1151                    (__v16si) __B,
1152                    (__v16si)
1153                    _mm512_setzero_si512 (),
1154                    __M);
1155 }
1156
1157 static __inline __m512i __DEFAULT_FN_ATTRS
1158 _mm512_max_epu32(__m512i __A, __m512i __B)
1159 {
1160   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1161               (__v16si) __B,
1162               (__v16si)
1163               _mm512_setzero_si512 (),
1164               (__mmask16) -1);
1165 }
1166
1167 static __inline__ __m512i __DEFAULT_FN_ATTRS
1168 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1169 {
1170   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1171                    (__v16si) __B,
1172                    (__v16si) __W, __M);
1173 }
1174
1175 static __inline__ __m512i __DEFAULT_FN_ATTRS
1176 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1177 {
1178   return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1179                    (__v16si) __B,
1180                    (__v16si)
1181                    _mm512_setzero_si512 (),
1182                    __M);
1183 }
1184
1185 static __inline __m512i __DEFAULT_FN_ATTRS
1186 _mm512_max_epi64(__m512i __A, __m512i __B)
1187 {
1188   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1189               (__v8di) __B,
1190               (__v8di)
1191               _mm512_setzero_si512 (),
1192               (__mmask8) -1);
1193 }
1194
1195 static __inline__ __m512i __DEFAULT_FN_ATTRS
1196 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1197 {
1198   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1199                    (__v8di) __B,
1200                    (__v8di) __W, __M);
1201 }
1202
1203 static __inline__ __m512i __DEFAULT_FN_ATTRS
1204 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1205 {
1206   return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1207                    (__v8di) __B,
1208                    (__v8di)
1209                    _mm512_setzero_si512 (),
1210                    __M);
1211 }
1212
1213 static __inline __m512i __DEFAULT_FN_ATTRS
1214 _mm512_max_epu64(__m512i __A, __m512i __B)
1215 {
1216   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1217               (__v8di) __B,
1218               (__v8di)
1219               _mm512_setzero_si512 (),
1220               (__mmask8) -1);
1221 }
1222
1223 static __inline__ __m512i __DEFAULT_FN_ATTRS
1224 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1225 {
1226   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1227                    (__v8di) __B,
1228                    (__v8di) __W, __M);
1229 }
1230
1231 static __inline__ __m512i __DEFAULT_FN_ATTRS
1232 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1233 {
1234   return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1235                    (__v8di) __B,
1236                    (__v8di)
1237                    _mm512_setzero_si512 (),
1238                    __M);
1239 }
1240
1241 #define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1242   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1243                                         (__v8df)(__m512d)(B), \
1244                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1245                                         (int)(R)); })
1246
1247 #define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1248   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1249                                         (__v8df)(__m512d)(B), \
1250                                         (__v8df)_mm512_setzero_pd(), \
1251                                         (__mmask8)(U), (int)(R)); })
1252
1253 #define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1254   (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1255                                         (__v8df)(__m512d)(B), \
1256                                         (__v8df)_mm512_undefined_pd(), \
1257                                         (__mmask8)-1, (int)(R)); })
1258
1259 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1260 _mm512_min_pd(__m512d __A, __m512d __B)
1261 {
1262   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1263              (__v8df) __B,
1264              (__v8df)
1265              _mm512_setzero_pd (),
1266              (__mmask8) -1,
1267              _MM_FROUND_CUR_DIRECTION);
1268 }
1269
1270 static __inline__ __m512d __DEFAULT_FN_ATTRS
1271 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1272 {
1273   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1274                   (__v8df) __B,
1275                   (__v8df) __W,
1276                   (__mmask8) __U,
1277                   _MM_FROUND_CUR_DIRECTION);
1278 }
1279
1280 #define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1281   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1282                                        (__v16sf)(__m512)(B), \
1283                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1284                                        (int)(R)); })
1285
1286 #define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1287   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1288                                        (__v16sf)(__m512)(B), \
1289                                        (__v16sf)_mm512_setzero_ps(), \
1290                                        (__mmask16)(U), (int)(R)); })
1291
1292 #define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1293   (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1294                                        (__v16sf)(__m512)(B), \
1295                                        (__v16sf)_mm512_undefined_ps(), \
1296                                        (__mmask16)-1, (int)(R)); })
1297
1298 static __inline__ __m512d __DEFAULT_FN_ATTRS
1299 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1300 {
1301   return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1302                   (__v8df) __B,
1303                   (__v8df)
1304                   _mm512_setzero_pd (),
1305                   (__mmask8) __U,
1306                   _MM_FROUND_CUR_DIRECTION);
1307 }
1308
1309 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1310 _mm512_min_ps(__m512 __A, __m512 __B)
1311 {
1312   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1313             (__v16sf) __B,
1314             (__v16sf)
1315             _mm512_setzero_ps (),
1316             (__mmask16) -1,
1317             _MM_FROUND_CUR_DIRECTION);
1318 }
1319
1320 static __inline__ __m512 __DEFAULT_FN_ATTRS
1321 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1322 {
1323   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1324                  (__v16sf) __B,
1325                  (__v16sf) __W,
1326                  (__mmask16) __U,
1327                  _MM_FROUND_CUR_DIRECTION);
1328 }
1329
1330 static __inline__ __m512 __DEFAULT_FN_ATTRS
1331 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1332 {
1333   return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1334                  (__v16sf) __B,
1335                  (__v16sf)
1336                  _mm512_setzero_ps (),
1337                  (__mmask16) __U,
1338                  _MM_FROUND_CUR_DIRECTION);
1339 }
1340
1341 static __inline__ __m128 __DEFAULT_FN_ATTRS
1342 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1343   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1344                 (__v4sf) __B,
1345                 (__v4sf) __W,
1346                 (__mmask8) __U,
1347                 _MM_FROUND_CUR_DIRECTION);
1348 }
1349
1350 static __inline__ __m128 __DEFAULT_FN_ATTRS
1351 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1352   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1353                 (__v4sf) __B,
1354                 (__v4sf)  _mm_setzero_ps (),
1355                 (__mmask8) __U,
1356                 _MM_FROUND_CUR_DIRECTION);
1357 }
1358
1359 #define _mm_min_round_ss(A, B, R) __extension__ ({ \
1360   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1361                                           (__v4sf)(__m128)(B), \
1362                                           (__v4sf)_mm_setzero_ps(), \
1363                                           (__mmask8)-1, (int)(R)); })
1364
1365 #define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1366   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1367                                           (__v4sf)(__m128)(B), \
1368                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
1369                                           (int)(R)); })
1370
1371 #define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1372   (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1373                                           (__v4sf)(__m128)(B), \
1374                                           (__v4sf)_mm_setzero_ps(), \
1375                                           (__mmask8)(U), (int)(R)); })
1376
1377 static __inline__ __m128d __DEFAULT_FN_ATTRS
1378 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1379   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1380                 (__v2df) __B,
1381                 (__v2df) __W,
1382                 (__mmask8) __U,
1383                 _MM_FROUND_CUR_DIRECTION);
1384 }
1385
1386 static __inline__ __m128d __DEFAULT_FN_ATTRS
1387 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1388   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1389                 (__v2df) __B,
1390                 (__v2df)  _mm_setzero_pd (),
1391                 (__mmask8) __U,
1392                 _MM_FROUND_CUR_DIRECTION);
1393 }
1394
1395 #define _mm_min_round_sd(A, B, R) __extension__ ({ \
1396   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1397                                            (__v2df)(__m128d)(B), \
1398                                            (__v2df)_mm_setzero_pd(), \
1399                                            (__mmask8)-1, (int)(R)); })
1400
1401 #define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1402   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1403                                            (__v2df)(__m128d)(B), \
1404                                            (__v2df)(__m128d)(W), \
1405                                            (__mmask8)(U), (int)(R)); })
1406
1407 #define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1408   (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1409                                            (__v2df)(__m128d)(B), \
1410                                            (__v2df)_mm_setzero_pd(), \
1411                                            (__mmask8)(U), (int)(R)); })
1412
1413 static __inline __m512i
1414 __DEFAULT_FN_ATTRS
1415 _mm512_min_epi32(__m512i __A, __m512i __B)
1416 {
1417   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1418               (__v16si) __B,
1419               (__v16si)
1420               _mm512_setzero_si512 (),
1421               (__mmask16) -1);
1422 }
1423
1424 static __inline__ __m512i __DEFAULT_FN_ATTRS
1425 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1426 {
1427   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1428                    (__v16si) __B,
1429                    (__v16si) __W, __M);
1430 }
1431
1432 static __inline__ __m512i __DEFAULT_FN_ATTRS
1433 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1434 {
1435   return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1436                    (__v16si) __B,
1437                    (__v16si)
1438                    _mm512_setzero_si512 (),
1439                    __M);
1440 }
1441
1442 static __inline __m512i __DEFAULT_FN_ATTRS
1443 _mm512_min_epu32(__m512i __A, __m512i __B)
1444 {
1445   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1446               (__v16si) __B,
1447               (__v16si)
1448               _mm512_setzero_si512 (),
1449               (__mmask16) -1);
1450 }
1451
1452 static __inline__ __m512i __DEFAULT_FN_ATTRS
1453 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1454 {
1455   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1456                    (__v16si) __B,
1457                    (__v16si) __W, __M);
1458 }
1459
1460 static __inline__ __m512i __DEFAULT_FN_ATTRS
1461 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1462 {
1463   return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1464                    (__v16si) __B,
1465                    (__v16si)
1466                    _mm512_setzero_si512 (),
1467                    __M);
1468 }
1469
1470 static __inline __m512i __DEFAULT_FN_ATTRS
1471 _mm512_min_epi64(__m512i __A, __m512i __B)
1472 {
1473   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1474               (__v8di) __B,
1475               (__v8di)
1476               _mm512_setzero_si512 (),
1477               (__mmask8) -1);
1478 }
1479
1480 static __inline__ __m512i __DEFAULT_FN_ATTRS
1481 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1482 {
1483   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1484                    (__v8di) __B,
1485                    (__v8di) __W, __M);
1486 }
1487
1488 static __inline__ __m512i __DEFAULT_FN_ATTRS
1489 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1490 {
1491   return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1492                    (__v8di) __B,
1493                    (__v8di)
1494                    _mm512_setzero_si512 (),
1495                    __M);
1496 }
1497
1498 static __inline __m512i __DEFAULT_FN_ATTRS
1499 _mm512_min_epu64(__m512i __A, __m512i __B)
1500 {
1501   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1502               (__v8di) __B,
1503               (__v8di)
1504               _mm512_setzero_si512 (),
1505               (__mmask8) -1);
1506 }
1507
1508 static __inline__ __m512i __DEFAULT_FN_ATTRS
1509 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1510 {
1511   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1512                    (__v8di) __B,
1513                    (__v8di) __W, __M);
1514 }
1515
1516 static __inline__ __m512i __DEFAULT_FN_ATTRS
1517 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1518 {
1519   return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1520                    (__v8di) __B,
1521                    (__v8di)
1522                    _mm512_setzero_si512 (),
1523                    __M);
1524 }
1525
1526 static __inline __m512i __DEFAULT_FN_ATTRS
1527 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1528 {
1529   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1530 }
1531
1532 static __inline __m512i __DEFAULT_FN_ATTRS
1533 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1534 {
1535   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1536                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1537                                              (__v8di)__W);
1538 }
1539
1540 static __inline __m512i __DEFAULT_FN_ATTRS
1541 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1542 {
1543   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1544                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1545                                              (__v8di)_mm512_setzero_si512 ());
1546 }
1547
1548 static __inline __m512i __DEFAULT_FN_ATTRS
1549 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1550 {
1551   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1552 }
1553
1554 static __inline __m512i __DEFAULT_FN_ATTRS
1555 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1556 {
1557   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1558                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1559                                              (__v8di)__W);
1560 }
1561
1562 static __inline __m512i __DEFAULT_FN_ATTRS
1563 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1564 {
1565   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1566                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1567                                              (__v8di)_mm512_setzero_si512 ());
1568 }
1569
1570 static __inline __m512i __DEFAULT_FN_ATTRS
1571 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1572 {
1573   return (__m512i) ((__v16su) __A * (__v16su) __B);
1574 }
1575
1576 static __inline __m512i __DEFAULT_FN_ATTRS
1577 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1578 {
1579   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1580                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1581                                              (__v16si)_mm512_setzero_si512());
1582 }
1583
1584 static __inline __m512i __DEFAULT_FN_ATTRS
1585 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1586 {
1587   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1588                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1589                                              (__v16si)__W);
1590 }
1591
1592 #define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1593   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1594                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
1595                                          (int)(R)); })
1596
1597 #define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1598   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1599                                          (__v8df)_mm512_setzero_pd(), \
1600                                          (__mmask8)(U), (int)(R)); })
1601
1602 #define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1603   (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1604                                          (__v8df)_mm512_undefined_pd(), \
1605                                          (__mmask8)-1, (int)(R)); })
1606
1607 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1608 _mm512_sqrt_pd(__m512d __a)
1609 {
1610   return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1611                                                 (__v8df) _mm512_setzero_pd (),
1612                                                 (__mmask8) -1,
1613                                                 _MM_FROUND_CUR_DIRECTION);
1614 }
1615
1616 static __inline__ __m512d __DEFAULT_FN_ATTRS
1617 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1618 {
1619   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1620                    (__v8df) __W,
1621                    (__mmask8) __U,
1622                    _MM_FROUND_CUR_DIRECTION);
1623 }
1624
1625 static __inline__ __m512d __DEFAULT_FN_ATTRS
1626 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1627 {
1628   return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1629                    (__v8df)
1630                    _mm512_setzero_pd (),
1631                    (__mmask8) __U,
1632                    _MM_FROUND_CUR_DIRECTION);
1633 }
1634
1635 #define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1636   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1637                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
1638                                         (int)(R)); })
1639
1640 #define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1641   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1642                                         (__v16sf)_mm512_setzero_ps(), \
1643                                         (__mmask16)(U), (int)(R)); })
1644
1645 #define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1646   (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1647                                         (__v16sf)_mm512_undefined_ps(), \
1648                                         (__mmask16)-1, (int)(R)); })
1649
1650 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1651 _mm512_sqrt_ps(__m512 __a)
1652 {
1653   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1654                                                (__v16sf) _mm512_setzero_ps (),
1655                                                (__mmask16) -1,
1656                                                _MM_FROUND_CUR_DIRECTION);
1657 }
1658
1659 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1660 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1661 {
1662   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1663                                                (__v16sf) __W,
1664                                                (__mmask16) __U,
1665                                                _MM_FROUND_CUR_DIRECTION);
1666 }
1667
1668 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1669 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1670 {
1671   return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1672                                                (__v16sf) _mm512_setzero_ps (),
1673                                                (__mmask16) __U,
1674                                                _MM_FROUND_CUR_DIRECTION);
1675 }
1676
1677 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1678 _mm512_rsqrt14_pd(__m512d __A)
1679 {
1680   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1681                  (__v8df)
1682                  _mm512_setzero_pd (),
1683                  (__mmask8) -1);}
1684
1685 static __inline__ __m512d __DEFAULT_FN_ATTRS
1686 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1687 {
1688   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1689                   (__v8df) __W,
1690                   (__mmask8) __U);
1691 }
1692
1693 static __inline__ __m512d __DEFAULT_FN_ATTRS
1694 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1695 {
1696   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1697                   (__v8df)
1698                   _mm512_setzero_pd (),
1699                   (__mmask8) __U);
1700 }
1701
1702 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1703 _mm512_rsqrt14_ps(__m512 __A)
1704 {
1705   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1706                 (__v16sf)
1707                 _mm512_setzero_ps (),
1708                 (__mmask16) -1);
1709 }
1710
1711 static __inline__ __m512 __DEFAULT_FN_ATTRS
1712 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1713 {
1714   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1715                  (__v16sf) __W,
1716                  (__mmask16) __U);
1717 }
1718
1719 static __inline__ __m512 __DEFAULT_FN_ATTRS
1720 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1721 {
1722   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1723                  (__v16sf)
1724                  _mm512_setzero_ps (),
1725                  (__mmask16) __U);
1726 }
1727
1728 static  __inline__ __m128 __DEFAULT_FN_ATTRS
1729 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1730 {
1731   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1732              (__v4sf) __B,
1733              (__v4sf)
1734              _mm_setzero_ps (),
1735              (__mmask8) -1);
1736 }
1737
1738 static __inline__ __m128 __DEFAULT_FN_ATTRS
1739 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1740 {
1741  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1742           (__v4sf) __B,
1743           (__v4sf) __W,
1744           (__mmask8) __U);
1745 }
1746
1747 static __inline__ __m128 __DEFAULT_FN_ATTRS
1748 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1749 {
1750  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1751           (__v4sf) __B,
1752           (__v4sf) _mm_setzero_ps (),
1753           (__mmask8) __U);
1754 }
1755
1756 static  __inline__ __m128d __DEFAULT_FN_ATTRS
1757 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1758 {
1759   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1760               (__v2df) __B,
1761               (__v2df)
1762               _mm_setzero_pd (),
1763               (__mmask8) -1);
1764 }
1765
1766 static __inline__ __m128d __DEFAULT_FN_ATTRS
1767 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1768 {
1769  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1770           (__v2df) __B,
1771           (__v2df) __W,
1772           (__mmask8) __U);
1773 }
1774
1775 static __inline__ __m128d __DEFAULT_FN_ATTRS
1776 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1777 {
1778  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1779           (__v2df) __B,
1780           (__v2df) _mm_setzero_pd (),
1781           (__mmask8) __U);
1782 }
1783
1784 static  __inline__ __m512d __DEFAULT_FN_ATTRS
1785 _mm512_rcp14_pd(__m512d __A)
1786 {
1787   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1788                (__v8df)
1789                _mm512_setzero_pd (),
1790                (__mmask8) -1);
1791 }
1792
1793 static __inline__ __m512d __DEFAULT_FN_ATTRS
1794 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1795 {
1796   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1797                 (__v8df) __W,
1798                 (__mmask8) __U);
1799 }
1800
1801 static __inline__ __m512d __DEFAULT_FN_ATTRS
1802 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1803 {
1804   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1805                 (__v8df)
1806                 _mm512_setzero_pd (),
1807                 (__mmask8) __U);
1808 }
1809
1810 static  __inline__ __m512 __DEFAULT_FN_ATTRS
1811 _mm512_rcp14_ps(__m512 __A)
1812 {
1813   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1814               (__v16sf)
1815               _mm512_setzero_ps (),
1816               (__mmask16) -1);
1817 }
1818
1819 static __inline__ __m512 __DEFAULT_FN_ATTRS
1820 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1821 {
1822   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1823                    (__v16sf) __W,
1824                    (__mmask16) __U);
1825 }
1826
1827 static __inline__ __m512 __DEFAULT_FN_ATTRS
1828 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1829 {
1830   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1831                    (__v16sf)
1832                    _mm512_setzero_ps (),
1833                    (__mmask16) __U);
1834 }
1835
1836 static  __inline__ __m128 __DEFAULT_FN_ATTRS
1837 _mm_rcp14_ss(__m128 __A, __m128 __B)
1838 {
1839   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1840                  (__v4sf) __B,
1841                  (__v4sf)
1842                  _mm_setzero_ps (),
1843                  (__mmask8) -1);
1844 }
1845
1846 static __inline__ __m128 __DEFAULT_FN_ATTRS
1847 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1848 {
1849  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1850           (__v4sf) __B,
1851           (__v4sf) __W,
1852           (__mmask8) __U);
1853 }
1854
1855 static __inline__ __m128 __DEFAULT_FN_ATTRS
1856 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1857 {
1858  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1859           (__v4sf) __B,
1860           (__v4sf) _mm_setzero_ps (),
1861           (__mmask8) __U);
1862 }
1863
1864 static  __inline__ __m128d __DEFAULT_FN_ATTRS
1865 _mm_rcp14_sd(__m128d __A, __m128d __B)
1866 {
1867   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1868             (__v2df) __B,
1869             (__v2df)
1870             _mm_setzero_pd (),
1871             (__mmask8) -1);
1872 }
1873
1874 static __inline__ __m128d __DEFAULT_FN_ATTRS
1875 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1876 {
1877  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1878           (__v2df) __B,
1879           (__v2df) __W,
1880           (__mmask8) __U);
1881 }
1882
1883 static __inline__ __m128d __DEFAULT_FN_ATTRS
1884 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1885 {
1886  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1887           (__v2df) __B,
1888           (__v2df) _mm_setzero_pd (),
1889           (__mmask8) __U);
1890 }
1891
1892 static __inline __m512 __DEFAULT_FN_ATTRS
1893 _mm512_floor_ps(__m512 __A)
1894 {
1895   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1896                                                   _MM_FROUND_FLOOR,
1897                                                   (__v16sf) __A, -1,
1898                                                   _MM_FROUND_CUR_DIRECTION);
1899 }
1900
1901 static __inline__ __m512 __DEFAULT_FN_ATTRS
1902 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1903 {
1904   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1905                    _MM_FROUND_FLOOR,
1906                    (__v16sf) __W, __U,
1907                    _MM_FROUND_CUR_DIRECTION);
1908 }
1909
1910 static __inline __m512d __DEFAULT_FN_ATTRS
1911 _mm512_floor_pd(__m512d __A)
1912 {
1913   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1914                                                    _MM_FROUND_FLOOR,
1915                                                    (__v8df) __A, -1,
1916                                                    _MM_FROUND_CUR_DIRECTION);
1917 }
1918
1919 static __inline__ __m512d __DEFAULT_FN_ATTRS
1920 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1921 {
1922   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1923                 _MM_FROUND_FLOOR,
1924                 (__v8df) __W, __U,
1925                 _MM_FROUND_CUR_DIRECTION);
1926 }
1927
1928 static __inline__ __m512 __DEFAULT_FN_ATTRS
1929 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1930 {
1931   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1932                    _MM_FROUND_CEIL,
1933                    (__v16sf) __W, __U,
1934                    _MM_FROUND_CUR_DIRECTION);
1935 }
1936
1937 static __inline __m512 __DEFAULT_FN_ATTRS
1938 _mm512_ceil_ps(__m512 __A)
1939 {
1940   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1941                                                   _MM_FROUND_CEIL,
1942                                                   (__v16sf) __A, -1,
1943                                                   _MM_FROUND_CUR_DIRECTION);
1944 }
1945
1946 static __inline __m512d __DEFAULT_FN_ATTRS
1947 _mm512_ceil_pd(__m512d __A)
1948 {
1949   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1950                                                    _MM_FROUND_CEIL,
1951                                                    (__v8df) __A, -1,
1952                                                    _MM_FROUND_CUR_DIRECTION);
1953 }
1954
1955 static __inline__ __m512d __DEFAULT_FN_ATTRS
1956 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1957 {
1958   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1959                 _MM_FROUND_CEIL,
1960                 (__v8df) __W, __U,
1961                 _MM_FROUND_CUR_DIRECTION);
1962 }
1963
1964 static __inline __m512i __DEFAULT_FN_ATTRS
1965 _mm512_abs_epi64(__m512i __A)
1966 {
1967   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1968              (__v8di)
1969              _mm512_setzero_si512 (),
1970              (__mmask8) -1);
1971 }
1972
1973 static __inline__ __m512i __DEFAULT_FN_ATTRS
1974 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1975 {
1976   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1977                   (__v8di) __W,
1978                   (__mmask8) __U);
1979 }
1980
1981 static __inline__ __m512i __DEFAULT_FN_ATTRS
1982 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1983 {
1984   return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1985                   (__v8di)
1986                   _mm512_setzero_si512 (),
1987                   (__mmask8) __U);
1988 }
1989
1990 static __inline __m512i __DEFAULT_FN_ATTRS
1991 _mm512_abs_epi32(__m512i __A)
1992 {
1993   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1994              (__v16si)
1995              _mm512_setzero_si512 (),
1996              (__mmask16) -1);
1997 }
1998
1999 static __inline__ __m512i __DEFAULT_FN_ATTRS
2000 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
2001 {
2002   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2003                   (__v16si) __W,
2004                   (__mmask16) __U);
2005 }
2006
2007 static __inline__ __m512i __DEFAULT_FN_ATTRS
2008 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
2009 {
2010   return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2011                   (__v16si)
2012                   _mm512_setzero_si512 (),
2013                   (__mmask16) __U);
2014 }
2015
2016 static __inline__ __m128 __DEFAULT_FN_ATTRS
2017 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2018   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2019                 (__v4sf) __B,
2020                 (__v4sf) __W,
2021                 (__mmask8) __U,
2022                 _MM_FROUND_CUR_DIRECTION);
2023 }
2024
2025 static __inline__ __m128 __DEFAULT_FN_ATTRS
2026 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2027   return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2028                 (__v4sf) __B,
2029                 (__v4sf)  _mm_setzero_ps (),
2030                 (__mmask8) __U,
2031                 _MM_FROUND_CUR_DIRECTION);
2032 }
2033
2034 #define _mm_add_round_ss(A, B, R) __extension__ ({ \
2035   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2036                                           (__v4sf)(__m128)(B), \
2037                                           (__v4sf)_mm_setzero_ps(), \
2038                                           (__mmask8)-1, (int)(R)); })
2039
2040 #define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
2041   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2042                                           (__v4sf)(__m128)(B), \
2043                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2044                                           (int)(R)); })
2045
2046 #define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
2047   (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2048                                           (__v4sf)(__m128)(B), \
2049                                           (__v4sf)_mm_setzero_ps(), \
2050                                           (__mmask8)(U), (int)(R)); })
2051
2052 static __inline__ __m128d __DEFAULT_FN_ATTRS
2053 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2054   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2055                 (__v2df) __B,
2056                 (__v2df) __W,
2057                 (__mmask8) __U,
2058                 _MM_FROUND_CUR_DIRECTION);
2059 }
2060
2061 static __inline__ __m128d __DEFAULT_FN_ATTRS
2062 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2063   return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2064                 (__v2df) __B,
2065                 (__v2df)  _mm_setzero_pd (),
2066                 (__mmask8) __U,
2067                 _MM_FROUND_CUR_DIRECTION);
2068 }
2069 #define _mm_add_round_sd(A, B, R) __extension__ ({ \
2070   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2071                                            (__v2df)(__m128d)(B), \
2072                                            (__v2df)_mm_setzero_pd(), \
2073                                            (__mmask8)-1, (int)(R)); })
2074
2075 #define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2076   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2077                                            (__v2df)(__m128d)(B), \
2078                                            (__v2df)(__m128d)(W), \
2079                                            (__mmask8)(U), (int)(R)); })
2080
2081 #define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2082   (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2083                                            (__v2df)(__m128d)(B), \
2084                                            (__v2df)_mm_setzero_pd(), \
2085                                            (__mmask8)(U), (int)(R)); })
2086
2087 static __inline__ __m512d __DEFAULT_FN_ATTRS
2088 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2089   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2090                                               (__v8df)_mm512_add_pd(__A, __B),
2091                                               (__v8df)__W);
2092 }
2093
2094 static __inline__ __m512d __DEFAULT_FN_ATTRS
2095 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2096   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2097                                               (__v8df)_mm512_add_pd(__A, __B),
2098                                               (__v8df)_mm512_setzero_pd());
2099 }
2100
2101 static __inline__ __m512 __DEFAULT_FN_ATTRS
2102 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2103   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2104                                              (__v16sf)_mm512_add_ps(__A, __B),
2105                                              (__v16sf)__W);
2106 }
2107
2108 static __inline__ __m512 __DEFAULT_FN_ATTRS
2109 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2110   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2111                                              (__v16sf)_mm512_add_ps(__A, __B),
2112                                              (__v16sf)_mm512_setzero_ps());
2113 }
2114
2115 #define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2116   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2117                                         (__v8df)(__m512d)(B), \
2118                                         (__v8df)_mm512_setzero_pd(), \
2119                                         (__mmask8)-1, (int)(R)); })
2120
2121 #define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2122   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2123                                         (__v8df)(__m512d)(B), \
2124                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2125                                         (int)(R)); })
2126
2127 #define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2128   (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2129                                         (__v8df)(__m512d)(B), \
2130                                         (__v8df)_mm512_setzero_pd(), \
2131                                         (__mmask8)(U), (int)(R)); })
2132
2133 #define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2134   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2135                                        (__v16sf)(__m512)(B), \
2136                                        (__v16sf)_mm512_setzero_ps(), \
2137                                        (__mmask16)-1, (int)(R)); })
2138
2139 #define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2140   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2141                                        (__v16sf)(__m512)(B), \
2142                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2143                                        (int)(R)); })
2144
2145 #define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2146   (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2147                                        (__v16sf)(__m512)(B), \
2148                                        (__v16sf)_mm512_setzero_ps(), \
2149                                        (__mmask16)(U), (int)(R)); })
2150
2151 static __inline__ __m128 __DEFAULT_FN_ATTRS
2152 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2153   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2154                 (__v4sf) __B,
2155                 (__v4sf) __W,
2156                 (__mmask8) __U,
2157                 _MM_FROUND_CUR_DIRECTION);
2158 }
2159
2160 static __inline__ __m128 __DEFAULT_FN_ATTRS
2161 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2162   return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2163                 (__v4sf) __B,
2164                 (__v4sf)  _mm_setzero_ps (),
2165                 (__mmask8) __U,
2166                 _MM_FROUND_CUR_DIRECTION);
2167 }
2168 #define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2169   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2170                                           (__v4sf)(__m128)(B), \
2171                                           (__v4sf)_mm_setzero_ps(), \
2172                                           (__mmask8)-1, (int)(R)); })
2173
2174 #define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2175   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2176                                           (__v4sf)(__m128)(B), \
2177                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2178                                           (int)(R)); })
2179
2180 #define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2181   (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2182                                           (__v4sf)(__m128)(B), \
2183                                           (__v4sf)_mm_setzero_ps(), \
2184                                           (__mmask8)(U), (int)(R)); })
2185
2186 static __inline__ __m128d __DEFAULT_FN_ATTRS
2187 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2188   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2189                 (__v2df) __B,
2190                 (__v2df) __W,
2191                 (__mmask8) __U,
2192                 _MM_FROUND_CUR_DIRECTION);
2193 }
2194
2195 static __inline__ __m128d __DEFAULT_FN_ATTRS
2196 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2197   return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2198                 (__v2df) __B,
2199                 (__v2df)  _mm_setzero_pd (),
2200                 (__mmask8) __U,
2201                 _MM_FROUND_CUR_DIRECTION);
2202 }
2203
2204 #define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2205   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2206                                            (__v2df)(__m128d)(B), \
2207                                            (__v2df)_mm_setzero_pd(), \
2208                                            (__mmask8)-1, (int)(R)); })
2209
2210 #define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2211   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2212                                            (__v2df)(__m128d)(B), \
2213                                            (__v2df)(__m128d)(W), \
2214                                            (__mmask8)(U), (int)(R)); })
2215
2216 #define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2217   (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2218                                            (__v2df)(__m128d)(B), \
2219                                            (__v2df)_mm_setzero_pd(), \
2220                                            (__mmask8)(U), (int)(R)); })
2221
2222 static __inline__ __m512d __DEFAULT_FN_ATTRS
2223 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2224   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2225                                               (__v8df)_mm512_sub_pd(__A, __B),
2226                                               (__v8df)__W);
2227 }
2228
2229 static __inline__ __m512d __DEFAULT_FN_ATTRS
2230 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2231   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2232                                               (__v8df)_mm512_sub_pd(__A, __B),
2233                                               (__v8df)_mm512_setzero_pd());
2234 }
2235
2236 static __inline__ __m512 __DEFAULT_FN_ATTRS
2237 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2238   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2239                                              (__v16sf)_mm512_sub_ps(__A, __B),
2240                                              (__v16sf)__W);
2241 }
2242
2243 static __inline__ __m512 __DEFAULT_FN_ATTRS
2244 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2245   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2246                                              (__v16sf)_mm512_sub_ps(__A, __B),
2247                                              (__v16sf)_mm512_setzero_ps());
2248 }
2249
2250 #define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2251   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2252                                         (__v8df)(__m512d)(B), \
2253                                         (__v8df)_mm512_setzero_pd(), \
2254                                         (__mmask8)-1, (int)(R)); })
2255
2256 #define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2257   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2258                                         (__v8df)(__m512d)(B), \
2259                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2260                                         (int)(R)); })
2261
2262 #define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2263   (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2264                                         (__v8df)(__m512d)(B), \
2265                                         (__v8df)_mm512_setzero_pd(), \
2266                                         (__mmask8)(U), (int)(R)); })
2267
2268 #define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2269   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2270                                        (__v16sf)(__m512)(B), \
2271                                        (__v16sf)_mm512_setzero_ps(), \
2272                                        (__mmask16)-1, (int)(R)); })
2273
2274 #define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2275   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2276                                        (__v16sf)(__m512)(B), \
2277                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2278                                        (int)(R)); });
2279
2280 #define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2281   (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2282                                        (__v16sf)(__m512)(B), \
2283                                        (__v16sf)_mm512_setzero_ps(), \
2284                                        (__mmask16)(U), (int)(R)); });
2285
2286 static __inline__ __m128 __DEFAULT_FN_ATTRS
2287 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2288   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2289                 (__v4sf) __B,
2290                 (__v4sf) __W,
2291                 (__mmask8) __U,
2292                 _MM_FROUND_CUR_DIRECTION);
2293 }
2294
2295 static __inline__ __m128 __DEFAULT_FN_ATTRS
2296 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2297   return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2298                 (__v4sf) __B,
2299                 (__v4sf)  _mm_setzero_ps (),
2300                 (__mmask8) __U,
2301                 _MM_FROUND_CUR_DIRECTION);
2302 }
2303 #define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2304   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2305                                           (__v4sf)(__m128)(B), \
2306                                           (__v4sf)_mm_setzero_ps(), \
2307                                           (__mmask8)-1, (int)(R)); })
2308
2309 #define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2310   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2311                                           (__v4sf)(__m128)(B), \
2312                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2313                                           (int)(R)); })
2314
2315 #define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2316   (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2317                                           (__v4sf)(__m128)(B), \
2318                                           (__v4sf)_mm_setzero_ps(), \
2319                                           (__mmask8)(U), (int)(R)); })
2320
2321 static __inline__ __m128d __DEFAULT_FN_ATTRS
2322 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2323   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2324                 (__v2df) __B,
2325                 (__v2df) __W,
2326                 (__mmask8) __U,
2327                 _MM_FROUND_CUR_DIRECTION);
2328 }
2329
2330 static __inline__ __m128d __DEFAULT_FN_ATTRS
2331 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2332   return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2333                 (__v2df) __B,
2334                 (__v2df)  _mm_setzero_pd (),
2335                 (__mmask8) __U,
2336                 _MM_FROUND_CUR_DIRECTION);
2337 }
2338
2339 #define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2340   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2341                                            (__v2df)(__m128d)(B), \
2342                                            (__v2df)_mm_setzero_pd(), \
2343                                            (__mmask8)-1, (int)(R)); })
2344
2345 #define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2346   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2347                                            (__v2df)(__m128d)(B), \
2348                                            (__v2df)(__m128d)(W), \
2349                                            (__mmask8)(U), (int)(R)); })
2350
2351 #define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2352   (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2353                                            (__v2df)(__m128d)(B), \
2354                                            (__v2df)_mm_setzero_pd(), \
2355                                            (__mmask8)(U), (int)(R)); })
2356
2357 static __inline__ __m512d __DEFAULT_FN_ATTRS
2358 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2359   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2360                                               (__v8df)_mm512_mul_pd(__A, __B),
2361                                               (__v8df)__W);
2362 }
2363
2364 static __inline__ __m512d __DEFAULT_FN_ATTRS
2365 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2366   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2367                                               (__v8df)_mm512_mul_pd(__A, __B),
2368                                               (__v8df)_mm512_setzero_pd());
2369 }
2370
2371 static __inline__ __m512 __DEFAULT_FN_ATTRS
2372 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2373   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2374                                              (__v16sf)_mm512_mul_ps(__A, __B),
2375                                              (__v16sf)__W);
2376 }
2377
2378 static __inline__ __m512 __DEFAULT_FN_ATTRS
2379 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2380   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2381                                              (__v16sf)_mm512_mul_ps(__A, __B),
2382                                              (__v16sf)_mm512_setzero_ps());
2383 }
2384
2385 #define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2386   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2387                                         (__v8df)(__m512d)(B), \
2388                                         (__v8df)_mm512_setzero_pd(), \
2389                                         (__mmask8)-1, (int)(R)); })
2390
2391 #define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2392   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2393                                         (__v8df)(__m512d)(B), \
2394                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2395                                         (int)(R)); })
2396
2397 #define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2398   (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2399                                         (__v8df)(__m512d)(B), \
2400                                         (__v8df)_mm512_setzero_pd(), \
2401                                         (__mmask8)(U), (int)(R)); })
2402
2403 #define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2404   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2405                                        (__v16sf)(__m512)(B), \
2406                                        (__v16sf)_mm512_setzero_ps(), \
2407                                        (__mmask16)-1, (int)(R)); })
2408
2409 #define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2410   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2411                                        (__v16sf)(__m512)(B), \
2412                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2413                                        (int)(R)); });
2414
2415 #define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2416   (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2417                                        (__v16sf)(__m512)(B), \
2418                                        (__v16sf)_mm512_setzero_ps(), \
2419                                        (__mmask16)(U), (int)(R)); });
2420
2421 static __inline__ __m128 __DEFAULT_FN_ATTRS
2422 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2423   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2424                 (__v4sf) __B,
2425                 (__v4sf) __W,
2426                 (__mmask8) __U,
2427                 _MM_FROUND_CUR_DIRECTION);
2428 }
2429
2430 static __inline__ __m128 __DEFAULT_FN_ATTRS
2431 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2432   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2433                 (__v4sf) __B,
2434                 (__v4sf)  _mm_setzero_ps (),
2435                 (__mmask8) __U,
2436                 _MM_FROUND_CUR_DIRECTION);
2437 }
2438
2439 #define _mm_div_round_ss(A, B, R) __extension__ ({ \
2440   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2441                                           (__v4sf)(__m128)(B), \
2442                                           (__v4sf)_mm_setzero_ps(), \
2443                                           (__mmask8)-1, (int)(R)); })
2444
2445 #define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2446   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2447                                           (__v4sf)(__m128)(B), \
2448                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
2449                                           (int)(R)); })
2450
2451 #define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2452   (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2453                                           (__v4sf)(__m128)(B), \
2454                                           (__v4sf)_mm_setzero_ps(), \
2455                                           (__mmask8)(U), (int)(R)); })
2456
2457 static __inline__ __m128d __DEFAULT_FN_ATTRS
2458 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2459   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2460                 (__v2df) __B,
2461                 (__v2df) __W,
2462                 (__mmask8) __U,
2463                 _MM_FROUND_CUR_DIRECTION);
2464 }
2465
2466 static __inline__ __m128d __DEFAULT_FN_ATTRS
2467 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2468   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2469                 (__v2df) __B,
2470                 (__v2df)  _mm_setzero_pd (),
2471                 (__mmask8) __U,
2472                 _MM_FROUND_CUR_DIRECTION);
2473 }
2474
2475 #define _mm_div_round_sd(A, B, R) __extension__ ({ \
2476   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2477                                            (__v2df)(__m128d)(B), \
2478                                            (__v2df)_mm_setzero_pd(), \
2479                                            (__mmask8)-1, (int)(R)); })
2480
2481 #define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2482   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2483                                            (__v2df)(__m128d)(B), \
2484                                            (__v2df)(__m128d)(W), \
2485                                            (__mmask8)(U), (int)(R)); })
2486
2487 #define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2488   (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2489                                            (__v2df)(__m128d)(B), \
2490                                            (__v2df)_mm_setzero_pd(), \
2491                                            (__mmask8)(U), (int)(R)); })
2492
2493 static __inline __m512d __DEFAULT_FN_ATTRS
2494 _mm512_div_pd(__m512d __a, __m512d __b)
2495 {
2496   return (__m512d)((__v8df)__a/(__v8df)__b);
2497 }
2498
2499 static __inline__ __m512d __DEFAULT_FN_ATTRS
2500 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2501   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2502                                               (__v8df)_mm512_div_pd(__A, __B),
2503                                               (__v8df)__W);
2504 }
2505
2506 static __inline__ __m512d __DEFAULT_FN_ATTRS
2507 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2508   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2509                                               (__v8df)_mm512_div_pd(__A, __B),
2510                                               (__v8df)_mm512_setzero_pd());
2511 }
2512
2513 static __inline __m512 __DEFAULT_FN_ATTRS
2514 _mm512_div_ps(__m512 __a, __m512 __b)
2515 {
2516   return (__m512)((__v16sf)__a/(__v16sf)__b);
2517 }
2518
2519 static __inline__ __m512 __DEFAULT_FN_ATTRS
2520 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2521   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2522                                              (__v16sf)_mm512_div_ps(__A, __B),
2523                                              (__v16sf)__W);
2524 }
2525
2526 static __inline__ __m512 __DEFAULT_FN_ATTRS
2527 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2528   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2529                                              (__v16sf)_mm512_div_ps(__A, __B),
2530                                              (__v16sf)_mm512_setzero_ps());
2531 }
2532
2533 #define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2534   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2535                                         (__v8df)(__m512d)(B), \
2536                                         (__v8df)_mm512_setzero_pd(), \
2537                                         (__mmask8)-1, (int)(R)); })
2538
2539 #define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2540   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2541                                         (__v8df)(__m512d)(B), \
2542                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
2543                                         (int)(R)); })
2544
2545 #define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2546   (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2547                                         (__v8df)(__m512d)(B), \
2548                                         (__v8df)_mm512_setzero_pd(), \
2549                                         (__mmask8)(U), (int)(R)); })
2550
2551 #define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2552   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2553                                        (__v16sf)(__m512)(B), \
2554                                        (__v16sf)_mm512_setzero_ps(), \
2555                                        (__mmask16)-1, (int)(R)); })
2556
2557 #define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2558   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2559                                        (__v16sf)(__m512)(B), \
2560                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
2561                                        (int)(R)); });
2562
2563 #define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2564   (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2565                                        (__v16sf)(__m512)(B), \
2566                                        (__v16sf)_mm512_setzero_ps(), \
2567                                        (__mmask16)(U), (int)(R)); });
2568
2569 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
2570   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2571                                          (__v16sf)(__m512)(A), (__mmask16)-1, \
2572                                          _MM_FROUND_CUR_DIRECTION); })
2573
2574 #define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2575   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2576                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2577                                          _MM_FROUND_CUR_DIRECTION); })
2578
2579 #define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2580   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2581                                          (__v16sf)_mm512_setzero_ps(), \
2582                                          (__mmask16)(A), \
2583                                          _MM_FROUND_CUR_DIRECTION); })
2584
2585 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2586   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2587                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2588                                          (int)(R)); })
2589
2590 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2591   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2592                                          (__v16sf)_mm512_setzero_ps(), \
2593                                          (__mmask16)(A), (int)(R)); })
2594
2595 #define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2596   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2597                                          (__v16sf)_mm512_undefined_ps(), \
2598                                          (__mmask16)-1, (int)(R)); })
2599
2600 #define _mm512_roundscale_pd(A, B) __extension__ ({ \
2601   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2602                                           (__v8df)(__m512d)(A), (__mmask8)-1, \
2603                                           _MM_FROUND_CUR_DIRECTION); })
2604
2605 #define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2606   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2607                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2608                                           _MM_FROUND_CUR_DIRECTION); })
2609
2610 #define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2611   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2612                                           (__v8df)_mm512_setzero_pd(), \
2613                                           (__mmask8)(A), \
2614                                           _MM_FROUND_CUR_DIRECTION); })
2615
2616 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2617   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2618                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2619                                           (int)(R)); })
2620
2621 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2622   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2623                                           (__v8df)_mm512_setzero_pd(), \
2624                                           (__mmask8)(A), (int)(R)); })
2625
2626 #define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2627   (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2628                                           (__v8df)_mm512_undefined_pd(), \
2629                                           (__mmask8)-1, (int)(R)); })
2630
2631 #define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2632   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2633                                            (__v8df)(__m512d)(B), \
2634                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2635                                            (int)(R)); })
2636
2637
2638 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2639   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2640                                            (__v8df)(__m512d)(B), \
2641                                            (__v8df)(__m512d)(C), \
2642                                            (__mmask8)(U), (int)(R)); })
2643
2644
2645 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2646   (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2647                                             (__v8df)(__m512d)(B), \
2648                                             (__v8df)(__m512d)(C), \
2649                                             (__mmask8)(U), (int)(R)); })
2650
2651
2652 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2653   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2654                                             (__v8df)(__m512d)(B), \
2655                                             (__v8df)(__m512d)(C), \
2656                                             (__mmask8)(U), (int)(R)); })
2657
2658
2659 #define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2660   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2661                                            (__v8df)(__m512d)(B), \
2662                                            -(__v8df)(__m512d)(C), \
2663                                            (__mmask8)-1, (int)(R)); })
2664
2665
2666 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2667   (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2668                                            (__v8df)(__m512d)(B), \
2669                                            -(__v8df)(__m512d)(C), \
2670                                            (__mmask8)(U), (int)(R)); })
2671
2672
2673 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2674   (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2675                                             (__v8df)(__m512d)(B), \
2676                                             -(__v8df)(__m512d)(C), \
2677                                             (__mmask8)(U), (int)(R)); })
2678
2679
2680 #define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2681   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2682                                            (__v8df)(__m512d)(B), \
2683                                            (__v8df)(__m512d)(C), (__mmask8)-1, \
2684                                            (int)(R)); })
2685
2686
2687 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2688   (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2689                                             (__v8df)(__m512d)(B), \
2690                                             (__v8df)(__m512d)(C), \
2691                                             (__mmask8)(U), (int)(R)); })
2692
2693
2694 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2695   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2696                                             (__v8df)(__m512d)(B), \
2697                                             (__v8df)(__m512d)(C), \
2698                                             (__mmask8)(U), (int)(R)); })
2699
2700
2701 #define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2702   (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2703                                            (__v8df)(__m512d)(B), \
2704                                            -(__v8df)(__m512d)(C), \
2705                                            (__mmask8)-1, (int)(R)); })
2706
2707
2708 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2709   (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2710                                             (__v8df)(__m512d)(B), \
2711                                             -(__v8df)(__m512d)(C), \
2712                                             (__mmask8)(U), (int)(R)); })
2713
2714
2715 static __inline__ __m512d __DEFAULT_FN_ATTRS
2716 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2717 {
2718   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2719                                                     (__v8df) __B,
2720                                                     (__v8df) __C,
2721                                                     (__mmask8) -1,
2722                                                     _MM_FROUND_CUR_DIRECTION);
2723 }
2724
2725 static __inline__ __m512d __DEFAULT_FN_ATTRS
2726 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2727 {
2728   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2729                                                     (__v8df) __B,
2730                                                     (__v8df) __C,
2731                                                     (__mmask8) __U,
2732                                                     _MM_FROUND_CUR_DIRECTION);
2733 }
2734
2735 static __inline__ __m512d __DEFAULT_FN_ATTRS
2736 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2737 {
2738   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2739                                                      (__v8df) __B,
2740                                                      (__v8df) __C,
2741                                                      (__mmask8) __U,
2742                                                      _MM_FROUND_CUR_DIRECTION);
2743 }
2744
2745 static __inline__ __m512d __DEFAULT_FN_ATTRS
2746 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2747 {
2748   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2749                                                      (__v8df) __B,
2750                                                      (__v8df) __C,
2751                                                      (__mmask8) __U,
2752                                                      _MM_FROUND_CUR_DIRECTION);
2753 }
2754
2755 static __inline__ __m512d __DEFAULT_FN_ATTRS
2756 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2757 {
2758   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759                                                     (__v8df) __B,
2760                                                     -(__v8df) __C,
2761                                                     (__mmask8) -1,
2762                                                     _MM_FROUND_CUR_DIRECTION);
2763 }
2764
2765 static __inline__ __m512d __DEFAULT_FN_ATTRS
2766 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2767 {
2768   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2769                                                     (__v8df) __B,
2770                                                     -(__v8df) __C,
2771                                                     (__mmask8) __U,
2772                                                     _MM_FROUND_CUR_DIRECTION);
2773 }
2774
2775 static __inline__ __m512d __DEFAULT_FN_ATTRS
2776 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2777 {
2778   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2779                                                      (__v8df) __B,
2780                                                      -(__v8df) __C,
2781                                                      (__mmask8) __U,
2782                                                      _MM_FROUND_CUR_DIRECTION);
2783 }
2784
2785 static __inline__ __m512d __DEFAULT_FN_ATTRS
2786 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2787 {
2788   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2789                                                     (__v8df) __B,
2790                                                     (__v8df) __C,
2791                                                     (__mmask8) -1,
2792                                                     _MM_FROUND_CUR_DIRECTION);
2793 }
2794
2795 static __inline__ __m512d __DEFAULT_FN_ATTRS
2796 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2797 {
2798   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2799                                                      (__v8df) __B,
2800                                                      (__v8df) __C,
2801                                                      (__mmask8) __U,
2802                                                      _MM_FROUND_CUR_DIRECTION);
2803 }
2804
2805 static __inline__ __m512d __DEFAULT_FN_ATTRS
2806 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2807 {
2808   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2809                                                      (__v8df) __B,
2810                                                      (__v8df) __C,
2811                                                      (__mmask8) __U,
2812                                                      _MM_FROUND_CUR_DIRECTION);
2813 }
2814
2815 static __inline__ __m512d __DEFAULT_FN_ATTRS
2816 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2817 {
2818   return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2819                                                     (__v8df) __B,
2820                                                     -(__v8df) __C,
2821                                                     (__mmask8) -1,
2822                                                     _MM_FROUND_CUR_DIRECTION);
2823 }
2824
2825 static __inline__ __m512d __DEFAULT_FN_ATTRS
2826 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2827 {
2828   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2829                                                      (__v8df) __B,
2830                                                      -(__v8df) __C,
2831                                                      (__mmask8) __U,
2832                                                      _MM_FROUND_CUR_DIRECTION);
2833 }
2834
2835 #define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2836   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2837                                           (__v16sf)(__m512)(B), \
2838                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2839                                           (int)(R)); })
2840
2841
2842 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2843   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2844                                           (__v16sf)(__m512)(B), \
2845                                           (__v16sf)(__m512)(C), \
2846                                           (__mmask16)(U), (int)(R)); })
2847
2848
2849 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2850   (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2851                                            (__v16sf)(__m512)(B), \
2852                                            (__v16sf)(__m512)(C), \
2853                                            (__mmask16)(U), (int)(R)); })
2854
2855
2856 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2857   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2858                                            (__v16sf)(__m512)(B), \
2859                                            (__v16sf)(__m512)(C), \
2860                                            (__mmask16)(U), (int)(R)); })
2861
2862
2863 #define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2864   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2865                                           (__v16sf)(__m512)(B), \
2866                                           -(__v16sf)(__m512)(C), \
2867                                           (__mmask16)-1, (int)(R)); })
2868
2869
2870 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2871   (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2872                                           (__v16sf)(__m512)(B), \
2873                                           -(__v16sf)(__m512)(C), \
2874                                           (__mmask16)(U), (int)(R)); })
2875
2876
2877 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2878   (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2879                                            (__v16sf)(__m512)(B), \
2880                                            -(__v16sf)(__m512)(C), \
2881                                            (__mmask16)(U), (int)(R)); })
2882
2883
2884 #define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2885   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2886                                           (__v16sf)(__m512)(B), \
2887                                           (__v16sf)(__m512)(C), (__mmask16)-1, \
2888                                           (int)(R)); })
2889
2890
2891 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2892   (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2893                                            (__v16sf)(__m512)(B), \
2894                                            (__v16sf)(__m512)(C), \
2895                                            (__mmask16)(U), (int)(R)); })
2896
2897
2898 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2899   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2900                                            (__v16sf)(__m512)(B), \
2901                                            (__v16sf)(__m512)(C), \
2902                                            (__mmask16)(U), (int)(R)); })
2903
2904
2905 #define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2906   (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2907                                           (__v16sf)(__m512)(B), \
2908                                           -(__v16sf)(__m512)(C), \
2909                                           (__mmask16)-1, (int)(R)); })
2910
2911
2912 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2913   (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2914                                            (__v16sf)(__m512)(B), \
2915                                            -(__v16sf)(__m512)(C), \
2916                                            (__mmask16)(U), (int)(R)); })
2917
2918
2919 static __inline__ __m512 __DEFAULT_FN_ATTRS
2920 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2921 {
2922   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2923                                                    (__v16sf) __B,
2924                                                    (__v16sf) __C,
2925                                                    (__mmask16) -1,
2926                                                    _MM_FROUND_CUR_DIRECTION);
2927 }
2928
2929 static __inline__ __m512 __DEFAULT_FN_ATTRS
2930 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2931 {
2932   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2933                                                    (__v16sf) __B,
2934                                                    (__v16sf) __C,
2935                                                    (__mmask16) __U,
2936                                                    _MM_FROUND_CUR_DIRECTION);
2937 }
2938
2939 static __inline__ __m512 __DEFAULT_FN_ATTRS
2940 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2941 {
2942   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2943                                                     (__v16sf) __B,
2944                                                     (__v16sf) __C,
2945                                                     (__mmask16) __U,
2946                                                     _MM_FROUND_CUR_DIRECTION);
2947 }
2948
2949 static __inline__ __m512 __DEFAULT_FN_ATTRS
2950 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2951 {
2952   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2953                                                     (__v16sf) __B,
2954                                                     (__v16sf) __C,
2955                                                     (__mmask16) __U,
2956                                                     _MM_FROUND_CUR_DIRECTION);
2957 }
2958
2959 static __inline__ __m512 __DEFAULT_FN_ATTRS
2960 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2961 {
2962   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2963                                                    (__v16sf) __B,
2964                                                    -(__v16sf) __C,
2965                                                    (__mmask16) -1,
2966                                                    _MM_FROUND_CUR_DIRECTION);
2967 }
2968
2969 static __inline__ __m512 __DEFAULT_FN_ATTRS
2970 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2971 {
2972   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2973                                                    (__v16sf) __B,
2974                                                    -(__v16sf) __C,
2975                                                    (__mmask16) __U,
2976                                                    _MM_FROUND_CUR_DIRECTION);
2977 }
2978
2979 static __inline__ __m512 __DEFAULT_FN_ATTRS
2980 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2981 {
2982   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2983                                                     (__v16sf) __B,
2984                                                     -(__v16sf) __C,
2985                                                     (__mmask16) __U,
2986                                                     _MM_FROUND_CUR_DIRECTION);
2987 }
2988
2989 static __inline__ __m512 __DEFAULT_FN_ATTRS
2990 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2991 {
2992   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2993                                                    (__v16sf) __B,
2994                                                    (__v16sf) __C,
2995                                                    (__mmask16) -1,
2996                                                    _MM_FROUND_CUR_DIRECTION);
2997 }
2998
2999 static __inline__ __m512 __DEFAULT_FN_ATTRS
3000 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3001 {
3002   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3003                                                     (__v16sf) __B,
3004                                                     (__v16sf) __C,
3005                                                     (__mmask16) __U,
3006                                                     _MM_FROUND_CUR_DIRECTION);
3007 }
3008
3009 static __inline__ __m512 __DEFAULT_FN_ATTRS
3010 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3011 {
3012   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3013                                                     (__v16sf) __B,
3014                                                     (__v16sf) __C,
3015                                                     (__mmask16) __U,
3016                                                     _MM_FROUND_CUR_DIRECTION);
3017 }
3018
3019 static __inline__ __m512 __DEFAULT_FN_ATTRS
3020 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3021 {
3022   return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3023                                                    (__v16sf) __B,
3024                                                    -(__v16sf) __C,
3025                                                    (__mmask16) -1,
3026                                                    _MM_FROUND_CUR_DIRECTION);
3027 }
3028
3029 static __inline__ __m512 __DEFAULT_FN_ATTRS
3030 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3031 {
3032   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3033                                                     (__v16sf) __B,
3034                                                     -(__v16sf) __C,
3035                                                     (__mmask16) __U,
3036                                                     _MM_FROUND_CUR_DIRECTION);
3037 }
3038
3039 #define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
3040   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3041                                               (__v8df)(__m512d)(B), \
3042                                               (__v8df)(__m512d)(C), \
3043                                               (__mmask8)-1, (int)(R)); })
3044
3045
3046 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
3047   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3048                                               (__v8df)(__m512d)(B), \
3049                                               (__v8df)(__m512d)(C), \
3050                                               (__mmask8)(U), (int)(R)); })
3051
3052
3053 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
3054   (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3055                                                (__v8df)(__m512d)(B), \
3056                                                (__v8df)(__m512d)(C), \
3057                                                (__mmask8)(U), (int)(R)); })
3058
3059
3060 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
3061   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3062                                                (__v8df)(__m512d)(B), \
3063                                                (__v8df)(__m512d)(C), \
3064                                                (__mmask8)(U), (int)(R)); })
3065
3066
3067 #define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
3068   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3069                                               (__v8df)(__m512d)(B), \
3070                                               -(__v8df)(__m512d)(C), \
3071                                               (__mmask8)-1, (int)(R)); })
3072
3073
3074 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3075   (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3076                                               (__v8df)(__m512d)(B), \
3077                                               -(__v8df)(__m512d)(C), \
3078                                               (__mmask8)(U), (int)(R)); })
3079
3080
3081 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3082   (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3083                                                (__v8df)(__m512d)(B), \
3084                                                -(__v8df)(__m512d)(C), \
3085                                                (__mmask8)(U), (int)(R)); })
3086
3087
3088 static __inline__ __m512d __DEFAULT_FN_ATTRS
3089 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3090 {
3091   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3092                                                        (__v8df) __B,
3093                                                        (__v8df) __C,
3094                                                        (__mmask8) -1,
3095                                                        _MM_FROUND_CUR_DIRECTION);
3096 }
3097
3098 static __inline__ __m512d __DEFAULT_FN_ATTRS
3099 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3100 {
3101   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3102                                                        (__v8df) __B,
3103                                                        (__v8df) __C,
3104                                                        (__mmask8) __U,
3105                                                        _MM_FROUND_CUR_DIRECTION);
3106 }
3107
3108 static __inline__ __m512d __DEFAULT_FN_ATTRS
3109 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3110 {
3111   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3112                                                         (__v8df) __B,
3113                                                         (__v8df) __C,
3114                                                         (__mmask8) __U,
3115                                                         _MM_FROUND_CUR_DIRECTION);
3116 }
3117
3118 static __inline__ __m512d __DEFAULT_FN_ATTRS
3119 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3120 {
3121   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3122                                                         (__v8df) __B,
3123                                                         (__v8df) __C,
3124                                                         (__mmask8) __U,
3125                                                         _MM_FROUND_CUR_DIRECTION);
3126 }
3127
3128 static __inline__ __m512d __DEFAULT_FN_ATTRS
3129 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3130 {
3131   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3132                                                        (__v8df) __B,
3133                                                        -(__v8df) __C,
3134                                                        (__mmask8) -1,
3135                                                        _MM_FROUND_CUR_DIRECTION);
3136 }
3137
3138 static __inline__ __m512d __DEFAULT_FN_ATTRS
3139 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3140 {
3141   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3142                                                        (__v8df) __B,
3143                                                        -(__v8df) __C,
3144                                                        (__mmask8) __U,
3145                                                        _MM_FROUND_CUR_DIRECTION);
3146 }
3147
3148 static __inline__ __m512d __DEFAULT_FN_ATTRS
3149 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3150 {
3151   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3152                                                         (__v8df) __B,
3153                                                         -(__v8df) __C,
3154                                                         (__mmask8) __U,
3155                                                         _MM_FROUND_CUR_DIRECTION);
3156 }
3157
3158 #define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3159   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3160                                              (__v16sf)(__m512)(B), \
3161                                              (__v16sf)(__m512)(C), \
3162                                              (__mmask16)-1, (int)(R)); })
3163
3164
3165 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3166   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3167                                              (__v16sf)(__m512)(B), \
3168                                              (__v16sf)(__m512)(C), \
3169                                              (__mmask16)(U), (int)(R)); })
3170
3171
3172 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3173   (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3174                                               (__v16sf)(__m512)(B), \
3175                                               (__v16sf)(__m512)(C), \
3176                                               (__mmask16)(U), (int)(R)); })
3177
3178
3179 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3180   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3181                                               (__v16sf)(__m512)(B), \
3182                                               (__v16sf)(__m512)(C), \
3183                                               (__mmask16)(U), (int)(R)); })
3184
3185
3186 #define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3187   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3188                                              (__v16sf)(__m512)(B), \
3189                                              -(__v16sf)(__m512)(C), \
3190                                              (__mmask16)-1, (int)(R)); })
3191
3192
3193 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3194   (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3195                                              (__v16sf)(__m512)(B), \
3196                                              -(__v16sf)(__m512)(C), \
3197                                              (__mmask16)(U), (int)(R)); })
3198
3199
3200 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3201   (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3202                                               (__v16sf)(__m512)(B), \
3203                                               -(__v16sf)(__m512)(C), \
3204                                               (__mmask16)(U), (int)(R)); })
3205
3206
3207 static __inline__ __m512 __DEFAULT_FN_ATTRS
3208 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3209 {
3210   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3211                                                       (__v16sf) __B,
3212                                                       (__v16sf) __C,
3213                                                       (__mmask16) -1,
3214                                                       _MM_FROUND_CUR_DIRECTION);
3215 }
3216
3217 static __inline__ __m512 __DEFAULT_FN_ATTRS
3218 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3219 {
3220   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3221                                                       (__v16sf) __B,
3222                                                       (__v16sf) __C,
3223                                                       (__mmask16) __U,
3224                                                       _MM_FROUND_CUR_DIRECTION);
3225 }
3226
3227 static __inline__ __m512 __DEFAULT_FN_ATTRS
3228 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3229 {
3230   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3231                                                        (__v16sf) __B,
3232                                                        (__v16sf) __C,
3233                                                        (__mmask16) __U,
3234                                                        _MM_FROUND_CUR_DIRECTION);
3235 }
3236
3237 static __inline__ __m512 __DEFAULT_FN_ATTRS
3238 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3239 {
3240   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3241                                                        (__v16sf) __B,
3242                                                        (__v16sf) __C,
3243                                                        (__mmask16) __U,
3244                                                        _MM_FROUND_CUR_DIRECTION);
3245 }
3246
3247 static __inline__ __m512 __DEFAULT_FN_ATTRS
3248 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3249 {
3250   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3251                                                       (__v16sf) __B,
3252                                                       -(__v16sf) __C,
3253                                                       (__mmask16) -1,
3254                                                       _MM_FROUND_CUR_DIRECTION);
3255 }
3256
3257 static __inline__ __m512 __DEFAULT_FN_ATTRS
3258 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3259 {
3260   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3261                                                       (__v16sf) __B,
3262                                                       -(__v16sf) __C,
3263                                                       (__mmask16) __U,
3264                                                       _MM_FROUND_CUR_DIRECTION);
3265 }
3266
3267 static __inline__ __m512 __DEFAULT_FN_ATTRS
3268 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3269 {
3270   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3271                                                        (__v16sf) __B,
3272                                                        -(__v16sf) __C,
3273                                                        (__mmask16) __U,
3274                                                        _MM_FROUND_CUR_DIRECTION);
3275 }
3276
3277 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3278   (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3279                                             (__v8df)(__m512d)(B), \
3280                                             (__v8df)(__m512d)(C), \
3281                                             (__mmask8)(U), (int)(R)); })
3282
3283
3284 static __inline__ __m512d __DEFAULT_FN_ATTRS
3285 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3286 {
3287   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3288                                                      (__v8df) __B,
3289                                                      (__v8df) __C,
3290                                                      (__mmask8) __U,
3291                                                      _MM_FROUND_CUR_DIRECTION);
3292 }
3293
3294 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3295   (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3296                                            (__v16sf)(__m512)(B), \
3297                                            (__v16sf)(__m512)(C), \
3298                                            (__mmask16)(U), (int)(R)); })
3299
3300
3301 static __inline__ __m512 __DEFAULT_FN_ATTRS
3302 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3303 {
3304   return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3305                                                     (__v16sf) __B,
3306                                                     (__v16sf) __C,
3307                                                     (__mmask16) __U,
3308                                                     _MM_FROUND_CUR_DIRECTION);
3309 }
3310
3311 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3312   (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3313                                                (__v8df)(__m512d)(B), \
3314                                                (__v8df)(__m512d)(C), \
3315                                                (__mmask8)(U), (int)(R)); })
3316
3317
3318 static __inline__ __m512d __DEFAULT_FN_ATTRS
3319 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3320 {
3321   return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3322                                                         (__v8df) __B,
3323                                                         (__v8df) __C,
3324                                                         (__mmask8) __U,
3325                                                         _MM_FROUND_CUR_DIRECTION);
3326 }
3327
3328 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3329   (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3330                                               (__v16sf)(__m512)(B), \
3331                                               (__v16sf)(__m512)(C), \
3332                                               (__mmask16)(U), (int)(R)); })
3333
3334
3335 static __inline__ __m512 __DEFAULT_FN_ATTRS
3336 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3337 {
3338   return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3339                                                        (__v16sf) __B,
3340                                                        (__v16sf) __C,
3341                                                        (__mmask16) __U,
3342                                                        _MM_FROUND_CUR_DIRECTION);
3343 }
3344
3345 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3346   (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3347                                             (__v8df)(__m512d)(B), \
3348                                             (__v8df)(__m512d)(C), \
3349                                             (__mmask8)(U), (int)(R)); })
3350
3351
3352 static __inline__ __m512d __DEFAULT_FN_ATTRS
3353 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3354 {
3355   return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3356                                                      (__v8df) __B,
3357                                                      (__v8df) __C,
3358                                                      (__mmask8) __U,
3359                                                      _MM_FROUND_CUR_DIRECTION);
3360 }
3361
3362 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3363   (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3364                                            (__v16sf)(__m512)(B), \
3365                                            (__v16sf)(__m512)(C), \
3366                                            (__mmask16)(U), (int)(R)); })
3367
3368
3369 static __inline__ __m512 __DEFAULT_FN_ATTRS
3370 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3371 {
3372   return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3373                                                     (__v16sf) __B,
3374                                                     (__v16sf) __C,
3375                                                     (__mmask16) __U,
3376                                                     _MM_FROUND_CUR_DIRECTION);
3377 }
3378
3379 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3380   (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3381                                             (__v8df)(__m512d)(B), \
3382                                             (__v8df)(__m512d)(C), \
3383                                             (__mmask8)(U), (int)(R)); })
3384
3385
3386 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3387   (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3388                                              (__v8df)(__m512d)(B), \
3389                                              (__v8df)(__m512d)(C), \
3390                                              (__mmask8)(U), (int)(R)); })
3391
3392
3393 static __inline__ __m512d __DEFAULT_FN_ATTRS
3394 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3395 {
3396   return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3397                                                      (__v8df) __B,
3398                                                      (__v8df) __C,
3399                                                      (__mmask8) __U,
3400                                                      _MM_FROUND_CUR_DIRECTION);
3401 }
3402
3403 static __inline__ __m512d __DEFAULT_FN_ATTRS
3404 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3405 {
3406   return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3407                                                       (__v8df) __B,
3408                                                       (__v8df) __C,
3409                                                       (__mmask8) __U,
3410                                                       _MM_FROUND_CUR_DIRECTION);
3411 }
3412
3413 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3414   (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3415                                            (__v16sf)(__m512)(B), \
3416                                            (__v16sf)(__m512)(C), \
3417                                            (__mmask16)(U), (int)(R)); })
3418
3419
3420 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3421   (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3422                                             (__v16sf)(__m512)(B), \
3423                                             (__v16sf)(__m512)(C), \
3424                                             (__mmask16)(U), (int)(R)); })
3425
3426
3427 static __inline__ __m512 __DEFAULT_FN_ATTRS
3428 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3429 {
3430   return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3431                                                     (__v16sf) __B,
3432                                                     (__v16sf) __C,
3433                                                     (__mmask16) __U,
3434                                                     _MM_FROUND_CUR_DIRECTION);
3435 }
3436
3437 static __inline__ __m512 __DEFAULT_FN_ATTRS
3438 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3439 {
3440   return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3441                                                      (__v16sf) __B,
3442                                                      (__v16sf) __C,
3443                                                      (__mmask16) __U,
3444                                                      _MM_FROUND_CUR_DIRECTION);
3445 }
3446
3447
3448
3449 /* Vector permutations */
3450
3451 static __inline __m512i __DEFAULT_FN_ATTRS
3452 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3453 {
3454   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3455                                                        /* idx */ ,
3456                                                        (__v16si) __A,
3457                                                        (__v16si) __B,
3458                                                        (__mmask16) -1);
3459 }
3460
3461 static __inline__ __m512i __DEFAULT_FN_ATTRS
3462 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3463                                 __m512i __I, __m512i __B)
3464 {
3465   return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3466                                                         /* idx */ ,
3467                                                         (__v16si) __A,
3468                                                         (__v16si) __B,
3469                                                         (__mmask16) __U);
3470 }
3471
3472 static __inline__ __m512i __DEFAULT_FN_ATTRS
3473 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3474                                  __m512i __I, __m512i __B)
3475 {
3476   return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3477                                                         /* idx */ ,
3478                                                         (__v16si) __A,
3479                                                         (__v16si) __B,
3480                                                         (__mmask16) __U);
3481 }
3482
3483 static __inline __m512i __DEFAULT_FN_ATTRS
3484 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3485 {
3486   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3487                                                        /* idx */ ,
3488                                                        (__v8di) __A,
3489                                                        (__v8di) __B,
3490                                                        (__mmask8) -1);
3491 }
3492
3493 static __inline__ __m512i __DEFAULT_FN_ATTRS
3494 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3495                                 __m512i __B)
3496 {
3497   return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3498                                                        /* idx */ ,
3499                                                        (__v8di) __A,
3500                                                        (__v8di) __B,
3501                                                        (__mmask8) __U);
3502 }
3503
3504
3505 static __inline__ __m512i __DEFAULT_FN_ATTRS
3506 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3507          __m512i __I, __m512i __B)
3508 {
3509   return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3510                                                         /* idx */ ,
3511                                                         (__v8di) __A,
3512                                                         (__v8di) __B,
3513                                                         (__mmask8) __U);
3514 }
3515
3516 #define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3517   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3518                                    (__v8di)(__m512i)(A), \
3519                                    ((int)(I) & 0x7) + 0, \
3520                                    ((int)(I) & 0x7) + 1, \
3521                                    ((int)(I) & 0x7) + 2, \
3522                                    ((int)(I) & 0x7) + 3, \
3523                                    ((int)(I) & 0x7) + 4, \
3524                                    ((int)(I) & 0x7) + 5, \
3525                                    ((int)(I) & 0x7) + 6, \
3526                                    ((int)(I) & 0x7) + 7); })
3527
3528 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3529   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3530                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3531                                  (__v8di)(__m512i)(W)); })
3532
3533 #define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3534   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3535                                  (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3536                                  (__v8di)_mm512_setzero_si512()); })
3537
3538 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3539   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3540                                    (__v16si)(__m512i)(A), \
3541                                    ((int)(I) & 0xf) + 0, \
3542                                    ((int)(I) & 0xf) + 1, \
3543                                    ((int)(I) & 0xf) + 2, \
3544                                    ((int)(I) & 0xf) + 3, \
3545                                    ((int)(I) & 0xf) + 4, \
3546                                    ((int)(I) & 0xf) + 5, \
3547                                    ((int)(I) & 0xf) + 6, \
3548                                    ((int)(I) & 0xf) + 7, \
3549                                    ((int)(I) & 0xf) + 8, \
3550                                    ((int)(I) & 0xf) + 9, \
3551                                    ((int)(I) & 0xf) + 10, \
3552                                    ((int)(I) & 0xf) + 11, \
3553                                    ((int)(I) & 0xf) + 12, \
3554                                    ((int)(I) & 0xf) + 13, \
3555                                    ((int)(I) & 0xf) + 14, \
3556                                    ((int)(I) & 0xf) + 15); })
3557
3558 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3559   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3560                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3561                                 (__v16si)(__m512i)(W)); })
3562
3563 #define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3564   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3565                                 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3566                                 (__v16si)_mm512_setzero_si512()); })
3567 /* Vector Extract */
3568
3569 #define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
3570   (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
3571                                    (__v8df)_mm512_undefined_pd(), \
3572                                    ((I) & 1) ? 4 : 0,             \
3573                                    ((I) & 1) ? 5 : 1,             \
3574                                    ((I) & 1) ? 6 : 2,             \
3575                                    ((I) & 1) ? 7 : 3); })
3576
3577 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3578   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3579                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3580                                    (__v4df)(W)); })
3581
3582 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3583   (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3584                                    (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3585                                    (__v4df)_mm256_setzero_pd()); })
3586
3587 #define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
3588   (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
3589                                   (__v16sf)_mm512_undefined_ps(), \
3590                                   0 + ((I) & 0x3) * 4,            \
3591                                   1 + ((I) & 0x3) * 4,            \
3592                                   2 + ((I) & 0x3) * 4,            \
3593                                   3 + ((I) & 0x3) * 4); })
3594
3595 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3596   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3597                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3598                                    (__v4sf)(W)); })
3599
3600 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3601   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3602                                    (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3603                                    (__v4sf)_mm_setzero_ps()); })
3604
3605 /* Vector Blend */
3606
3607 static __inline __m512d __DEFAULT_FN_ATTRS
3608 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3609 {
3610   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3611                  (__v8df) __W,
3612                  (__v8df) __A);
3613 }
3614
3615 static __inline __m512 __DEFAULT_FN_ATTRS
3616 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3617 {
3618   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3619                 (__v16sf) __W,
3620                 (__v16sf) __A);
3621 }
3622
3623 static __inline __m512i __DEFAULT_FN_ATTRS
3624 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3625 {
3626   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3627                 (__v8di) __W,
3628                 (__v8di) __A);
3629 }
3630
3631 static __inline __m512i __DEFAULT_FN_ATTRS
3632 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3633 {
3634   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3635                 (__v16si) __W,
3636                 (__v16si) __A);
3637 }
3638
3639 /* Compare */
3640
3641 #define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3642   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3643                                           (__v16sf)(__m512)(B), (int)(P), \
3644                                           (__mmask16)-1, (int)(R)); })
3645
3646 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3647   (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3648                                           (__v16sf)(__m512)(B), (int)(P), \
3649                                           (__mmask16)(U), (int)(R)); })
3650
3651 #define _mm512_cmp_ps_mask(A, B, P) \
3652   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3653 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3654   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3655
3656 #define _mm512_cmpeq_ps_mask(A, B) \
3657     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3658 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3659     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3660
3661 #define _mm512_cmplt_ps_mask(A, B) \
3662     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3663 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3664     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3665
3666 #define _mm512_cmple_ps_mask(A, B) \
3667     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3668 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3669     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3670
3671 #define _mm512_cmpunord_ps_mask(A, B) \
3672     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3673 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3674     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3675
3676 #define _mm512_cmpneq_ps_mask(A, B) \
3677     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3678 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3679     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3680
3681 #define _mm512_cmpnlt_ps_mask(A, B) \
3682     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3683 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3684     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3685
3686 #define _mm512_cmpnle_ps_mask(A, B) \
3687     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3688 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3689     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3690
3691 #define _mm512_cmpord_ps_mask(A, B) \
3692     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3693 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3694     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3695
3696 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3697   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3698                                          (__v8df)(__m512d)(B), (int)(P), \
3699                                          (__mmask8)-1, (int)(R)); })
3700
3701 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3702   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3703                                          (__v8df)(__m512d)(B), (int)(P), \
3704                                          (__mmask8)(U), (int)(R)); })
3705
3706 #define _mm512_cmp_pd_mask(A, B, P) \
3707   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3708 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3709   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3710
3711 #define _mm512_cmpeq_pd_mask(A, B) \
3712     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3713 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3714     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3715
3716 #define _mm512_cmplt_pd_mask(A, B) \
3717     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3718 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3719     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3720
3721 #define _mm512_cmple_pd_mask(A, B) \
3722     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3723 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3724     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3725
3726 #define _mm512_cmpunord_pd_mask(A, B) \
3727     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3728 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3729     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3730
3731 #define _mm512_cmpneq_pd_mask(A, B) \
3732     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3733 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3734     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3735
3736 #define _mm512_cmpnlt_pd_mask(A, B) \
3737     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3738 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3739     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3740
3741 #define _mm512_cmpnle_pd_mask(A, B) \
3742     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3743 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3744     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3745
3746 #define _mm512_cmpord_pd_mask(A, B) \
3747     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3748 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3749     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3750
3751 /* Conversion */
3752
3753 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3754   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3755                                              (__v16si)_mm512_undefined_epi32(), \
3756                                              (__mmask16)-1, (int)(R)); })
3757
3758 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3759   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3760                                              (__v16si)(__m512i)(W), \
3761                                              (__mmask16)(U), (int)(R)); })
3762
3763 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3764   (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3765                                              (__v16si)_mm512_setzero_si512(), \
3766                                              (__mmask16)(U), (int)(R)); })
3767
3768
3769 static __inline __m512i __DEFAULT_FN_ATTRS
3770 _mm512_cvttps_epu32(__m512 __A)
3771 {
3772   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3773                   (__v16si)
3774                   _mm512_setzero_si512 (),
3775                   (__mmask16) -1,
3776                   _MM_FROUND_CUR_DIRECTION);
3777 }
3778
3779 static __inline__ __m512i __DEFAULT_FN_ATTRS
3780 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3781 {
3782   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3783                    (__v16si) __W,
3784                    (__mmask16) __U,
3785                    _MM_FROUND_CUR_DIRECTION);
3786 }
3787
3788 static __inline__ __m512i __DEFAULT_FN_ATTRS
3789 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3790 {
3791   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3792                    (__v16si) _mm512_setzero_si512 (),
3793                    (__mmask16) __U,
3794                    _MM_FROUND_CUR_DIRECTION);
3795 }
3796
3797 #define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3798   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3799                                           (__v16sf)_mm512_setzero_ps(), \
3800                                           (__mmask16)-1, (int)(R)); })
3801
3802 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3803   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3804                                           (__v16sf)(__m512)(W), \
3805                                           (__mmask16)(U), (int)(R)); })
3806
3807 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3808   (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3809                                           (__v16sf)_mm512_setzero_ps(), \
3810                                           (__mmask16)(U), (int)(R)); })
3811
3812 #define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3813   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3814                                            (__v16sf)_mm512_setzero_ps(), \
3815                                            (__mmask16)-1, (int)(R)); })
3816
3817 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3818   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3819                                            (__v16sf)(__m512)(W), \
3820                                            (__mmask16)(U), (int)(R)); })
3821
3822 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3823   (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3824                                            (__v16sf)_mm512_setzero_ps(), \
3825                                            (__mmask16)(U), (int)(R)); })
3826
3827 static __inline__ __m512 __DEFAULT_FN_ATTRS
3828 _mm512_cvtepu32_ps (__m512i __A)
3829 {
3830   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3831                  (__v16sf) _mm512_undefined_ps (),
3832                  (__mmask16) -1,
3833                  _MM_FROUND_CUR_DIRECTION);
3834 }
3835
3836 static __inline__ __m512 __DEFAULT_FN_ATTRS
3837 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3838 {
3839   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3840                  (__v16sf) __W,
3841                  (__mmask16) __U,
3842                  _MM_FROUND_CUR_DIRECTION);
3843 }
3844
3845 static __inline__ __m512 __DEFAULT_FN_ATTRS
3846 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3847 {
3848   return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3849                  (__v16sf) _mm512_setzero_ps (),
3850                  (__mmask16) __U,
3851                  _MM_FROUND_CUR_DIRECTION);
3852 }
3853
3854 static __inline __m512d __DEFAULT_FN_ATTRS
3855 _mm512_cvtepi32_pd(__m256i __A)
3856 {
3857   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3858 }
3859
3860 static __inline__ __m512d __DEFAULT_FN_ATTRS
3861 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3862 {
3863   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3864                                               (__v8df)_mm512_cvtepi32_pd(__A),
3865                                               (__v8df)__W);
3866 }
3867
3868 static __inline__ __m512d __DEFAULT_FN_ATTRS
3869 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3870 {
3871   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3872                                               (__v8df)_mm512_cvtepi32_pd(__A),
3873                                               (__v8df)_mm512_setzero_pd());
3874 }
3875
3876 static __inline__ __m512d __DEFAULT_FN_ATTRS
3877 _mm512_cvtepi32lo_pd(__m512i __A)
3878 {
3879   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3880 }
3881
3882 static __inline__ __m512d __DEFAULT_FN_ATTRS
3883 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3884 {
3885   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3886 }
3887
3888 static __inline__ __m512 __DEFAULT_FN_ATTRS
3889 _mm512_cvtepi32_ps (__m512i __A)
3890 {
3891   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3892                 (__v16sf) _mm512_undefined_ps (),
3893                 (__mmask16) -1,
3894                 _MM_FROUND_CUR_DIRECTION);
3895 }
3896
3897 static __inline__ __m512 __DEFAULT_FN_ATTRS
3898 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3899 {
3900   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3901                 (__v16sf) __W,
3902                 (__mmask16) __U,
3903                 _MM_FROUND_CUR_DIRECTION);
3904 }
3905
3906 static __inline__ __m512 __DEFAULT_FN_ATTRS
3907 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3908 {
3909   return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3910                 (__v16sf) _mm512_setzero_ps (),
3911                 (__mmask16) __U,
3912                 _MM_FROUND_CUR_DIRECTION);
3913 }
3914
3915 static __inline __m512d __DEFAULT_FN_ATTRS
3916 _mm512_cvtepu32_pd(__m256i __A)
3917 {
3918   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3919 }
3920
3921 static __inline__ __m512d __DEFAULT_FN_ATTRS
3922 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3923 {
3924   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3925                                               (__v8df)_mm512_cvtepu32_pd(__A),
3926                                               (__v8df)__W);
3927 }
3928
3929 static __inline__ __m512d __DEFAULT_FN_ATTRS
3930 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3931 {
3932   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3933                                               (__v8df)_mm512_cvtepu32_pd(__A),
3934                                               (__v8df)_mm512_setzero_pd());
3935 }
3936
3937 static __inline__ __m512d __DEFAULT_FN_ATTRS
3938 _mm512_cvtepu32lo_pd(__m512i __A)
3939 {
3940   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3941 }
3942
3943 static __inline__ __m512d __DEFAULT_FN_ATTRS
3944 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3945 {
3946   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3947 }
3948
3949 #define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3950   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3951                                           (__v8sf)_mm256_setzero_ps(), \
3952                                           (__mmask8)-1, (int)(R)); })
3953
3954 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3955   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3956                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
3957                                           (int)(R)); })
3958
3959 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3960   (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3961                                           (__v8sf)_mm256_setzero_ps(), \
3962                                           (__mmask8)(U), (int)(R)); })
3963
3964 static __inline__ __m256 __DEFAULT_FN_ATTRS
3965 _mm512_cvtpd_ps (__m512d __A)
3966 {
3967   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3968                 (__v8sf) _mm256_undefined_ps (),
3969                 (__mmask8) -1,
3970                 _MM_FROUND_CUR_DIRECTION);
3971 }
3972
3973 static __inline__ __m256 __DEFAULT_FN_ATTRS
3974 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3975 {
3976   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3977                 (__v8sf) __W,
3978                 (__mmask8) __U,
3979                 _MM_FROUND_CUR_DIRECTION);
3980 }
3981
3982 static __inline__ __m256 __DEFAULT_FN_ATTRS
3983 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3984 {
3985   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3986                 (__v8sf) _mm256_setzero_ps (),
3987                 (__mmask8) __U,
3988                 _MM_FROUND_CUR_DIRECTION);
3989 }
3990
3991 static __inline__ __m512 __DEFAULT_FN_ATTRS
3992 _mm512_cvtpd_pslo (__m512d __A)
3993 {
3994   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3995                 (__v8sf) _mm256_setzero_ps (),
3996                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3997 }
3998
3999 static __inline__ __m512 __DEFAULT_FN_ATTRS
4000 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
4001 {
4002   return (__m512) __builtin_shufflevector (
4003                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
4004                                                __U, __A),
4005                 (__v8sf) _mm256_setzero_ps (),
4006                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4007 }
4008
4009 #define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
4010   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4011                                             (__v16hi)_mm256_undefined_si256(), \
4012                                             (__mmask16)-1); })
4013
4014 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
4015   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4016                                             (__v16hi)(__m256i)(U), \
4017                                             (__mmask16)(W)); })
4018
4019 #define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
4020   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4021                                             (__v16hi)_mm256_setzero_si256(), \
4022                                             (__mmask16)(W)); })
4023
4024 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
4025   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4026                                             (__v16hi)_mm256_setzero_si256(), \
4027                                             (__mmask16)-1); })
4028
4029 #define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
4030   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4031                                             (__v16hi)(__m256i)(U), \
4032                                             (__mmask16)(W)); })
4033
4034 #define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
4035   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4036                                             (__v16hi)_mm256_setzero_si256(), \
4037                                             (__mmask16)(W)); })
4038
4039 #define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
4040   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4041                                            (__v16sf)_mm512_undefined_ps(), \
4042                                            (__mmask16)-1, (int)(R)); })
4043
4044 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
4045   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4046                                            (__v16sf)(__m512)(W), \
4047                                            (__mmask16)(U), (int)(R)); })
4048
4049 #define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
4050   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4051                                            (__v16sf)_mm512_setzero_ps(), \
4052                                            (__mmask16)(U), (int)(R)); })
4053
4054
4055 static  __inline __m512 __DEFAULT_FN_ATTRS
4056 _mm512_cvtph_ps(__m256i __A)
4057 {
4058   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4059                 (__v16sf)
4060                 _mm512_setzero_ps (),
4061                 (__mmask16) -1,
4062                 _MM_FROUND_CUR_DIRECTION);
4063 }
4064
4065 static __inline__ __m512 __DEFAULT_FN_ATTRS
4066 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
4067 {
4068   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4069                  (__v16sf) __W,
4070                  (__mmask16) __U,
4071                  _MM_FROUND_CUR_DIRECTION);
4072 }
4073
4074 static __inline__ __m512 __DEFAULT_FN_ATTRS
4075 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4076 {
4077   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4078                  (__v16sf) _mm512_setzero_ps (),
4079                  (__mmask16) __U,
4080                  _MM_FROUND_CUR_DIRECTION);
4081 }
4082
4083 #define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4084   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4085                                             (__v8si)_mm256_setzero_si256(), \
4086                                             (__mmask8)-1, (int)(R)); })
4087
4088 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4089   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4090                                             (__v8si)(__m256i)(W), \
4091                                             (__mmask8)(U), (int)(R)); })
4092
4093 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
4094   (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4095                                             (__v8si)_mm256_setzero_si256(), \
4096                                             (__mmask8)(U), (int)(R)); })
4097
4098 static __inline __m256i __DEFAULT_FN_ATTRS
4099 _mm512_cvttpd_epi32(__m512d __a)
4100 {
4101   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4102                                                    (__v8si)_mm256_setzero_si256(),
4103                                                    (__mmask8) -1,
4104                                                     _MM_FROUND_CUR_DIRECTION);
4105 }
4106
4107 static __inline__ __m256i __DEFAULT_FN_ATTRS
4108 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4109 {
4110   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4111                   (__v8si) __W,
4112                   (__mmask8) __U,
4113                   _MM_FROUND_CUR_DIRECTION);
4114 }
4115
4116 static __inline__ __m256i __DEFAULT_FN_ATTRS
4117 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4118 {
4119   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4120                   (__v8si) _mm256_setzero_si256 (),
4121                   (__mmask8) __U,
4122                   _MM_FROUND_CUR_DIRECTION);
4123 }
4124
4125 #define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4126   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4127                                             (__v16si)_mm512_setzero_si512(), \
4128                                             (__mmask16)-1, (int)(R)); })
4129
4130 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4131   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4132                                             (__v16si)(__m512i)(W), \
4133                                             (__mmask16)(U), (int)(R)); })
4134
4135 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4136   (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4137                                             (__v16si)_mm512_setzero_si512(), \
4138                                             (__mmask16)(U), (int)(R)); })
4139
4140 static __inline __m512i __DEFAULT_FN_ATTRS
4141 _mm512_cvttps_epi32(__m512 __a)
4142 {
4143   return (__m512i)
4144     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4145                                      (__v16si) _mm512_setzero_si512 (),
4146                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4147 }
4148
4149 static __inline__ __m512i __DEFAULT_FN_ATTRS
4150 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4151 {
4152   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4153                   (__v16si) __W,
4154                   (__mmask16) __U,
4155                   _MM_FROUND_CUR_DIRECTION);
4156 }
4157
4158 static __inline__ __m512i __DEFAULT_FN_ATTRS
4159 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4160 {
4161   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4162                   (__v16si) _mm512_setzero_si512 (),
4163                   (__mmask16) __U,
4164                   _MM_FROUND_CUR_DIRECTION);
4165 }
4166
4167 #define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4168   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4169                                            (__v16si)_mm512_setzero_si512(), \
4170                                            (__mmask16)-1, (int)(R)); })
4171
4172 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4173   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4174                                            (__v16si)(__m512i)(W), \
4175                                            (__mmask16)(U), (int)(R)); })
4176
4177 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4178   (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4179                                            (__v16si)_mm512_setzero_si512(), \
4180                                            (__mmask16)(U), (int)(R)); })
4181
4182 static __inline__ __m512i __DEFAULT_FN_ATTRS
4183 _mm512_cvtps_epi32 (__m512 __A)
4184 {
4185   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4186                  (__v16si) _mm512_undefined_epi32 (),
4187                  (__mmask16) -1,
4188                  _MM_FROUND_CUR_DIRECTION);
4189 }
4190
4191 static __inline__ __m512i __DEFAULT_FN_ATTRS
4192 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4193 {
4194   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4195                  (__v16si) __W,
4196                  (__mmask16) __U,
4197                  _MM_FROUND_CUR_DIRECTION);
4198 }
4199
4200 static __inline__ __m512i __DEFAULT_FN_ATTRS
4201 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4202 {
4203   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4204                  (__v16si)
4205                  _mm512_setzero_si512 (),
4206                  (__mmask16) __U,
4207                  _MM_FROUND_CUR_DIRECTION);
4208 }
4209
4210 #define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4211   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4212                                            (__v8si)_mm256_setzero_si256(), \
4213                                            (__mmask8)-1, (int)(R)); })
4214
4215 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4216   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4217                                            (__v8si)(__m256i)(W), \
4218                                            (__mmask8)(U), (int)(R)); })
4219
4220 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4221   (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4222                                            (__v8si)_mm256_setzero_si256(), \
4223                                            (__mmask8)(U), (int)(R)); })
4224
4225 static __inline__ __m256i __DEFAULT_FN_ATTRS
4226 _mm512_cvtpd_epi32 (__m512d __A)
4227 {
4228   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4229                  (__v8si)
4230                  _mm256_undefined_si256 (),
4231                  (__mmask8) -1,
4232                  _MM_FROUND_CUR_DIRECTION);
4233 }
4234
4235 static __inline__ __m256i __DEFAULT_FN_ATTRS
4236 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4237 {
4238   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4239                  (__v8si) __W,
4240                  (__mmask8) __U,
4241                  _MM_FROUND_CUR_DIRECTION);
4242 }
4243
4244 static __inline__ __m256i __DEFAULT_FN_ATTRS
4245 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4246 {
4247   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4248                  (__v8si)
4249                  _mm256_setzero_si256 (),
4250                  (__mmask8) __U,
4251                  _MM_FROUND_CUR_DIRECTION);
4252 }
4253
4254 #define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4255   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4256                                             (__v16si)_mm512_setzero_si512(), \
4257                                             (__mmask16)-1, (int)(R)); })
4258
4259 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4260   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4261                                             (__v16si)(__m512i)(W), \
4262                                             (__mmask16)(U), (int)(R)); })
4263
4264 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4265   (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4266                                             (__v16si)_mm512_setzero_si512(), \
4267                                             (__mmask16)(U), (int)(R)); })
4268
4269 static __inline__ __m512i __DEFAULT_FN_ATTRS
4270 _mm512_cvtps_epu32 ( __m512 __A)
4271 {
4272   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4273                   (__v16si)\
4274                   _mm512_undefined_epi32 (),\
4275                   (__mmask16) -1,\
4276                   _MM_FROUND_CUR_DIRECTION);\
4277 }
4278
4279 static __inline__ __m512i __DEFAULT_FN_ATTRS
4280 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4281 {
4282   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4283                   (__v16si) __W,
4284                   (__mmask16) __U,
4285                   _MM_FROUND_CUR_DIRECTION);
4286 }
4287
4288 static __inline__ __m512i __DEFAULT_FN_ATTRS
4289 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4290 {
4291   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4292                   (__v16si) 
4293                   _mm512_setzero_si512 (),
4294                   (__mmask16) __U ,
4295                   _MM_FROUND_CUR_DIRECTION);
4296 }
4297
4298 #define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4299   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4300                                             (__v8si)_mm256_setzero_si256(), \
4301                                             (__mmask8)-1, (int)(R)); })
4302
4303 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4304   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4305                                             (__v8si)(W), \
4306                                             (__mmask8)(U), (int)(R)); })
4307
4308 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4309   (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4310                                             (__v8si)_mm256_setzero_si256(), \
4311                                             (__mmask8)(U), (int)(R)); })
4312
4313 static __inline__ __m256i __DEFAULT_FN_ATTRS
4314 _mm512_cvtpd_epu32 (__m512d __A)
4315 {
4316   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4317                   (__v8si)
4318                   _mm256_undefined_si256 (),
4319                   (__mmask8) -1,
4320                   _MM_FROUND_CUR_DIRECTION);
4321 }
4322
4323 static __inline__ __m256i __DEFAULT_FN_ATTRS
4324 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4325 {
4326   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4327                   (__v8si) __W,
4328                   (__mmask8) __U,
4329                   _MM_FROUND_CUR_DIRECTION);
4330 }
4331
4332 static __inline__ __m256i __DEFAULT_FN_ATTRS
4333 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4334 {
4335   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4336                   (__v8si)
4337                   _mm256_setzero_si256 (),
4338                   (__mmask8) __U,
4339                   _MM_FROUND_CUR_DIRECTION);
4340 }
4341
4342 static __inline__ double __DEFAULT_FN_ATTRS
4343 _mm512_cvtsd_f64(__m512d __a)
4344 {
4345   return __a[0];
4346 }
4347
4348 static __inline__ float __DEFAULT_FN_ATTRS
4349 _mm512_cvtss_f32(__m512 __a)
4350 {
4351   return __a[0];
4352 }
4353
4354 /* Unpack and Interleave */
4355
4356 static __inline __m512d __DEFAULT_FN_ATTRS
4357 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4358 {
4359   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4360                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4361 }
4362
4363 static __inline__ __m512d __DEFAULT_FN_ATTRS
4364 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4365 {
4366   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4367                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4368                                            (__v8df)__W);
4369 }
4370
4371 static __inline__ __m512d __DEFAULT_FN_ATTRS
4372 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4373 {
4374   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4375                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4376                                            (__v8df)_mm512_setzero_pd());
4377 }
4378
4379 static __inline __m512d __DEFAULT_FN_ATTRS
4380 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4381 {
4382   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4383                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4384 }
4385
4386 static __inline__ __m512d __DEFAULT_FN_ATTRS
4387 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4388 {
4389   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4390                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4391                                            (__v8df)__W);
4392 }
4393
4394 static __inline__ __m512d __DEFAULT_FN_ATTRS
4395 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4396 {
4397   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4398                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4399                                            (__v8df)_mm512_setzero_pd());
4400 }
4401
4402 static __inline __m512 __DEFAULT_FN_ATTRS
4403 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4404 {
4405   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4406                                          2,    18,    3,    19,
4407                                          2+4,  18+4,  3+4,  19+4,
4408                                          2+8,  18+8,  3+8,  19+8,
4409                                          2+12, 18+12, 3+12, 19+12);
4410 }
4411
4412 static __inline__ __m512 __DEFAULT_FN_ATTRS
4413 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4414 {
4415   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4416                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4417                                           (__v16sf)__W);
4418 }
4419
4420 static __inline__ __m512 __DEFAULT_FN_ATTRS
4421 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4422 {
4423   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4424                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4425                                           (__v16sf)_mm512_setzero_ps());
4426 }
4427
4428 static __inline __m512 __DEFAULT_FN_ATTRS
4429 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4430 {
4431   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4432                                          0,    16,    1,    17,
4433                                          0+4,  16+4,  1+4,  17+4,
4434                                          0+8,  16+8,  1+8,  17+8,
4435                                          0+12, 16+12, 1+12, 17+12);
4436 }
4437
4438 static __inline__ __m512 __DEFAULT_FN_ATTRS
4439 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4440 {
4441   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4442                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4443                                           (__v16sf)__W);
4444 }
4445
4446 static __inline__ __m512 __DEFAULT_FN_ATTRS
4447 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4448 {
4449   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4450                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4451                                           (__v16sf)_mm512_setzero_ps());
4452 }
4453
4454 static __inline__ __m512i __DEFAULT_FN_ATTRS
4455 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4456 {
4457   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4458                                           2,    18,    3,    19,
4459                                           2+4,  18+4,  3+4,  19+4,
4460                                           2+8,  18+8,  3+8,  19+8,
4461                                           2+12, 18+12, 3+12, 19+12);
4462 }
4463
4464 static __inline__ __m512i __DEFAULT_FN_ATTRS
4465 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4466 {
4467   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4468                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4469                                        (__v16si)__W);
4470 }
4471
4472 static __inline__ __m512i __DEFAULT_FN_ATTRS
4473 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4474 {
4475   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4476                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4477                                        (__v16si)_mm512_setzero_si512());
4478 }
4479
4480 static __inline__ __m512i __DEFAULT_FN_ATTRS
4481 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4482 {
4483   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4484                                           0,    16,    1,    17,
4485                                           0+4,  16+4,  1+4,  17+4,
4486                                           0+8,  16+8,  1+8,  17+8,
4487                                           0+12, 16+12, 1+12, 17+12);
4488 }
4489
4490 static __inline__ __m512i __DEFAULT_FN_ATTRS
4491 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4492 {
4493   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4494                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4495                                        (__v16si)__W);
4496 }
4497
4498 static __inline__ __m512i __DEFAULT_FN_ATTRS
4499 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4500 {
4501   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4502                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4503                                        (__v16si)_mm512_setzero_si512());
4504 }
4505
4506 static __inline__ __m512i __DEFAULT_FN_ATTRS
4507 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4508 {
4509   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4510                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4511 }
4512
4513 static __inline__ __m512i __DEFAULT_FN_ATTRS
4514 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4515 {
4516   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4517                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4518                                         (__v8di)__W);
4519 }
4520
4521 static __inline__ __m512i __DEFAULT_FN_ATTRS
4522 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4523 {
4524   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4525                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4526                                         (__v8di)_mm512_setzero_si512());
4527 }
4528
4529 static __inline__ __m512i __DEFAULT_FN_ATTRS
4530 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4531 {
4532   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4533                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4534 }
4535
4536 static __inline__ __m512i __DEFAULT_FN_ATTRS
4537 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4538 {
4539   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4540                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4541                                         (__v8di)__W);
4542 }
4543
4544 static __inline__ __m512i __DEFAULT_FN_ATTRS
4545 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4546 {
4547   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4548                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4549                                         (__v8di)_mm512_setzero_si512());
4550 }
4551
4552 /* Bit Test */
4553
4554 static __inline __mmask16 __DEFAULT_FN_ATTRS
4555 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
4556 {
4557   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4558             (__v16si) __B,
4559             (__mmask16) -1);
4560 }
4561
4562 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4563 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
4564 {
4565   return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4566                  (__v16si) __B, __U);
4567 }
4568
4569 static __inline __mmask8 __DEFAULT_FN_ATTRS
4570 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
4571 {
4572   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
4573                  (__v8di) __B,
4574                  (__mmask8) -1);
4575 }
4576
4577 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4578 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
4579 {
4580   return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
4581 }
4582
4583
4584 /* SIMD load ops */
4585
4586 static __inline __m512i __DEFAULT_FN_ATTRS
4587 _mm512_loadu_si512 (void const *__P)
4588 {
4589   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4590                   (__v16si)
4591                   _mm512_setzero_si512 (),
4592                   (__mmask16) -1);
4593 }
4594
4595 static __inline __m512i __DEFAULT_FN_ATTRS
4596 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4597 {
4598   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4599                   (__v16si) __W,
4600                   (__mmask16) __U);
4601 }
4602
4603
4604 static __inline __m512i __DEFAULT_FN_ATTRS
4605 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4606 {
4607   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4608                                                      (__v16si)
4609                                                      _mm512_setzero_si512 (),
4610                                                      (__mmask16) __U);
4611 }
4612
4613 static __inline __m512i __DEFAULT_FN_ATTRS
4614 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4615 {
4616   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4617                   (__v8di) __W,
4618                   (__mmask8) __U);
4619 }
4620
4621 static __inline __m512i __DEFAULT_FN_ATTRS
4622 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4623 {
4624   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4625                                                      (__v8di)
4626                                                      _mm512_setzero_si512 (),
4627                                                      (__mmask8) __U);
4628 }
4629
4630 static __inline __m512 __DEFAULT_FN_ATTRS
4631 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4632 {
4633   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4634                    (__v16sf) __W,
4635                    (__mmask16) __U);
4636 }
4637
4638 static __inline __m512 __DEFAULT_FN_ATTRS
4639 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4640 {
4641   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4642                                                   (__v16sf)
4643                                                   _mm512_setzero_ps (),
4644                                                   (__mmask16) __U);
4645 }
4646
4647 static __inline __m512d __DEFAULT_FN_ATTRS
4648 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4649 {
4650   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4651                 (__v8df) __W,
4652                 (__mmask8) __U);
4653 }
4654
4655 static __inline __m512d __DEFAULT_FN_ATTRS
4656 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4657 {
4658   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4659                                                    (__v8df)
4660                                                    _mm512_setzero_pd (),
4661                                                    (__mmask8) __U);
4662 }
4663
4664 static __inline __m512d __DEFAULT_FN_ATTRS
4665 _mm512_loadu_pd(void const *__p)
4666 {
4667   struct __loadu_pd {
4668     __m512d __v;
4669   } __attribute__((__packed__, __may_alias__));
4670   return ((struct __loadu_pd*)__p)->__v;
4671 }
4672
4673 static __inline __m512 __DEFAULT_FN_ATTRS
4674 _mm512_loadu_ps(void const *__p)
4675 {
4676   struct __loadu_ps {
4677     __m512 __v;
4678   } __attribute__((__packed__, __may_alias__));
4679   return ((struct __loadu_ps*)__p)->__v;
4680 }
4681
4682 static __inline __m512 __DEFAULT_FN_ATTRS
4683 _mm512_load_ps(void const *__p)
4684 {
4685   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4686                                                   (__v16sf)
4687                                                   _mm512_setzero_ps (),
4688                                                   (__mmask16) -1);
4689 }
4690
4691 static __inline __m512 __DEFAULT_FN_ATTRS
4692 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4693 {
4694   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4695                    (__v16sf) __W,
4696                    (__mmask16) __U);
4697 }
4698
4699 static __inline __m512 __DEFAULT_FN_ATTRS
4700 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4701 {
4702   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4703                                                   (__v16sf)
4704                                                   _mm512_setzero_ps (),
4705                                                   (__mmask16) __U);
4706 }
4707
4708 static __inline __m512d __DEFAULT_FN_ATTRS
4709 _mm512_load_pd(void const *__p)
4710 {
4711   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4712                                                    (__v8df)
4713                                                    _mm512_setzero_pd (),
4714                                                    (__mmask8) -1);
4715 }
4716
4717 static __inline __m512d __DEFAULT_FN_ATTRS
4718 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4719 {
4720   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4721                           (__v8df) __W,
4722                           (__mmask8) __U);
4723 }
4724
4725 static __inline __m512d __DEFAULT_FN_ATTRS
4726 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4727 {
4728   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4729                                                    (__v8df)
4730                                                    _mm512_setzero_pd (),
4731                                                    (__mmask8) __U);
4732 }
4733
4734 static __inline __m512i __DEFAULT_FN_ATTRS
4735 _mm512_load_si512 (void const *__P)
4736 {
4737   return *(__m512i *) __P;
4738 }
4739
4740 static __inline __m512i __DEFAULT_FN_ATTRS
4741 _mm512_load_epi32 (void const *__P)
4742 {
4743   return *(__m512i *) __P;
4744 }
4745
4746 static __inline __m512i __DEFAULT_FN_ATTRS
4747 _mm512_load_epi64 (void const *__P)
4748 {
4749   return *(__m512i *) __P;
4750 }
4751
4752 /* SIMD store ops */
4753
4754 static __inline void __DEFAULT_FN_ATTRS
4755 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4756 {
4757   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4758                                      (__mmask8) __U);
4759 }
4760
4761 static __inline void __DEFAULT_FN_ATTRS
4762 _mm512_storeu_si512 (void *__P, __m512i __A)
4763 {
4764   __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4765             (__mmask16) -1);
4766 }
4767
4768 static __inline void __DEFAULT_FN_ATTRS
4769 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4770 {
4771   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4772                                      (__mmask16) __U);
4773 }
4774
4775 static __inline void __DEFAULT_FN_ATTRS
4776 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4777 {
4778   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4779 }
4780
4781 static __inline void __DEFAULT_FN_ATTRS
4782 _mm512_storeu_pd(void *__P, __m512d __A)
4783 {
4784   __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4785 }
4786
4787 static __inline void __DEFAULT_FN_ATTRS
4788 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4789 {
4790   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4791                                    (__mmask16) __U);
4792 }
4793
4794 static __inline void __DEFAULT_FN_ATTRS
4795 _mm512_storeu_ps(void *__P, __m512 __A)
4796 {
4797   __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4798 }
4799
4800 static __inline void __DEFAULT_FN_ATTRS
4801 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4802 {
4803   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4804 }
4805
4806 static __inline void __DEFAULT_FN_ATTRS
4807 _mm512_store_pd(void *__P, __m512d __A)
4808 {
4809   *(__m512d*)__P = __A;
4810 }
4811
4812 static __inline void __DEFAULT_FN_ATTRS
4813 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4814 {
4815   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4816                                    (__mmask16) __U);
4817 }
4818
4819 static __inline void __DEFAULT_FN_ATTRS
4820 _mm512_store_ps(void *__P, __m512 __A)
4821 {
4822   *(__m512*)__P = __A;
4823 }
4824
4825 static __inline void __DEFAULT_FN_ATTRS
4826 _mm512_store_si512 (void *__P, __m512i __A)
4827 {
4828   *(__m512i *) __P = __A;
4829 }
4830
4831 static __inline void __DEFAULT_FN_ATTRS
4832 _mm512_store_epi32 (void *__P, __m512i __A)
4833 {
4834   *(__m512i *) __P = __A;
4835 }
4836
4837 static __inline void __DEFAULT_FN_ATTRS
4838 _mm512_store_epi64 (void *__P, __m512i __A)
4839 {
4840   *(__m512i *) __P = __A;
4841 }
4842
4843 /* Mask ops */
4844
4845 static __inline __mmask16 __DEFAULT_FN_ATTRS
4846 _mm512_knot(__mmask16 __M)
4847 {
4848   return __builtin_ia32_knothi(__M);
4849 }
4850
4851 /* Integer compare */
4852
4853 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4854 _mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
4855   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4856                                                    (__mmask16)-1);
4857 }
4858
4859 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4860 _mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4861   return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4862                                                    __u);
4863 }
4864
4865 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4866 _mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
4867   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4868                                                  (__mmask16)-1);
4869 }
4870
4871 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4872 _mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4873   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4874                                                  __u);
4875 }
4876
4877 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4878 _mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4879   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4880                                                   __u);
4881 }
4882
4883 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4884 _mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
4885   return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4886                                                   (__mmask8)-1);
4887 }
4888
4889 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4890 _mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
4891   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4892                                                 (__mmask8)-1);
4893 }
4894
4895 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4896 _mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4897   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4898                                                 __u);
4899 }
4900
4901 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4902 _mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
4903   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4904                                                 (__mmask16)-1);
4905 }
4906
4907 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4908 _mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4909   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4910                                                 __u);
4911 }
4912
4913 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4914 _mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
4915   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4916                                                  (__mmask16)-1);
4917 }
4918
4919 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4920 _mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4921   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4922                                                  __u);
4923 }
4924
4925 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4926 _mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
4927   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4928                                                (__mmask8)-1);
4929 }
4930
4931 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4932 _mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4933   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4934                                                __u);
4935 }
4936
4937 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4938 _mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
4939   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4940                                                 (__mmask8)-1);
4941 }
4942
4943 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4944 _mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4945   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4946                                                 __u);
4947 }
4948
4949 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4950 _mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
4951   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4952                                                    (__mmask16)-1);
4953 }
4954
4955 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4956 _mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4957   return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4958                                                    __u);
4959 }
4960
4961 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4962 _mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
4963   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4964                                                  (__mmask16)-1);
4965 }
4966
4967 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4968 _mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4969   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4970                                                  __u);
4971 }
4972
4973 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4974 _mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4975   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4976                                                   __u);
4977 }
4978
4979 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4980 _mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
4981   return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4982                                                   (__mmask8)-1);
4983 }
4984
4985 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4986 _mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
4987   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4988                                                 (__mmask8)-1);
4989 }
4990
4991 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4992 _mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4993   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4994                                                 __u);
4995 }
4996
4997 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4998 _mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
4999   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5000                                                 (__mmask16)-1);
5001 }
5002
5003 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5004 _mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5005   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5006                                                 __u);
5007 }
5008
5009 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5010 _mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
5011   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5012                                                  (__mmask16)-1);
5013 }
5014
5015 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5016 _mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5017   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5018                                                  __u);
5019 }
5020
5021 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5022 _mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
5023   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5024                                                (__mmask8)-1);
5025 }
5026
5027 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5028 _mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5029   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5030                                                __u);
5031 }
5032
5033 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5034 _mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
5035   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5036                                                 (__mmask8)-1);
5037 }
5038
5039 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5040 _mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5041   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5042                                                 __u);
5043 }
5044
5045 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5046 _mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
5047   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5048                                                 (__mmask16)-1);
5049 }
5050
5051 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5052 _mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5053   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5054                                                 __u);
5055 }
5056
5057 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5058 _mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
5059   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5060                                                  (__mmask16)-1);
5061 }
5062
5063 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5064 _mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5065   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5066                                                  __u);
5067 }
5068
5069 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5070 _mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
5071   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5072                                                (__mmask8)-1);
5073 }
5074
5075 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5076 _mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5077   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5078                                                __u);
5079 }
5080
5081 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5082 _mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
5083   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5084                                                 (__mmask8)-1);
5085 }
5086
5087 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5088 _mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5089   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5090                                                 __u);
5091 }
5092
5093 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5094 _mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
5095   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5096                                                 (__mmask16)-1);
5097 }
5098
5099 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5100 _mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5101   return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5102                                                 __u);
5103 }
5104
5105 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5106 _mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
5107   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5108                                                  (__mmask16)-1);
5109 }
5110
5111 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5112 _mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5113   return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5114                                                  __u);
5115 }
5116
5117 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5118 _mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
5119   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5120                                                (__mmask8)-1);
5121 }
5122
5123 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5124 _mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5125   return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5126                                                __u);
5127 }
5128
5129 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5130 _mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
5131   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5132                                                 (__mmask8)-1);
5133 }
5134
5135 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5136 _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5137   return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5138                                                 __u);
5139 }
5140
5141 static __inline__ __m512i __DEFAULT_FN_ATTRS
5142 _mm512_cvtepi8_epi32(__m128i __A)
5143 {
5144   /* This function always performs a signed extension, but __v16qi is a char
5145      which may be signed or unsigned, so use __v16qs. */
5146   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
5147 }
5148
5149 static __inline__ __m512i __DEFAULT_FN_ATTRS
5150 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5151 {
5152   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5153                                              (__v16si)_mm512_cvtepi8_epi32(__A),
5154                                              (__v16si)__W);
5155 }
5156
5157 static __inline__ __m512i __DEFAULT_FN_ATTRS
5158 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
5159 {
5160   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5161                                              (__v16si)_mm512_cvtepi8_epi32(__A),
5162                                              (__v16si)_mm512_setzero_si512());
5163 }
5164
5165 static __inline__ __m512i __DEFAULT_FN_ATTRS
5166 _mm512_cvtepi8_epi64(__m128i __A)
5167 {
5168   /* This function always performs a signed extension, but __v16qi is a char
5169      which may be signed or unsigned, so use __v16qs. */
5170   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5171 }
5172
5173 static __inline__ __m512i __DEFAULT_FN_ATTRS
5174 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5175 {
5176   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5177                                              (__v8di)_mm512_cvtepi8_epi64(__A),
5178                                              (__v8di)__W);
5179 }
5180
5181 static __inline__ __m512i __DEFAULT_FN_ATTRS
5182 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
5183 {
5184   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5185                                              (__v8di)_mm512_cvtepi8_epi64(__A),
5186                                              (__v8di)_mm512_setzero_si512 ());
5187 }
5188
5189 static __inline__ __m512i __DEFAULT_FN_ATTRS
5190 _mm512_cvtepi32_epi64(__m256i __X)
5191 {
5192   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
5193 }
5194
5195 static __inline__ __m512i __DEFAULT_FN_ATTRS
5196 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5197 {
5198   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5199                                              (__v8di)_mm512_cvtepi32_epi64(__X),
5200                                              (__v8di)__W);
5201 }
5202
5203 static __inline__ __m512i __DEFAULT_FN_ATTRS
5204 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
5205 {
5206   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5207                                              (__v8di)_mm512_cvtepi32_epi64(__X),
5208                                              (__v8di)_mm512_setzero_si512());
5209 }
5210
5211 static __inline__ __m512i __DEFAULT_FN_ATTRS
5212 _mm512_cvtepi16_epi32(__m256i __A)
5213 {
5214   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
5215 }
5216
5217 static __inline__ __m512i __DEFAULT_FN_ATTRS
5218 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5219 {
5220   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5221                                             (__v16si)_mm512_cvtepi16_epi32(__A),
5222                                             (__v16si)__W);
5223 }
5224
5225 static __inline__ __m512i __DEFAULT_FN_ATTRS
5226 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5227 {
5228   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5229                                             (__v16si)_mm512_cvtepi16_epi32(__A),
5230                                             (__v16si)_mm512_setzero_si512 ());
5231 }
5232
5233 static __inline__ __m512i __DEFAULT_FN_ATTRS
5234 _mm512_cvtepi16_epi64(__m128i __A)
5235 {
5236   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5237 }
5238
5239 static __inline__ __m512i __DEFAULT_FN_ATTRS
5240 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5241 {
5242   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5243                                              (__v8di)_mm512_cvtepi16_epi64(__A),
5244                                              (__v8di)__W);
5245 }
5246
5247 static __inline__ __m512i __DEFAULT_FN_ATTRS
5248 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5249 {
5250   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5251                                              (__v8di)_mm512_cvtepi16_epi64(__A),
5252                                              (__v8di)_mm512_setzero_si512());
5253 }
5254
5255 static __inline__ __m512i __DEFAULT_FN_ATTRS
5256 _mm512_cvtepu8_epi32(__m128i __A)
5257 {
5258   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5259 }
5260
5261 static __inline__ __m512i __DEFAULT_FN_ATTRS
5262 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5263 {
5264   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5265                                              (__v16si)_mm512_cvtepu8_epi32(__A),
5266                                              (__v16si)__W);
5267 }
5268
5269 static __inline__ __m512i __DEFAULT_FN_ATTRS
5270 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5271 {
5272   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5273                                              (__v16si)_mm512_cvtepu8_epi32(__A),
5274                                              (__v16si)_mm512_setzero_si512());
5275 }
5276
5277 static __inline__ __m512i __DEFAULT_FN_ATTRS
5278 _mm512_cvtepu8_epi64(__m128i __A)
5279 {
5280   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5281 }
5282
5283 static __inline__ __m512i __DEFAULT_FN_ATTRS
5284 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5285 {
5286   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5287                                              (__v8di)_mm512_cvtepu8_epi64(__A),
5288                                              (__v8di)__W);
5289 }
5290
5291 static __inline__ __m512i __DEFAULT_FN_ATTRS
5292 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5293 {
5294   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5295                                              (__v8di)_mm512_cvtepu8_epi64(__A),
5296                                              (__v8di)_mm512_setzero_si512());
5297 }
5298
5299 static __inline__ __m512i __DEFAULT_FN_ATTRS
5300 _mm512_cvtepu32_epi64(__m256i __X)
5301 {
5302   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5303 }
5304
5305 static __inline__ __m512i __DEFAULT_FN_ATTRS
5306 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5307 {
5308   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5309                                              (__v8di)_mm512_cvtepu32_epi64(__X),
5310                                              (__v8di)__W);
5311 }
5312
5313 static __inline__ __m512i __DEFAULT_FN_ATTRS
5314 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5315 {
5316   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5317                                              (__v8di)_mm512_cvtepu32_epi64(__X),
5318                                              (__v8di)_mm512_setzero_si512());
5319 }
5320
5321 static __inline__ __m512i __DEFAULT_FN_ATTRS
5322 _mm512_cvtepu16_epi32(__m256i __A)
5323 {
5324   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5325 }
5326
5327 static __inline__ __m512i __DEFAULT_FN_ATTRS
5328 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5329 {
5330   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5331                                             (__v16si)_mm512_cvtepu16_epi32(__A),
5332                                             (__v16si)__W);
5333 }
5334
5335 static __inline__ __m512i __DEFAULT_FN_ATTRS
5336 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5337 {
5338   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5339                                             (__v16si)_mm512_cvtepu16_epi32(__A),
5340                                             (__v16si)_mm512_setzero_si512());
5341 }
5342
5343 static __inline__ __m512i __DEFAULT_FN_ATTRS
5344 _mm512_cvtepu16_epi64(__m128i __A)
5345 {
5346   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5347 }
5348
5349 static __inline__ __m512i __DEFAULT_FN_ATTRS
5350 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5351 {
5352   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5353                                              (__v8di)_mm512_cvtepu16_epi64(__A),
5354                                              (__v8di)__W);
5355 }
5356
5357 static __inline__ __m512i __DEFAULT_FN_ATTRS
5358 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5359 {
5360   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5361                                              (__v8di)_mm512_cvtepu16_epi64(__A),
5362                                              (__v8di)_mm512_setzero_si512());
5363 }
5364
5365 static __inline__ __m512i __DEFAULT_FN_ATTRS
5366 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
5367 {
5368   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5369               (__v16si) __B,
5370               (__v16si)
5371               _mm512_setzero_si512 (),
5372               (__mmask16) -1);
5373 }
5374
5375 static __inline__ __m512i __DEFAULT_FN_ATTRS
5376 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5377 {
5378   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5379               (__v16si) __B,
5380               (__v16si) __W,
5381               (__mmask16) __U);
5382 }
5383
5384 static __inline__ __m512i __DEFAULT_FN_ATTRS
5385 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5386 {
5387   return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5388               (__v16si) __B,
5389               (__v16si)
5390               _mm512_setzero_si512 (),
5391               (__mmask16) __U);
5392 }
5393
5394 static __inline__ __m512i __DEFAULT_FN_ATTRS
5395 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
5396 {
5397   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5398               (__v8di) __B,
5399               (__v8di)
5400               _mm512_setzero_si512 (),
5401               (__mmask8) -1);
5402 }
5403
5404 static __inline__ __m512i __DEFAULT_FN_ATTRS
5405 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5406 {
5407   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5408               (__v8di) __B,
5409               (__v8di) __W,
5410               (__mmask8) __U);
5411 }
5412
5413 static __inline__ __m512i __DEFAULT_FN_ATTRS
5414 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5415 {
5416   return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5417               (__v8di) __B,
5418               (__v8di)
5419               _mm512_setzero_si512 (),
5420               (__mmask8) __U);
5421 }
5422
5423
5424
5425 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5426   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5427                                          (__v16si)(__m512i)(b), (int)(p), \
5428                                          (__mmask16)-1); })
5429
5430 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5431   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5432                                           (__v16si)(__m512i)(b), (int)(p), \
5433                                           (__mmask16)-1); })
5434
5435 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5436   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5437                                         (__v8di)(__m512i)(b), (int)(p), \
5438                                         (__mmask8)-1); })
5439
5440 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5441   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5442                                          (__v8di)(__m512i)(b), (int)(p), \
5443                                          (__mmask8)-1); })
5444
5445 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5446   (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5447                                          (__v16si)(__m512i)(b), (int)(p), \
5448                                          (__mmask16)(m)); })
5449
5450 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5451   (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5452                                           (__v16si)(__m512i)(b), (int)(p), \
5453                                           (__mmask16)(m)); })
5454
5455 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5456   (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5457                                         (__v8di)(__m512i)(b), (int)(p), \
5458                                         (__mmask8)(m)); })
5459
5460 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5461   (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5462                                          (__v8di)(__m512i)(b), (int)(p), \
5463                                          (__mmask8)(m)); })
5464
5465 #define _mm512_rol_epi32(a, b) __extension__ ({ \
5466   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5467                                         (__v16si)_mm512_setzero_si512(), \
5468                                         (__mmask16)-1); })
5469
5470 #define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5471   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5472                                         (__v16si)(__m512i)(W), \
5473                                         (__mmask16)(U)); })
5474
5475 #define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5476   (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5477                                         (__v16si)_mm512_setzero_si512(), \
5478                                         (__mmask16)(U)); })
5479
5480 #define _mm512_rol_epi64(a, b) __extension__ ({ \
5481   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5482                                         (__v8di)_mm512_setzero_si512(), \
5483                                         (__mmask8)-1); })
5484
5485 #define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5486   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5487                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5488
5489 #define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5490   (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5491                                         (__v8di)_mm512_setzero_si512(), \
5492                                         (__mmask8)(U)); })
5493 static __inline__ __m512i __DEFAULT_FN_ATTRS
5494 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5495 {
5496   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5497               (__v16si) __B,
5498               (__v16si)
5499               _mm512_setzero_si512 (),
5500               (__mmask16) -1);
5501 }
5502
5503 static __inline__ __m512i __DEFAULT_FN_ATTRS
5504 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5505 {
5506   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5507               (__v16si) __B,
5508               (__v16si) __W,
5509               (__mmask16) __U);
5510 }
5511
5512 static __inline__ __m512i __DEFAULT_FN_ATTRS
5513 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5514 {
5515   return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5516               (__v16si) __B,
5517               (__v16si)
5518               _mm512_setzero_si512 (),
5519               (__mmask16) __U);
5520 }
5521
5522 static __inline__ __m512i __DEFAULT_FN_ATTRS
5523 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5524 {
5525   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5526               (__v8di) __B,
5527               (__v8di)
5528               _mm512_setzero_si512 (),
5529               (__mmask8) -1);
5530 }
5531
5532 static __inline__ __m512i __DEFAULT_FN_ATTRS
5533 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5534 {
5535   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5536               (__v8di) __B,
5537               (__v8di) __W,
5538               (__mmask8) __U);
5539 }
5540
5541 static __inline__ __m512i __DEFAULT_FN_ATTRS
5542 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5543 {
5544   return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5545               (__v8di) __B,
5546               (__v8di)
5547               _mm512_setzero_si512 (),
5548               (__mmask8) __U);
5549 }
5550
5551 #define _mm512_ror_epi32(A, B) __extension__ ({ \
5552   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5553                                         (__v16si)_mm512_setzero_si512(), \
5554                                         (__mmask16)-1); })
5555
5556 #define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5557   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5558                                         (__v16si)(__m512i)(W), \
5559                                         (__mmask16)(U)); })
5560
5561 #define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5562   (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5563                                         (__v16si)_mm512_setzero_si512(), \
5564                                         (__mmask16)(U)); })
5565
5566 #define _mm512_ror_epi64(A, B) __extension__ ({ \
5567   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5568                                         (__v8di)_mm512_setzero_si512(), \
5569                                         (__mmask8)-1); })
5570
5571 #define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5572   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5573                                         (__v8di)(__m512i)(W), (__mmask8)(U)); })
5574
5575 #define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5576   (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5577                                         (__v8di)_mm512_setzero_si512(), \
5578                                         (__mmask8)(U)); })
5579
5580 static __inline__ __m512i __DEFAULT_FN_ATTRS
5581 _mm512_slli_epi32(__m512i __A, int __B)
5582 {
5583   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5584 }
5585
5586 static __inline__ __m512i __DEFAULT_FN_ATTRS
5587 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5588 {
5589   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5590                                          (__v16si)_mm512_slli_epi32(__A, __B),
5591                                          (__v16si)__W);
5592 }
5593
5594 static __inline__ __m512i __DEFAULT_FN_ATTRS
5595 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5596   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5597                                          (__v16si)_mm512_slli_epi32(__A, __B),
5598                                          (__v16si)_mm512_setzero_si512());
5599 }
5600
5601 static __inline__ __m512i __DEFAULT_FN_ATTRS
5602 _mm512_slli_epi64(__m512i __A, int __B)
5603 {
5604   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5605 }
5606
5607 static __inline__ __m512i __DEFAULT_FN_ATTRS
5608 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5609 {
5610   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5611                                           (__v8di)_mm512_slli_epi64(__A, __B),
5612                                           (__v8di)__W);
5613 }
5614
5615 static __inline__ __m512i __DEFAULT_FN_ATTRS
5616 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5617 {
5618   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5619                                           (__v8di)_mm512_slli_epi64(__A, __B),
5620                                           (__v8di)_mm512_setzero_si512());
5621 }
5622
5623 static __inline__ __m512i __DEFAULT_FN_ATTRS
5624 _mm512_srli_epi32(__m512i __A, int __B)
5625 {
5626   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5627 }
5628
5629 static __inline__ __m512i __DEFAULT_FN_ATTRS
5630 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5631 {
5632   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5633                                          (__v16si)_mm512_srli_epi32(__A, __B),
5634                                          (__v16si)__W);
5635 }
5636
5637 static __inline__ __m512i __DEFAULT_FN_ATTRS
5638 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5639   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5640                                          (__v16si)_mm512_srli_epi32(__A, __B),
5641                                          (__v16si)_mm512_setzero_si512());
5642 }
5643
5644 static __inline__ __m512i __DEFAULT_FN_ATTRS
5645 _mm512_srli_epi64(__m512i __A, int __B)
5646 {
5647   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5648 }
5649
5650 static __inline__ __m512i __DEFAULT_FN_ATTRS
5651 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5652 {
5653   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5654                                           (__v8di)_mm512_srli_epi64(__A, __B),
5655                                           (__v8di)__W);
5656 }
5657
5658 static __inline__ __m512i __DEFAULT_FN_ATTRS
5659 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5660 {
5661   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5662                                           (__v8di)_mm512_srli_epi64(__A, __B),
5663                                           (__v8di)_mm512_setzero_si512());
5664 }
5665
5666 static __inline__ __m512i __DEFAULT_FN_ATTRS
5667 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5668 {
5669   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5670               (__v16si) __W,
5671               (__mmask16) __U);
5672 }
5673
5674 static __inline__ __m512i __DEFAULT_FN_ATTRS
5675 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5676 {
5677   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5678               (__v16si)
5679               _mm512_setzero_si512 (),
5680               (__mmask16) __U);
5681 }
5682
5683 static __inline__ void __DEFAULT_FN_ATTRS
5684 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5685 {
5686   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5687           (__mmask16) __U);
5688 }
5689
5690 static __inline__ __m512i __DEFAULT_FN_ATTRS
5691 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5692 {
5693   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5694                  (__v16si) __A,
5695                  (__v16si) __W);
5696 }
5697
5698 static __inline__ __m512i __DEFAULT_FN_ATTRS
5699 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5700 {
5701   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5702                  (__v16si) __A,
5703                  (__v16si) _mm512_setzero_si512 ());
5704 }
5705
5706 static __inline__ __m512i __DEFAULT_FN_ATTRS
5707 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5708 {
5709   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5710                  (__v8di) __A,
5711                  (__v8di) __W);
5712 }
5713
5714 static __inline__ __m512i __DEFAULT_FN_ATTRS
5715 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5716 {
5717   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5718                  (__v8di) __A,
5719                  (__v8di) _mm512_setzero_si512 ());
5720 }
5721
5722 static __inline__ __m512i __DEFAULT_FN_ATTRS
5723 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5724 {
5725   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5726               (__v8di) __W,
5727               (__mmask8) __U);
5728 }
5729
5730 static __inline__ __m512i __DEFAULT_FN_ATTRS
5731 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5732 {
5733   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5734               (__v8di)
5735               _mm512_setzero_si512 (),
5736               (__mmask8) __U);
5737 }
5738
5739 static __inline__ void __DEFAULT_FN_ATTRS
5740 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5741 {
5742   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5743           (__mmask8) __U);
5744 }
5745
5746 static __inline__ __m512d __DEFAULT_FN_ATTRS
5747 _mm512_movedup_pd (__m512d __A)
5748 {
5749   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5750                                           0, 0, 2, 2, 4, 4, 6, 6);
5751 }
5752
5753 static __inline__ __m512d __DEFAULT_FN_ATTRS
5754 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5755 {
5756   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5757                                               (__v8df)_mm512_movedup_pd(__A),
5758                                               (__v8df)__W);
5759 }
5760
5761 static __inline__ __m512d __DEFAULT_FN_ATTRS
5762 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5763 {
5764   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5765                                               (__v8df)_mm512_movedup_pd(__A),
5766                                               (__v8df)_mm512_setzero_pd());
5767 }
5768
5769 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5770   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5771                                              (__v8df)(__m512d)(B), \
5772                                              (__v8di)(__m512i)(C), (int)(imm), \
5773                                              (__mmask8)-1, (int)(R)); })
5774
5775 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5776   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5777                                              (__v8df)(__m512d)(B), \
5778                                              (__v8di)(__m512i)(C), (int)(imm), \
5779                                              (__mmask8)(U), (int)(R)); })
5780
5781 #define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5782   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5783                                              (__v8df)(__m512d)(B), \
5784                                              (__v8di)(__m512i)(C), (int)(imm), \
5785                                              (__mmask8)-1, \
5786                                              _MM_FROUND_CUR_DIRECTION); })
5787
5788 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5789   (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5790                                              (__v8df)(__m512d)(B), \
5791                                              (__v8di)(__m512i)(C), (int)(imm), \
5792                                              (__mmask8)(U), \
5793                                              _MM_FROUND_CUR_DIRECTION); })
5794
5795 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5796   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5797                                               (__v8df)(__m512d)(B), \
5798                                               (__v8di)(__m512i)(C), \
5799                                               (int)(imm), (__mmask8)(U), \
5800                                               (int)(R)); })
5801
5802 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5803   (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5804                                               (__v8df)(__m512d)(B), \
5805                                               (__v8di)(__m512i)(C), \
5806                                               (int)(imm), (__mmask8)(U), \
5807                                               _MM_FROUND_CUR_DIRECTION); })
5808
5809 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5810   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5811                                             (__v16sf)(__m512)(B), \
5812                                             (__v16si)(__m512i)(C), (int)(imm), \
5813                                             (__mmask16)-1, (int)(R)); })
5814
5815 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5816   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5817                                             (__v16sf)(__m512)(B), \
5818                                             (__v16si)(__m512i)(C), (int)(imm), \
5819                                             (__mmask16)(U), (int)(R)); })
5820
5821 #define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5822   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5823                                             (__v16sf)(__m512)(B), \
5824                                             (__v16si)(__m512i)(C), (int)(imm), \
5825                                             (__mmask16)-1, \
5826                                             _MM_FROUND_CUR_DIRECTION); })
5827
5828 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5829   (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5830                                             (__v16sf)(__m512)(B), \
5831                                             (__v16si)(__m512i)(C), (int)(imm), \
5832                                             (__mmask16)(U), \
5833                                             _MM_FROUND_CUR_DIRECTION); })
5834
5835 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5836   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5837                                              (__v16sf)(__m512)(B), \
5838                                              (__v16si)(__m512i)(C), \
5839                                              (int)(imm), (__mmask16)(U), \
5840                                              (int)(R)); })
5841
5842 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5843   (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5844                                              (__v16sf)(__m512)(B), \
5845                                              (__v16si)(__m512i)(C), \
5846                                              (int)(imm), (__mmask16)(U), \
5847                                              _MM_FROUND_CUR_DIRECTION); })
5848
5849 #define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5850   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5851                                           (__v2df)(__m128d)(B), \
5852                                           (__v2di)(__m128i)(C), (int)(imm), \
5853                                           (__mmask8)-1, (int)(R)); })
5854
5855 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5856   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5857                                           (__v2df)(__m128d)(B), \
5858                                           (__v2di)(__m128i)(C), (int)(imm), \
5859                                           (__mmask8)(U), (int)(R)); })
5860
5861 #define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5862   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5863                                           (__v2df)(__m128d)(B), \
5864                                           (__v2di)(__m128i)(C), (int)(imm), \
5865                                           (__mmask8)-1, \
5866                                           _MM_FROUND_CUR_DIRECTION); })
5867
5868 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5869   (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5870                                           (__v2df)(__m128d)(B), \
5871                                           (__v2di)(__m128i)(C), (int)(imm), \
5872                                           (__mmask8)(U), \
5873                                           _MM_FROUND_CUR_DIRECTION); })
5874
5875 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5876   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5877                                            (__v2df)(__m128d)(B), \
5878                                            (__v2di)(__m128i)(C), (int)(imm), \
5879                                            (__mmask8)(U), (int)(R)); })
5880
5881 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5882   (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5883                                            (__v2df)(__m128d)(B), \
5884                                            (__v2di)(__m128i)(C), (int)(imm), \
5885                                            (__mmask8)(U), \
5886                                            _MM_FROUND_CUR_DIRECTION); })
5887
5888 #define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5889   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5890                                          (__v4sf)(__m128)(B), \
5891                                          (__v4si)(__m128i)(C), (int)(imm), \
5892                                          (__mmask8)-1, (int)(R)); })
5893
5894 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5895   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5896                                          (__v4sf)(__m128)(B), \
5897                                          (__v4si)(__m128i)(C), (int)(imm), \
5898                                          (__mmask8)(U), (int)(R)); })
5899
5900 #define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5901   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5902                                          (__v4sf)(__m128)(B), \
5903                                          (__v4si)(__m128i)(C), (int)(imm), \
5904                                          (__mmask8)-1, \
5905                                          _MM_FROUND_CUR_DIRECTION); })
5906
5907 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5908   (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5909                                          (__v4sf)(__m128)(B), \
5910                                          (__v4si)(__m128i)(C), (int)(imm), \
5911                                          (__mmask8)(U), \
5912                                          _MM_FROUND_CUR_DIRECTION); })
5913
5914 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5915   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5916                                           (__v4sf)(__m128)(B), \
5917                                           (__v4si)(__m128i)(C), (int)(imm), \
5918                                           (__mmask8)(U), (int)(R)); })
5919
5920 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5921   (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5922                                           (__v4sf)(__m128)(B), \
5923                                           (__v4si)(__m128i)(C), (int)(imm), \
5924                                           (__mmask8)(U), \
5925                                           _MM_FROUND_CUR_DIRECTION); })
5926
5927 #define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5928   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5929                                                  (__v2df)(__m128d)(B), \
5930                                                  (__v2df)_mm_setzero_pd(), \
5931                                                  (__mmask8)-1, (int)(R)); })
5932
5933
5934 static __inline__ __m128d __DEFAULT_FN_ATTRS
5935 _mm_getexp_sd (__m128d __A, __m128d __B)
5936 {
5937   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5938                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5939 }
5940
5941 static __inline__ __m128d __DEFAULT_FN_ATTRS
5942 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5943 {
5944  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5945           (__v2df) __B,
5946           (__v2df) __W,
5947           (__mmask8) __U,
5948           _MM_FROUND_CUR_DIRECTION);
5949 }
5950
5951 #define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5952   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5953                                                  (__v2df)(__m128d)(B), \
5954                                                  (__v2df)(__m128d)(W), \
5955                                                  (__mmask8)(U), (int)(R)); })
5956
5957 static __inline__ __m128d __DEFAULT_FN_ATTRS
5958 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5959 {
5960  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5961           (__v2df) __B,
5962           (__v2df) _mm_setzero_pd (),
5963           (__mmask8) __U,
5964           _MM_FROUND_CUR_DIRECTION);
5965 }
5966
5967 #define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5968   (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5969                                                  (__v2df)(__m128d)(B), \
5970                                                  (__v2df)_mm_setzero_pd(), \
5971                                                  (__mmask8)(U), (int)(R)); })
5972
5973 #define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5974   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5975                                                 (__v4sf)(__m128)(B), \
5976                                                 (__v4sf)_mm_setzero_ps(), \
5977                                                 (__mmask8)-1, (int)(R)); })
5978
5979 static __inline__ __m128 __DEFAULT_FN_ATTRS
5980 _mm_getexp_ss (__m128 __A, __m128 __B)
5981 {
5982   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5983                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5984 }
5985
5986 static __inline__ __m128 __DEFAULT_FN_ATTRS
5987 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5988 {
5989  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5990           (__v4sf) __B,
5991           (__v4sf) __W,
5992           (__mmask8) __U,
5993           _MM_FROUND_CUR_DIRECTION);
5994 }
5995
5996 #define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5997   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5998                                                 (__v4sf)(__m128)(B), \
5999                                                 (__v4sf)(__m128)(W), \
6000                                                 (__mmask8)(U), (int)(R)); })
6001
6002 static __inline__ __m128 __DEFAULT_FN_ATTRS
6003 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
6004 {
6005  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
6006           (__v4sf) __B,
6007           (__v4sf) _mm_setzero_pd (),
6008           (__mmask8) __U,
6009           _MM_FROUND_CUR_DIRECTION);
6010 }
6011
6012 #define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
6013   (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
6014                                                 (__v4sf)(__m128)(B), \
6015                                                 (__v4sf)_mm_setzero_ps(), \
6016                                                 (__mmask8)(U), (int)(R)); })
6017
6018 #define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
6019   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6020                                                (__v2df)(__m128d)(B), \
6021                                                (int)(((D)<<2) | (C)), \
6022                                                (__v2df)_mm_setzero_pd(), \
6023                                                (__mmask8)-1, (int)(R)); })
6024
6025 #define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
6026   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6027                                                (__v2df)(__m128d)(B), \
6028                                                (int)(((D)<<2) | (C)), \
6029                                                (__v2df)_mm_setzero_pd(), \
6030                                                (__mmask8)-1, \
6031                                                _MM_FROUND_CUR_DIRECTION); })
6032
6033 #define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
6034   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6035                                                (__v2df)(__m128d)(B), \
6036                                                (int)(((D)<<2) | (C)), \
6037                                                (__v2df)(__m128d)(W), \
6038                                                (__mmask8)(U), \
6039                                                _MM_FROUND_CUR_DIRECTION); })
6040
6041 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
6042   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6043                                                (__v2df)(__m128d)(B), \
6044                                                (int)(((D)<<2) | (C)), \
6045                                                (__v2df)(__m128d)(W), \
6046                                                (__mmask8)(U), (int)(R)); })
6047
6048 #define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
6049   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6050                                                (__v2df)(__m128d)(B), \
6051                                                (int)(((D)<<2) | (C)), \
6052                                                (__v2df)_mm_setzero_pd(), \
6053                                                (__mmask8)(U), \
6054                                                _MM_FROUND_CUR_DIRECTION); })
6055
6056 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
6057   (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6058                                                (__v2df)(__m128d)(B), \
6059                                                (int)(((D)<<2) | (C)), \
6060                                                (__v2df)_mm_setzero_pd(), \
6061                                                (__mmask8)(U), (int)(R)); })
6062
6063 #define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
6064   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6065                                               (__v4sf)(__m128)(B), \
6066                                               (int)(((D)<<2) | (C)), \
6067                                               (__v4sf)_mm_setzero_ps(), \
6068                                               (__mmask8)-1, (int)(R)); })
6069
6070 #define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
6071   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6072                                               (__v4sf)(__m128)(B), \
6073                                               (int)(((D)<<2) | (C)), \
6074                                               (__v4sf)_mm_setzero_ps(), \
6075                                               (__mmask8)-1, \
6076                                               _MM_FROUND_CUR_DIRECTION); })
6077
6078 #define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
6079   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6080                                               (__v4sf)(__m128)(B), \
6081                                               (int)(((D)<<2) | (C)), \
6082                                               (__v4sf)(__m128)(W), \
6083                                               (__mmask8)(U), \
6084                                               _MM_FROUND_CUR_DIRECTION); })
6085
6086 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
6087   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6088                                               (__v4sf)(__m128)(B), \
6089                                               (int)(((D)<<2) | (C)), \
6090                                               (__v4sf)(__m128)(W), \
6091                                               (__mmask8)(U), (int)(R)); })
6092
6093 #define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
6094   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6095                                               (__v4sf)(__m128)(B), \
6096                                               (int)(((D)<<2) | (C)), \
6097                                               (__v4sf)_mm_setzero_pd(), \
6098                                               (__mmask8)(U), \
6099                                               _MM_FROUND_CUR_DIRECTION); })
6100
6101 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
6102   (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6103                                               (__v4sf)(__m128)(B), \
6104                                               (int)(((D)<<2) | (C)), \
6105                                               (__v4sf)_mm_setzero_ps(), \
6106                                               (__mmask8)(U), (int)(R)); })
6107
6108 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6109 _mm512_kmov (__mmask16 __A)
6110 {
6111   return  __A;
6112 }
6113
6114 #define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
6115   (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
6116                               (int)(P), (int)(R)); })
6117
6118 #define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
6119   (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
6120                               (int)(P), (int)(R)); })
6121
6122 #ifdef __x86_64__
6123 #define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
6124   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6125 #endif
6126
6127 static __inline__ __m512i __DEFAULT_FN_ATTRS
6128 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6129          __mmask16 __U, __m512i __B)
6130 {
6131   return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6132                    (__v16si) __I
6133                    /* idx */ ,
6134                    (__v16si) __B,
6135                    (__mmask16) __U);
6136 }
6137
6138 static __inline__ __m512i __DEFAULT_FN_ATTRS
6139 _mm512_sll_epi32(__m512i __A, __m128i __B)
6140 {
6141   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
6142 }
6143
6144 static __inline__ __m512i __DEFAULT_FN_ATTRS
6145 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6146 {
6147   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6148                                           (__v16si)_mm512_sll_epi32(__A, __B),
6149                                           (__v16si)__W);
6150 }
6151
6152 static __inline__ __m512i __DEFAULT_FN_ATTRS
6153 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6154 {
6155   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6156                                           (__v16si)_mm512_sll_epi32(__A, __B),
6157                                           (__v16si)_mm512_setzero_si512());
6158 }
6159
6160 static __inline__ __m512i __DEFAULT_FN_ATTRS
6161 _mm512_sll_epi64(__m512i __A, __m128i __B)
6162 {
6163   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
6164 }
6165
6166 static __inline__ __m512i __DEFAULT_FN_ATTRS
6167 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6168 {
6169   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6170                                              (__v8di)_mm512_sll_epi64(__A, __B),
6171                                              (__v8di)__W);
6172 }
6173
6174 static __inline__ __m512i __DEFAULT_FN_ATTRS
6175 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6176 {
6177   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6178                                            (__v8di)_mm512_sll_epi64(__A, __B),
6179                                            (__v8di)_mm512_setzero_si512());
6180 }
6181
6182 static __inline__ __m512i __DEFAULT_FN_ATTRS
6183 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
6184 {
6185   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
6186 }
6187
6188 static __inline__ __m512i __DEFAULT_FN_ATTRS
6189 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6190 {
6191   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6192                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
6193                                            (__v16si)__W);
6194 }
6195
6196 static __inline__ __m512i __DEFAULT_FN_ATTRS
6197 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6198 {
6199   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6200                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
6201                                            (__v16si)_mm512_setzero_si512());
6202 }
6203
6204 static __inline__ __m512i __DEFAULT_FN_ATTRS
6205 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
6206 {
6207   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
6208 }
6209
6210 static __inline__ __m512i __DEFAULT_FN_ATTRS
6211 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6212 {
6213   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6214                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
6215                                             (__v8di)__W);
6216 }
6217
6218 static __inline__ __m512i __DEFAULT_FN_ATTRS
6219 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6220 {
6221   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6222                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
6223                                             (__v8di)_mm512_setzero_si512());
6224 }
6225
6226 static __inline__ __m512i __DEFAULT_FN_ATTRS
6227 _mm512_sra_epi32(__m512i __A, __m128i __B)
6228 {
6229   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6230 }
6231
6232 static __inline__ __m512i __DEFAULT_FN_ATTRS
6233 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6234 {
6235   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6236                                           (__v16si)_mm512_sra_epi32(__A, __B),
6237                                           (__v16si)__W);
6238 }
6239
6240 static __inline__ __m512i __DEFAULT_FN_ATTRS
6241 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6242 {
6243   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6244                                           (__v16si)_mm512_sra_epi32(__A, __B),
6245                                           (__v16si)_mm512_setzero_si512());
6246 }
6247
6248 static __inline__ __m512i __DEFAULT_FN_ATTRS
6249 _mm512_sra_epi64(__m512i __A, __m128i __B)
6250 {
6251   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6252 }
6253
6254 static __inline__ __m512i __DEFAULT_FN_ATTRS
6255 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6256 {
6257   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6258                                            (__v8di)_mm512_sra_epi64(__A, __B),
6259                                            (__v8di)__W);
6260 }
6261
6262 static __inline__ __m512i __DEFAULT_FN_ATTRS
6263 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6264 {
6265   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6266                                            (__v8di)_mm512_sra_epi64(__A, __B),
6267                                            (__v8di)_mm512_setzero_si512());
6268 }
6269
6270 static __inline__ __m512i __DEFAULT_FN_ATTRS
6271 _mm512_srav_epi32(__m512i __X, __m512i __Y)
6272 {
6273   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6274 }
6275
6276 static __inline__ __m512i __DEFAULT_FN_ATTRS
6277 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6278 {
6279   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6280                                            (__v16si)_mm512_srav_epi32(__X, __Y),
6281                                            (__v16si)__W);
6282 }
6283
6284 static __inline__ __m512i __DEFAULT_FN_ATTRS
6285 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6286 {
6287   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6288                                            (__v16si)_mm512_srav_epi32(__X, __Y),
6289                                            (__v16si)_mm512_setzero_si512());
6290 }
6291
6292 static __inline__ __m512i __DEFAULT_FN_ATTRS
6293 _mm512_srav_epi64(__m512i __X, __m512i __Y)
6294 {
6295   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6296 }
6297
6298 static __inline__ __m512i __DEFAULT_FN_ATTRS
6299 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6300 {
6301   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6302                                             (__v8di)_mm512_srav_epi64(__X, __Y),
6303                                             (__v8di)__W);
6304 }
6305
6306 static __inline__ __m512i __DEFAULT_FN_ATTRS
6307 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6308 {
6309   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6310                                             (__v8di)_mm512_srav_epi64(__X, __Y),
6311                                             (__v8di)_mm512_setzero_si512());
6312 }
6313
6314 static __inline__ __m512i __DEFAULT_FN_ATTRS
6315 _mm512_srl_epi32(__m512i __A, __m128i __B)
6316 {
6317   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6318 }
6319
6320 static __inline__ __m512i __DEFAULT_FN_ATTRS
6321 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6322 {
6323   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6324                                           (__v16si)_mm512_srl_epi32(__A, __B),
6325                                           (__v16si)__W);
6326 }
6327
6328 static __inline__ __m512i __DEFAULT_FN_ATTRS
6329 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6330 {
6331   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6332                                           (__v16si)_mm512_srl_epi32(__A, __B),
6333                                           (__v16si)_mm512_setzero_si512());
6334 }
6335
6336 static __inline__ __m512i __DEFAULT_FN_ATTRS
6337 _mm512_srl_epi64(__m512i __A, __m128i __B)
6338 {
6339   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6340 }
6341
6342 static __inline__ __m512i __DEFAULT_FN_ATTRS
6343 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6344 {
6345   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6346                                            (__v8di)_mm512_srl_epi64(__A, __B),
6347                                            (__v8di)__W);
6348 }
6349
6350 static __inline__ __m512i __DEFAULT_FN_ATTRS
6351 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6352 {
6353   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6354                                            (__v8di)_mm512_srl_epi64(__A, __B),
6355                                            (__v8di)_mm512_setzero_si512());
6356 }
6357
6358 static __inline__ __m512i __DEFAULT_FN_ATTRS
6359 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
6360 {
6361   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6362 }
6363
6364 static __inline__ __m512i __DEFAULT_FN_ATTRS
6365 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6366 {
6367   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6368                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
6369                                            (__v16si)__W);
6370 }
6371
6372 static __inline__ __m512i __DEFAULT_FN_ATTRS
6373 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6374 {
6375   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6376                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
6377                                            (__v16si)_mm512_setzero_si512());
6378 }
6379
6380 static __inline__ __m512i __DEFAULT_FN_ATTRS
6381 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6382 {
6383   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6384 }
6385
6386 static __inline__ __m512i __DEFAULT_FN_ATTRS
6387 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6388 {
6389   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6390                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
6391                                             (__v8di)__W);
6392 }
6393
6394 static __inline__ __m512i __DEFAULT_FN_ATTRS
6395 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6396 {
6397   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6398                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
6399                                             (__v8di)_mm512_setzero_si512());
6400 }
6401
6402 #define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6403   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6404                                             (__v16si)(__m512i)(B), \
6405                                             (__v16si)(__m512i)(C), (int)(imm), \
6406                                             (__mmask16)-1); })
6407
6408 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6409   (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6410                                             (__v16si)(__m512i)(B), \
6411                                             (__v16si)(__m512i)(C), (int)(imm), \
6412                                             (__mmask16)(U)); })
6413
6414 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6415   (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6416                                              (__v16si)(__m512i)(B), \
6417                                              (__v16si)(__m512i)(C), \
6418                                              (int)(imm), (__mmask16)(U)); })
6419
6420 #define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6421   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6422                                             (__v8di)(__m512i)(B), \
6423                                             (__v8di)(__m512i)(C), (int)(imm), \
6424                                             (__mmask8)-1); })
6425
6426 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6427   (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6428                                             (__v8di)(__m512i)(B), \
6429                                             (__v8di)(__m512i)(C), (int)(imm), \
6430                                             (__mmask8)(U)); })
6431
6432 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6433   (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6434                                              (__v8di)(__m512i)(B), \
6435                                              (__v8di)(__m512i)(C), (int)(imm), \
6436                                              (__mmask8)(U)); })
6437
6438 #ifdef __x86_64__
6439 #define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6440   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6441 #endif
6442
6443 #define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6444   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6445
6446 #define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6447   (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6448
6449 #define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6450   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6451
6452 static __inline__ unsigned __DEFAULT_FN_ATTRS
6453 _mm_cvtsd_u32 (__m128d __A)
6454 {
6455   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6456              _MM_FROUND_CUR_DIRECTION);
6457 }
6458
6459 #ifdef __x86_64__
6460 #define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6461   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6462                                                   (int)(R)); })
6463
6464 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6465 _mm_cvtsd_u64 (__m128d __A)
6466 {
6467   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6468                  __A,
6469                  _MM_FROUND_CUR_DIRECTION);
6470 }
6471 #endif
6472
6473 #define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6474   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6475
6476 #define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6477   (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6478
6479 #ifdef __x86_64__
6480 #define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6481   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6482
6483 #define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6484   (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6485 #endif
6486
6487 #define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6488   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6489
6490 static __inline__ unsigned __DEFAULT_FN_ATTRS
6491 _mm_cvtss_u32 (__m128 __A)
6492 {
6493   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6494              _MM_FROUND_CUR_DIRECTION);
6495 }
6496
6497 #ifdef __x86_64__
6498 #define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6499   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6500                                                   (int)(R)); })
6501
6502 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6503 _mm_cvtss_u64 (__m128 __A)
6504 {
6505   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6506                  __A,
6507                  _MM_FROUND_CUR_DIRECTION);
6508 }
6509 #endif
6510
6511 #define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6512   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6513
6514 #define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6515   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6516
6517 static __inline__ int __DEFAULT_FN_ATTRS
6518 _mm_cvttsd_i32 (__m128d __A)
6519 {
6520   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6521               _MM_FROUND_CUR_DIRECTION);
6522 }
6523
6524 #ifdef __x86_64__
6525 #define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6526   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6527
6528 #define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6529   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6530
6531 static __inline__ long long __DEFAULT_FN_ATTRS
6532 _mm_cvttsd_i64 (__m128d __A)
6533 {
6534   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6535               _MM_FROUND_CUR_DIRECTION);
6536 }
6537 #endif
6538
6539 #define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6540   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6541
6542 static __inline__ unsigned __DEFAULT_FN_ATTRS
6543 _mm_cvttsd_u32 (__m128d __A)
6544 {
6545   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6546               _MM_FROUND_CUR_DIRECTION);
6547 }
6548
6549 #ifdef __x86_64__
6550 #define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6551   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6552                                                    (int)(R)); })
6553
6554 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6555 _mm_cvttsd_u64 (__m128d __A)
6556 {
6557   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6558                   __A,
6559                   _MM_FROUND_CUR_DIRECTION);
6560 }
6561 #endif
6562
6563 #define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6564   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6565
6566 #define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6567   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6568
6569 static __inline__ int __DEFAULT_FN_ATTRS
6570 _mm_cvttss_i32 (__m128 __A)
6571 {
6572   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6573               _MM_FROUND_CUR_DIRECTION);
6574 }
6575
6576 #ifdef __x86_64__
6577 #define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6578   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6579
6580 #define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6581   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6582
6583 static __inline__ long long __DEFAULT_FN_ATTRS
6584 _mm_cvttss_i64 (__m128 __A)
6585 {
6586   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6587               _MM_FROUND_CUR_DIRECTION);
6588 }
6589 #endif
6590
6591 #define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6592   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6593
6594 static __inline__ unsigned __DEFAULT_FN_ATTRS
6595 _mm_cvttss_u32 (__m128 __A)
6596 {
6597   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6598               _MM_FROUND_CUR_DIRECTION);
6599 }
6600
6601 #ifdef __x86_64__
6602 #define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6603   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6604                                                    (int)(R)); })
6605
6606 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6607 _mm_cvttss_u64 (__m128 __A)
6608 {
6609   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6610                   __A,
6611                   _MM_FROUND_CUR_DIRECTION);
6612 }
6613 #endif
6614
6615 static __inline__ __m512d __DEFAULT_FN_ATTRS
6616 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6617             __m512d __B)
6618 {
6619   return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6620               (__v8di) __I
6621               /* idx */ ,
6622               (__v8df) __B,
6623               (__mmask8) __U);
6624 }
6625
6626 static __inline__ __m512 __DEFAULT_FN_ATTRS
6627 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6628             __m512 __B)
6629 {
6630   return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6631                    (__v16si) __I
6632                    /* idx */ ,
6633                    (__v16sf) __B,
6634                    (__mmask16) __U);
6635 }
6636
6637 static __inline__ __m512i __DEFAULT_FN_ATTRS
6638 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6639          __mmask8 __U, __m512i __B)
6640 {
6641   return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6642                    (__v8di) __I
6643                    /* idx */ ,
6644                    (__v8di) __B,
6645                    (__mmask8) __U);
6646 }
6647
6648 #define _mm512_permute_pd(X, C) __extension__ ({ \
6649   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6650                                    (__v8df)_mm512_undefined_pd(), \
6651                                    0 + (((C) >> 0) & 0x1), \
6652                                    0 + (((C) >> 1) & 0x1), \
6653                                    2 + (((C) >> 2) & 0x1), \
6654                                    2 + (((C) >> 3) & 0x1), \
6655                                    4 + (((C) >> 4) & 0x1), \
6656                                    4 + (((C) >> 5) & 0x1), \
6657                                    6 + (((C) >> 6) & 0x1), \
6658                                    6 + (((C) >> 7) & 0x1)); })
6659
6660 #define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6661   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6662                                        (__v8df)_mm512_permute_pd((X), (C)), \
6663                                        (__v8df)(__m512d)(W)); })
6664
6665 #define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6666   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6667                                        (__v8df)_mm512_permute_pd((X), (C)), \
6668                                        (__v8df)_mm512_setzero_pd()); })
6669
6670 #define _mm512_permute_ps(X, C) __extension__ ({ \
6671   (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6672                                   (__v16sf)_mm512_undefined_ps(), \
6673                                    0  + (((C) >> 0) & 0x3), \
6674                                    0  + (((C) >> 2) & 0x3), \
6675                                    0  + (((C) >> 4) & 0x3), \
6676                                    0  + (((C) >> 6) & 0x3), \
6677                                    4  + (((C) >> 0) & 0x3), \
6678                                    4  + (((C) >> 2) & 0x3), \
6679                                    4  + (((C) >> 4) & 0x3), \
6680                                    4  + (((C) >> 6) & 0x3), \
6681                                    8  + (((C) >> 0) & 0x3), \
6682                                    8  + (((C) >> 2) & 0x3), \
6683                                    8  + (((C) >> 4) & 0x3), \
6684                                    8  + (((C) >> 6) & 0x3), \
6685                                    12 + (((C) >> 0) & 0x3), \
6686                                    12 + (((C) >> 2) & 0x3), \
6687                                    12 + (((C) >> 4) & 0x3), \
6688                                    12 + (((C) >> 6) & 0x3)); })
6689
6690 #define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6691   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6692                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6693                                       (__v16sf)(__m512)(W)); })
6694
6695 #define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6696   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6697                                       (__v16sf)_mm512_permute_ps((X), (C)), \
6698                                       (__v16sf)_mm512_setzero_ps()); })
6699
6700 static __inline__ __m512d __DEFAULT_FN_ATTRS
6701 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6702 {
6703   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6704 }
6705
6706 static __inline__ __m512d __DEFAULT_FN_ATTRS
6707 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6708 {
6709   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6710                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6711                                          (__v8df)__W);
6712 }
6713
6714 static __inline__ __m512d __DEFAULT_FN_ATTRS
6715 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6716 {
6717   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6718                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6719                                          (__v8df)_mm512_setzero_pd());
6720 }
6721
6722 static __inline__ __m512 __DEFAULT_FN_ATTRS
6723 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6724 {
6725   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6726 }
6727
6728 static __inline__ __m512 __DEFAULT_FN_ATTRS
6729 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6730 {
6731   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6732                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6733                                         (__v16sf)__W);
6734 }
6735
6736 static __inline__ __m512 __DEFAULT_FN_ATTRS
6737 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6738 {
6739   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6740                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6741                                         (__v16sf)_mm512_setzero_ps());
6742 }
6743
6744 static __inline __m512d __DEFAULT_FN_ATTRS
6745 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6746 {
6747   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6748                     /* idx */ ,
6749                     (__v8df) __A,
6750                     (__v8df) __B,
6751                     (__mmask8) -1);
6752 }
6753
6754 static __inline__ __m512d __DEFAULT_FN_ATTRS
6755 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6756 {
6757   return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6758                     /* idx */ ,
6759                     (__v8df) __A,
6760                     (__v8df) __B,
6761                     (__mmask8) __U);
6762 }
6763
6764 static __inline__ __m512d __DEFAULT_FN_ATTRS
6765 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6766             __m512d __B)
6767 {
6768   return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6769                                                          /* idx */ ,
6770                                                          (__v8df) __A,
6771                                                          (__v8df) __B,
6772                                                          (__mmask8) __U);
6773 }
6774
6775 static __inline __m512 __DEFAULT_FN_ATTRS
6776 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6777 {
6778   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6779                                                          /* idx */ ,
6780                                                          (__v16sf) __A,
6781                                                          (__v16sf) __B,
6782                                                          (__mmask16) -1);
6783 }
6784
6785 static __inline__ __m512 __DEFAULT_FN_ATTRS
6786 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6787 {
6788   return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6789                                                          /* idx */ ,
6790                                                          (__v16sf) __A,
6791                                                          (__v16sf) __B,
6792                                                          (__mmask16) __U);
6793 }
6794
6795 static __inline__ __m512 __DEFAULT_FN_ATTRS
6796 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6797             __m512 __B)
6798 {
6799   return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6800                                                         /* idx */ ,
6801                                                         (__v16sf) __A,
6802                                                         (__v16sf) __B,
6803                                                         (__mmask16) __U);
6804 }
6805
6806 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6807 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
6808 {
6809   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6810              (__v16si) __B,
6811              (__mmask16) -1);
6812 }
6813
6814 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6815 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
6816 {
6817   return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6818              (__v16si) __B, __U);
6819 }
6820
6821 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6822 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
6823 {
6824   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6825             (__v8di) __B,
6826             (__mmask8) -1);
6827 }
6828
6829 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6830 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
6831 {
6832   return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6833             (__v8di) __B, __U);
6834 }
6835
6836 #define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6837   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6838                                              (__v8si)_mm256_undefined_si256(), \
6839                                              (__mmask8)-1, (int)(R)); })
6840
6841 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6842   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6843                                              (__v8si)(__m256i)(W), \
6844                                              (__mmask8)(U), (int)(R)); })
6845
6846 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6847   (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6848                                              (__v8si)_mm256_setzero_si256(), \
6849                                              (__mmask8)(U), (int)(R)); })
6850
6851 static __inline__ __m256i __DEFAULT_FN_ATTRS
6852 _mm512_cvttpd_epu32 (__m512d __A)
6853 {
6854   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6855                   (__v8si)
6856                   _mm256_undefined_si256 (),
6857                   (__mmask8) -1,
6858                   _MM_FROUND_CUR_DIRECTION);
6859 }
6860
6861 static __inline__ __m256i __DEFAULT_FN_ATTRS
6862 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6863 {
6864   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6865                   (__v8si) __W,
6866                   (__mmask8) __U,
6867                   _MM_FROUND_CUR_DIRECTION);
6868 }
6869
6870 static __inline__ __m256i __DEFAULT_FN_ATTRS
6871 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6872 {
6873   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6874                   (__v8si)
6875                   _mm256_setzero_si256 (),
6876                   (__mmask8) __U,
6877                   _MM_FROUND_CUR_DIRECTION);
6878 }
6879
6880 #define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6881   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6882                                                 (__v2df)(__m128d)(B), \
6883                                                 (__v2df)_mm_setzero_pd(), \
6884                                                 (__mmask8)-1, (int)(imm), \
6885                                                 (int)(R)); })
6886
6887 #define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6888   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6889                                                 (__v2df)(__m128d)(B), \
6890                                                 (__v2df)_mm_setzero_pd(), \
6891                                                 (__mmask8)-1, (int)(imm), \
6892                                                 _MM_FROUND_CUR_DIRECTION); })
6893
6894 #define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6895   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6896                                                 (__v2df)(__m128d)(B), \
6897                                                 (__v2df)(__m128d)(W), \
6898                                                 (__mmask8)(U), (int)(imm), \
6899                                                 _MM_FROUND_CUR_DIRECTION); })
6900
6901 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6902   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6903                                                 (__v2df)(__m128d)(B), \
6904                                                 (__v2df)(__m128d)(W), \
6905                                                 (__mmask8)(U), (int)(I), \
6906                                                 (int)(R)); })
6907
6908 #define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6909   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6910                                                 (__v2df)(__m128d)(B), \
6911                                                 (__v2df)_mm_setzero_pd(), \
6912                                                 (__mmask8)(U), (int)(I), \
6913                                                 _MM_FROUND_CUR_DIRECTION); })
6914
6915 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6916   (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6917                                                 (__v2df)(__m128d)(B), \
6918                                                 (__v2df)_mm_setzero_pd(), \
6919                                                 (__mmask8)(U), (int)(I), \
6920                                                 (int)(R)); })
6921
6922 #define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6923   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6924                                                (__v4sf)(__m128)(B), \
6925                                                (__v4sf)_mm_setzero_ps(), \
6926                                                (__mmask8)-1, (int)(imm), \
6927                                                (int)(R)); })
6928
6929 #define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6930   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6931                                                (__v4sf)(__m128)(B), \
6932                                                (__v4sf)_mm_setzero_ps(), \
6933                                                (__mmask8)-1, (int)(imm), \
6934                                                _MM_FROUND_CUR_DIRECTION); })
6935
6936 #define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6937   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6938                                                (__v4sf)(__m128)(B), \
6939                                                (__v4sf)(__m128)(W), \
6940                                                (__mmask8)(U), (int)(I), \
6941                                                _MM_FROUND_CUR_DIRECTION); })
6942
6943 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6944   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6945                                                (__v4sf)(__m128)(B), \
6946                                                (__v4sf)(__m128)(W), \
6947                                                (__mmask8)(U), (int)(I), \
6948                                                (int)(R)); })
6949
6950 #define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6951   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6952                                                (__v4sf)(__m128)(B), \
6953                                                (__v4sf)_mm_setzero_ps(), \
6954                                                (__mmask8)(U), (int)(I), \
6955                                                _MM_FROUND_CUR_DIRECTION); })
6956
6957 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6958   (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6959                                                (__v4sf)(__m128)(B), \
6960                                                (__v4sf)_mm_setzero_ps(), \
6961                                                (__mmask8)(U), (int)(I), \
6962                                                (int)(R)); })
6963
6964 #define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6965   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6966                                            (__v8df)(__m512d)(B), \
6967                                            (__v8df)_mm512_undefined_pd(), \
6968                                            (__mmask8)-1, (int)(R)); })
6969
6970 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6971   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6972                                            (__v8df)(__m512d)(B), \
6973                                            (__v8df)(__m512d)(W), \
6974                                            (__mmask8)(U), (int)(R)); })
6975
6976 #define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6977   (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6978                                            (__v8df)(__m512d)(B), \
6979                                            (__v8df)_mm512_setzero_pd(), \
6980                                            (__mmask8)(U), (int)(R)); })
6981
6982 static __inline__ __m512d __DEFAULT_FN_ATTRS
6983 _mm512_scalef_pd (__m512d __A, __m512d __B)
6984 {
6985   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6986                 (__v8df) __B,
6987                 (__v8df)
6988                 _mm512_undefined_pd (),
6989                 (__mmask8) -1,
6990                 _MM_FROUND_CUR_DIRECTION);
6991 }
6992
6993 static __inline__ __m512d __DEFAULT_FN_ATTRS
6994 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6995 {
6996   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6997                 (__v8df) __B,
6998                 (__v8df) __W,
6999                 (__mmask8) __U,
7000                 _MM_FROUND_CUR_DIRECTION);
7001 }
7002
7003 static __inline__ __m512d __DEFAULT_FN_ATTRS
7004 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
7005 {
7006   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
7007                 (__v8df) __B,
7008                 (__v8df)
7009                 _mm512_setzero_pd (),
7010                 (__mmask8) __U,
7011                 _MM_FROUND_CUR_DIRECTION);
7012 }
7013
7014 #define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
7015   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7016                                           (__v16sf)(__m512)(B), \
7017                                           (__v16sf)_mm512_undefined_ps(), \
7018                                           (__mmask16)-1, (int)(R)); })
7019
7020 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
7021   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7022                                           (__v16sf)(__m512)(B), \
7023                                           (__v16sf)(__m512)(W), \
7024                                           (__mmask16)(U), (int)(R)); })
7025
7026 #define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
7027   (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7028                                           (__v16sf)(__m512)(B), \
7029                                           (__v16sf)_mm512_setzero_ps(), \
7030                                           (__mmask16)(U), (int)(R)); })
7031
7032 static __inline__ __m512 __DEFAULT_FN_ATTRS
7033 _mm512_scalef_ps (__m512 __A, __m512 __B)
7034 {
7035   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7036                (__v16sf) __B,
7037                (__v16sf)
7038                _mm512_undefined_ps (),
7039                (__mmask16) -1,
7040                _MM_FROUND_CUR_DIRECTION);
7041 }
7042
7043 static __inline__ __m512 __DEFAULT_FN_ATTRS
7044 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7045 {
7046   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7047                (__v16sf) __B,
7048                (__v16sf) __W,
7049                (__mmask16) __U,
7050                _MM_FROUND_CUR_DIRECTION);
7051 }
7052
7053 static __inline__ __m512 __DEFAULT_FN_ATTRS
7054 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
7055 {
7056   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7057                (__v16sf) __B,
7058                (__v16sf)
7059                _mm512_setzero_ps (),
7060                (__mmask16) __U,
7061                _MM_FROUND_CUR_DIRECTION);
7062 }
7063
7064 #define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
7065   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7066                                               (__v2df)(__m128d)(B), \
7067                                               (__v2df)_mm_setzero_pd(), \
7068                                               (__mmask8)-1, (int)(R)); })
7069
7070 static __inline__ __m128d __DEFAULT_FN_ATTRS
7071 _mm_scalef_sd (__m128d __A, __m128d __B)
7072 {
7073   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
7074               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
7075               (__mmask8) -1,
7076               _MM_FROUND_CUR_DIRECTION);
7077 }
7078
7079 static __inline__ __m128d __DEFAULT_FN_ATTRS
7080 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7081 {
7082  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7083                  (__v2df) __B,
7084                 (__v2df) __W,
7085                 (__mmask8) __U,
7086                 _MM_FROUND_CUR_DIRECTION);
7087 }
7088
7089 #define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
7090   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7091                                               (__v2df)(__m128d)(B), \
7092                                               (__v2df)(__m128d)(W), \
7093                                               (__mmask8)(U), (int)(R)); })
7094
7095 static __inline__ __m128d __DEFAULT_FN_ATTRS
7096 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
7097 {
7098  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7099                  (__v2df) __B,
7100                 (__v2df) _mm_setzero_pd (),
7101                 (__mmask8) __U,
7102                 _MM_FROUND_CUR_DIRECTION);
7103 }
7104
7105 #define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
7106   (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7107                                               (__v2df)(__m128d)(B), \
7108                                               (__v2df)_mm_setzero_pd(), \
7109                                               (__mmask8)(U), (int)(R)); })
7110
7111 #define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
7112   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7113                                              (__v4sf)(__m128)(B), \
7114                                              (__v4sf)_mm_setzero_ps(), \
7115                                              (__mmask8)-1, (int)(R)); })
7116
7117 static __inline__ __m128 __DEFAULT_FN_ATTRS
7118 _mm_scalef_ss (__m128 __A, __m128 __B)
7119 {
7120   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
7121              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
7122              (__mmask8) -1,
7123              _MM_FROUND_CUR_DIRECTION);
7124 }
7125
7126 static __inline__ __m128 __DEFAULT_FN_ATTRS
7127 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7128 {
7129  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7130                 (__v4sf) __B,
7131                 (__v4sf) __W,
7132                 (__mmask8) __U,
7133                 _MM_FROUND_CUR_DIRECTION);
7134 }
7135
7136 #define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
7137   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7138                                              (__v4sf)(__m128)(B), \
7139                                              (__v4sf)(__m128)(W), \
7140                                              (__mmask8)(U), (int)(R)); })
7141
7142 static __inline__ __m128 __DEFAULT_FN_ATTRS
7143 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
7144 {
7145  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7146                  (__v4sf) __B,
7147                 (__v4sf) _mm_setzero_ps (),
7148                 (__mmask8) __U,
7149                 _MM_FROUND_CUR_DIRECTION);
7150 }
7151
7152 #define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
7153   (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7154                                              (__v4sf)(__m128)(B), \
7155                                              (__v4sf)_mm_setzero_ps(), \
7156                                              (__mmask8)(U), \
7157                                              _MM_FROUND_CUR_DIRECTION); })
7158
7159 static __inline__ __m512i __DEFAULT_FN_ATTRS
7160 _mm512_srai_epi32(__m512i __A, int __B)
7161 {
7162   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
7163 }
7164
7165 static __inline__ __m512i __DEFAULT_FN_ATTRS
7166 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
7167 {
7168   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7169                                          (__v16si)_mm512_srai_epi32(__A, __B), \
7170                                          (__v16si)__W);
7171 }
7172
7173 static __inline__ __m512i __DEFAULT_FN_ATTRS
7174 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
7175   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7176                                          (__v16si)_mm512_srai_epi32(__A, __B), \
7177                                          (__v16si)_mm512_setzero_si512());
7178 }
7179
7180 static __inline__ __m512i __DEFAULT_FN_ATTRS
7181 _mm512_srai_epi64(__m512i __A, int __B)
7182 {
7183   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
7184 }
7185
7186 static __inline__ __m512i __DEFAULT_FN_ATTRS
7187 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
7188 {
7189   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7190                                           (__v8di)_mm512_srai_epi64(__A, __B), \
7191                                           (__v8di)__W);
7192 }
7193
7194 static __inline__ __m512i __DEFAULT_FN_ATTRS
7195 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
7196 {
7197   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7198                                           (__v8di)_mm512_srai_epi64(__A, __B), \
7199                                           (__v8di)_mm512_setzero_si512());
7200 }
7201
7202 #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
7203   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7204                                          (__v16sf)(__m512)(B), (int)(imm), \
7205                                          (__v16sf)_mm512_undefined_ps(), \
7206                                          (__mmask16)-1); })
7207
7208 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7209   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7210                                          (__v16sf)(__m512)(B), (int)(imm), \
7211                                          (__v16sf)(__m512)(W), \
7212                                          (__mmask16)(U)); })
7213
7214 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7215   (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7216                                          (__v16sf)(__m512)(B), (int)(imm), \
7217                                          (__v16sf)_mm512_setzero_ps(), \
7218                                          (__mmask16)(U)); })
7219
7220 #define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
7221   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7222                                           (__v8df)(__m512d)(B), (int)(imm), \
7223                                           (__v8df)_mm512_undefined_pd(), \
7224                                           (__mmask8)-1); })
7225
7226 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7227   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7228                                           (__v8df)(__m512d)(B), (int)(imm), \
7229                                           (__v8df)(__m512d)(W), \
7230                                           (__mmask8)(U)); })
7231
7232 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7233   (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7234                                           (__v8df)(__m512d)(B), (int)(imm), \
7235                                           (__v8df)_mm512_setzero_pd(), \
7236                                           (__mmask8)(U)); })
7237
7238 #define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7239   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7240                                           (__v16si)(__m512i)(B), (int)(imm), \
7241                                           (__v16si)_mm512_setzero_si512(), \
7242                                           (__mmask16)-1); })
7243
7244 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7245   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7246                                           (__v16si)(__m512i)(B), (int)(imm), \
7247                                           (__v16si)(__m512i)(W), \
7248                                           (__mmask16)(U)); })
7249
7250 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7251   (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7252                                           (__v16si)(__m512i)(B), (int)(imm), \
7253                                           (__v16si)_mm512_setzero_si512(), \
7254                                           (__mmask16)(U)); })
7255
7256 #define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7257   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7258                                           (__v8di)(__m512i)(B), (int)(imm), \
7259                                           (__v8di)_mm512_setzero_si512(), \
7260                                           (__mmask8)-1); })
7261
7262 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7263   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7264                                           (__v8di)(__m512i)(B), (int)(imm), \
7265                                           (__v8di)(__m512i)(W), \
7266                                           (__mmask8)(U)); })
7267
7268 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7269   (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7270                                           (__v8di)(__m512i)(B), (int)(imm), \
7271                                           (__v8di)_mm512_setzero_si512(), \
7272                                           (__mmask8)(U)); })
7273
7274 #define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7275   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7276                                    (__v8df)(__m512d)(B), \
7277                                    0  + (((M) >> 0) & 0x1), \
7278                                    8  + (((M) >> 1) & 0x1), \
7279                                    2  + (((M) >> 2) & 0x1), \
7280                                    10 + (((M) >> 3) & 0x1), \
7281                                    4  + (((M) >> 4) & 0x1), \
7282                                    12 + (((M) >> 5) & 0x1), \
7283                                    6  + (((M) >> 6) & 0x1), \
7284                                    14 + (((M) >> 7) & 0x1)); })
7285
7286 #define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7287   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7288                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7289                                        (__v8df)(__m512d)(W)); })
7290
7291 #define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7292   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7293                                        (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7294                                        (__v8df)_mm512_setzero_pd()); })
7295
7296 #define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7297   (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7298                                    (__v16sf)(__m512)(B), \
7299                                    0  + (((M) >> 0) & 0x3), \
7300                                    0  + (((M) >> 2) & 0x3), \
7301                                    16 + (((M) >> 4) & 0x3), \
7302                                    16 + (((M) >> 6) & 0x3), \
7303                                    4  + (((M) >> 0) & 0x3), \
7304                                    4  + (((M) >> 2) & 0x3), \
7305                                    20 + (((M) >> 4) & 0x3), \
7306                                    20 + (((M) >> 6) & 0x3), \
7307                                    8  + (((M) >> 0) & 0x3), \
7308                                    8  + (((M) >> 2) & 0x3), \
7309                                    24 + (((M) >> 4) & 0x3), \
7310                                    24 + (((M) >> 6) & 0x3), \
7311                                    12 + (((M) >> 0) & 0x3), \
7312                                    12 + (((M) >> 2) & 0x3), \
7313                                    28 + (((M) >> 4) & 0x3), \
7314                                    28 + (((M) >> 6) & 0x3)); })
7315
7316 #define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7317   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7318                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7319                                       (__v16sf)(__m512)(W)); })
7320
7321 #define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7322   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7323                                       (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7324                                       (__v16sf)_mm512_setzero_ps()); })
7325
7326 #define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7327   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7328                                             (__v2df)(__m128d)(B), \
7329                                             (__v2df)_mm_setzero_pd(), \
7330                                             (__mmask8)-1, (int)(R)); })
7331
7332 static __inline__ __m128d __DEFAULT_FN_ATTRS
7333 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7334 {
7335  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7336                  (__v2df) __B,
7337                 (__v2df) __W,
7338                 (__mmask8) __U,
7339                 _MM_FROUND_CUR_DIRECTION);
7340 }
7341
7342 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7343   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7344                                             (__v2df)(__m128d)(B), \
7345                                             (__v2df)(__m128d)(W), \
7346                                             (__mmask8)(U), (int)(R)); })
7347
7348 static __inline__ __m128d __DEFAULT_FN_ATTRS
7349 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7350 {
7351  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7352                  (__v2df) __B,
7353                 (__v2df) _mm_setzero_pd (),
7354                 (__mmask8) __U,
7355                 _MM_FROUND_CUR_DIRECTION);
7356 }
7357
7358 #define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7359   (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7360                                             (__v2df)(__m128d)(B), \
7361                                             (__v2df)_mm_setzero_pd(), \
7362                                             (__mmask8)(U), (int)(R)); })
7363
7364 #define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7365   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7366                                            (__v4sf)(__m128)(B), \
7367                                            (__v4sf)_mm_setzero_ps(), \
7368                                            (__mmask8)-1, (int)(R)); })
7369
7370 static __inline__ __m128 __DEFAULT_FN_ATTRS
7371 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7372 {
7373  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7374                  (__v4sf) __B,
7375                 (__v4sf) __W,
7376                 (__mmask8) __U,
7377                 _MM_FROUND_CUR_DIRECTION);
7378 }
7379
7380 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7381   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7382                                            (__v4sf)(__m128)(B), \
7383                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
7384                                            (int)(R)); })
7385
7386 static __inline__ __m128 __DEFAULT_FN_ATTRS
7387 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7388 {
7389  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7390                  (__v4sf) __B,
7391                 (__v4sf) _mm_setzero_ps (),
7392                 (__mmask8) __U,
7393                 _MM_FROUND_CUR_DIRECTION);
7394 }
7395
7396 #define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7397   (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7398                                            (__v4sf)(__m128)(B), \
7399                                            (__v4sf)_mm_setzero_ps(), \
7400                                            (__mmask8)(U), (int)(R)); })
7401
7402 static __inline__ __m512 __DEFAULT_FN_ATTRS
7403 _mm512_broadcast_f32x4(__m128 __A)
7404 {
7405   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7406                                          0, 1, 2, 3, 0, 1, 2, 3,
7407                                          0, 1, 2, 3, 0, 1, 2, 3);
7408 }
7409
7410 static __inline__ __m512 __DEFAULT_FN_ATTRS
7411 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
7412 {
7413   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7414                                            (__v16sf)_mm512_broadcast_f32x4(__A),
7415                                            (__v16sf)__O);
7416 }
7417
7418 static __inline__ __m512 __DEFAULT_FN_ATTRS
7419 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
7420 {
7421   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7422                                            (__v16sf)_mm512_broadcast_f32x4(__A),
7423                                            (__v16sf)_mm512_setzero_ps());
7424 }
7425
7426 static __inline__ __m512d __DEFAULT_FN_ATTRS
7427 _mm512_broadcast_f64x4(__m256d __A)
7428 {
7429   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7430                                           0, 1, 2, 3, 0, 1, 2, 3);
7431 }
7432
7433 static __inline__ __m512d __DEFAULT_FN_ATTRS
7434 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
7435 {
7436   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7437                                             (__v8df)_mm512_broadcast_f64x4(__A),
7438                                             (__v8df)__O);
7439 }
7440
7441 static __inline__ __m512d __DEFAULT_FN_ATTRS
7442 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
7443 {
7444   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7445                                             (__v8df)_mm512_broadcast_f64x4(__A),
7446                                             (__v8df)_mm512_setzero_pd());
7447 }
7448
7449 static __inline__ __m512i __DEFAULT_FN_ATTRS
7450 _mm512_broadcast_i32x4(__m128i __A)
7451 {
7452   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7453                                           0, 1, 2, 3, 0, 1, 2, 3,
7454                                           0, 1, 2, 3, 0, 1, 2, 3);
7455 }
7456
7457 static __inline__ __m512i __DEFAULT_FN_ATTRS
7458 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
7459 {
7460   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7461                                            (__v16si)_mm512_broadcast_i32x4(__A),
7462                                            (__v16si)__O);
7463 }
7464
7465 static __inline__ __m512i __DEFAULT_FN_ATTRS
7466 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
7467 {
7468   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7469                                            (__v16si)_mm512_broadcast_i32x4(__A),
7470                                            (__v16si)_mm512_setzero_si512());
7471 }
7472
7473 static __inline__ __m512i __DEFAULT_FN_ATTRS
7474 _mm512_broadcast_i64x4(__m256i __A)
7475 {
7476   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7477                                           0, 1, 2, 3, 0, 1, 2, 3);
7478 }
7479
7480 static __inline__ __m512i __DEFAULT_FN_ATTRS
7481 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
7482 {
7483   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7484                                             (__v8di)_mm512_broadcast_i64x4(__A),
7485                                             (__v8di)__O);
7486 }
7487
7488 static __inline__ __m512i __DEFAULT_FN_ATTRS
7489 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
7490 {
7491   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7492                                             (__v8di)_mm512_broadcast_i64x4(__A),
7493                                             (__v8di)_mm512_setzero_si512());
7494 }
7495
7496 static __inline__ __m512d __DEFAULT_FN_ATTRS
7497 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7498 {
7499   return (__m512d)__builtin_ia32_selectpd_512(__M,
7500                                               (__v8df) _mm512_broadcastsd_pd(__A),
7501                                               (__v8df) __O);
7502 }
7503
7504 static __inline__ __m512d __DEFAULT_FN_ATTRS
7505 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7506 {
7507   return (__m512d)__builtin_ia32_selectpd_512(__M,
7508                                               (__v8df) _mm512_broadcastsd_pd(__A),
7509                                               (__v8df) _mm512_setzero_pd());
7510 }
7511
7512 static __inline__ __m512 __DEFAULT_FN_ATTRS
7513 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7514 {
7515   return (__m512)__builtin_ia32_selectps_512(__M,
7516                                              (__v16sf) _mm512_broadcastss_ps(__A),
7517                                              (__v16sf) __O);
7518 }
7519
7520 static __inline__ __m512 __DEFAULT_FN_ATTRS
7521 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7522 {
7523   return (__m512)__builtin_ia32_selectps_512(__M,
7524                                              (__v16sf) _mm512_broadcastss_ps(__A),
7525                                              (__v16sf) _mm512_setzero_ps());
7526 }
7527
7528 static __inline__ __m128i __DEFAULT_FN_ATTRS
7529 _mm512_cvtsepi32_epi8 (__m512i __A)
7530 {
7531   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7532                (__v16qi) _mm_undefined_si128 (),
7533                (__mmask16) -1);
7534 }
7535
7536 static __inline__ __m128i __DEFAULT_FN_ATTRS
7537 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7538 {
7539   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7540                (__v16qi) __O, __M);
7541 }
7542
7543 static __inline__ __m128i __DEFAULT_FN_ATTRS
7544 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7545 {
7546   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7547                (__v16qi) _mm_setzero_si128 (),
7548                __M);
7549 }
7550
7551 static __inline__ void __DEFAULT_FN_ATTRS
7552 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7553 {
7554   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7555 }
7556
7557 static __inline__ __m256i __DEFAULT_FN_ATTRS
7558 _mm512_cvtsepi32_epi16 (__m512i __A)
7559 {
7560   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7561                (__v16hi) _mm256_undefined_si256 (),
7562                (__mmask16) -1);
7563 }
7564
7565 static __inline__ __m256i __DEFAULT_FN_ATTRS
7566 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7567 {
7568   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7569                (__v16hi) __O, __M);
7570 }
7571
7572 static __inline__ __m256i __DEFAULT_FN_ATTRS
7573 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7574 {
7575   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7576                (__v16hi) _mm256_setzero_si256 (),
7577                __M);
7578 }
7579
7580 static __inline__ void __DEFAULT_FN_ATTRS
7581 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7582 {
7583   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7584 }
7585
7586 static __inline__ __m128i __DEFAULT_FN_ATTRS
7587 _mm512_cvtsepi64_epi8 (__m512i __A)
7588 {
7589   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7590                (__v16qi) _mm_undefined_si128 (),
7591                (__mmask8) -1);
7592 }
7593
7594 static __inline__ __m128i __DEFAULT_FN_ATTRS
7595 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7596 {
7597   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7598                (__v16qi) __O, __M);
7599 }
7600
7601 static __inline__ __m128i __DEFAULT_FN_ATTRS
7602 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7603 {
7604   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7605                (__v16qi) _mm_setzero_si128 (),
7606                __M);
7607 }
7608
7609 static __inline__ void __DEFAULT_FN_ATTRS
7610 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7611 {
7612   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7613 }
7614
7615 static __inline__ __m256i __DEFAULT_FN_ATTRS
7616 _mm512_cvtsepi64_epi32 (__m512i __A)
7617 {
7618   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7619                (__v8si) _mm256_undefined_si256 (),
7620                (__mmask8) -1);
7621 }
7622
7623 static __inline__ __m256i __DEFAULT_FN_ATTRS
7624 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7625 {
7626   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7627                (__v8si) __O, __M);
7628 }
7629
7630 static __inline__ __m256i __DEFAULT_FN_ATTRS
7631 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7632 {
7633   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7634                (__v8si) _mm256_setzero_si256 (),
7635                __M);
7636 }
7637
7638 static __inline__ void __DEFAULT_FN_ATTRS
7639 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7640 {
7641   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7642 }
7643
7644 static __inline__ __m128i __DEFAULT_FN_ATTRS
7645 _mm512_cvtsepi64_epi16 (__m512i __A)
7646 {
7647   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7648                (__v8hi) _mm_undefined_si128 (),
7649                (__mmask8) -1);
7650 }
7651
7652 static __inline__ __m128i __DEFAULT_FN_ATTRS
7653 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7654 {
7655   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7656                (__v8hi) __O, __M);
7657 }
7658
7659 static __inline__ __m128i __DEFAULT_FN_ATTRS
7660 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7661 {
7662   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7663                (__v8hi) _mm_setzero_si128 (),
7664                __M);
7665 }
7666
7667 static __inline__ void __DEFAULT_FN_ATTRS
7668 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7669 {
7670   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7671 }
7672
7673 static __inline__ __m128i __DEFAULT_FN_ATTRS
7674 _mm512_cvtusepi32_epi8 (__m512i __A)
7675 {
7676   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7677                 (__v16qi) _mm_undefined_si128 (),
7678                 (__mmask16) -1);
7679 }
7680
7681 static __inline__ __m128i __DEFAULT_FN_ATTRS
7682 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7683 {
7684   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7685                 (__v16qi) __O,
7686                 __M);
7687 }
7688
7689 static __inline__ __m128i __DEFAULT_FN_ATTRS
7690 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7691 {
7692   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7693                 (__v16qi) _mm_setzero_si128 (),
7694                 __M);
7695 }
7696
7697 static __inline__ void __DEFAULT_FN_ATTRS
7698 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7699 {
7700   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7701 }
7702
7703 static __inline__ __m256i __DEFAULT_FN_ATTRS
7704 _mm512_cvtusepi32_epi16 (__m512i __A)
7705 {
7706   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7707                 (__v16hi) _mm256_undefined_si256 (),
7708                 (__mmask16) -1);
7709 }
7710
7711 static __inline__ __m256i __DEFAULT_FN_ATTRS
7712 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7713 {
7714   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7715                 (__v16hi) __O,
7716                 __M);
7717 }
7718
7719 static __inline__ __m256i __DEFAULT_FN_ATTRS
7720 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7721 {
7722   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7723                 (__v16hi) _mm256_setzero_si256 (),
7724                 __M);
7725 }
7726
7727 static __inline__ void __DEFAULT_FN_ATTRS
7728 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7729 {
7730   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7731 }
7732
7733 static __inline__ __m128i __DEFAULT_FN_ATTRS
7734 _mm512_cvtusepi64_epi8 (__m512i __A)
7735 {
7736   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7737                 (__v16qi) _mm_undefined_si128 (),
7738                 (__mmask8) -1);
7739 }
7740
7741 static __inline__ __m128i __DEFAULT_FN_ATTRS
7742 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7743 {
7744   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7745                 (__v16qi) __O,
7746                 __M);
7747 }
7748
7749 static __inline__ __m128i __DEFAULT_FN_ATTRS
7750 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7751 {
7752   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7753                 (__v16qi) _mm_setzero_si128 (),
7754                 __M);
7755 }
7756
7757 static __inline__ void __DEFAULT_FN_ATTRS
7758 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7759 {
7760   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7761 }
7762
7763 static __inline__ __m256i __DEFAULT_FN_ATTRS
7764 _mm512_cvtusepi64_epi32 (__m512i __A)
7765 {
7766   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7767                 (__v8si) _mm256_undefined_si256 (),
7768                 (__mmask8) -1);
7769 }
7770
7771 static __inline__ __m256i __DEFAULT_FN_ATTRS
7772 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7773 {
7774   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7775                 (__v8si) __O, __M);
7776 }
7777
7778 static __inline__ __m256i __DEFAULT_FN_ATTRS
7779 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7780 {
7781   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7782                 (__v8si) _mm256_setzero_si256 (),
7783                 __M);
7784 }
7785
7786 static __inline__ void __DEFAULT_FN_ATTRS
7787 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7788 {
7789   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7790 }
7791
7792 static __inline__ __m128i __DEFAULT_FN_ATTRS
7793 _mm512_cvtusepi64_epi16 (__m512i __A)
7794 {
7795   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7796                 (__v8hi) _mm_undefined_si128 (),
7797                 (__mmask8) -1);
7798 }
7799
7800 static __inline__ __m128i __DEFAULT_FN_ATTRS
7801 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7802 {
7803   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7804                 (__v8hi) __O, __M);
7805 }
7806
7807 static __inline__ __m128i __DEFAULT_FN_ATTRS
7808 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7809 {
7810   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7811                 (__v8hi) _mm_setzero_si128 (),
7812                 __M);
7813 }
7814
7815 static __inline__ void __DEFAULT_FN_ATTRS
7816 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7817 {
7818   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7819 }
7820
7821 static __inline__ __m128i __DEFAULT_FN_ATTRS
7822 _mm512_cvtepi32_epi8 (__m512i __A)
7823 {
7824   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7825               (__v16qi) _mm_undefined_si128 (),
7826               (__mmask16) -1);
7827 }
7828
7829 static __inline__ __m128i __DEFAULT_FN_ATTRS
7830 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7831 {
7832   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7833               (__v16qi) __O, __M);
7834 }
7835
7836 static __inline__ __m128i __DEFAULT_FN_ATTRS
7837 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7838 {
7839   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7840               (__v16qi) _mm_setzero_si128 (),
7841               __M);
7842 }
7843
7844 static __inline__ void __DEFAULT_FN_ATTRS
7845 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7846 {
7847   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7848 }
7849
7850 static __inline__ __m256i __DEFAULT_FN_ATTRS
7851 _mm512_cvtepi32_epi16 (__m512i __A)
7852 {
7853   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7854               (__v16hi) _mm256_undefined_si256 (),
7855               (__mmask16) -1);
7856 }
7857
7858 static __inline__ __m256i __DEFAULT_FN_ATTRS
7859 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7860 {
7861   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7862               (__v16hi) __O, __M);
7863 }
7864
7865 static __inline__ __m256i __DEFAULT_FN_ATTRS
7866 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7867 {
7868   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7869               (__v16hi) _mm256_setzero_si256 (),
7870               __M);
7871 }
7872
7873 static __inline__ void __DEFAULT_FN_ATTRS
7874 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7875 {
7876   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7877 }
7878
7879 static __inline__ __m128i __DEFAULT_FN_ATTRS
7880 _mm512_cvtepi64_epi8 (__m512i __A)
7881 {
7882   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7883               (__v16qi) _mm_undefined_si128 (),
7884               (__mmask8) -1);
7885 }
7886
7887 static __inline__ __m128i __DEFAULT_FN_ATTRS
7888 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7889 {
7890   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7891               (__v16qi) __O, __M);
7892 }
7893
7894 static __inline__ __m128i __DEFAULT_FN_ATTRS
7895 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7896 {
7897   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7898               (__v16qi) _mm_setzero_si128 (),
7899               __M);
7900 }
7901
7902 static __inline__ void __DEFAULT_FN_ATTRS
7903 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7904 {
7905   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7906 }
7907
7908 static __inline__ __m256i __DEFAULT_FN_ATTRS
7909 _mm512_cvtepi64_epi32 (__m512i __A)
7910 {
7911   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7912               (__v8si) _mm256_undefined_si256 (),
7913               (__mmask8) -1);
7914 }
7915
7916 static __inline__ __m256i __DEFAULT_FN_ATTRS
7917 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7918 {
7919   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7920               (__v8si) __O, __M);
7921 }
7922
7923 static __inline__ __m256i __DEFAULT_FN_ATTRS
7924 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7925 {
7926   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7927               (__v8si) _mm256_setzero_si256 (),
7928               __M);
7929 }
7930
7931 static __inline__ void __DEFAULT_FN_ATTRS
7932 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7933 {
7934   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7935 }
7936
7937 static __inline__ __m128i __DEFAULT_FN_ATTRS
7938 _mm512_cvtepi64_epi16 (__m512i __A)
7939 {
7940   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7941               (__v8hi) _mm_undefined_si128 (),
7942               (__mmask8) -1);
7943 }
7944
7945 static __inline__ __m128i __DEFAULT_FN_ATTRS
7946 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7947 {
7948   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7949               (__v8hi) __O, __M);
7950 }
7951
7952 static __inline__ __m128i __DEFAULT_FN_ATTRS
7953 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7954 {
7955   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7956               (__v8hi) _mm_setzero_si128 (),
7957               __M);
7958 }
7959
7960 static __inline__ void __DEFAULT_FN_ATTRS
7961 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7962 {
7963   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7964 }
7965
7966 #define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
7967   (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
7968                                    (__v16si)_mm512_undefined_epi32(), \
7969                                    0 + ((imm) & 0x3) * 4,             \
7970                                    1 + ((imm) & 0x3) * 4,             \
7971                                    2 + ((imm) & 0x3) * 4,             \
7972                                    3 + ((imm) & 0x3) * 4); })
7973
7974 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7975   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7976                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7977                                 (__v4si)(W)); })
7978
7979 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7980   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7981                                 (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7982                                 (__v4si)_mm_setzero_si128()); })
7983
7984 #define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
7985   (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
7986                                    (__v8di)_mm512_undefined_epi32(), \
7987                                    ((imm) & 1) ? 4 : 0,              \
7988                                    ((imm) & 1) ? 5 : 1,              \
7989                                    ((imm) & 1) ? 6 : 2,              \
7990                                    ((imm) & 1) ? 7 : 3); })
7991
7992 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7993   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7994                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7995                                 (__v4di)(W)); })
7996
7997 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7998   (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7999                                 (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
8000                                 (__v4di)_mm256_setzero_si256()); })
8001
8002 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
8003   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
8004                                  (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
8005                                  ((imm) & 0x1) ?  0 :  8, \
8006                                  ((imm) & 0x1) ?  1 :  9, \
8007                                  ((imm) & 0x1) ?  2 : 10, \
8008                                  ((imm) & 0x1) ?  3 : 11, \
8009                                  ((imm) & 0x1) ?  8 :  4, \
8010                                  ((imm) & 0x1) ?  9 :  5, \
8011                                  ((imm) & 0x1) ? 10 :  6, \
8012                                  ((imm) & 0x1) ? 11 :  7); })
8013
8014 #define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
8015   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8016                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
8017                                   (__v8df)(W)); })
8018
8019 #define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
8020   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8021                                   (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
8022                                   (__v8df)_mm512_setzero_pd()); })
8023
8024 #define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
8025   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
8026                                  (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
8027                                  ((imm) & 0x1) ?  0 :  8, \
8028                                  ((imm) & 0x1) ?  1 :  9, \
8029                                  ((imm) & 0x1) ?  2 : 10, \
8030                                  ((imm) & 0x1) ?  3 : 11, \
8031                                  ((imm) & 0x1) ?  8 :  4, \
8032                                  ((imm) & 0x1) ?  9 :  5, \
8033                                  ((imm) & 0x1) ? 10 :  6, \
8034                                  ((imm) & 0x1) ? 11 :  7); })
8035
8036 #define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
8037   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8038                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
8039                                   (__v8di)(W)); })
8040
8041 #define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
8042   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8043                                   (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
8044                                   (__v8di)_mm512_setzero_si512()); })
8045
8046 #define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
8047   (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
8048                                   (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
8049                                   (((imm) & 0x3) == 0) ? 16 :  0, \
8050                                   (((imm) & 0x3) == 0) ? 17 :  1, \
8051                                   (((imm) & 0x3) == 0) ? 18 :  2, \
8052                                   (((imm) & 0x3) == 0) ? 19 :  3, \
8053                                   (((imm) & 0x3) == 1) ? 16 :  4, \
8054                                   (((imm) & 0x3) == 1) ? 17 :  5, \
8055                                   (((imm) & 0x3) == 1) ? 18 :  6, \
8056                                   (((imm) & 0x3) == 1) ? 19 :  7, \
8057                                   (((imm) & 0x3) == 2) ? 16 :  8, \
8058                                   (((imm) & 0x3) == 2) ? 17 :  9, \
8059                                   (((imm) & 0x3) == 2) ? 18 : 10, \
8060                                   (((imm) & 0x3) == 2) ? 19 : 11, \
8061                                   (((imm) & 0x3) == 3) ? 16 : 12, \
8062                                   (((imm) & 0x3) == 3) ? 17 : 13, \
8063                                   (((imm) & 0x3) == 3) ? 18 : 14, \
8064                                   (((imm) & 0x3) == 3) ? 19 : 15); })
8065
8066 #define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8067   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8068                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8069                                  (__v16sf)(W)); })
8070
8071 #define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8072   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8073                                  (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8074                                  (__v16sf)_mm512_setzero_ps()); })
8075
8076 #define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
8077   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
8078                                  (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
8079                                  (((imm) & 0x3) == 0) ? 16 :  0, \
8080                                  (((imm) & 0x3) == 0) ? 17 :  1, \
8081                                  (((imm) & 0x3) == 0) ? 18 :  2, \
8082                                  (((imm) & 0x3) == 0) ? 19 :  3, \
8083                                  (((imm) & 0x3) == 1) ? 16 :  4, \
8084                                  (((imm) & 0x3) == 1) ? 17 :  5, \
8085                                  (((imm) & 0x3) == 1) ? 18 :  6, \
8086                                  (((imm) & 0x3) == 1) ? 19 :  7, \
8087                                  (((imm) & 0x3) == 2) ? 16 :  8, \
8088                                  (((imm) & 0x3) == 2) ? 17 :  9, \
8089                                  (((imm) & 0x3) == 2) ? 18 : 10, \
8090                                  (((imm) & 0x3) == 2) ? 19 : 11, \
8091                                  (((imm) & 0x3) == 3) ? 16 : 12, \
8092                                  (((imm) & 0x3) == 3) ? 17 : 13, \
8093                                  (((imm) & 0x3) == 3) ? 18 : 14, \
8094                                  (((imm) & 0x3) == 3) ? 19 : 15); })
8095
8096 #define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8097   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8098                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8099                                  (__v16si)(W)); })
8100
8101 #define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8102   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8103                                  (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8104                                  (__v16si)_mm512_setzero_si512()); })
8105
8106 #define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
8107   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8108                                             (int)(((C)<<2) | (B)), \
8109                                             (__v8df)_mm512_undefined_pd(), \
8110                                             (__mmask8)-1, (int)(R)); })
8111
8112 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
8113   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8114                                             (int)(((C)<<2) | (B)), \
8115                                             (__v8df)(__m512d)(W), \
8116                                             (__mmask8)(U), (int)(R)); })
8117
8118 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
8119   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8120                                             (int)(((C)<<2) | (B)), \
8121                                             (__v8df)_mm512_setzero_pd(), \
8122                                             (__mmask8)(U), (int)(R)); })
8123
8124 #define _mm512_getmant_pd(A, B, C) __extension__ ({ \
8125   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8126                                             (int)(((C)<<2) | (B)), \
8127                                             (__v8df)_mm512_setzero_pd(), \
8128                                             (__mmask8)-1, \
8129                                             _MM_FROUND_CUR_DIRECTION); })
8130
8131 #define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8132   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8133                                             (int)(((C)<<2) | (B)), \
8134                                             (__v8df)(__m512d)(W), \
8135                                             (__mmask8)(U), \
8136                                             _MM_FROUND_CUR_DIRECTION); })
8137
8138 #define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8139   (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8140                                             (int)(((C)<<2) | (B)), \
8141                                             (__v8df)_mm512_setzero_pd(), \
8142                                             (__mmask8)(U), \
8143                                             _MM_FROUND_CUR_DIRECTION); })
8144
8145 #define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
8146   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8147                                            (int)(((C)<<2) | (B)), \
8148                                            (__v16sf)_mm512_undefined_ps(), \
8149                                            (__mmask16)-1, (int)(R)); })
8150
8151 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
8152   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8153                                            (int)(((C)<<2) | (B)), \
8154                                            (__v16sf)(__m512)(W), \
8155                                            (__mmask16)(U), (int)(R)); })
8156
8157 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
8158   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8159                                            (int)(((C)<<2) | (B)), \
8160                                            (__v16sf)_mm512_setzero_ps(), \
8161                                            (__mmask16)(U), (int)(R)); })
8162
8163 #define _mm512_getmant_ps(A, B, C) __extension__ ({ \
8164   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8165                                            (int)(((C)<<2)|(B)), \
8166                                            (__v16sf)_mm512_undefined_ps(), \
8167                                            (__mmask16)-1, \
8168                                            _MM_FROUND_CUR_DIRECTION); })
8169
8170 #define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8171   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8172                                            (int)(((C)<<2)|(B)), \
8173                                            (__v16sf)(__m512)(W), \
8174                                            (__mmask16)(U), \
8175                                            _MM_FROUND_CUR_DIRECTION); })
8176
8177 #define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8178   (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8179                                            (int)(((C)<<2)|(B)), \
8180                                            (__v16sf)_mm512_setzero_ps(), \
8181                                            (__mmask16)(U), \
8182                                            _MM_FROUND_CUR_DIRECTION); })
8183
8184 #define _mm512_getexp_round_pd(A, R) __extension__ ({ \
8185   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8186                                            (__v8df)_mm512_undefined_pd(), \
8187                                            (__mmask8)-1, (int)(R)); })
8188
8189 #define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
8190   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8191                                            (__v8df)(__m512d)(W), \
8192                                            (__mmask8)(U), (int)(R)); })
8193
8194 #define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
8195   (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8196                                            (__v8df)_mm512_setzero_pd(), \
8197                                            (__mmask8)(U), (int)(R)); })
8198
8199 static __inline__ __m512d __DEFAULT_FN_ATTRS
8200 _mm512_getexp_pd (__m512d __A)
8201 {
8202   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8203                 (__v8df) _mm512_undefined_pd (),
8204                 (__mmask8) -1,
8205                 _MM_FROUND_CUR_DIRECTION);
8206 }
8207
8208 static __inline__ __m512d __DEFAULT_FN_ATTRS
8209 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
8210 {
8211   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8212                 (__v8df) __W,
8213                 (__mmask8) __U,
8214                 _MM_FROUND_CUR_DIRECTION);
8215 }
8216
8217 static __inline__ __m512d __DEFAULT_FN_ATTRS
8218 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
8219 {
8220   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8221                 (__v8df) _mm512_setzero_pd (),
8222                 (__mmask8) __U,
8223                 _MM_FROUND_CUR_DIRECTION);
8224 }
8225
8226 #define _mm512_getexp_round_ps(A, R) __extension__ ({ \
8227   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8228                                           (__v16sf)_mm512_undefined_ps(), \
8229                                           (__mmask16)-1, (int)(R)); })
8230
8231 #define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8232   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8233                                           (__v16sf)(__m512)(W), \
8234                                           (__mmask16)(U), (int)(R)); })
8235
8236 #define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8237   (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8238                                           (__v16sf)_mm512_setzero_ps(), \
8239                                           (__mmask16)(U), (int)(R)); })
8240
8241 static __inline__ __m512 __DEFAULT_FN_ATTRS
8242 _mm512_getexp_ps (__m512 __A)
8243 {
8244   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8245                (__v16sf) _mm512_undefined_ps (),
8246                (__mmask16) -1,
8247                _MM_FROUND_CUR_DIRECTION);
8248 }
8249
8250 static __inline__ __m512 __DEFAULT_FN_ATTRS
8251 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8252 {
8253   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8254                (__v16sf) __W,
8255                (__mmask16) __U,
8256                _MM_FROUND_CUR_DIRECTION);
8257 }
8258
8259 static __inline__ __m512 __DEFAULT_FN_ATTRS
8260 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8261 {
8262   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8263                (__v16sf) _mm512_setzero_ps (),
8264                (__mmask16) __U,
8265                _MM_FROUND_CUR_DIRECTION);
8266 }
8267
8268 #define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8269   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8270                                        (float const *)(addr), \
8271                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8272                                        (int)(scale)); })
8273
8274 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
8275   (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8276                                        (float const *)(addr), \
8277                                        (__v8di)(__m512i)(index), \
8278                                        (__mmask8)(mask), (int)(scale)); })
8279
8280 #define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8281   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8282                                         (int const *)(addr), \
8283                                         (__v8di)(__m512i)(index), \
8284                                         (__mmask8)-1, (int)(scale)); })
8285
8286 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8287   (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8288                                         (int const *)(addr), \
8289                                         (__v8di)(__m512i)(index), \
8290                                         (__mmask8)(mask), (int)(scale)); })
8291
8292 #define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8293   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8294                                        (double const *)(addr), \
8295                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8296                                        (int)(scale)); })
8297
8298 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8299   (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8300                                        (double const *)(addr), \
8301                                        (__v8di)(__m512i)(index), \
8302                                        (__mmask8)(mask), (int)(scale)); })
8303
8304 #define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8305   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8306                                        (long long const *)(addr), \
8307                                        (__v8di)(__m512i)(index), (__mmask8)-1, \
8308                                        (int)(scale)); })
8309
8310 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8311   (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8312                                        (long long const *)(addr), \
8313                                        (__v8di)(__m512i)(index), \
8314                                        (__mmask8)(mask), (int)(scale)); })
8315
8316 #define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8317   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8318                                        (float const *)(addr), \
8319                                        (__v16sf)(__m512)(index), \
8320                                        (__mmask16)-1, (int)(scale)); })
8321
8322 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8323   (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8324                                        (float const *)(addr), \
8325                                        (__v16sf)(__m512)(index), \
8326                                        (__mmask16)(mask), (int)(scale)); })
8327
8328 #define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8329   (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8330                                         (int const *)(addr), \
8331                                         (__v16si)(__m512i)(index), \
8332                                         (__mmask16)-1, (int)(scale)); })
8333
8334 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8335   (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8336                                         (int const *)(addr), \
8337                                         (__v16si)(__m512i)(index), \
8338                                         (__mmask16)(mask), (int)(scale)); })
8339
8340 #define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8341   (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8342                                        (double const *)(addr), \
8343                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8344                                        (int)(scale)); })
8345
8346 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8347   (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8348                                        (double const *)(addr), \
8349                                        (__v8si)(__m256i)(index), \
8350                                        (__mmask8)(mask), (int)(scale)); })
8351
8352 #define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8353   (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8354                                        (long long const *)(addr), \
8355                                        (__v8si)(__m256i)(index), (__mmask8)-1, \
8356                                        (int)(scale)); })
8357
8358 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8359   (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8360                                        (long long const *)(addr), \
8361                                        (__v8si)(__m256i)(index), \
8362                                        (__mmask8)(mask), (int)(scale)); })
8363
8364 #define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8365   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8366                                 (__v8di)(__m512i)(index), \
8367                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8368
8369 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8370   __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8371                                 (__v8di)(__m512i)(index), \
8372                                 (__v8sf)(__m256)(v1), (int)(scale)); })
8373
8374 #define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8375   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8376                                 (__v8di)(__m512i)(index), \
8377                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8378
8379 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8380   __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8381                                 (__v8di)(__m512i)(index), \
8382                                 (__v8si)(__m256i)(v1), (int)(scale)); })
8383
8384 #define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8385   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8386                                (__v8di)(__m512i)(index), \
8387                                (__v8df)(__m512d)(v1), (int)(scale)); })
8388
8389 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8390   __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8391                                (__v8di)(__m512i)(index), \
8392                                (__v8df)(__m512d)(v1), (int)(scale)); })
8393
8394 #define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8395   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8396                                (__v8di)(__m512i)(index), \
8397                                (__v8di)(__m512i)(v1), (int)(scale)); })
8398
8399 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8400   __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8401                                (__v8di)(__m512i)(index), \
8402                                (__v8di)(__m512i)(v1), (int)(scale)); })
8403
8404 #define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8405   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8406                                 (__v16si)(__m512i)(index), \
8407                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8408
8409 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8410   __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8411                                 (__v16si)(__m512i)(index), \
8412                                 (__v16sf)(__m512)(v1), (int)(scale)); })
8413
8414 #define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8415   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8416                                 (__v16si)(__m512i)(index), \
8417                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8418
8419 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8420   __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8421                                 (__v16si)(__m512i)(index), \
8422                                 (__v16si)(__m512i)(v1), (int)(scale)); })
8423
8424 #define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8425   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8426                                (__v8si)(__m256i)(index), \
8427                                (__v8df)(__m512d)(v1), (int)(scale)); })
8428
8429 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8430   __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8431                                (__v8si)(__m256i)(index), \
8432                                (__v8df)(__m512d)(v1), (int)(scale)); })
8433
8434 #define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8435   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8436                                (__v8si)(__m256i)(index), \
8437                                (__v8di)(__m512i)(v1), (int)(scale)); })
8438
8439 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8440   __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8441                                (__v8si)(__m256i)(index), \
8442                                (__v8di)(__m512i)(v1), (int)(scale)); })
8443
8444 static __inline__ __m128 __DEFAULT_FN_ATTRS
8445 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8446 {
8447  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8448           (__v4sf) __A,
8449           (__v4sf) __B,
8450           (__mmask8) __U,
8451           _MM_FROUND_CUR_DIRECTION);
8452 }
8453
8454 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8455   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8456                                         (__v4sf)(__m128)(A), \
8457                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8458                                         (int)(R)); })
8459
8460 static __inline__ __m128 __DEFAULT_FN_ATTRS
8461 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8462 {
8463  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8464           (__v4sf) __B,
8465           (__v4sf) __C,
8466           (__mmask8) __U,
8467           _MM_FROUND_CUR_DIRECTION);
8468 }
8469
8470 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8471   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8472                                          (__v4sf)(__m128)(B), \
8473                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8474                                          _MM_FROUND_CUR_DIRECTION); })
8475
8476 static __inline__ __m128 __DEFAULT_FN_ATTRS
8477 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8478 {
8479  return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8480           (__v4sf) __X,
8481           (__v4sf) __Y,
8482           (__mmask8) __U,
8483           _MM_FROUND_CUR_DIRECTION);
8484 }
8485
8486 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8487   (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8488                                          (__v4sf)(__m128)(X), \
8489                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8490                                          (int)(R)); })
8491
8492 static __inline__ __m128 __DEFAULT_FN_ATTRS
8493 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8494 {
8495  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8496           (__v4sf) __A,
8497           -(__v4sf) __B,
8498           (__mmask8) __U,
8499           _MM_FROUND_CUR_DIRECTION);
8500 }
8501
8502 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8503   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8504                                         (__v4sf)(__m128)(A), \
8505                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8506                                         (int)(R)); })
8507
8508 static __inline__ __m128 __DEFAULT_FN_ATTRS
8509 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8510 {
8511  return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8512           (__v4sf) __B,
8513           -(__v4sf) __C,
8514           (__mmask8) __U,
8515           _MM_FROUND_CUR_DIRECTION);
8516 }
8517
8518 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8519   (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8520                                          (__v4sf)(__m128)(B), \
8521                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8522                                          (int)(R)); })
8523
8524 static __inline__ __m128 __DEFAULT_FN_ATTRS
8525 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8526 {
8527  return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8528           (__v4sf) __X,
8529           (__v4sf) __Y,
8530           (__mmask8) __U,
8531           _MM_FROUND_CUR_DIRECTION);
8532 }
8533
8534 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8535   (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8536                                          (__v4sf)(__m128)(X), \
8537                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8538                                          (int)(R)); })
8539
8540 static __inline__ __m128 __DEFAULT_FN_ATTRS
8541 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8542 {
8543  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8544           -(__v4sf) __A,
8545           (__v4sf) __B,
8546           (__mmask8) __U,
8547           _MM_FROUND_CUR_DIRECTION);
8548 }
8549
8550 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8551   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8552                                         -(__v4sf)(__m128)(A), \
8553                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
8554                                         (int)(R)); })
8555
8556 static __inline__ __m128 __DEFAULT_FN_ATTRS
8557 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8558 {
8559  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8560           (__v4sf) __B,
8561           (__v4sf) __C,
8562           (__mmask8) __U,
8563           _MM_FROUND_CUR_DIRECTION);
8564 }
8565
8566 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8567   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8568                                          (__v4sf)(__m128)(B), \
8569                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
8570                                          (int)(R)); })
8571
8572 static __inline__ __m128 __DEFAULT_FN_ATTRS
8573 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8574 {
8575  return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8576           (__v4sf) __X,
8577           (__v4sf) __Y,
8578           (__mmask8) __U,
8579           _MM_FROUND_CUR_DIRECTION);
8580 }
8581
8582 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8583   (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8584                                          (__v4sf)(__m128)(X), \
8585                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8586                                          (int)(R)); })
8587
8588 static __inline__ __m128 __DEFAULT_FN_ATTRS
8589 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8590 {
8591  return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8592           -(__v4sf) __A,
8593           -(__v4sf) __B,
8594           (__mmask8) __U,
8595           _MM_FROUND_CUR_DIRECTION);
8596 }
8597
8598 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8599   (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8600                                         -(__v4sf)(__m128)(A), \
8601                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
8602                                         (int)(R)); })
8603
8604 static __inline__ __m128 __DEFAULT_FN_ATTRS
8605 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8606 {
8607  return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8608           (__v4sf) __B,
8609           -(__v4sf) __C,
8610           (__mmask8) __U,
8611           _MM_FROUND_CUR_DIRECTION);
8612 }
8613
8614 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8615   (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8616                                          (__v4sf)(__m128)(B), \
8617                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
8618                                          _MM_FROUND_CUR_DIRECTION); })
8619
8620 static __inline__ __m128 __DEFAULT_FN_ATTRS
8621 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8622 {
8623  return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8624           (__v4sf) __X,
8625           (__v4sf) __Y,
8626           (__mmask8) __U,
8627           _MM_FROUND_CUR_DIRECTION);
8628 }
8629
8630 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8631   (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8632                                          (__v4sf)(__m128)(X), \
8633                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
8634                                          (int)(R)); })
8635
8636 static __inline__ __m128d __DEFAULT_FN_ATTRS
8637 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8638 {
8639  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8640           (__v2df) __A,
8641           (__v2df) __B,
8642           (__mmask8) __U,
8643           _MM_FROUND_CUR_DIRECTION);
8644 }
8645
8646 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8647   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8648                                          (__v2df)(__m128d)(A), \
8649                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
8650                                          (int)(R)); })
8651
8652 static __inline__ __m128d __DEFAULT_FN_ATTRS
8653 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8654 {
8655  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8656           (__v2df) __B,
8657           (__v2df) __C,
8658           (__mmask8) __U,
8659           _MM_FROUND_CUR_DIRECTION);
8660 }
8661
8662 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8663   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8664                                           (__v2df)(__m128d)(B), \
8665                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8666                                           _MM_FROUND_CUR_DIRECTION); })
8667
8668 static __inline__ __m128d __DEFAULT_FN_ATTRS
8669 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8670 {
8671  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8672           (__v2df) __X,
8673           (__v2df) __Y,
8674           (__mmask8) __U,
8675           _MM_FROUND_CUR_DIRECTION);
8676 }
8677
8678 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8679   (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8680                                           (__v2df)(__m128d)(X), \
8681                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8682                                           (int)(R)); })
8683
8684 static __inline__ __m128d __DEFAULT_FN_ATTRS
8685 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8686 {
8687  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8688           (__v2df) __A,
8689           -(__v2df) __B,
8690           (__mmask8) __U,
8691           _MM_FROUND_CUR_DIRECTION);
8692 }
8693
8694 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8695   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8696                                          (__v2df)(__m128d)(A), \
8697                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
8698                                          (int)(R)); })
8699
8700 static __inline__ __m128d __DEFAULT_FN_ATTRS
8701 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8702 {
8703  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8704           (__v2df) __B,
8705           -(__v2df) __C,
8706           (__mmask8) __U,
8707           _MM_FROUND_CUR_DIRECTION);
8708 }
8709
8710 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8711   (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8712                                           (__v2df)(__m128d)(B), \
8713                                           -(__v2df)(__m128d)(C), \
8714                                           (__mmask8)(U), (int)(R)); })
8715
8716 static __inline__ __m128d __DEFAULT_FN_ATTRS
8717 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8718 {
8719  return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8720           (__v2df) __X,
8721           (__v2df) __Y,
8722           (__mmask8) __U,
8723           _MM_FROUND_CUR_DIRECTION);
8724 }
8725
8726 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8727   (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8728                                           (__v2df)(__m128d)(X), \
8729                                           (__v2df)(__m128d)(Y), \
8730                                           (__mmask8)(U), (int)(R)); })
8731
8732 static __inline__ __m128d __DEFAULT_FN_ATTRS
8733 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8734 {
8735  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8736           -(__v2df) __A,
8737           (__v2df) __B,
8738           (__mmask8) __U,
8739           _MM_FROUND_CUR_DIRECTION);
8740 }
8741
8742 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8743   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8744                                          -(__v2df)(__m128d)(A), \
8745                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
8746                                          (int)(R)); })
8747
8748 static __inline__ __m128d __DEFAULT_FN_ATTRS
8749 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8750 {
8751  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8752           (__v2df) __B,
8753           (__v2df) __C,
8754           (__mmask8) __U,
8755           _MM_FROUND_CUR_DIRECTION);
8756 }
8757
8758 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8759   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8760                                           (__v2df)(__m128d)(B), \
8761                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
8762                                           (int)(R)); })
8763
8764 static __inline__ __m128d __DEFAULT_FN_ATTRS
8765 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8766 {
8767  return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8768           (__v2df) __X,
8769           (__v2df) __Y,
8770           (__mmask8) __U,
8771           _MM_FROUND_CUR_DIRECTION);
8772 }
8773
8774 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8775   (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8776                                           (__v2df)(__m128d)(X), \
8777                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
8778                                           (int)(R)); })
8779
8780 static __inline__ __m128d __DEFAULT_FN_ATTRS
8781 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8782 {
8783  return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8784           -(__v2df) __A,
8785           -(__v2df) __B,
8786           (__mmask8) __U,
8787           _MM_FROUND_CUR_DIRECTION);
8788 }
8789
8790 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8791   (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8792                                          -(__v2df)(__m128d)(A), \
8793                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
8794                                          (int)(R)); })
8795
8796 static __inline__ __m128d __DEFAULT_FN_ATTRS
8797 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8798 {
8799  return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8800           (__v2df) __B,
8801           -(__v2df) __C,
8802           (__mmask8) __U,
8803           _MM_FROUND_CUR_DIRECTION);
8804 }
8805
8806 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8807   (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8808                                           (__v2df)(__m128d)(B), \
8809                                           -(__v2df)(__m128d)(C), \
8810                                           (__mmask8)(U), \
8811                                           _MM_FROUND_CUR_DIRECTION); })
8812
8813 static __inline__ __m128d __DEFAULT_FN_ATTRS
8814 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8815 {
8816  return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8817           (__v2df) __X,
8818           (__v2df) (__Y),
8819           (__mmask8) __U,
8820           _MM_FROUND_CUR_DIRECTION);
8821 }
8822
8823 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8824   (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8825                                           (__v2df)(__m128d)(X), \
8826                                           (__v2df)(__m128d)(Y), \
8827                                           (__mmask8)(U), (int)(R)); })
8828
8829 #define _mm512_permutex_pd(X, C) __extension__ ({ \
8830   (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8831                                    (__v8df)_mm512_undefined_pd(), \
8832                                    0 + (((C) >> 0) & 0x3), \
8833                                    0 + (((C) >> 2) & 0x3), \
8834                                    0 + (((C) >> 4) & 0x3), \
8835                                    0 + (((C) >> 6) & 0x3), \
8836                                    4 + (((C) >> 0) & 0x3), \
8837                                    4 + (((C) >> 2) & 0x3), \
8838                                    4 + (((C) >> 4) & 0x3), \
8839                                    4 + (((C) >> 6) & 0x3)); })
8840
8841 #define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8842   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8843                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8844                                        (__v8df)(__m512d)(W)); })
8845
8846 #define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8847   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8848                                        (__v8df)_mm512_permutex_pd((X), (C)), \
8849                                        (__v8df)_mm512_setzero_pd()); })
8850
8851 #define _mm512_permutex_epi64(X, C) __extension__ ({ \
8852   (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8853                                    (__v8di)_mm512_undefined_epi32(), \
8854                                    0 + (((C) >> 0) & 0x3), \
8855                                    0 + (((C) >> 2) & 0x3), \
8856                                    0 + (((C) >> 4) & 0x3), \
8857                                    0 + (((C) >> 6) & 0x3), \
8858                                    4 + (((C) >> 0) & 0x3), \
8859                                    4 + (((C) >> 2) & 0x3), \
8860                                    4 + (((C) >> 4) & 0x3), \
8861                                    4 + (((C) >> 6) & 0x3)); })
8862
8863 #define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8864   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8865                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8866                                       (__v8di)(__m512i)(W)); })
8867
8868 #define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8869   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8870                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
8871                                       (__v8di)_mm512_setzero_si512()); })
8872
8873 static __inline__ __m512d __DEFAULT_FN_ATTRS
8874 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8875 {
8876   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8877                  (__v8di) __X,
8878                  (__v8df) _mm512_undefined_pd (),
8879                  (__mmask8) -1);
8880 }
8881
8882 static __inline__ __m512d __DEFAULT_FN_ATTRS
8883 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8884 {
8885   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8886                  (__v8di) __X,
8887                  (__v8df) __W,
8888                  (__mmask8) __U);
8889 }
8890
8891 static __inline__ __m512d __DEFAULT_FN_ATTRS
8892 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8893 {
8894   return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8895                  (__v8di) __X,
8896                  (__v8df) _mm512_setzero_pd (),
8897                  (__mmask8) __U);
8898 }
8899
8900 static __inline__ __m512i __DEFAULT_FN_ATTRS
8901 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8902 {
8903   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8904                  (__v8di) __X,
8905                  (__v8di) _mm512_setzero_si512 (),
8906                  __M);
8907 }
8908
8909 static __inline__ __m512i __DEFAULT_FN_ATTRS
8910 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8911 {
8912   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8913                  (__v8di) __X,
8914                  (__v8di) _mm512_undefined_epi32 (),
8915                  (__mmask8) -1);
8916 }
8917
8918 static __inline__ __m512i __DEFAULT_FN_ATTRS
8919 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8920              __m512i __Y)
8921 {
8922   return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8923                  (__v8di) __X,
8924                  (__v8di) __W,
8925                  __M);
8926 }
8927
8928 static __inline__ __m512 __DEFAULT_FN_ATTRS
8929 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8930 {
8931   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8932                 (__v16si) __X,
8933                 (__v16sf) _mm512_undefined_ps (),
8934                 (__mmask16) -1);
8935 }
8936
8937 static __inline__ __m512 __DEFAULT_FN_ATTRS
8938 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8939 {
8940   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8941                 (__v16si) __X,
8942                 (__v16sf) __W,
8943                 (__mmask16) __U);
8944 }
8945
8946 static __inline__ __m512 __DEFAULT_FN_ATTRS
8947 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8948 {
8949   return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8950                 (__v16si) __X,
8951                 (__v16sf) _mm512_setzero_ps (),
8952                 (__mmask16) __U);
8953 }
8954
8955 static __inline__ __m512i __DEFAULT_FN_ATTRS
8956 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8957 {
8958   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8959                  (__v16si) __X,
8960                  (__v16si) _mm512_setzero_si512 (),
8961                  __M);
8962 }
8963
8964 static __inline__ __m512i __DEFAULT_FN_ATTRS
8965 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8966 {
8967   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8968                  (__v16si) __X,
8969                  (__v16si) _mm512_undefined_epi32 (),
8970                  (__mmask16) -1);
8971 }
8972
8973 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8974
8975 static __inline__ __m512i __DEFAULT_FN_ATTRS
8976 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8977              __m512i __Y)
8978 {
8979   return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8980                  (__v16si) __X,
8981                  (__v16si) __W,
8982                  __M);
8983 }
8984
8985 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8986
8987 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8988 _mm512_kand (__mmask16 __A, __mmask16 __B)
8989 {
8990   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8991 }
8992
8993 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8994 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8995 {
8996   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8997 }
8998
8999 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9000 _mm512_kor (__mmask16 __A, __mmask16 __B)
9001 {
9002   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
9003 }
9004
9005 static __inline__ int __DEFAULT_FN_ATTRS
9006 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
9007 {
9008   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
9009 }
9010
9011 static __inline__ int __DEFAULT_FN_ATTRS
9012 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
9013 {
9014   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
9015 }
9016
9017 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9018 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
9019 {
9020   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
9021 }
9022
9023 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9024 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
9025 {
9026   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
9027 }
9028
9029 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9030 _mm512_kxor (__mmask16 __A, __mmask16 __B)
9031 {
9032   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
9033 }
9034
9035 static __inline__ void __DEFAULT_FN_ATTRS
9036 _mm512_stream_si512 (__m512i * __P, __m512i __A)
9037 {
9038   __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
9039 }
9040
9041 static __inline__ __m512i __DEFAULT_FN_ATTRS
9042 _mm512_stream_load_si512 (void *__P)
9043 {
9044   return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
9045 }
9046
9047 static __inline__ void __DEFAULT_FN_ATTRS
9048 _mm512_stream_pd (double *__P, __m512d __A)
9049 {
9050   __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
9051 }
9052
9053 static __inline__ void __DEFAULT_FN_ATTRS
9054 _mm512_stream_ps (float *__P, __m512 __A)
9055 {
9056   __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
9057 }
9058
9059 static __inline__ __m512d __DEFAULT_FN_ATTRS
9060 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9061 {
9062   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9063                   (__v8df) __W,
9064                   (__mmask8) __U);
9065 }
9066
9067 static __inline__ __m512d __DEFAULT_FN_ATTRS
9068 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9069 {
9070   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9071                   (__v8df)
9072                   _mm512_setzero_pd (),
9073                   (__mmask8) __U);
9074 }
9075
9076 static __inline__ __m512i __DEFAULT_FN_ATTRS
9077 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9078 {
9079   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9080                   (__v8di) __W,
9081                   (__mmask8) __U);
9082 }
9083
9084 static __inline__ __m512i __DEFAULT_FN_ATTRS
9085 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9086 {
9087   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9088                   (__v8di)
9089                   _mm512_setzero_si512 (),
9090                   (__mmask8) __U);
9091 }
9092
9093 static __inline__ __m512 __DEFAULT_FN_ATTRS
9094 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9095 {
9096   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9097                  (__v16sf) __W,
9098                  (__mmask16) __U);
9099 }
9100
9101 static __inline__ __m512 __DEFAULT_FN_ATTRS
9102 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9103 {
9104   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9105                  (__v16sf)
9106                  _mm512_setzero_ps (),
9107                  (__mmask16) __U);
9108 }
9109
9110 static __inline__ __m512i __DEFAULT_FN_ATTRS
9111 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9112 {
9113   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9114                   (__v16si) __W,
9115                   (__mmask16) __U);
9116 }
9117
9118 static __inline__ __m512i __DEFAULT_FN_ATTRS
9119 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9120 {
9121   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9122                   (__v16si)
9123                   _mm512_setzero_si512 (),
9124                   (__mmask16) __U);
9125 }
9126
9127 #define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
9128   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9129                                       (__v4sf)(__m128)(Y), (int)(P), \
9130                                       (__mmask8)-1, (int)(R)); })
9131
9132 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
9133   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9134                                       (__v4sf)(__m128)(Y), (int)(P), \
9135                                       (__mmask8)(M), (int)(R)); })
9136
9137 #define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
9138   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9139                                       (__v4sf)(__m128)(Y), (int)(P), \
9140                                       (__mmask8)-1, \
9141                                       _MM_FROUND_CUR_DIRECTION); })
9142
9143 #define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
9144   (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9145                                       (__v4sf)(__m128)(Y), (int)(P), \
9146                                       (__mmask8)(M), \
9147                                       _MM_FROUND_CUR_DIRECTION); })
9148
9149 #define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
9150   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9151                                       (__v2df)(__m128d)(Y), (int)(P), \
9152                                       (__mmask8)-1, (int)(R)); })
9153
9154 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
9155   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9156                                       (__v2df)(__m128d)(Y), (int)(P), \
9157                                       (__mmask8)(M), (int)(R)); })
9158
9159 #define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
9160   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9161                                       (__v2df)(__m128d)(Y), (int)(P), \
9162                                       (__mmask8)-1, \
9163                                       _MM_FROUND_CUR_DIRECTION); })
9164
9165 #define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
9166   (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9167                                       (__v2df)(__m128d)(Y), (int)(P), \
9168                                       (__mmask8)(M), \
9169                                       _MM_FROUND_CUR_DIRECTION); })
9170
9171 static __inline__ __m512 __DEFAULT_FN_ATTRS
9172 _mm512_movehdup_ps (__m512 __A)
9173 {
9174   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9175                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9176 }
9177
9178 static __inline__ __m512 __DEFAULT_FN_ATTRS
9179 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9180 {
9181   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9182                                              (__v16sf)_mm512_movehdup_ps(__A),
9183                                              (__v16sf)__W);
9184 }
9185
9186 static __inline__ __m512 __DEFAULT_FN_ATTRS
9187 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9188 {
9189   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9190                                              (__v16sf)_mm512_movehdup_ps(__A),
9191                                              (__v16sf)_mm512_setzero_ps());
9192 }
9193
9194 static __inline__ __m512 __DEFAULT_FN_ATTRS
9195 _mm512_moveldup_ps (__m512 __A)
9196 {
9197   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9198                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9199 }
9200
9201 static __inline__ __m512 __DEFAULT_FN_ATTRS
9202 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9203 {
9204   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9205                                              (__v16sf)_mm512_moveldup_ps(__A),
9206                                              (__v16sf)__W);
9207 }
9208
9209 static __inline__ __m512 __DEFAULT_FN_ATTRS
9210 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9211 {
9212   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9213                                              (__v16sf)_mm512_moveldup_ps(__A),
9214                                              (__v16sf)_mm512_setzero_ps());
9215 }
9216
9217 static __inline__ __m128 __DEFAULT_FN_ATTRS
9218 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9219 {
9220   __m128 res = __A; 
9221   res[0] = (__U & 1) ? __B[0] : __W[0];
9222   return res; 
9223 }
9224
9225 static __inline__ __m128 __DEFAULT_FN_ATTRS
9226 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9227 {
9228   __m128 res = __A; 
9229   res[0] = (__U & 1) ? __B[0] : 0; 
9230   return res; 
9231 }
9232
9233 static __inline__ __m128d __DEFAULT_FN_ATTRS
9234 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9235 {
9236   __m128d res = __A; 
9237   res[0] = (__U & 1) ? __B[0] : __W[0];
9238   return res; 
9239 }
9240
9241 static __inline__ __m128d __DEFAULT_FN_ATTRS
9242 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9243 {
9244   __m128d res = __A; 
9245   res[0] = (__U & 1) ? __B[0] : 0; 
9246   return res; 
9247 }
9248
9249 static __inline__ void __DEFAULT_FN_ATTRS
9250 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9251 {
9252   __builtin_ia32_storess128_mask ((__v16sf *)__W, 
9253                 (__v16sf) _mm512_castps128_ps512(__A),
9254                 (__mmask16) __U & (__mmask16)1);
9255 }
9256
9257 static __inline__ void __DEFAULT_FN_ATTRS
9258 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9259 {
9260   __builtin_ia32_storesd128_mask ((__v8df *)__W, 
9261                 (__v8df) _mm512_castpd128_pd512(__A),
9262                 (__mmask8) __U & 1);
9263 }
9264
9265 static __inline__ __m128 __DEFAULT_FN_ATTRS
9266 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9267 {
9268   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9269                                                 (__v4sf) {0.0, 0.0, 0.0, 0.0},
9270                                                 0, 4, 4, 4);
9271
9272   return (__m128) __builtin_shufflevector(
9273                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9274                                       (__v16sf) _mm512_castps128_ps512(src),
9275                                       (__mmask16) __U & 1),
9276                            _mm512_undefined_ps(), 0, 1, 2, 3);
9277 }
9278
9279 static __inline__ __m128 __DEFAULT_FN_ATTRS
9280 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
9281 {
9282   return (__m128) __builtin_shufflevector(
9283                            __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9284                                       (__v16sf) _mm512_setzero_ps(),
9285                                       (__mmask16) __U & 1),
9286                            _mm512_undefined_ps(), 0, 1, 2, 3);
9287 }
9288
9289 static __inline__ __m128d __DEFAULT_FN_ATTRS
9290 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9291 {
9292   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9293                                                  (__v2df) {0.0, 0.0}, 0, 2);
9294
9295   return (__m128d) __builtin_shufflevector(
9296                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9297                                       (__v8df) _mm512_castpd128_pd512(src),
9298                                       (__mmask8) __U & 1),
9299                             _mm512_undefined_pd(), 0, 1);
9300 }
9301
9302 static __inline__ __m128d __DEFAULT_FN_ATTRS
9303 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
9304 {
9305   return (__m128d) __builtin_shufflevector(
9306                             __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9307                                       (__v8df) _mm512_setzero_pd(),
9308                                       (__mmask8) __U & 1),
9309                             _mm512_undefined_pd(), 0, 1);
9310 }
9311
9312 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9313   (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9314                                    (__v16si)_mm512_undefined_epi32(), \
9315                                    0  + (((I) >> 0) & 0x3), \
9316                                    0  + (((I) >> 2) & 0x3), \
9317                                    0  + (((I) >> 4) & 0x3), \
9318                                    0  + (((I) >> 6) & 0x3), \
9319                                    4  + (((I) >> 0) & 0x3), \
9320                                    4  + (((I) >> 2) & 0x3), \
9321                                    4  + (((I) >> 4) & 0x3), \
9322                                    4  + (((I) >> 6) & 0x3), \
9323                                    8  + (((I) >> 0) & 0x3), \
9324                                    8  + (((I) >> 2) & 0x3), \
9325                                    8  + (((I) >> 4) & 0x3), \
9326                                    8  + (((I) >> 6) & 0x3), \
9327                                    12 + (((I) >> 0) & 0x3), \
9328                                    12 + (((I) >> 2) & 0x3), \
9329                                    12 + (((I) >> 4) & 0x3), \
9330                                    12 + (((I) >> 6) & 0x3)); })
9331
9332 #define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9333   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9334                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9335                                       (__v16si)(__m512i)(W)); })
9336
9337 #define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9338   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9339                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
9340                                       (__v16si)_mm512_setzero_si512()); })
9341
9342 static __inline__ __m512d __DEFAULT_FN_ATTRS
9343 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9344 {
9345   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9346                 (__v8df) __W,
9347                 (__mmask8) __U);
9348 }
9349
9350 static __inline__ __m512d __DEFAULT_FN_ATTRS
9351 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9352 {
9353   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9354                 (__v8df) _mm512_setzero_pd (),
9355                 (__mmask8) __U);
9356 }
9357
9358 static __inline__ __m512i __DEFAULT_FN_ATTRS
9359 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9360 {
9361   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9362                 (__v8di) __W,
9363                 (__mmask8) __U);
9364 }
9365
9366 static __inline__ __m512i __DEFAULT_FN_ATTRS
9367 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9368 {
9369   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9370                 (__v8di) _mm512_setzero_pd (),
9371                 (__mmask8) __U);
9372 }
9373
9374 static __inline__ __m512d __DEFAULT_FN_ATTRS
9375 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9376 {
9377   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9378               (__v8df) __W,
9379               (__mmask8) __U);
9380 }
9381
9382 static __inline__ __m512d __DEFAULT_FN_ATTRS
9383 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9384 {
9385   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9386               (__v8df) _mm512_setzero_pd(),
9387               (__mmask8) __U);
9388 }
9389
9390 static __inline__ __m512i __DEFAULT_FN_ATTRS
9391 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9392 {
9393   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9394               (__v8di) __W,
9395               (__mmask8) __U);
9396 }
9397
9398 static __inline__ __m512i __DEFAULT_FN_ATTRS
9399 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9400 {
9401   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9402               (__v8di) _mm512_setzero_pd(),
9403               (__mmask8) __U);
9404 }
9405
9406 static __inline__ __m512 __DEFAULT_FN_ATTRS
9407 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9408 {
9409   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9410                    (__v16sf) __W,
9411                    (__mmask16) __U);
9412 }
9413
9414 static __inline__ __m512 __DEFAULT_FN_ATTRS
9415 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9416 {
9417   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9418                    (__v16sf) _mm512_setzero_ps(),
9419                    (__mmask16) __U);
9420 }
9421
9422 static __inline__ __m512i __DEFAULT_FN_ATTRS
9423 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9424 {
9425   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9426               (__v16si) __W,
9427               (__mmask16) __U);
9428 }
9429
9430 static __inline__ __m512i __DEFAULT_FN_ATTRS
9431 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9432 {
9433   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9434               (__v16si) _mm512_setzero_ps(),
9435               (__mmask16) __U);
9436 }
9437
9438 static __inline__ __m512 __DEFAULT_FN_ATTRS
9439 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9440 {
9441   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9442                (__v16sf) __W,
9443                (__mmask16) __U);
9444 }
9445
9446 static __inline__ __m512 __DEFAULT_FN_ATTRS
9447 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9448 {
9449   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9450                (__v16sf) _mm512_setzero_ps(),
9451                (__mmask16) __U);
9452 }
9453
9454 static __inline__ __m512i __DEFAULT_FN_ATTRS
9455 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9456 {
9457   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9458                 (__v16si) __W,
9459                 (__mmask16) __U);
9460 }
9461
9462 static __inline__ __m512i __DEFAULT_FN_ATTRS
9463 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9464 {
9465   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9466                 (__v16si) _mm512_setzero_ps(),
9467                 (__mmask16) __U);
9468 }
9469
9470 #define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9471   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9472                                            (__v8df)_mm512_undefined_pd(), \
9473                                            (__mmask8)-1, (int)(R)); })
9474
9475 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9476   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9477                                            (__v8df)(__m512d)(W), \
9478                                            (__mmask8)(U), (int)(R)); })
9479
9480 #define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9481   (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9482                                            (__v8df)_mm512_setzero_pd(), \
9483                                            (__mmask8)(U), (int)(R)); })
9484
9485 static __inline__ __m512d __DEFAULT_FN_ATTRS
9486 _mm512_cvtps_pd (__m256 __A)
9487 {
9488   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9489                 (__v8df)
9490                 _mm512_undefined_pd (),
9491                 (__mmask8) -1,
9492                 _MM_FROUND_CUR_DIRECTION);
9493 }
9494
9495 static __inline__ __m512d __DEFAULT_FN_ATTRS
9496 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9497 {
9498   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9499                 (__v8df) __W,
9500                 (__mmask8) __U,
9501                 _MM_FROUND_CUR_DIRECTION);
9502 }
9503
9504 static __inline__ __m512d __DEFAULT_FN_ATTRS
9505 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9506 {
9507   return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9508                 (__v8df)
9509                 _mm512_setzero_pd (),
9510                 (__mmask8) __U,
9511                 _MM_FROUND_CUR_DIRECTION);
9512 }
9513
9514 static __inline__ __m512 __DEFAULT_FN_ATTRS
9515 _mm512_cvtpslo_pd (__m512 __A)
9516 {
9517   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9518 }
9519
9520 static __inline__ __m512 __DEFAULT_FN_ATTRS
9521 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9522 {
9523   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9524 }
9525
9526 static __inline__ __m512d __DEFAULT_FN_ATTRS
9527 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9528 {
9529   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9530               (__v8df) __A,
9531               (__v8df) __W);
9532 }
9533
9534 static __inline__ __m512d __DEFAULT_FN_ATTRS
9535 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9536 {
9537   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9538               (__v8df) __A,
9539               (__v8df) _mm512_setzero_pd ());
9540 }
9541
9542 static __inline__ __m512 __DEFAULT_FN_ATTRS
9543 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9544 {
9545   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9546              (__v16sf) __A,
9547              (__v16sf) __W);
9548 }
9549
9550 static __inline__ __m512 __DEFAULT_FN_ATTRS
9551 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9552 {
9553   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9554              (__v16sf) __A,
9555              (__v16sf) _mm512_setzero_ps ());
9556 }
9557
9558 static __inline__ void __DEFAULT_FN_ATTRS
9559 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9560 {
9561   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9562             (__mmask8) __U);
9563 }
9564
9565 static __inline__ void __DEFAULT_FN_ATTRS
9566 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9567 {
9568   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9569             (__mmask8) __U);
9570 }
9571
9572 static __inline__ void __DEFAULT_FN_ATTRS
9573 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9574 {
9575   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9576             (__mmask16) __U);
9577 }
9578
9579 static __inline__ void __DEFAULT_FN_ATTRS
9580 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9581 {
9582   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9583             (__mmask16) __U);
9584 }
9585
9586 #define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9587   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9588                                              (__v2df)(__m128d)(B), \
9589                                              (__v4sf)_mm_undefined_ps(), \
9590                                              (__mmask8)-1, (int)(R)); })
9591
9592 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9593   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9594                                              (__v2df)(__m128d)(B), \
9595                                              (__v4sf)(__m128)(W), \
9596                                              (__mmask8)(U), (int)(R)); })
9597
9598 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9599   (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9600                                              (__v2df)(__m128d)(B), \
9601                                              (__v4sf)_mm_setzero_ps(), \
9602                                              (__mmask8)(U), (int)(R)); })
9603
9604 static __inline__ __m128 __DEFAULT_FN_ATTRS
9605 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9606 {
9607   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9608                                              (__v2df)(__B),
9609                                              (__v4sf)(__W), 
9610                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9611 }
9612
9613 static __inline__ __m128 __DEFAULT_FN_ATTRS
9614 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9615 {
9616   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9617                                              (__v2df)(__B),
9618                                              (__v4sf)_mm_setzero_ps(), 
9619                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9620 }
9621
9622 #define _mm_cvtss_i32 _mm_cvtss_si32
9623 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9624 #define _mm_cvti32_sd _mm_cvtsi32_sd
9625 #define _mm_cvti32_ss _mm_cvtsi32_ss
9626 #ifdef __x86_64__
9627 #define _mm_cvtss_i64 _mm_cvtss_si64
9628 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9629 #define _mm_cvti64_sd _mm_cvtsi64_sd
9630 #define _mm_cvti64_ss _mm_cvtsi64_ss
9631 #endif
9632
9633 #ifdef __x86_64__
9634 #define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9635   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9636                                      (int)(R)); })
9637
9638 #define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9639   (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9640                                      (int)(R)); })
9641 #endif
9642
9643 #define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9644   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9645
9646 #define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9647   (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9648
9649 #ifdef __x86_64__
9650 #define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9651   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9652                                     (int)(R)); })
9653
9654 #define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9655   (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9656                                     (int)(R)); })
9657 #endif
9658
9659 #define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9660   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9661                                               (__v4sf)(__m128)(B), \
9662                                               (__v2df)_mm_undefined_pd(), \
9663                                               (__mmask8)-1, (int)(R)); })
9664
9665 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9666   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9667                                               (__v4sf)(__m128)(B), \
9668                                               (__v2df)(__m128d)(W), \
9669                                               (__mmask8)(U), (int)(R)); })
9670
9671 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9672   (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9673                                               (__v4sf)(__m128)(B), \
9674                                               (__v2df)_mm_setzero_pd(), \
9675                                               (__mmask8)(U), (int)(R)); })
9676
9677 static __inline__ __m128d __DEFAULT_FN_ATTRS
9678 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9679 {
9680   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9681                                               (__v4sf)(__B),
9682                                               (__v2df)(__W),
9683                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
9684 }
9685
9686 static __inline__ __m128d __DEFAULT_FN_ATTRS
9687 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9688 {
9689   return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9690                                               (__v4sf)(__B),
9691                                               (__v2df)_mm_setzero_pd(), 
9692                                               (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); 
9693 }
9694
9695 static __inline__ __m128d __DEFAULT_FN_ATTRS
9696 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9697 {
9698   return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9699 }
9700
9701 #ifdef __x86_64__
9702 #define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9703   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9704                                       (unsigned long long)(B), (int)(R)); })
9705
9706 static __inline__ __m128d __DEFAULT_FN_ATTRS
9707 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9708 {
9709   return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9710                  _MM_FROUND_CUR_DIRECTION);
9711 }
9712 #endif
9713
9714 #define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9715   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9716                                      (int)(R)); })
9717
9718 static __inline__ __m128 __DEFAULT_FN_ATTRS
9719 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9720 {
9721   return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9722                 _MM_FROUND_CUR_DIRECTION);
9723 }
9724
9725 #ifdef __x86_64__
9726 #define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9727   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9728                                      (unsigned long long)(B), (int)(R)); })
9729
9730 static __inline__ __m128 __DEFAULT_FN_ATTRS
9731 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9732 {
9733   return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9734                 _MM_FROUND_CUR_DIRECTION);
9735 }
9736 #endif
9737
9738 static __inline__ __m512i __DEFAULT_FN_ATTRS
9739 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9740 {
9741   return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
9742                  __M);
9743 }
9744
9745 #ifdef __x86_64__
9746 static __inline__ __m512i __DEFAULT_FN_ATTRS
9747 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9748 {
9749   return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
9750                  __M);
9751 }
9752 #endif
9753
9754 static  __inline __m512i __DEFAULT_FN_ATTRS
9755 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9756     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9757     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9758     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9759     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9760     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9761     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9762     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9763     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9764     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9765     char __e4, char __e3, char __e2, char __e1, char __e0) {
9766
9767   return __extension__ (__m512i)(__v64qi)
9768     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9769      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9770      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9771      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9772      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9773      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9774      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9775      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9776 }
9777
9778 static  __inline __m512i __DEFAULT_FN_ATTRS
9779 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9780     short __e27, short __e26, short __e25, short __e24, short __e23,
9781     short __e22, short __e21, short __e20, short __e19, short __e18,
9782     short __e17, short __e16, short __e15, short __e14, short __e13,
9783     short __e12, short __e11, short __e10, short __e9, short __e8,
9784     short __e7, short __e6, short __e5, short __e4, short __e3,
9785     short __e2, short __e1, short __e0) {
9786   return __extension__ (__m512i)(__v32hi)
9787     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9788      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9789      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9790      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9791 }
9792
9793 static __inline __m512i __DEFAULT_FN_ATTRS
9794 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9795      int __E, int __F, int __G, int __H,
9796      int __I, int __J, int __K, int __L,
9797      int __M, int __N, int __O, int __P)
9798 {
9799   return __extension__ (__m512i)(__v16si)
9800   { __P, __O, __N, __M, __L, __K, __J, __I,
9801     __H, __G, __F, __E, __D, __C, __B, __A };
9802 }
9803
9804 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9805        e8,e9,e10,e11,e12,e13,e14,e15)          \
9806   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9807                    (e5),(e4),(e3),(e2),(e1),(e0))
9808
9809 static __inline__ __m512i __DEFAULT_FN_ATTRS
9810 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9811      long long __D, long long __E, long long __F,
9812      long long __G, long long __H)
9813 {
9814   return __extension__ (__m512i) (__v8di)
9815   { __H, __G, __F, __E, __D, __C, __B, __A };
9816 }
9817
9818 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9819   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9820
9821 static __inline__ __m512d __DEFAULT_FN_ATTRS
9822 _mm512_set_pd (double __A, double __B, double __C, double __D,
9823         double __E, double __F, double __G, double __H)
9824 {
9825   return __extension__ (__m512d)
9826   { __H, __G, __F, __E, __D, __C, __B, __A };
9827 }
9828
9829 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9830   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9831
9832 static __inline__ __m512 __DEFAULT_FN_ATTRS
9833 _mm512_set_ps (float __A, float __B, float __C, float __D,
9834         float __E, float __F, float __G, float __H,
9835         float __I, float __J, float __K, float __L,
9836         float __M, float __N, float __O, float __P)
9837 {
9838   return __extension__ (__m512)
9839   { __P, __O, __N, __M, __L, __K, __J, __I,
9840     __H, __G, __F, __E, __D, __C, __B, __A };
9841 }
9842
9843 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9844   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9845                 (e4),(e3),(e2),(e1),(e0))
9846
9847 static __inline__ __m512 __DEFAULT_FN_ATTRS
9848 _mm512_abs_ps(__m512 __A)
9849 {
9850   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9851 }
9852
9853 static __inline__ __m512 __DEFAULT_FN_ATTRS
9854 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9855 {
9856   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9857 }
9858
9859 static __inline__ __m512d __DEFAULT_FN_ATTRS
9860 _mm512_abs_pd(__m512d __A)
9861 {
9862   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9863 }
9864
9865 static __inline__ __m512d __DEFAULT_FN_ATTRS
9866 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9867 {
9868   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9869 }
9870
9871 // Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9872 // outputs. This class of vector operation forms the basis of many scientific
9873 // computations. In vector-reduction arithmetic, the evaluation off is
9874 // independent of the order of the input elements of V.
9875
9876 // Used bisection method. At each step, we partition the vector with previous
9877 // step in half, and the operation is performed on its two halves.
9878 // This takes log2(n) steps where n is the number of elements in the vector.
9879
9880 // Vec512 - Vector with size 512.
9881 // Operator - Can be one of following: +,*,&,|
9882 // T2  - Can get 'i' for int and 'f' for float.
9883 // T1 - Can get 'i' for int and 'd' for double.
9884
9885 #define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
9886   __extension__({                                                      \
9887     __m256##T1 Vec256 = __builtin_shufflevector(                       \
9888                             (__v8d##T2)Vec512,                         \
9889                             (__v8d##T2)Vec512,                         \
9890                             0, 1, 2, 3)                                \
9891                         Operator                                       \
9892                         __builtin_shufflevector(                       \
9893                             (__v8d##T2)Vec512,                         \
9894                             (__v8d##T2)Vec512,                         \
9895                             4, 5, 6, 7);                               \
9896     __m128##T1 Vec128 = __builtin_shufflevector(                       \
9897                             (__v4d##T2)Vec256,                         \
9898                             (__v4d##T2)Vec256,                         \
9899                             0, 1)                                      \
9900                         Operator                                       \
9901                         __builtin_shufflevector(                       \
9902                             (__v4d##T2)Vec256,                         \
9903                             (__v4d##T2)Vec256,                         \
9904                             2, 3);                                     \
9905     Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
9906                                      (__v2d##T2)Vec128, 0, -1)         \
9907              Operator                                                  \
9908              __builtin_shufflevector((__v2d##T2)Vec128,                \
9909                                      (__v2d##T2)Vec128, 1, -1);        \
9910     return Vec128[0];                                                  \
9911   })
9912
9913 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9914   _mm512_reduce_operator_64bit(__W, +, i, i);
9915 }
9916
9917 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9918   _mm512_reduce_operator_64bit(__W, *, i, i);
9919 }
9920
9921 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9922   _mm512_reduce_operator_64bit(__W, &, i, i);
9923 }
9924
9925 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9926   _mm512_reduce_operator_64bit(__W, |, i, i);
9927 }
9928
9929 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9930   _mm512_reduce_operator_64bit(__W, +, f, d);
9931 }
9932
9933 static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9934   _mm512_reduce_operator_64bit(__W, *, f, d);
9935 }
9936
9937 // Vec512 - Vector with size 512.
9938 // Vec512Neutral - All vector elements set to the identity element. 
9939 // Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9940 // Operator - Can be one of following: +,*,&,|
9941 // Mask - Intrinsic Mask
9942 // T2  - Can get 'i' for int and 'f' for float.
9943 // T1 - Can get 'i' for int and 'd' for packed double-precision.
9944 // T3 - Can be Pd for packed double or q for q-word.
9945
9946 #define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
9947                                           Mask, T2, T1, T3)                    \
9948   __extension__({                                                              \
9949     Vec512 = __builtin_ia32_select##T3##_512(                                  \
9950                  (__mmask8)Mask,                                               \
9951                  (__v8d##T2)Vec512,                                            \
9952                  (__v8d##T2)Vec512Neutral);                                    \
9953     _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
9954   })
9955
9956 static __inline__ long long __DEFAULT_FN_ATTRS
9957 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9958   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9959 }
9960
9961 static __inline__ long long __DEFAULT_FN_ATTRS
9962 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9963   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9964 }
9965
9966 static __inline__ long long __DEFAULT_FN_ATTRS
9967 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9968   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF), 
9969                                     &, __M,  i, i, q);
9970 }
9971
9972 static __inline__ long long __DEFAULT_FN_ATTRS
9973 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9974   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M, 
9975                                     i, i, q);
9976 }
9977
9978 static __inline__ double __DEFAULT_FN_ATTRS
9979 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9980   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M, 
9981                                     f, d, pd);
9982 }
9983
9984 static __inline__ double __DEFAULT_FN_ATTRS
9985 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9986   _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9987                                     f, d, pd);
9988 }
9989
9990 // Vec512 - Vector with size 512.
9991 // Operator - Can be one of following: +,*,&,|
9992 // T2 - Can get 'i' for int and ' ' for packed single.
9993 // T1 - Can get 'i' for int and 'f' for float.
9994
9995 #define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9996     __m256##T1 Vec256 =                                                        \
9997             (__m256##T1)(__builtin_shufflevector(                              \
9998                                     (__v16s##T2)Vec512,                        \
9999                                     (__v16s##T2)Vec512,                        \
10000                                     0, 1, 2, 3, 4, 5, 6, 7)                    \
10001                                 Operator                                       \
10002                          __builtin_shufflevector(                              \
10003                                     (__v16s##T2)Vec512,                        \
10004                                     (__v16s##T2)Vec512,                        \
10005                                     8, 9, 10, 11, 12, 13, 14, 15));            \
10006     __m128##T1 Vec128 =                                                        \
10007              (__m128##T1)(__builtin_shufflevector(                             \
10008                                     (__v8s##T2)Vec256,                         \
10009                                     (__v8s##T2)Vec256,                         \
10010                                     0, 1, 2, 3)                                \
10011                                 Operator                                       \
10012                           __builtin_shufflevector(                             \
10013                                     (__v8s##T2)Vec256,                         \
10014                                     (__v8s##T2)Vec256,                         \
10015                                     4, 5, 6, 7));                              \
10016     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
10017                                     (__v4s##T2)Vec128,                         \
10018                                     (__v4s##T2)Vec128,                         \
10019                                     0, 1, -1, -1)                              \
10020                                 Operator                                       \
10021                           __builtin_shufflevector(                             \
10022                                     (__v4s##T2)Vec128,                         \
10023                                     (__v4s##T2)Vec128,                         \
10024                                     2, 3, -1, -1));                            \
10025     Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
10026                                     (__v4s##T2)Vec128,                         \
10027                                     (__v4s##T2)Vec128,                         \
10028                                     0, -1, -1, -1)                             \
10029                                 Operator                                       \
10030                           __builtin_shufflevector(                             \
10031                                     (__v4s##T2)Vec128,                         \
10032                                     (__v4s##T2)Vec128,                         \
10033                                     1, -1, -1, -1));                           \
10034     return Vec128[0];                                                          \
10035   })
10036
10037 static __inline__ int __DEFAULT_FN_ATTRS
10038 _mm512_reduce_add_epi32(__m512i __W) {
10039   _mm512_reduce_operator_32bit(__W, +, i, i);
10040 }
10041
10042 static __inline__ int __DEFAULT_FN_ATTRS 
10043 _mm512_reduce_mul_epi32(__m512i __W) {
10044   _mm512_reduce_operator_32bit(__W, *, i, i);
10045 }
10046
10047 static __inline__ int __DEFAULT_FN_ATTRS 
10048 _mm512_reduce_and_epi32(__m512i __W) {
10049   _mm512_reduce_operator_32bit(__W, &, i, i);
10050 }
10051
10052 static __inline__ int __DEFAULT_FN_ATTRS 
10053 _mm512_reduce_or_epi32(__m512i __W) {
10054   _mm512_reduce_operator_32bit(__W, |, i, i);
10055 }
10056
10057 static __inline__ float __DEFAULT_FN_ATTRS
10058 _mm512_reduce_add_ps(__m512 __W) {
10059   _mm512_reduce_operator_32bit(__W, +, f, );
10060 }
10061
10062 static __inline__ float __DEFAULT_FN_ATTRS
10063 _mm512_reduce_mul_ps(__m512 __W) {
10064   _mm512_reduce_operator_32bit(__W, *, f, );
10065 }
10066
10067 // Vec512 - Vector with size 512.
10068 // Vec512Neutral - All vector elements set to the identity element. 
10069 // Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
10070 // Operator - Can be one of following: +,*,&,|
10071 // Mask - Intrinsic Mask
10072 // T2  - Can get 'i' for int and 'f' for float.
10073 // T1 - Can get 'i' for int and 'd' for double.
10074 // T3 - Can be Ps for packed single or d for d-word.
10075
10076 #define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
10077                                           Mask, T2, T1, T3)                    \
10078   __extension__({                                                              \
10079     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10080                              (__mmask16)Mask,                                  \
10081                              (__v16s##T2)Vec512,                               \
10082                              (__v16s##T2)Vec512Neutral);                       \
10083     _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
10084   })
10085
10086 static __inline__ int __DEFAULT_FN_ATTRS
10087 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
10088   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
10089 }
10090
10091 static __inline__ int __DEFAULT_FN_ATTRS
10092 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
10093   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
10094 }
10095
10096 static __inline__ int __DEFAULT_FN_ATTRS
10097 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
10098   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M, 
10099                                     i, i, d);
10100 }
10101
10102 static __inline__ int __DEFAULT_FN_ATTRS
10103 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
10104   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
10105 }
10106
10107 static __inline__ float __DEFAULT_FN_ATTRS
10108 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
10109   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
10110 }
10111
10112 static __inline__ float __DEFAULT_FN_ATTRS
10113 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
10114   _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
10115 }
10116
10117 // Used bisection method. At each step, we partition the vector with previous
10118 // step in half, and the operation is performed on its two halves.
10119 // This takes log2(n) steps where n is the number of elements in the vector.
10120 // This macro uses only intrinsics from the AVX512F feature.
10121
10122 // Vec512 - Vector with size of 512.
10123 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10124 //              __mm512_max_epi64
10125 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10126 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10127
10128 #define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
10129         Vec512 = _mm512_##IntrinName(                                          \
10130                                 (__m512##T1)__builtin_shufflevector(           \
10131                                                 (__v8d##T2)Vec512,             \
10132                                                 (__v8d##T2)Vec512,             \
10133                                                  0, 1, 2, 3, -1, -1, -1, -1),  \
10134                                 (__m512##T1)__builtin_shufflevector(           \
10135                                                 (__v8d##T2)Vec512,             \
10136                                                 (__v8d##T2)Vec512,             \
10137                                                  4, 5, 6, 7, -1, -1, -1, -1)); \
10138         Vec512 = _mm512_##IntrinName(                                          \
10139                                 (__m512##T1)__builtin_shufflevector(           \
10140                                                 (__v8d##T2)Vec512,             \
10141                                                 (__v8d##T2)Vec512,             \
10142                                                  0, 1, -1, -1, -1, -1, -1, -1),\
10143                                 (__m512##T1)__builtin_shufflevector(           \
10144                                                 (__v8d##T2)Vec512,             \
10145                                                 (__v8d##T2)Vec512,             \
10146                                                  2, 3, -1, -1, -1, -1, -1,     \
10147                                                  -1));                         \
10148         Vec512 = _mm512_##IntrinName(                                          \
10149                                 (__m512##T1)__builtin_shufflevector(           \
10150                                                 (__v8d##T2)Vec512,             \
10151                                                 (__v8d##T2)Vec512,             \
10152                                                 0, -1, -1, -1, -1, -1, -1, -1),\
10153                                 (__m512##T1)__builtin_shufflevector(           \
10154                                                 (__v8d##T2)Vec512,             \
10155                                                 (__v8d##T2)Vec512,             \
10156                                                 1, -1, -1, -1, -1, -1, -1, -1))\
10157                                                 ;                              \
10158     return Vec512[0];                                                          \
10159   })
10160
10161 static __inline__ long long __DEFAULT_FN_ATTRS 
10162 _mm512_reduce_max_epi64(__m512i __V) {
10163   _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
10164 }
10165
10166 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10167 _mm512_reduce_max_epu64(__m512i __V) {
10168   _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10169 }
10170
10171 static __inline__ double __DEFAULT_FN_ATTRS 
10172 _mm512_reduce_max_pd(__m512d __V) {
10173   _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10174 }
10175
10176 static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10177 (__m512i __V) {
10178   _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10179 }
10180
10181 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10182 _mm512_reduce_min_epu64(__m512i __V) {
10183   _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10184 }
10185
10186 static __inline__ double __DEFAULT_FN_ATTRS 
10187 _mm512_reduce_min_pd(__m512d __V) {
10188   _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10189 }
10190
10191 // Vec512 - Vector with size 512.
10192 // Vec512Neutral - A 512 length vector with elements set to the identity element
10193 // Identity element: {max_epi,0x8000000000000000}
10194 //                   {max_epu,0x0000000000000000}
10195 //                   {max_pd, 0xFFF0000000000000}
10196 //                   {min_epi,0x7FFFFFFFFFFFFFFF}
10197 //                   {min_epu,0xFFFFFFFFFFFFFFFF}
10198 //                   {min_pd, 0x7FF0000000000000}
10199 //
10200 // IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10201 //              __mm512_max_epi64
10202 // T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10203 // T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10204 // T3 - Can get 'q' q word and 'pd' for packed double.
10205 //      [__builtin_ia32_select{q|pd}_512]
10206 // Mask - Intrinsic Mask
10207
10208 #define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10209                                         T2, T3, Mask)                          \
10210   __extension__({                                                              \
10211     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10212                              (__mmask8)Mask,                                   \
10213                              (__v8d##T2)Vec512,                                \
10214                              (__v8d##T2)Vec512Neutral);                        \
10215     _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
10216   })
10217
10218 static __inline__ long long __DEFAULT_FN_ATTRS
10219 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10220   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10221                                   max_epi64, i, i, q, __M);
10222 }
10223
10224 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10225 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10226   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10227                                   max_epu64, i, i, q, __M);
10228 }
10229
10230 static __inline__ double __DEFAULT_FN_ATTRS
10231 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10232   _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10233                                   max_pd, d, f, pd, __M);
10234 }
10235
10236 static __inline__ long long __DEFAULT_FN_ATTRS
10237 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10238   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10239                                   min_epi64, i, i, q, __M);
10240 }
10241
10242 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10243 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10244   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10245                                   min_epu64, i, i, q, __M);
10246 }
10247
10248 static __inline__ double __DEFAULT_FN_ATTRS
10249 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10250   _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10251                                   min_pd, d, f, pd, __M);
10252 }
10253
10254 // Vec512 - Vector with size 512.
10255 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10256 //              __mm512_max_epi32
10257 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10258 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10259
10260 #define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10261     Vec512 = _mm512_##IntrinName(                                              \
10262                   (__m512##T1)__builtin_shufflevector(                         \
10263                                   (__v16s##T2)Vec512,                          \
10264                                   (__v16s##T2)Vec512,                          \
10265                                   0, 1, 2, 3, 4, 5, 6, 7,                      \
10266                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10267                   (__m512##T1)__builtin_shufflevector(                         \
10268                                   (__v16s##T2)Vec512,                          \
10269                                   (__v16s##T2)Vec512,                          \
10270                                   8, 9, 10, 11, 12, 13, 14, 15,                \
10271                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10272     Vec512 = _mm512_##IntrinName(                                              \
10273                   (__m512##T1)__builtin_shufflevector(                         \
10274                                   (__v16s##T2)Vec512,                          \
10275                                   (__v16s##T2)Vec512,                          \
10276                                   0, 1, 2, 3, -1, -1, -1, -1,                  \
10277                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10278                   (__m512##T1)__builtin_shufflevector(                         \
10279                                   (__v16s##T2)Vec512,                          \
10280                                   (__v16s##T2)Vec512,                          \
10281                                   4, 5, 6, 7, -1, -1, -1, -1,                  \
10282                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10283     Vec512 = _mm512_##IntrinName(                                              \
10284                   (__m512##T1)__builtin_shufflevector(                         \
10285                                   (__v16s##T2)Vec512,                          \
10286                                   (__v16s##T2)Vec512,                          \
10287                                   0, 1, -1, -1, -1, -1, -1, -1,                \
10288                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10289                   (__m512##T1)__builtin_shufflevector(                         \
10290                                   (__v16s##T2)Vec512,                          \
10291                                   (__v16s##T2)Vec512,                          \
10292                                   2, 3, -1, -1, -1, -1, -1, -1,                \
10293                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10294     Vec512 = _mm512_##IntrinName(                                              \
10295                   (__m512##T1)__builtin_shufflevector(                         \
10296                                   (__v16s##T2)Vec512,                          \
10297                                   (__v16s##T2)Vec512,                          \
10298                                   0,  -1, -1, -1, -1, -1, -1, -1,              \
10299                                   -1, -1, -1, -1, -1, -1, -1, -1),             \
10300                   (__m512##T1)__builtin_shufflevector(                         \
10301                                   (__v16s##T2)Vec512,                          \
10302                                   (__v16s##T2)Vec512,                          \
10303                                   1, -1, -1, -1, -1, -1, -1, -1,               \
10304                                   -1, -1, -1, -1, -1, -1, -1, -1));            \
10305     return Vec512[0];                                                          \
10306   })
10307
10308 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10309   _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10310 }
10311
10312 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10313 _mm512_reduce_max_epu32(__m512i a) {
10314   _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10315 }
10316
10317 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10318   _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10319 }
10320
10321 static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10322   _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10323 }
10324
10325 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10326 _mm512_reduce_min_epu32(__m512i a) {
10327   _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10328 }
10329
10330 static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10331   _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10332 }
10333
10334 // Vec512 - Vector with size 512.
10335 // Vec512Neutral - A 512 length vector with elements set to the identity element
10336 // Identity element: {max_epi,0x80000000}
10337 //                   {max_epu,0x00000000}
10338 //                   {max_ps, 0xFF800000}
10339 //                   {min_epi,0x7FFFFFFF}
10340 //                   {min_epu,0xFFFFFFFF}
10341 //                   {min_ps, 0x7F800000}
10342 //
10343 // IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10344 //              __mm512_max_epi32
10345 // T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10346 // T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10347 // T3 - Can get 'q' q word and 'pd' for packed double.
10348 //      [__builtin_ia32_select{q|pd}_512]
10349 // Mask - Intrinsic Mask
10350
10351 #define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10352                                         T2, T3, Mask)                          \
10353   __extension__({                                                              \
10354     Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10355                                         (__mmask16)Mask,                       \
10356                                         (__v16s##T2)Vec512,                    \
10357                                         (__v16s##T2)Vec512Neutral);            \
10358    _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
10359    })
10360
10361 static __inline__ int __DEFAULT_FN_ATTRS
10362 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10363   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10364                                   i, i, d, __M);
10365 }
10366
10367 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10368 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10369   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10370                                   i, i, d, __M);
10371 }
10372
10373 static __inline__ float __DEFAULT_FN_ATTRS
10374 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10375   _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10376                                   ps, __M);
10377 }
10378
10379 static __inline__ int __DEFAULT_FN_ATTRS
10380 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10381   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10382                                   i, i, d, __M);
10383 }
10384
10385 static __inline__ unsigned int __DEFAULT_FN_ATTRS
10386 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10387   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10388                                   i, i, d, __M);
10389 }
10390
10391 static __inline__ float __DEFAULT_FN_ATTRS
10392 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10393   _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10394                                   ps, __M);
10395 }
10396
10397 #undef __DEFAULT_FN_ATTRS
10398
10399 #endif // __AVX512FINTRIN_H