1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 *===-----------------------------------------------------------------------===
27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
32 #include <xmmintrin.h>
35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
36 #include <emmintrin.h>
39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
40 #include <pmmintrin.h>
43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
44 #include <tmmintrin.h>
47 #if !defined(_MSC_VER) || __has_feature(modules) || \
48 (defined(__SSE4_2__) || defined(__SSE4_1__))
49 #include <smmintrin.h>
52 #if !defined(_MSC_VER) || __has_feature(modules) || \
53 (defined(__AES__) || defined(__PCLMUL__))
54 #include <wmmintrin.h>
57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
58 #include <clflushoptintrin.h>
61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
62 #include <clwbintrin.h>
65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
66 #include <avxintrin.h>
69 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
70 #include <avx2intrin.h>
72 /* The 256-bit versions of functions in f16cintrin.h.
73 Intel documents these as being in immintrin.h, and
74 they depend on typedefs from avxintrin.h. */
76 /// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
77 /// containing 16-bit half-precision float values.
79 /// \headerfile <x86intrin.h>
82 /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
85 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
88 /// A 256-bit vector containing 32-bit single-precision float values to be
89 /// converted to 16-bit half-precision float values.
91 /// An immediate value controlling rounding using bits [2:0]: \n
96 /// 1XX: Use MXCSR.RC for rounding
97 /// \returns A 128-bit vector containing the converted 16-bit half-precision
99 #define _mm256_cvtps_ph(a, imm) __extension__ ({ \
100 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
102 /// \brief Converts a 128-bit vector containing 16-bit half-precision float
103 /// values into a 256-bit vector of [8 x float].
105 /// \headerfile <x86intrin.h>
107 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
110 /// A 128-bit vector containing 16-bit half-precision float values to be
111 /// converted to 32-bit single-precision float values.
112 /// \returns A vector of [8 x float] containing the converted 32-bit
113 /// single-precision float values.
114 static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
115 _mm256_cvtph_ps(__m128i __a)
117 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
119 #endif /* __AVX2__ */
121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
122 #include <bmiintrin.h>
125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
126 #include <bmi2intrin.h>
129 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
130 #include <lzcntintrin.h>
133 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
134 #include <fmaintrin.h>
137 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
138 #include <avx512fintrin.h>
141 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
142 #include <avx512vlintrin.h>
145 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
146 #include <avx512bwintrin.h>
149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
150 #include <avx512cdintrin.h>
153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
154 #include <avx512vpopcntdqintrin.h>
157 #if !defined(_MSC_VER) || __has_feature(modules) || \
158 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
159 #include <avx512vpopcntdqvlintrin.h>
162 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
163 #include <avx512dqintrin.h>
166 #if !defined(_MSC_VER) || __has_feature(modules) || \
167 (defined(__AVX512VL__) && defined(__AVX512BW__))
168 #include <avx512vlbwintrin.h>
171 #if !defined(_MSC_VER) || __has_feature(modules) || \
172 (defined(__AVX512VL__) && defined(__AVX512CD__))
173 #include <avx512vlcdintrin.h>
176 #if !defined(_MSC_VER) || __has_feature(modules) || \
177 (defined(__AVX512VL__) && defined(__AVX512DQ__))
178 #include <avx512vldqintrin.h>
181 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
182 #include <avx512erintrin.h>
185 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
186 #include <avx512ifmaintrin.h>
189 #if !defined(_MSC_VER) || __has_feature(modules) || \
190 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
191 #include <avx512ifmavlintrin.h>
194 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
195 #include <avx512vbmiintrin.h>
198 #if !defined(_MSC_VER) || __has_feature(modules) || \
199 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
200 #include <avx512vbmivlintrin.h>
203 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
204 #include <avx512pfintrin.h>
207 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
208 #include <pkuintrin.h>
211 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
212 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
213 _rdrand16_step(unsigned short *__p)
215 return __builtin_ia32_rdrand16_step(__p);
218 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
219 _rdrand32_step(unsigned int *__p)
221 return __builtin_ia32_rdrand32_step(__p);
225 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
226 _rdrand64_step(unsigned long long *__p)
228 return __builtin_ia32_rdrand64_step(__p);
231 #endif /* __RDRND__ */
233 /* __bit_scan_forward */
234 static __inline__ int __attribute__((__always_inline__, __nodebug__))
235 _bit_scan_forward(int __A) {
236 return __builtin_ctz(__A);
239 /* __bit_scan_reverse */
240 static __inline__ int __attribute__((__always_inline__, __nodebug__))
241 _bit_scan_reverse(int __A) {
242 return 31 - __builtin_clz(__A);
245 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
247 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
248 _readfsbase_u32(void)
250 return __builtin_ia32_rdfsbase32();
253 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
254 _readfsbase_u64(void)
256 return __builtin_ia32_rdfsbase64();
259 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
260 _readgsbase_u32(void)
262 return __builtin_ia32_rdgsbase32();
265 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
266 _readgsbase_u64(void)
268 return __builtin_ia32_rdgsbase64();
271 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
272 _writefsbase_u32(unsigned int __V)
274 return __builtin_ia32_wrfsbase32(__V);
277 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
278 _writefsbase_u64(unsigned long long __V)
280 return __builtin_ia32_wrfsbase64(__V);
283 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
284 _writegsbase_u32(unsigned int __V)
286 return __builtin_ia32_wrgsbase32(__V);
289 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
290 _writegsbase_u64(unsigned long long __V)
292 return __builtin_ia32_wrgsbase64(__V);
296 #endif /* __FSGSBASE__ */
298 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
299 #include <rtmintrin.h>
300 #include <xtestintrin.h>
303 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
304 #include <shaintrin.h>
307 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
308 #include <fxsrintrin.h>
311 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
312 #include <xsaveintrin.h>
315 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
316 #include <xsaveoptintrin.h>
319 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
320 #include <xsavecintrin.h>
323 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
324 #include <xsavesintrin.h>
327 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
328 #include <cetintrin.h>
331 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
332 * whereas others are also available at all times. */
333 #include <adxintrin.h>
335 #endif /* __IMMINTRIN_H */