1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
13 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
18 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
20 #include <xmmintrin.h>
23 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
25 #include <emmintrin.h>
28 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
30 #include <pmmintrin.h>
33 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
35 #include <tmmintrin.h>
38 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
39 (defined(__SSE4_2__) || defined(__SSE4_1__))
40 #include <smmintrin.h>
43 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
44 (defined(__AES__) || defined(__PCLMUL__))
45 #include <wmmintrin.h>
48 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
49 defined(__CLFLUSHOPT__)
50 #include <clflushoptintrin.h>
53 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
55 #include <clwbintrin.h>
58 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
60 #include <avxintrin.h>
63 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
65 #include <avx2intrin.h>
68 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
70 #include <f16cintrin.h>
73 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
74 defined(__VPCLMULQDQ__)
75 #include <vpclmulqdqintrin.h>
78 /* No feature check desired due to internal checks */
79 #include <bmiintrin.h>
81 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
83 #include <bmi2intrin.h>
86 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
88 #include <lzcntintrin.h>
91 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
93 #include <popcntintrin.h>
96 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
98 #include <fmaintrin.h>
101 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
103 #include <avx512fintrin.h>
106 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
107 defined(__AVX512VL__)
108 #include <avx512vlintrin.h>
111 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
112 defined(__AVX512BW__)
113 #include <avx512bwintrin.h>
116 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
117 defined(__AVX512BITALG__)
118 #include <avx512bitalgintrin.h>
121 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
122 defined(__AVX512CD__)
123 #include <avx512cdintrin.h>
126 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
127 defined(__AVX512VPOPCNTDQ__)
128 #include <avx512vpopcntdqintrin.h>
131 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
132 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
133 #include <avx512vpopcntdqvlintrin.h>
136 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
137 defined(__AVX512VNNI__)
138 #include <avx512vnniintrin.h>
141 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
142 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
143 #include <avx512vlvnniintrin.h>
146 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
147 defined(__AVX512DQ__)
148 #include <avx512dqintrin.h>
151 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
152 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
153 #include <avx512vlbitalgintrin.h>
156 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
157 (defined(__AVX512VL__) && defined(__AVX512BW__))
158 #include <avx512vlbwintrin.h>
161 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
162 (defined(__AVX512VL__) && defined(__AVX512CD__))
163 #include <avx512vlcdintrin.h>
166 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
167 (defined(__AVX512VL__) && defined(__AVX512DQ__))
168 #include <avx512vldqintrin.h>
171 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
172 defined(__AVX512ER__)
173 #include <avx512erintrin.h>
176 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
177 defined(__AVX512IFMA__)
178 #include <avx512ifmaintrin.h>
181 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
182 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
183 #include <avx512ifmavlintrin.h>
186 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
187 defined(__AVX512VBMI__)
188 #include <avx512vbmiintrin.h>
191 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
192 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
193 #include <avx512vbmivlintrin.h>
196 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
197 defined(__AVX512VBMI2__)
198 #include <avx512vbmi2intrin.h>
201 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
202 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
203 #include <avx512vlvbmi2intrin.h>
206 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
207 defined(__AVX512PF__)
208 #include <avx512pfintrin.h>
211 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
212 defined(__AVX512BF16__)
213 #include <avx512bf16intrin.h>
216 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
217 (defined(__AVX512VL__) && defined(__AVX512BF16__))
218 #include <avx512vlbf16intrin.h>
221 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
223 #include <pkuintrin.h>
226 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
228 #include <vaesintrin.h>
231 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
233 #include <gfniintrin.h>
236 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
238 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
240 /// \headerfile <immintrin.h>
242 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
243 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
245 return __builtin_ia32_rdpid();
249 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
251 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
252 _rdrand16_step(unsigned short *__p)
254 return __builtin_ia32_rdrand16_step(__p);
257 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
258 _rdrand32_step(unsigned int *__p)
260 return __builtin_ia32_rdrand32_step(__p);
264 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
265 _rdrand64_step(unsigned long long *__p)
267 return __builtin_ia32_rdrand64_step(__p);
270 #endif /* __RDRND__ */
272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
273 defined(__FSGSBASE__)
275 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
276 _readfsbase_u32(void)
278 return __builtin_ia32_rdfsbase32();
281 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
282 _readfsbase_u64(void)
284 return __builtin_ia32_rdfsbase64();
287 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
288 _readgsbase_u32(void)
290 return __builtin_ia32_rdgsbase32();
293 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
294 _readgsbase_u64(void)
296 return __builtin_ia32_rdgsbase64();
299 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
300 _writefsbase_u32(unsigned int __V)
302 __builtin_ia32_wrfsbase32(__V);
305 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
306 _writefsbase_u64(unsigned long long __V)
308 __builtin_ia32_wrfsbase64(__V);
311 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
312 _writegsbase_u32(unsigned int __V)
314 __builtin_ia32_wrgsbase32(__V);
317 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
318 _writegsbase_u64(unsigned long long __V)
320 __builtin_ia32_wrgsbase64(__V);
324 #endif /* __FSGSBASE__ */
326 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
329 /* The structs used below are to force the load/store to be unaligned. This
330 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
331 * tbaa metadata from being generated based on the struct and the type of the
332 * field inside of it.
335 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
336 _loadbe_i16(void const * __P) {
339 } __attribute__((__packed__, __may_alias__));
340 return __builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
343 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
344 _storebe_i16(void * __P, short __D) {
345 struct __storeu_i16 {
347 } __attribute__((__packed__, __may_alias__));
348 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
351 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
352 _loadbe_i32(void const * __P) {
355 } __attribute__((__packed__, __may_alias__));
356 return __builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
359 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
360 _storebe_i32(void * __P, int __D) {
361 struct __storeu_i32 {
363 } __attribute__((__packed__, __may_alias__));
364 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
368 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
369 _loadbe_i64(void const * __P) {
372 } __attribute__((__packed__, __may_alias__));
373 return __builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
376 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
377 _storebe_i64(void * __P, long long __D) {
378 struct __storeu_i64 {
380 } __attribute__((__packed__, __may_alias__));
381 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
386 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
388 #include <rtmintrin.h>
389 #include <xtestintrin.h>
392 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
394 #include <shaintrin.h>
397 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
399 #include <fxsrintrin.h>
402 /* No feature check desired due to internal MSC_VER checks */
403 #include <xsaveintrin.h>
405 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
406 defined(__XSAVEOPT__)
407 #include <xsaveoptintrin.h>
410 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
412 #include <xsavecintrin.h>
415 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
417 #include <xsavesintrin.h>
420 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
422 #include <cetintrin.h>
425 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
426 * whereas others are also available at all times. */
427 #include <adxintrin.h>
429 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
431 #include <rdseedintrin.h>
434 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
435 defined(__WBNOINVD__)
436 #include <wbnoinvdintrin.h>
439 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
440 defined(__CLDEMOTE__)
441 #include <cldemoteintrin.h>
444 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
446 #include <waitpkgintrin.h>
449 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
450 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
451 #include <movdirintrin.h>
454 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
456 #include <pconfigintrin.h>
459 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
461 #include <sgxintrin.h>
464 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
466 #include <ptwriteintrin.h>
469 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
471 #include <invpcidintrin.h>
474 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
475 defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__)
476 #include <amxintrin.h>
479 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
480 defined(__AVX512VP2INTERSECT__)
481 #include <avx512vp2intersectintrin.h>
484 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
485 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
486 #include <avx512vlvp2intersectintrin.h>
489 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
491 #include <enqcmdintrin.h>
494 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
495 defined(__SERIALIZE__)
496 #include <serializeintrin.h>
499 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
500 defined(__TSXLDTRK__)
501 #include <tsxldtrkintrin.h>
504 #if defined(_MSC_VER) && __has_extension(gnu_asm)
505 /* Define the default attributes for these intrinsics */
506 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
510 /*----------------------------------------------------------------------------*\
511 |* Interlocked Exchange HLE
512 \*----------------------------------------------------------------------------*/
513 #if defined(__i386__) || defined(__x86_64__)
514 static __inline__ long __DEFAULT_FN_ATTRS
515 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
516 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
517 : "+r" (_Value), "+m" (*_Target) :: "memory");
520 static __inline__ long __DEFAULT_FN_ATTRS
521 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
522 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
523 : "+r" (_Value), "+m" (*_Target) :: "memory");
527 #if defined(__x86_64__)
528 static __inline__ __int64 __DEFAULT_FN_ATTRS
529 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
530 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
531 : "+r" (_Value), "+m" (*_Target) :: "memory");
534 static __inline__ __int64 __DEFAULT_FN_ATTRS
535 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
536 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
537 : "+r" (_Value), "+m" (*_Target) :: "memory");
541 /*----------------------------------------------------------------------------*\
542 |* Interlocked Compare Exchange HLE
543 \*----------------------------------------------------------------------------*/
544 #if defined(__i386__) || defined(__x86_64__)
545 static __inline__ long __DEFAULT_FN_ATTRS
546 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
547 long _Exchange, long _Comparand) {
548 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
549 : "+a" (_Comparand), "+m" (*_Destination)
550 : "r" (_Exchange) : "memory");
553 static __inline__ long __DEFAULT_FN_ATTRS
554 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
555 long _Exchange, long _Comparand) {
556 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
557 : "+a" (_Comparand), "+m" (*_Destination)
558 : "r" (_Exchange) : "memory");
562 #if defined(__x86_64__)
563 static __inline__ __int64 __DEFAULT_FN_ATTRS
564 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
565 __int64 _Exchange, __int64 _Comparand) {
566 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
567 : "+a" (_Comparand), "+m" (*_Destination)
568 : "r" (_Exchange) : "memory");
571 static __inline__ __int64 __DEFAULT_FN_ATTRS
572 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
573 __int64 _Exchange, __int64 _Comparand) {
574 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
575 : "+a" (_Comparand), "+m" (*_Destination)
576 : "r" (_Exchange) : "memory");
584 #undef __DEFAULT_FN_ATTRS
586 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
588 #endif /* __IMMINTRIN_H */