contrib/llvm/tools/clang/lib/Headers/immintrin.h

   1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
   2  *
   3  * Permission is hereby granted, free of charge, to any person obtaining a copy
   4  * of this software and associated documentation files (the "Software"), to deal
   5  * in the Software without restriction, including without limitation the rights
   6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   7  * copies of the Software, and to permit persons to whom the Software is
   8  * furnished to do so, subject to the following conditions:
   9  *
  10  * The above copyright notice and this permission notice shall be included in
  11  * all copies or substantial portions of the Software.
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19  * THE SOFTWARE.
  20  *
  21  *===-----------------------------------------------------------------------===
  22  */
  23
  24 #ifndef __IMMINTRIN_H
  25 #define __IMMINTRIN_H
  26
  27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
  28 #include <mmintrin.h>
  29 #endif
  30
  31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
  32 #include <xmmintrin.h>
  33 #endif
  34
  35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
  36 #include <emmintrin.h>
  37 #endif
  38
  39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
  40 #include <pmmintrin.h>
  41 #endif
  42
  43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
  44 #include <tmmintrin.h>
  45 #endif
  46
  47 #if !defined(_MSC_VER) || __has_feature(modules) || \
  48     (defined(__SSE4_2__) || defined(__SSE4_1__))
  49 #include <smmintrin.h>
  50 #endif
  51
  52 #if !defined(_MSC_VER) || __has_feature(modules) || \
  53     (defined(__AES__) || defined(__PCLMUL__))
  54 #include <wmmintrin.h>
  55 #endif
  56
  57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
  58 #include <clflushoptintrin.h>
  59 #endif
  60
  61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
  62 #include <clwbintrin.h>
  63 #endif
  64
  65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
  66 #include <avxintrin.h>
  67 #endif
  68
  69 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
  70 #include <avx2intrin.h>
  71
  72 /* The 256-bit versions of functions in f16cintrin.h.
  73    Intel documents these as being in immintrin.h, and
  74    they depend on typedefs from avxintrin.h. */
  75
  76 /// \brief Converts a 256-bit vector of [8 x float] into a 128-bit vector
  77 ///    containing 16-bit half-precision float values.
  78 ///
  79 /// \headerfile <x86intrin.h>
  80 ///
  81 /// \code
  82 /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
  83 /// \endcode
  84 ///
  85 /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
  86 ///
  87 /// \param a
  88 ///    A 256-bit vector containing 32-bit single-precision float values to be
  89 ///    converted to 16-bit half-precision float values.
  90 /// \param imm
  91 ///    An immediate value controlling rounding using bits [2:0]: \n
  92 ///    000: Nearest \n
  93 ///    001: Down \n
  94 ///    010: Up \n
  95 ///    011: Truncate \n
  96 ///    1XX: Use MXCSR.RC for rounding
  97 /// \returns A 128-bit vector containing the converted 16-bit half-precision
  98 ///    float values.
  99 #define _mm256_cvtps_ph(a, imm) __extension__ ({ \
 100  (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
 101
 102 /// \brief Converts a 128-bit vector containing 16-bit half-precision float
 103 ///    values into a 256-bit vector of [8 x float].
 104 ///
 105 /// \headerfile <x86intrin.h>
 106 ///
 107 /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
 108 ///
 109 /// \param __a
 110 ///    A 128-bit vector containing 16-bit half-precision float values to be
 111 ///    converted to 32-bit single-precision float values.
 112 /// \returns A vector of [8 x float] containing the converted 32-bit
 113 ///    single-precision float values.
 114 static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
 115 _mm256_cvtph_ps(__m128i __a)
 116 {
 117   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
 118 }
 119 #endif /* __AVX2__ */
 120
 121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
 122 #include <bmiintrin.h>
 123 #endif
 124
 125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
 126 #include <bmi2intrin.h>
 127 #endif
 128
 129 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
 130 #include <lzcntintrin.h>
 131 #endif
 132
 133 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
 134 #include <fmaintrin.h>
 135 #endif
 136
 137 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
 138 #include <avx512fintrin.h>
 139 #endif
 140
 141 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
 142 #include <avx512vlintrin.h>
 143 #endif
 144
 145 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
 146 #include <avx512bwintrin.h>
 147 #endif
 148
 149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
 150 #include <avx512cdintrin.h>
 151 #endif
 152
 153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
 154 #include <avx512vpopcntdqintrin.h>
 155 #endif
 156
 157 #if !defined(_MSC_VER) || __has_feature(modules) || \
 158     (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
 159 #include <avx512vpopcntdqvlintrin.h>
 160 #endif
 161
 162 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
 163 #include <avx512dqintrin.h>
 164 #endif
 165
 166 #if !defined(_MSC_VER) || __has_feature(modules) || \
 167     (defined(__AVX512VL__) && defined(__AVX512BW__))
 168 #include <avx512vlbwintrin.h>
 169 #endif
 170
 171 #if !defined(_MSC_VER) || __has_feature(modules) || \
 172     (defined(__AVX512VL__) && defined(__AVX512CD__))
 173 #include <avx512vlcdintrin.h>
 174 #endif
 175
 176 #if !defined(_MSC_VER) || __has_feature(modules) || \
 177     (defined(__AVX512VL__) && defined(__AVX512DQ__))
 178 #include <avx512vldqintrin.h>
 179 #endif
 180
 181 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
 182 #include <avx512erintrin.h>
 183 #endif
 184
 185 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
 186 #include <avx512ifmaintrin.h>
 187 #endif
 188
 189 #if !defined(_MSC_VER) || __has_feature(modules) || \
 190     (defined(__AVX512IFMA__) && defined(__AVX512VL__))
 191 #include <avx512ifmavlintrin.h>
 192 #endif
 193
 194 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
 195 #include <avx512vbmiintrin.h>
 196 #endif
 197
 198 #if !defined(_MSC_VER) || __has_feature(modules) || \
 199     (defined(__AVX512VBMI__) && defined(__AVX512VL__))
 200 #include <avx512vbmivlintrin.h>
 201 #endif
 202
 203 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
 204 #include <avx512pfintrin.h>
 205 #endif
 206
 207 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
 208 #include <pkuintrin.h>
 209 #endif
 210
 211 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
 212 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 213 _rdrand16_step(unsigned short *__p)
 214 {
 215   return __builtin_ia32_rdrand16_step(__p);
 216 }
 217
 218 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 219 _rdrand32_step(unsigned int *__p)
 220 {
 221   return __builtin_ia32_rdrand32_step(__p);
 222 }
 223
 224 #ifdef __x86_64__
 225 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 226 _rdrand64_step(unsigned long long *__p)
 227 {
 228   return __builtin_ia32_rdrand64_step(__p);
 229 }
 230 #endif
 231 #endif /* __RDRND__ */
 232
 233 /* __bit_scan_forward */
 234 static __inline__ int __attribute__((__always_inline__, __nodebug__))
 235 _bit_scan_forward(int __A) {
 236   return __builtin_ctz(__A);
 237 }
 238
 239 /* __bit_scan_reverse */
 240 static __inline__ int __attribute__((__always_inline__, __nodebug__))
 241 _bit_scan_reverse(int __A) {
 242   return 31 - __builtin_clz(__A);
 243 }
 244
 245 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
 246 #ifdef __x86_64__
 247 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 248 _readfsbase_u32(void)
 249 {
 250   return __builtin_ia32_rdfsbase32();
 251 }
 252
 253 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 254 _readfsbase_u64(void)
 255 {
 256   return __builtin_ia32_rdfsbase64();
 257 }
 258
 259 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 260 _readgsbase_u32(void)
 261 {
 262   return __builtin_ia32_rdgsbase32();
 263 }
 264
 265 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 266 _readgsbase_u64(void)
 267 {
 268   return __builtin_ia32_rdgsbase64();
 269 }
 270
 271 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 272 _writefsbase_u32(unsigned int __V)
 273 {
 274   return __builtin_ia32_wrfsbase32(__V);
 275 }
 276
 277 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 278 _writefsbase_u64(unsigned long long __V)
 279 {
 280   return __builtin_ia32_wrfsbase64(__V);
 281 }
 282
 283 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 284 _writegsbase_u32(unsigned int __V)
 285 {
 286   return __builtin_ia32_wrgsbase32(__V);
 287 }
 288
 289 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 290 _writegsbase_u64(unsigned long long __V)
 291 {
 292   return __builtin_ia32_wrgsbase64(__V);
 293 }
 294
 295 #endif
 296 #endif /* __FSGSBASE__ */
 297
 298 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
 299 #include <rtmintrin.h>
 300 #include <xtestintrin.h>
 301 #endif
 302
 303 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
 304 #include <shaintrin.h>
 305 #endif
 306
 307 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
 308 #include <fxsrintrin.h>
 309 #endif
 310
 311 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
 312 #include <xsaveintrin.h>
 313 #endif
 314
 315 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
 316 #include <xsaveoptintrin.h>
 317 #endif
 318
 319 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
 320 #include <xsavecintrin.h>
 321 #endif
 322
 323 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
 324 #include <xsavesintrin.h>
 325 #endif
 326
 327 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
 328 #include <cetintrin.h>
 329 #endif
 330
 331 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
 332  * whereas others are also available at all times. */
 333 #include <adxintrin.h>
 334
 335 #endif /* __IMMINTRIN_H */