contrib/llvm/lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This is a target description file for the Intel i386 architecture, referred
  11 // to here as the "X86" architecture.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 // Get the target-independent interfaces which we are implementing...
  16 //
  17 include "llvm/Target/Target.td"
  18
  19 //===----------------------------------------------------------------------===//
  20 // X86 Subtarget state
  21 //
  22
  23 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
  24                                   "64-bit mode (x86_64)">;
  25 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
  26                                   "32-bit mode (80386)">;
  27 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
  28                                   "16-bit mode (i8086)">;
  29
  30 //===----------------------------------------------------------------------===//
  31 // X86 Subtarget features
  32 //===----------------------------------------------------------------------===//
  33
  34 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  35                                       "Enable X87 float instructions">;
  36
  37 def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
  38                                       "Enable NOPL instruction">;
  39
  40 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
  41                                       "Enable conditional move instructions">;
  42
  43 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  44                                        "Support POPCNT instruction">;
  45
  46 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  47                                       "Support fxsave/fxrestore instructions">;
  48
  49 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  50                                        "Support xsave instructions">;
  51
  52 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  53                                        "Support xsaveopt instructions">;
  54
  55 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  56                                        "Support xsavec instructions">;
  57
  58 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  59                                        "Support xsaves instructions">;
  60
  61 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  62                                       "Enable SSE instructions">;
  63 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  64                                       "Enable SSE2 instructions",
  65                                       [FeatureSSE1]>;
  66 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  67                                       "Enable SSE3 instructions",
  68                                       [FeatureSSE2]>;
  69 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  70                                       "Enable SSSE3 instructions",
  71                                       [FeatureSSE3]>;
  72 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  73                                       "Enable SSE 4.1 instructions",
  74                                       [FeatureSSSE3]>;
  75 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  76                                       "Enable SSE 4.2 instructions",
  77                                       [FeatureSSE41]>;
  78 // The MMX subtarget feature is separate from the rest of the SSE features
  79 // because it's important (for odd compatibility reasons) to be able to
  80 // turn it off explicitly while allowing SSE+ to be on.
  81 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
  82                                       "Enable MMX instructions">;
  83 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
  84                                       "Enable 3DNow! instructions",
  85                                       [FeatureMMX]>;
  86 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
  87                                       "Enable 3DNow! Athlon instructions",
  88                                       [Feature3DNow]>;
  89 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  90 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  91 // without disabling 64-bit mode. Nothing should imply this feature bit. It
  92 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
  93 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
  94                                       "Support 64-bit instructions">;
  95 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
  96                                       "64-bit with cmpxchg16b">;
  97 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
  98                                        "SHLD instruction is slow">;
  99 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
 100                                         "PMULLD instruction is slow">;
 101 def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
 102                                           "true",
 103                                           "PMADDWD is slower than PMULLD">;
 104 // FIXME: This should not apply to CPUs that do not have SSE.
 105 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 106                                 "IsUAMem16Slow", "true",
 107                                 "Slow unaligned 16-byte memory access">;
 108 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 109                                 "IsUAMem32Slow", "true",
 110                                 "Slow unaligned 32-byte memory access">;
 111 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 112                                       "Support SSE 4a instructions",
 113                                       [FeatureSSE3]>;
 114
 115 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 116                                       "Enable AVX instructions",
 117                                       [FeatureSSE42]>;
 118 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 119                                       "Enable AVX2 instructions",
 120                                       [FeatureAVX]>;
 121 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 122                                       "Enable three-operand fused multiple-add",
 123                                       [FeatureAVX]>;
 124 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 125                        "Support 16-bit floating point conversion instructions",
 126                        [FeatureAVX]>;
 127 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
 128                                       "Enable AVX-512 instructions",
 129                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
 130 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
 131                       "Enable AVX-512 Exponential and Reciprocal Instructions",
 132                                       [FeatureAVX512]>;
 133 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 134                       "Enable AVX-512 Conflict Detection Instructions",
 135                                       [FeatureAVX512]>;
 136 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 137                        "true", "Enable AVX-512 Population Count Instructions",
 138                                       [FeatureAVX512]>;
 139 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
 140                       "Enable AVX-512 PreFetch Instructions",
 141                                       [FeatureAVX512]>;
 142 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
 143                                    "true",
 144                                    "Prefetch with Intent to Write and T1 Hint">;
 145 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 146                       "Enable AVX-512 Doubleword and Quadword Instructions",
 147                                       [FeatureAVX512]>;
 148 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 149                       "Enable AVX-512 Byte and Word Instructions",
 150                                       [FeatureAVX512]>;
 151 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 152                       "Enable AVX-512 Vector Length eXtensions",
 153                                       [FeatureAVX512]>;
 154 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 155                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 156                                       [FeatureBWI]>;
 157 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
 158                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
 159                                       [FeatureBWI]>;
 160 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 161                       "Enable AVX-512 Integer Fused Multiple-Add",
 162                                       [FeatureAVX512]>;
 163 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 164                       "Enable protection keys">;
 165 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
 166                           "Enable AVX-512 Vector Neural Network Instructions",
 167                                       [FeatureAVX512]>;
 168 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
 169                        "Enable AVX-512 Bit Algorithms",
 170                         [FeatureBWI]>;
 171 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 172                          "Enable packed carry-less multiplication instructions",
 173                                [FeatureSSE2]>;
 174 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
 175                          "Enable Galois Field Arithmetic Instructions",
 176                                [FeatureSSE2]>;
 177 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
 178                                          "Enable vpclmulqdq instructions",
 179                                          [FeatureAVX, FeaturePCLMUL]>;
 180 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 181                                       "Enable four-operand fused multiple-add",
 182                                       [FeatureAVX, FeatureSSE4A]>;
 183 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 184                                       "Enable XOP instructions",
 185                                       [FeatureFMA4]>;
 186 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 187                                           "HasSSEUnalignedMem", "true",
 188                       "Allow unaligned memory operands with SSE instructions">;
 189 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 190                                       "Enable AES instructions",
 191                                       [FeatureSSE2]>;
 192 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
 193                        "Promote selected AES instructions to AVX512/AVX registers",
 194                         [FeatureAVX, FeatureAES]>;
 195 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 196                                       "Enable TBM instructions">;
 197 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 198                                       "Enable LWP instructions">;
 199 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 200                                       "Support MOVBE instruction">;
 201 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 202                                       "Support RDRAND instruction">;
 203 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 204                                        "Support FS/GS Base instructions">;
 205 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 206                                       "Support LZCNT instruction">;
 207 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 208                                       "Support BMI instructions">;
 209 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 210                                       "Support BMI2 instructions">;
 211 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 212                                       "Support RTM instructions">;
 213 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 214                                       "Support ADX instructions">;
 215 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 216                                       "Enable SHA instructions",
 217                                       [FeatureSSE2]>;
 218 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
 219                        "Support CET Shadow-Stack instructions">;
 220 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 221                                       "Support PRFCHW instructions">;
 222 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 223                                       "Support RDSEED instruction">;
 224 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
 225                                        "Support LAHF and SAHF instructions">;
 226 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 227                                       "Enable MONITORX/MWAITX timer functionality">;
 228 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 229                                       "Enable Cache Line Zero">;
 230 def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
 231                                       "Enable Cache Demote">;
 232 def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
 233                                       "Support ptwrite instruction">;
 234 def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
 235                                       "Support MPX instructions">;
 236 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 237                                      "Use LEA for adjusting the stack pointer">;
 238 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 239                                      "HasSlowDivide32", "true",
 240                                      "Use 8-bit divide for positive values less than 256">;
 241 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 242                                      "HasSlowDivide64", "true",
 243                                      "Use 32-bit divide for positive values less than 2^32">;
 244 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
 245                                      "PadShortFunctions", "true",
 246                                      "Pad short functions">;
 247 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
 248                                       "Invalidate Process-Context Identifier">;
 249 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 250                                       "Enable Software Guard Extensions">;
 251 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 252                                       "Flush A Cache Line Optimized">;
 253 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 254                                       "Cache Line Write Back">;
 255 def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
 256                                       "Write Back No Invalidate">;
 257 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
 258                                     "Support RDPID instructions">;
 259 def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
 260                                       "Wait and pause enhancements">;
 261 // On some processors, instructions that implicitly take two memory operands are
 262 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
 263 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
 264 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
 265                                      "SlowTwoMemOps", "true",
 266                                      "Two memory operand instructions are slow">;
 267 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
 268                                    "LEA instruction needs inputs at AG stage">;
 269 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 270                                    "LEA instruction with certain arguments is slow">;
 271 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
 272                                    "LEA instruction with 3 ops or certain registers is slow">;
 273 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 274                                    "INC and DEC instructions are slower than ADD and SUB">;
 275 def FeatureSoftFloat
 276     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 277                        "Use software floating point features.">;
 278 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
 279                                      "HasPOPCNTFalseDeps", "true",
 280                                      "POPCNT has a false dependency on dest register">;
 281 def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
 282                                      "HasLZCNTFalseDeps", "true",
 283                                      "LZCNT/TZCNT have a false dependency on dest register">;
 284 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
 285                                       "platform configuration instruction">;
 286 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
 287 // using a variable mask over multiple fixed shuffles.
 288 def FeatureFastVariableShuffle
 289     : SubtargetFeature<"fast-variable-shuffle",
 290                        "HasFastVariableShuffle",
 291                        "true", "Shuffles with variable masks are fast">;
 292 // On some X86 processors, there is no performance hazard to writing only the
 293 // lower parts of a YMM or ZMM register without clearing the upper part.
 294 def FeatureFastPartialYMMorZMMWrite
 295     : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
 296                        "HasFastPartialYMMorZMMWrite",
 297                        "true", "Partial writes to YMM/ZMM registers are fast">;
 298 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 299 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
 300 // vector FSQRT has higher throughput than the corresponding NR code.
 301 // The idea is that throughput bound code is likely to be vectorized, so for
 302 // vectorized code we should care about the throughput of SQRT operations.
 303 // But if the code is scalar that probably means that the code has some kind of
 304 // dependency and we should care more about reducing the latency.
 305 def FeatureFastScalarFSQRT
 306     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 307                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 308 def FeatureFastVectorFSQRT
 309     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 310                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 311 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 312 // be used to replace test/set sequences.
 313 def FeatureFastLZCNT
 314     : SubtargetFeature<
 315           "fast-lzcnt", "HasFastLZCNT", "true",
 316           "LZCNT instructions are as fast as most simple integer ops">;
 317 // If the target can efficiently decode NOPs upto 11-bytes in length.
 318 def FeatureFast11ByteNOP
 319     : SubtargetFeature<
 320           "fast-11bytenop", "HasFast11ByteNOP", "true",
 321           "Target can quickly decode up to 11 byte NOPs">;
 322 // If the target can efficiently decode NOPs upto 15-bytes in length.
 323 def FeatureFast15ByteNOP
 324     : SubtargetFeature<
 325           "fast-15bytenop", "HasFast15ByteNOP", "true",
 326           "Target can quickly decode up to 15 byte NOPs">;
 327 // Sandy Bridge and newer processors can use SHLD with the same source on both
 328 // inputs to implement rotate to avoid the partial flag update of the normal
 329 // rotate instructions.
 330 def FeatureFastSHLDRotate
 331     : SubtargetFeature<
 332           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 333           "SHLD can be used as a faster rotate">;
 334
 335 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 336 // "string operations"). See "REP String Enhancement" in the Intel Software
 337 // Development Manual. This feature essentially means that REP MOVSB will copy
 338 // using the largest available size instead of copying bytes one by one, making
 339 // it at least as fast as REPMOVS{W,D,Q}.
 340 def FeatureERMSB
 341     : SubtargetFeature<
 342           "ermsb", "HasERMSB", "true",
 343           "REP MOVS/STOS are fast">;
 344
 345 // Sandy Bridge and newer processors have many instructions that can be
 346 // fused with conditional branches and pass through the CPU as a single
 347 // operation.
 348 def FeatureMacroFusion
 349     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
 350                  "Various instructions can be fused with conditional branches">;
 351
 352 // Gather is available since Haswell (AVX2 set). So technically, we can
 353 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
 354 // Skylake Client processor has faster Gathers than HSW and performance is
 355 // similar to Skylake Server (AVX-512).
 356 def FeatureHasFastGather
 357     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
 358                        "Indicates if gather is reasonably fast.">;
 359
 360 def FeaturePrefer256Bit
 361     : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
 362                        "Prefer 256-bit AVX instructions">;
 363
 364 // Lower indirect calls using a special construct called a `retpoline` to
 365 // mitigate potential Spectre v2 attacks against them.
 366 def FeatureRetpolineIndirectCalls
 367     : SubtargetFeature<
 368           "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
 369           "Remove speculation of indirect calls from the generated code.">;
 370
 371 // Lower indirect branches and switches either using conditional branch trees
 372 // or using a special construct called a `retpoline` to mitigate potential
 373 // Spectre v2 attacks against them.
 374 def FeatureRetpolineIndirectBranches
 375     : SubtargetFeature<
 376           "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
 377           "Remove speculation of indirect branches from the generated code.">;
 378
 379 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
 380 // `retpoline-indirect-branches` above.
 381 def FeatureRetpoline
 382     : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
 383                        "Remove speculation of indirect branches from the "
 384                        "generated code, either by avoiding them entirely or "
 385                        "lowering them with a speculation blocking construct.",
 386                        [FeatureRetpolineIndirectCalls,
 387                         FeatureRetpolineIndirectBranches]>;
 388
 389 // Rely on external thunks for the emitted retpoline calls. This allows users
 390 // to provide their own custom thunk definitions in highly specialized
 391 // environments such as a kernel that does boot-time hot patching.
 392 def FeatureRetpolineExternalThunk
 393     : SubtargetFeature<
 394           "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
 395           "When lowering an indirect call or branch using a `retpoline`, rely "
 396           "on the specified user provided thunk rather than emitting one "
 397           "ourselves. Only has effect when combined with some other retpoline "
 398           "feature.", [FeatureRetpolineIndirectCalls]>;
 399
 400 // Direct Move instructions.
 401 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
 402                                        "Support movdiri instruction">;
 403 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
 404                                         "Support movdir64b instruction">;
 405
 406 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
 407           "Indicates that the BEXTR instruction is implemented as a single uop "
 408           "with good throughput.">;
 409
 410 // Combine vector math operations with shuffles into horizontal math
 411 // instructions if a CPU implements horizontal operations (introduced with
 412 // SSE3) with better latency/throughput than the alternative sequence.
 413 def FeatureFastHorizontalOps
 414     : SubtargetFeature<
 415         "fast-hops", "HasFastHorizontalOps", "true",
 416         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
 417         "normal vector instructions with shuffles", [FeatureSSE3]>;
 418
 419 // Merge branches using three-way conditional code.
 420 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
 421                                         "ThreewayBranchProfitable", "true",
 422                                         "Merge branches to a three-way "
 423                                         "conditional branch">;
 424
 425 //===----------------------------------------------------------------------===//
 426 // Register File Description
 427 //===----------------------------------------------------------------------===//
 428
 429 include "X86RegisterInfo.td"
 430 include "X86RegisterBanks.td"
 431
 432 //===----------------------------------------------------------------------===//
 433 // Instruction Descriptions
 434 //===----------------------------------------------------------------------===//
 435
 436 include "X86Schedule.td"
 437 include "X86InstrInfo.td"
 438 include "X86SchedPredicates.td"
 439
 440 def X86InstrInfo : InstrInfo;
 441
 442 //===----------------------------------------------------------------------===//
 443 // X86 processors supported.
 444 //===----------------------------------------------------------------------===//
 445
 446 include "X86ScheduleAtom.td"
 447 include "X86SchedSandyBridge.td"
 448 include "X86SchedHaswell.td"
 449 include "X86SchedBroadwell.td"
 450 include "X86ScheduleSLM.td"
 451 include "X86ScheduleZnver1.td"
 452 include "X86ScheduleBdVer2.td"
 453 include "X86ScheduleBtVer2.td"
 454 include "X86SchedSkylakeClient.td"
 455 include "X86SchedSkylakeServer.td"
 456
 457 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
 458                     "Intel Atom processors">;
 459 def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
 460                     "Intel Silvermont processors">;
 461 def ProcIntelGLM  : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
 462                     "Intel Goldmont processors">;
 463 def ProcIntelGLP  : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
 464                     "Intel Goldmont Plus processors">;
 465 def ProcIntelTRM  : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
 466                     "Intel Tremont processors">;
 467
 468 class Proc<string Name, list<SubtargetFeature> Features>
 469  : ProcessorModel<Name, GenericModel, Features>;
 470
 471 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
 472 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
 473 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
 474 def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
 475 def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
 476 def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 477
 478 def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
 479 def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
 480                           FeatureNOPL]>;
 481
 482 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 483                                FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
 484
 485 foreach P = ["pentium3", "pentium3m"] in {
 486   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
 487                  FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 488 }
 489
 490 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
 491 // The intent is to enable it for pentium4 which is the current default
 492 // processor in a vanilla 32-bit clang compilation when no specific
 493 // architecture is specified.  This generally gives a nice performance
 494 // increase on silvermont, with largely neutral behavior on other
 495 // contemporary large core processors.
 496 // pentium-m, pentium4m, prescott and nocona are included as a preventative
 497 // measure to avoid performance surprises, in case clang's default cpu
 498 // changes slightly.
 499
 500 def : ProcessorModel<"pentium-m", GenericPostRAModel,
 501                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 502                       FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 503
 504 foreach P = ["pentium4", "pentium4m"] in {
 505   def : ProcessorModel<P, GenericPostRAModel,
 506                        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 507                         FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 508 }
 509
 510 // Intel Quark.
 511 def : Proc<"lakemont",        []>;
 512
 513 // Intel Core Duo.
 514 def : ProcessorModel<"yonah", SandyBridgeModel,
 515                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
 516                       FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 517
 518 // NetBurst.
 519 def : ProcessorModel<"prescott", GenericPostRAModel,
 520                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
 521                       FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
 522 def : ProcessorModel<"nocona", GenericPostRAModel, [
 523   FeatureX87,
 524   FeatureSlowUAMem16,
 525   FeatureCMOV,
 526   FeatureMMX,
 527   FeatureSSE3,
 528   FeatureFXSR,
 529   FeatureNOPL,
 530   Feature64Bit,
 531   FeatureCMPXCHG16B
 532 ]>;
 533
 534 // Intel Core 2 Solo/Duo.
 535 def : ProcessorModel<"core2", SandyBridgeModel, [
 536   FeatureX87,
 537   FeatureSlowUAMem16,
 538   FeatureCMOV,
 539   FeatureMMX,
 540   FeatureSSSE3,
 541   FeatureFXSR,
 542   FeatureNOPL,
 543   Feature64Bit,
 544   FeatureCMPXCHG16B,
 545   FeatureLAHFSAHF,
 546   FeatureMacroFusion
 547 ]>;
 548 def : ProcessorModel<"penryn", SandyBridgeModel, [
 549   FeatureX87,
 550   FeatureSlowUAMem16,
 551   FeatureCMOV,
 552   FeatureMMX,
 553   FeatureSSE41,
 554   FeatureFXSR,
 555   FeatureNOPL,
 556   Feature64Bit,
 557   FeatureCMPXCHG16B,
 558   FeatureLAHFSAHF,
 559   FeatureMacroFusion
 560 ]>;
 561
 562 // Atom CPUs.
 563 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
 564   ProcIntelAtom,
 565   FeatureX87,
 566   FeatureSlowUAMem16,
 567   FeatureCMOV,
 568   FeatureMMX,
 569   FeatureSSSE3,
 570   FeatureFXSR,
 571   FeatureNOPL,
 572   Feature64Bit,
 573   FeatureCMPXCHG16B,
 574   FeatureMOVBE,
 575   FeatureLEAForSP,
 576   FeatureSlowDivide32,
 577   FeatureSlowDivide64,
 578   FeatureSlowTwoMemOps,
 579   FeatureLEAUsesAG,
 580   FeaturePadShortFunctions,
 581   FeatureLAHFSAHF
 582 ]>;
 583 def : BonnellProc<"bonnell">;
 584 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 585
 586 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
 587   ProcIntelSLM,
 588   FeatureX87,
 589   FeatureCMOV,
 590   FeatureMMX,
 591   FeatureSSE42,
 592   FeatureFXSR,
 593   FeatureNOPL,
 594   Feature64Bit,
 595   FeatureCMPXCHG16B,
 596   FeatureMOVBE,
 597   FeaturePOPCNT,
 598   FeaturePCLMUL,
 599   FeatureSlowDivide64,
 600   FeatureSlowTwoMemOps,
 601   FeaturePRFCHW,
 602   FeatureSlowLEA,
 603   FeatureSlowIncDec,
 604   FeatureSlowPMULLD,
 605   FeatureRDRAND,
 606   FeatureLAHFSAHF,
 607   FeaturePOPCNTFalseDeps
 608 ]>;
 609 def : SilvermontProc<"silvermont">;
 610 def : SilvermontProc<"slm">; // Legacy alias.
 611
 612 class ProcessorFeatures<list<SubtargetFeature> Inherited,
 613                         list<SubtargetFeature> NewFeatures> {
 614   list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
 615 }
 616
 617 class ProcModel<string Name, SchedMachineModel Model,
 618                 list<SubtargetFeature> ProcFeatures,
 619                 list<SubtargetFeature> OtherFeatures> :
 620   ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
 621
 622 def GLMFeatures : ProcessorFeatures<[], [
 623   FeatureX87,
 624   FeatureCMOV,
 625   FeatureMMX,
 626   FeatureSSE42,
 627   FeatureFXSR,
 628   FeatureNOPL,
 629   Feature64Bit,
 630   FeatureCMPXCHG16B,
 631   FeatureMOVBE,
 632   FeaturePOPCNT,
 633   FeaturePCLMUL,
 634   FeatureAES,
 635   FeaturePRFCHW,
 636   FeatureSlowTwoMemOps,
 637   FeatureSlowLEA,
 638   FeatureSlowIncDec,
 639   FeatureLAHFSAHF,
 640   FeatureMPX,
 641   FeatureSHA,
 642   FeatureRDRAND,
 643   FeatureRDSEED,
 644   FeatureXSAVE,
 645   FeatureXSAVEOPT,
 646   FeatureXSAVEC,
 647   FeatureXSAVES,
 648   FeatureCLFLUSHOPT,
 649   FeatureFSGSBase
 650 ]>;
 651
 652 class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
 653       GLMFeatures.Value, [
 654   ProcIntelGLM,
 655   FeaturePOPCNTFalseDeps
 656 ]>;
 657 def : GoldmontProc<"goldmont">;
 658
 659 def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
 660   FeaturePTWRITE,
 661   FeatureRDPID,
 662   FeatureSGX
 663 ]>;
 664
 665 class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
 666       GLPFeatures.Value, [
 667   ProcIntelGLP
 668 ]>;
 669 def : GoldmontPlusProc<"goldmont-plus">;
 670
 671 class TremontProc<string Name> : ProcModel<Name, SLMModel,
 672       GLPFeatures.Value, [
 673   ProcIntelTRM,
 674   FeatureCLDEMOTE,
 675   FeatureGFNI,
 676   FeatureMOVDIRI,
 677   FeatureMOVDIR64B,
 678   FeatureWAITPKG
 679 ]>;
 680 def : TremontProc<"tremont">;
 681
 682 // "Arrandale" along with corei3 and corei5
 683 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
 684   FeatureX87,
 685   FeatureCMOV,
 686   FeatureMMX,
 687   FeatureSSE42,
 688   FeatureFXSR,
 689   FeatureNOPL,
 690   Feature64Bit,
 691   FeatureCMPXCHG16B,
 692   FeaturePOPCNT,
 693   FeatureLAHFSAHF,
 694   FeatureMacroFusion
 695 ]>;
 696 def : NehalemProc<"nehalem">;
 697 def : NehalemProc<"corei7">;
 698
 699 // Westmere is a similar machine to nehalem with some additional features.
 700 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 701 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
 702   FeatureX87,
 703   FeatureCMOV,
 704   FeatureMMX,
 705   FeatureSSE42,
 706   FeatureFXSR,
 707   FeatureNOPL,
 708   Feature64Bit,
 709   FeatureCMPXCHG16B,
 710   FeaturePOPCNT,
 711   FeaturePCLMUL,
 712   FeatureLAHFSAHF,
 713   FeatureMacroFusion
 714 ]>;
 715 def : WestmereProc<"westmere">;
 716
 717 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 718 // rather than a superset.
 719 def SNBFeatures : ProcessorFeatures<[], [
 720   FeatureX87,
 721   FeatureCMOV,
 722   FeatureMMX,
 723   FeatureAVX,
 724   FeatureFXSR,
 725   FeatureNOPL,
 726   Feature64Bit,
 727   FeatureCMPXCHG16B,
 728   FeaturePOPCNT,
 729   FeatureSlowDivide64,
 730   FeaturePCLMUL,
 731   FeatureXSAVE,
 732   FeatureXSAVEOPT,
 733   FeatureLAHFSAHF,
 734   FeatureSlow3OpsLEA,
 735   FeatureFastScalarFSQRT,
 736   FeatureFastSHLDRotate,
 737   FeatureSlowIncDec,
 738   FeatureMergeToThreeWayBranch,
 739   FeatureMacroFusion
 740 ]>;
 741
 742 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
 743                                                SNBFeatures.Value, [
 744   FeatureSlowUAMem32,
 745   FeaturePOPCNTFalseDeps
 746 ]>;
 747 def : SandyBridgeProc<"sandybridge">;
 748 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 749
 750 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
 751   FeatureRDRAND,
 752   FeatureF16C,
 753   FeatureFSGSBase
 754 ]>;
 755
 756 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
 757                                              IVBFeatures.Value, [
 758   FeatureSlowUAMem32,
 759   FeaturePOPCNTFalseDeps
 760 ]>;
 761 def : IvyBridgeProc<"ivybridge">;
 762 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 763
 764 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
 765   FeatureAVX2,
 766   FeatureBMI,
 767   FeatureBMI2,
 768   FeatureERMSB,
 769   FeatureFMA,
 770   FeatureINVPCID,
 771   FeatureLZCNT,
 772   FeatureMOVBE,
 773   FeatureFastVariableShuffle
 774 ]>;
 775
 776 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
 777                                            HSWFeatures.Value, [
 778   FeaturePOPCNTFalseDeps,
 779   FeatureLZCNTFalseDeps
 780 ]>;
 781 def : HaswellProc<"haswell">;
 782 def : HaswellProc<"core-avx2">; // Legacy alias.
 783
 784 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
 785   FeatureADX,
 786   FeatureRDSEED,
 787   FeaturePRFCHW
 788 ]>;
 789 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
 790                                              BDWFeatures.Value, [
 791   FeaturePOPCNTFalseDeps,
 792   FeatureLZCNTFalseDeps
 793 ]>;
 794 def : BroadwellProc<"broadwell">;
 795
 796 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
 797   FeatureAES,
 798   FeatureMPX,
 799   FeatureXSAVEC,
 800   FeatureXSAVES,
 801   FeatureCLFLUSHOPT,
 802   FeatureFastVectorFSQRT
 803 ]>;
 804
 805 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
 806                                                  SKLFeatures.Value, [
 807   FeatureHasFastGather,
 808   FeaturePOPCNTFalseDeps,
 809   FeatureSGX
 810 ]>;
 811 def : SkylakeClientProc<"skylake">;
 812
 813 def KNLFeatures : ProcessorFeatures<[], [
 814   FeatureX87,
 815   FeatureCMOV,
 816   FeatureMMX,
 817   FeatureFXSR,
 818   FeatureNOPL,
 819   Feature64Bit,
 820   FeatureCMPXCHG16B,
 821   FeaturePOPCNT,
 822   FeatureSlowDivide64,
 823   FeaturePCLMUL,
 824   FeatureXSAVE,
 825   FeatureXSAVEOPT,
 826   FeatureLAHFSAHF,
 827   FeatureSlow3OpsLEA,
 828   FeatureSlowIncDec,
 829   FeatureAES,
 830   FeatureRDRAND,
 831   FeatureF16C,
 832   FeatureFSGSBase,
 833   FeatureAVX512,
 834   FeatureERI,
 835   FeatureCDI,
 836   FeaturePFI,
 837   FeaturePREFETCHWT1,
 838   FeatureADX,
 839   FeatureRDSEED,
 840   FeatureMOVBE,
 841   FeatureLZCNT,
 842   FeatureBMI,
 843   FeatureBMI2,
 844   FeatureFMA,
 845   FeaturePRFCHW
 846 ]>;
 847
 848 // FIXME: define KNL model
 849 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
 850                                                   KNLFeatures.Value, [
 851   FeatureSlowTwoMemOps,
 852   FeatureFastPartialYMMorZMMWrite,
 853   FeatureHasFastGather,
 854   FeatureSlowPMADDWD
 855 ]>;
 856 def : KnightsLandingProc<"knl">;
 857
 858 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
 859                                                KNLFeatures.Value, [
 860   FeatureSlowTwoMemOps,
 861   FeatureFastPartialYMMorZMMWrite,
 862   FeatureHasFastGather,
 863   FeatureSlowPMADDWD,
 864   FeatureVPOPCNTDQ
 865 ]>;
 866 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
 867
 868 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
 869   FeatureAVX512,
 870   FeatureCDI,
 871   FeatureDQI,
 872   FeatureBWI,
 873   FeatureVLX,
 874   FeaturePKU,
 875   FeatureCLWB
 876 ]>;
 877
 878 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
 879                                                  SKXFeatures.Value, [
 880   FeatureHasFastGather,
 881   FeaturePOPCNTFalseDeps
 882 ]>;
 883 def : SkylakeServerProc<"skylake-avx512">;
 884 def : SkylakeServerProc<"skx">; // Legacy alias.
 885
 886 def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
 887   FeatureVNNI
 888 ]>;
 889
 890 class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
 891                                               CLXFeatures.Value, [
 892   FeatureHasFastGather,
 893   FeaturePOPCNTFalseDeps
 894 ]>;
 895 def : CascadelakeProc<"cascadelake">;
 896
 897 def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
 898   FeatureAVX512,
 899   FeatureCDI,
 900   FeatureDQI,
 901   FeatureBWI,
 902   FeatureVLX,
 903   FeaturePKU,
 904   FeatureVBMI,
 905   FeatureIFMA,
 906   FeatureSHA,
 907   FeatureSGX
 908 ]>;
 909
 910 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
 911                                               CNLFeatures.Value, [
 912   FeatureHasFastGather
 913 ]>;
 914 def : CannonlakeProc<"cannonlake">;
 915
 916 def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
 917   FeatureBITALG,
 918   FeatureVAES,
 919   FeatureVBMI2,
 920   FeatureVNNI,
 921   FeatureVPCLMULQDQ,
 922   FeatureVPOPCNTDQ,
 923   FeatureGFNI,
 924   FeatureCLWB,
 925   FeatureRDPID
 926 ]>;
 927
 928 class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
 929                                                  ICLFeatures.Value, [
 930   FeatureHasFastGather
 931 ]>;
 932 def : IcelakeClientProc<"icelake-client">;
 933
 934 class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
 935                                                  ICLFeatures.Value, [
 936   FeaturePCONFIG,
 937   FeatureWBNOINVD,
 938   FeatureHasFastGather
 939 ]>;
 940 def : IcelakeServerProc<"icelake-server">;
 941
 942 // AMD CPUs.
 943
 944 def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 945 def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 946 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 947
 948 foreach P = ["athlon", "athlon-tbird"] in {
 949   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
 950                  FeatureNOPL, FeatureSlowSHLD]>;
 951 }
 952
 953 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
 954   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
 955                  Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
 956 }
 957
 958 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
 959   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
 960                  FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
 961                  FeatureCMOV]>;
 962 }
 963
 964 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
 965   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
 966                  FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
 967                  FeatureCMOV, Feature64Bit]>;
 968 }
 969
 970 foreach P = ["amdfam10", "barcelona"] in {
 971   def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
 972                  FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
 973                  FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
 974 }
 975
 976 // Bobcat
 977 def : Proc<"btver1", [
 978   FeatureX87,
 979   FeatureCMOV,
 980   FeatureMMX,
 981   FeatureSSSE3,
 982   FeatureSSE4A,
 983   FeatureFXSR,
 984   FeatureNOPL,
 985   Feature64Bit,
 986   FeatureCMPXCHG16B,
 987   FeaturePRFCHW,
 988   FeatureLZCNT,
 989   FeaturePOPCNT,
 990   FeatureSlowSHLD,
 991   FeatureLAHFSAHF,
 992   FeatureFast15ByteNOP
 993 ]>;
 994
 995 // Jaguar
 996 def : ProcessorModel<"btver2", BtVer2Model, [
 997   FeatureX87,
 998   FeatureCMOV,
 999   FeatureMMX,
1000   FeatureAVX,
1001   FeatureFXSR,
1002   FeatureNOPL,
1003   FeatureSSE4A,
1004   Feature64Bit,
1005   FeatureCMPXCHG16B,
1006   FeaturePRFCHW,
1007   FeatureAES,
1008   FeaturePCLMUL,
1009   FeatureBMI,
1010   FeatureF16C,
1011   FeatureMOVBE,
1012   FeatureLZCNT,
1013   FeatureFastLZCNT,
1014   FeaturePOPCNT,
1015   FeatureXSAVE,
1016   FeatureXSAVEOPT,
1017   FeatureSlowSHLD,
1018   FeatureLAHFSAHF,
1019   FeatureFast15ByteNOP,
1020   FeatureFastBEXTR,
1021   FeatureFastPartialYMMorZMMWrite,
1022   FeatureFastHorizontalOps
1023 ]>;
1024
1025 // Bulldozer
1026 def : ProcessorModel<"bdver1", BdVer2Model, [
1027   FeatureX87,
1028   FeatureCMOV,
1029   FeatureXOP,
1030   FeatureFMA4,
1031   Feature64Bit,
1032   FeatureCMPXCHG16B,
1033   FeatureAES,
1034   FeaturePRFCHW,
1035   FeaturePCLMUL,
1036   FeatureMMX,
1037   FeatureAVX,
1038   FeatureFXSR,
1039   FeatureNOPL,
1040   FeatureSSE4A,
1041   FeatureLZCNT,
1042   FeaturePOPCNT,
1043   FeatureXSAVE,
1044   FeatureLWP,
1045   FeatureSlowSHLD,
1046   FeatureLAHFSAHF,
1047   FeatureFast11ByteNOP,
1048   FeatureMacroFusion
1049 ]>;
1050 // Piledriver
1051 def : ProcessorModel<"bdver2", BdVer2Model, [
1052   FeatureX87,
1053   FeatureCMOV,
1054   FeatureXOP,
1055   FeatureFMA4,
1056   Feature64Bit,
1057   FeatureCMPXCHG16B,
1058   FeatureAES,
1059   FeaturePRFCHW,
1060   FeaturePCLMUL,
1061   FeatureMMX,
1062   FeatureAVX,
1063   FeatureFXSR,
1064   FeatureNOPL,
1065   FeatureSSE4A,
1066   FeatureF16C,
1067   FeatureLZCNT,
1068   FeaturePOPCNT,
1069   FeatureXSAVE,
1070   FeatureBMI,
1071   FeatureTBM,
1072   FeatureLWP,
1073   FeatureFMA,
1074   FeatureSlowSHLD,
1075   FeatureLAHFSAHF,
1076   FeatureFast11ByteNOP,
1077   FeatureFastBEXTR,
1078   FeatureMacroFusion
1079 ]>;
1080
1081 // Steamroller
1082 def : Proc<"bdver3", [
1083   FeatureX87,
1084   FeatureCMOV,
1085   FeatureXOP,
1086   FeatureFMA4,
1087   Feature64Bit,
1088   FeatureCMPXCHG16B,
1089   FeatureAES,
1090   FeaturePRFCHW,
1091   FeaturePCLMUL,
1092   FeatureMMX,
1093   FeatureAVX,
1094   FeatureFXSR,
1095   FeatureNOPL,
1096   FeatureSSE4A,
1097   FeatureF16C,
1098   FeatureLZCNT,
1099   FeaturePOPCNT,
1100   FeatureXSAVE,
1101   FeatureBMI,
1102   FeatureTBM,
1103   FeatureLWP,
1104   FeatureFMA,
1105   FeatureXSAVEOPT,
1106   FeatureSlowSHLD,
1107   FeatureFSGSBase,
1108   FeatureLAHFSAHF,
1109   FeatureFast11ByteNOP,
1110   FeatureFastBEXTR,
1111   FeatureMacroFusion
1112 ]>;
1113
1114 // Excavator
1115 def : Proc<"bdver4", [
1116   FeatureX87,
1117   FeatureCMOV,
1118   FeatureMMX,
1119   FeatureAVX2,
1120   FeatureFXSR,
1121   FeatureNOPL,
1122   FeatureXOP,
1123   FeatureFMA4,
1124   Feature64Bit,
1125   FeatureCMPXCHG16B,
1126   FeatureAES,
1127   FeaturePRFCHW,
1128   FeaturePCLMUL,
1129   FeatureF16C,
1130   FeatureLZCNT,
1131   FeaturePOPCNT,
1132   FeatureXSAVE,
1133   FeatureBMI,
1134   FeatureBMI2,
1135   FeatureTBM,
1136   FeatureLWP,
1137   FeatureFMA,
1138   FeatureXSAVEOPT,
1139   FeatureSlowSHLD,
1140   FeatureFSGSBase,
1141   FeatureLAHFSAHF,
1142   FeatureFastBEXTR,
1143   FeatureFast11ByteNOP,
1144   FeatureMWAITX,
1145   FeatureMacroFusion
1146 ]>;
1147
1148 // Znver1
1149 def: ProcessorModel<"znver1", Znver1Model, [
1150   FeatureADX,
1151   FeatureAES,
1152   FeatureAVX2,
1153   FeatureBMI,
1154   FeatureBMI2,
1155   FeatureCLFLUSHOPT,
1156   FeatureCLZERO,
1157   FeatureCMOV,
1158   Feature64Bit,
1159   FeatureCMPXCHG16B,
1160   FeatureF16C,
1161   FeatureFMA,
1162   FeatureFSGSBase,
1163   FeatureFXSR,
1164   FeatureNOPL,
1165   FeatureFastLZCNT,
1166   FeatureLAHFSAHF,
1167   FeatureLZCNT,
1168   FeatureFastBEXTR,
1169   FeatureFast15ByteNOP,
1170   FeatureMacroFusion,
1171   FeatureMMX,
1172   FeatureMOVBE,
1173   FeatureMWAITX,
1174   FeaturePCLMUL,
1175   FeaturePOPCNT,
1176   FeaturePRFCHW,
1177   FeatureRDRAND,
1178   FeatureRDSEED,
1179   FeatureSHA,
1180   FeatureSSE4A,
1181   FeatureSlowSHLD,
1182   FeatureX87,
1183   FeatureXSAVE,
1184   FeatureXSAVEC,
1185   FeatureXSAVEOPT,
1186   FeatureXSAVES]>;
1187
1188 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
1189
1190 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
1191 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1192 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1193 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
1194                                FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
1195
1196 // We also provide a generic 64-bit specific x86 processor model which tries to
1197 // be good for modern chips without enabling instruction set encodings past the
1198 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1199 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1200 //
1201 // We currently use the Sandy Bridge model as the default scheduling model as
1202 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1203 // covers a huge swath of x86 processors. If there are specific scheduling
1204 // knobs which need to be tuned differently for AMD chips, we might consider
1205 // forming a common base for them.
1206 def : ProcessorModel<"x86-64", SandyBridgeModel, [
1207   FeatureX87,
1208   FeatureCMOV,
1209   FeatureMMX,
1210   FeatureSSE2,
1211   FeatureFXSR,
1212   FeatureNOPL,
1213   Feature64Bit,
1214   FeatureSlow3OpsLEA,
1215   FeatureSlowIncDec,
1216   FeatureMacroFusion
1217 ]>;
1218
1219 //===----------------------------------------------------------------------===//
1220 // Calling Conventions
1221 //===----------------------------------------------------------------------===//
1222
1223 include "X86CallingConv.td"
1224
1225
1226 //===----------------------------------------------------------------------===//
1227 // Assembly Parser
1228 //===----------------------------------------------------------------------===//
1229
1230 def ATTAsmParserVariant : AsmParserVariant {
1231   int Variant = 0;
1232
1233   // Variant name.
1234   string Name = "att";
1235
1236   // Discard comments in assembly strings.
1237   string CommentDelimiter = "#";
1238
1239   // Recognize hard coded registers.
1240   string RegisterPrefix = "%";
1241 }
1242
1243 def IntelAsmParserVariant : AsmParserVariant {
1244   int Variant = 1;
1245
1246   // Variant name.
1247   string Name = "intel";
1248
1249   // Discard comments in assembly strings.
1250   string CommentDelimiter = ";";
1251
1252   // Recognize hard coded registers.
1253   string RegisterPrefix = "";
1254 }
1255
1256 //===----------------------------------------------------------------------===//
1257 // Assembly Printers
1258 //===----------------------------------------------------------------------===//
1259
1260 // The X86 target supports two different syntaxes for emitting machine code.
1261 // This is controlled by the -x86-asm-syntax={att|intel}
1262 def ATTAsmWriter : AsmWriter {
1263   string AsmWriterClassName  = "ATTInstPrinter";
1264   int Variant = 0;
1265 }
1266 def IntelAsmWriter : AsmWriter {
1267   string AsmWriterClassName  = "IntelInstPrinter";
1268   int Variant = 1;
1269 }
1270
1271 def X86 : Target {
1272   // Information about the instructions...
1273   let InstructionSet = X86InstrInfo;
1274   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1275   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1276   let AllowRegisterRenaming = 1;
1277 }
1278
1279 //===----------------------------------------------------------------------===//
1280 // Pfm Counters
1281 //===----------------------------------------------------------------------===//
1282
1283 include "X86PfmCounters.td"