lib/Target/X86/X86.td

   1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This is a target description file for the Intel i386 architecture, referred
  11 // to here as the "X86" architecture.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 // Get the target-independent interfaces which we are implementing...
  16 //
  17 include "llvm/Target/Target.td"
  18
  19 //===----------------------------------------------------------------------===//
  20 // X86 Subtarget state
  21 //
  22
  23 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
  24                                   "64-bit mode (x86_64)">;
  25 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
  26                                   "32-bit mode (80386)">;
  27 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
  28                                   "16-bit mode (i8086)">;
  29
  30 //===----------------------------------------------------------------------===//
  31 // X86 Subtarget features
  32 //===----------------------------------------------------------------------===//
  33
  34 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
  35                                       "Enable X87 float instructions">;
  36
  37 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
  38                                       "Enable conditional move instructions">;
  39
  40 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
  41                                        "Support POPCNT instruction">;
  42
  43 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
  44                                       "Support fxsave/fxrestore instructions">;
  45
  46 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
  47                                        "Support xsave instructions">;
  48
  49 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
  50                                        "Support xsaveopt instructions">;
  51
  52 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
  53                                        "Support xsavec instructions">;
  54
  55 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
  56                                        "Support xsaves instructions">;
  57
  58 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
  59                                       "Enable SSE instructions",
  60                                       // SSE codegen depends on cmovs, and all
  61                                       // SSE1+ processors support them.
  62                                       [FeatureCMOV]>;
  63 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
  64                                       "Enable SSE2 instructions",
  65                                       [FeatureSSE1]>;
  66 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
  67                                       "Enable SSE3 instructions",
  68                                       [FeatureSSE2]>;
  69 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
  70                                       "Enable SSSE3 instructions",
  71                                       [FeatureSSE3]>;
  72 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
  73                                       "Enable SSE 4.1 instructions",
  74                                       [FeatureSSSE3]>;
  75 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
  76                                       "Enable SSE 4.2 instructions",
  77                                       [FeatureSSE41]>;
  78 // The MMX subtarget feature is separate from the rest of the SSE features
  79 // because it's important (for odd compatibility reasons) to be able to
  80 // turn it off explicitly while allowing SSE+ to be on.
  81 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
  82                                       "Enable MMX instructions">;
  83 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
  84                                       "Enable 3DNow! instructions",
  85                                       [FeatureMMX]>;
  86 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
  87                                       "Enable 3DNow! Athlon instructions",
  88                                       [Feature3DNow]>;
  89 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
  90 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
  91 // without disabling 64-bit mode.
  92 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
  93                                       "Support 64-bit instructions",
  94                                       [FeatureCMOV]>;
  95 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
  96                                       "64-bit with cmpxchg16b",
  97                                       [Feature64Bit]>;
  98 def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
  99                                        "Bit testing of memory is slow">;
 100 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
 101                                        "SHLD instruction is slow">;
 102 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
 103                                         "PMULLD instruction is slow">;
 104 // FIXME: This should not apply to CPUs that do not have SSE.
 105 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
 106                                 "IsUAMem16Slow", "true",
 107                                 "Slow unaligned 16-byte memory access">;
 108 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
 109                                 "IsUAMem32Slow", "true",
 110                                 "Slow unaligned 32-byte memory access">;
 111 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
 112                                       "Support SSE 4a instructions",
 113                                       [FeatureSSE3]>;
 114
 115 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
 116                                       "Enable AVX instructions",
 117                                       [FeatureSSE42]>;
 118 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
 119                                       "Enable AVX2 instructions",
 120                                       [FeatureAVX]>;
 121 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
 122                                       "Enable AVX-512 instructions",
 123                                       [FeatureAVX2]>;
 124 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
 125                       "Enable AVX-512 Exponential and Reciprocal Instructions",
 126                                       [FeatureAVX512]>;
 127 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 128                       "Enable AVX-512 Conflict Detection Instructions",
 129                                       [FeatureAVX512]>;
 130 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
 131                       "Enable AVX-512 PreFetch Instructions",
 132                                       [FeatureAVX512]>;
 133 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
 134                                    "true",
 135                                    "Prefetch with Intent to Write and T1 Hint">;
 136 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
 137                       "Enable AVX-512 Doubleword and Quadword Instructions",
 138                                       [FeatureAVX512]>;
 139 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 140                       "Enable AVX-512 Byte and Word Instructions",
 141                                       [FeatureAVX512]>;
 142 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
 143                       "Enable AVX-512 Vector Length eXtensions",
 144                                       [FeatureAVX512]>;
 145 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
 146                       "Enable AVX-512 Vector Byte Manipulation Instructions",
 147                                       [FeatureBWI]>;
 148 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
 149                       "Enable AVX-512 Integer Fused Multiple-Add",
 150                                       [FeatureAVX512]>;
 151 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
 152                       "Enable protection keys">;
 153 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
 154                          "Enable packed carry-less multiplication instructions",
 155                                [FeatureSSE2]>;
 156 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
 157                                       "Enable three-operand fused multiple-add",
 158                                       [FeatureAVX]>;
 159 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 160                                       "Enable four-operand fused multiple-add",
 161                                       [FeatureAVX, FeatureSSE4A]>;
 162 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
 163                                       "Enable XOP instructions",
 164                                       [FeatureFMA4]>;
 165 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
 166                                           "HasSSEUnalignedMem", "true",
 167                       "Allow unaligned memory operands with SSE instructions">;
 168 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
 169                                       "Enable AES instructions",
 170                                       [FeatureSSE2]>;
 171 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
 172                                       "Enable TBM instructions">;
 173 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
 174                                       "Enable LWP instructions">;
 175 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
 176                                       "Support MOVBE instruction">;
 177 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
 178                                       "Support RDRAND instruction">;
 179 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
 180                        "Support 16-bit floating point conversion instructions",
 181                        [FeatureAVX]>;
 182 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
 183                                        "Support FS/GS Base instructions">;
 184 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
 185                                       "Support LZCNT instruction">;
 186 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
 187                                       "Support BMI instructions">;
 188 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
 189                                       "Support BMI2 instructions">;
 190 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
 191                                       "Support RTM instructions">;
 192 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
 193                                       "Support ADX instructions">;
 194 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
 195                                       "Enable SHA instructions",
 196                                       [FeatureSSE2]>;
 197 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
 198                                       "Support PRFCHW instructions">;
 199 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
 200                                       "Support RDSEED instruction">;
 201 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
 202                                        "Support LAHF and SAHF instructions">;
 203 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
 204                                       "Enable MONITORX/MWAITX timer functionality">;
 205 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
 206                                       "Enable Cache Line Zero">;
 207 def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
 208                                       "Support MPX instructions">;
 209 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
 210                                      "Use LEA for adjusting the stack pointer">;
 211 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
 212                                      "HasSlowDivide32", "true",
 213                                      "Use 8-bit divide for positive values less than 256">;
 214 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
 215                                      "HasSlowDivide64", "true",
 216                                      "Use 32-bit divide for positive values less than 2^32">;
 217 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
 218                                      "PadShortFunctions", "true",
 219                                      "Pad short functions">;
 220 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
 221                                       "Enable Software Guard Extensions">;
 222 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
 223                                       "Flush A Cache Line Optimized">;
 224 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
 225                                       "Cache Line Write Back">;
 226 // TODO: This feature ought to be renamed.
 227 // What it really refers to are CPUs for which certain instructions
 228 // (which ones besides the example below?) are microcoded.
 229 // The best examples of this are the memory forms of CALL and PUSH
 230 // instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
 231 def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
 232                                      "CallRegIndirect", "true",
 233                                      "Call register indirect">;
 234 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
 235                                    "LEA instruction needs inputs at AG stage">;
 236 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
 237                                    "LEA instruction with certain arguments is slow">;
 238 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
 239                                    "INC and DEC instructions are slower than ADD and SUB">;
 240 def FeatureSoftFloat
 241     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
 242                        "Use software floating point features.">;
 243 // On some X86 processors, there is no performance hazard to writing only the
 244 // lower parts of a YMM or ZMM register without clearing the upper part.
 245 def FeatureFastPartialYMMorZMMWrite
 246     : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
 247                        "HasFastPartialYMMorZMMWrite",
 248                        "true", "Partial writes to YMM/ZMM registers are fast">;
 249 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
 250 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
 251 // vector FSQRT has higher throughput than the corresponding NR code.
 252 // The idea is that throughput bound code is likely to be vectorized, so for
 253 // vectorized code we should care about the throughput of SQRT operations.
 254 // But if the code is scalar that probably means that the code has some kind of
 255 // dependency and we should care more about reducing the latency.
 256 def FeatureFastScalarFSQRT
 257     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
 258                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
 259 def FeatureFastVectorFSQRT
 260     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
 261                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
 262 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
 263 // be used to replace test/set sequences.
 264 def FeatureFastLZCNT
 265     : SubtargetFeature<
 266           "fast-lzcnt", "HasFastLZCNT", "true",
 267           "LZCNT instructions are as fast as most simple integer ops">;
 268
 269
 270 // Sandy Bridge and newer processors can use SHLD with the same source on both
 271 // inputs to implement rotate to avoid the partial flag update of the normal
 272 // rotate instructions.
 273 def FeatureFastSHLDRotate
 274     : SubtargetFeature<
 275           "fast-shld-rotate", "HasFastSHLDRotate", "true",
 276           "SHLD can be used as a faster rotate">;
 277
 278 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 279 // "string operations"). See "REP String Enhancement" in the Intel Software
 280 // Development Manual. This feature essentially means that REP MOVSB will copy
 281 // using the largest available size instead of copying bytes one by one, making
 282 // it at least as fast as REPMOVS{W,D,Q}.
 283 def FeatureERMSB
 284     : SubtargetFeature<
 285           "ermsb", "HasERMSB", "true",
 286           "REP MOVS/STOS are fast">;
 287
 288 //===----------------------------------------------------------------------===//
 289 // X86 processors supported.
 290 //===----------------------------------------------------------------------===//
 291
 292 include "X86Schedule.td"
 293
 294 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
 295                     "Intel Atom processors">;
 296 def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
 297                     "Intel Silvermont processors">;
 298
 299 class Proc<string Name, list<SubtargetFeature> Features>
 300  : ProcessorModel<Name, GenericModel, Features>;
 301
 302 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
 303 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
 304 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
 305 def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
 306 def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
 307 def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 308 def : Proc<"i686",            [FeatureX87, FeatureSlowUAMem16]>;
 309 def : Proc<"pentiumpro",      [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
 310 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 311                                FeatureCMOV, FeatureFXSR]>;
 312 def : Proc<"pentium3",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 313                                FeatureSSE1, FeatureFXSR]>;
 314 def : Proc<"pentium3m",       [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 315                                FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>;
 316
 317 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
 318 // The intent is to enable it for pentium4 which is the current default
 319 // processor in a vanilla 32-bit clang compilation when no specific
 320 // architecture is specified.  This generally gives a nice performance
 321 // increase on silvermont, with largely neutral behavior on other
 322 // contemporary large core processors.
 323 // pentium-m, pentium4m, prescott and nocona are included as a preventative
 324 // measure to avoid performance surprises, in case clang's default cpu
 325 // changes slightly.
 326
 327 def : ProcessorModel<"pentium-m", GenericPostRAModel,
 328                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 329                       FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
 330
 331 def : ProcessorModel<"pentium4", GenericPostRAModel,
 332                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 333                       FeatureSSE2, FeatureFXSR]>;
 334
 335 def : ProcessorModel<"pentium4m", GenericPostRAModel,
 336                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 337                       FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
 338
 339 // Intel Quark.
 340 def : Proc<"lakemont",        []>;
 341
 342 // Intel Core Duo.
 343 def : ProcessorModel<"yonah", SandyBridgeModel,
 344                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
 345                       FeatureFXSR, FeatureSlowBTMem]>;
 346
 347 // NetBurst.
 348 def : ProcessorModel<"prescott", GenericPostRAModel,
 349                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
 350                       FeatureFXSR, FeatureSlowBTMem]>;
 351 def : ProcessorModel<"nocona", GenericPostRAModel, [
 352   FeatureX87,
 353   FeatureSlowUAMem16,
 354   FeatureMMX,
 355   FeatureSSE3,
 356   FeatureFXSR,
 357   FeatureCMPXCHG16B,
 358   FeatureSlowBTMem
 359 ]>;
 360
 361 // Intel Core 2 Solo/Duo.
 362 def : ProcessorModel<"core2", SandyBridgeModel, [
 363   FeatureX87,
 364   FeatureSlowUAMem16,
 365   FeatureMMX,
 366   FeatureSSSE3,
 367   FeatureFXSR,
 368   FeatureCMPXCHG16B,
 369   FeatureSlowBTMem,
 370   FeatureLAHFSAHF
 371 ]>;
 372 def : ProcessorModel<"penryn", SandyBridgeModel, [
 373   FeatureX87,
 374   FeatureSlowUAMem16,
 375   FeatureMMX,
 376   FeatureSSE41,
 377   FeatureFXSR,
 378   FeatureCMPXCHG16B,
 379   FeatureSlowBTMem,
 380   FeatureLAHFSAHF
 381 ]>;
 382
 383 // Atom CPUs.
 384 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
 385   ProcIntelAtom,
 386   FeatureX87,
 387   FeatureSlowUAMem16,
 388   FeatureMMX,
 389   FeatureSSSE3,
 390   FeatureFXSR,
 391   FeatureCMPXCHG16B,
 392   FeatureMOVBE,
 393   FeatureSlowBTMem,
 394   FeatureLEAForSP,
 395   FeatureSlowDivide32,
 396   FeatureSlowDivide64,
 397   FeatureCallRegIndirect,
 398   FeatureLEAUsesAG,
 399   FeaturePadShortFunctions,
 400   FeatureLAHFSAHF
 401 ]>;
 402 def : BonnellProc<"bonnell">;
 403 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 404
 405 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
 406   ProcIntelSLM,
 407   FeatureX87,
 408   FeatureMMX,
 409   FeatureSSE42,
 410   FeatureFXSR,
 411   FeatureCMPXCHG16B,
 412   FeatureMOVBE,
 413   FeaturePOPCNT,
 414   FeaturePCLMUL,
 415   FeatureAES,
 416   FeatureSlowDivide64,
 417   FeatureCallRegIndirect,
 418   FeaturePRFCHW,
 419   FeatureSlowLEA,
 420   FeatureSlowIncDec,
 421   FeatureSlowBTMem,
 422   FeatureSlowPMULLD,
 423   FeatureLAHFSAHF
 424 ]>;
 425 def : SilvermontProc<"silvermont">;
 426 def : SilvermontProc<"slm">; // Legacy alias.
 427
 428 // "Arrandale" along with corei3 and corei5
 429 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
 430   FeatureX87,
 431   FeatureMMX,
 432   FeatureSSE42,
 433   FeatureFXSR,
 434   FeatureCMPXCHG16B,
 435   FeatureSlowBTMem,
 436   FeaturePOPCNT,
 437   FeatureLAHFSAHF
 438 ]>;
 439 def : NehalemProc<"nehalem">;
 440 def : NehalemProc<"corei7">;
 441
 442 // Westmere is a similar machine to nehalem with some additional features.
 443 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 444 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
 445   FeatureX87,
 446   FeatureMMX,
 447   FeatureSSE42,
 448   FeatureFXSR,
 449   FeatureCMPXCHG16B,
 450   FeatureSlowBTMem,
 451   FeaturePOPCNT,
 452   FeatureAES,
 453   FeaturePCLMUL,
 454   FeatureLAHFSAHF
 455 ]>;
 456 def : WestmereProc<"westmere">;
 457
 458 class ProcessorFeatures<list<SubtargetFeature> Inherited,
 459                         list<SubtargetFeature> NewFeatures> {
 460   list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
 461 }
 462
 463 class ProcModel<string Name, SchedMachineModel Model,
 464                 list<SubtargetFeature> ProcFeatures,
 465                 list<SubtargetFeature> OtherFeatures> :
 466   ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
 467
 468 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 469 // rather than a superset.
 470 def SNBFeatures : ProcessorFeatures<[], [
 471   FeatureX87,
 472   FeatureMMX,
 473   FeatureAVX,
 474   FeatureFXSR,
 475   FeatureCMPXCHG16B,
 476   FeaturePOPCNT,
 477   FeatureAES,
 478   FeatureSlowDivide64,
 479   FeaturePCLMUL,
 480   FeatureXSAVE,
 481   FeatureXSAVEOPT,
 482   FeatureLAHFSAHF,
 483   FeatureFastScalarFSQRT,
 484   FeatureFastSHLDRotate
 485 ]>;
 486
 487 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
 488                                                SNBFeatures.Value, [
 489   FeatureSlowBTMem,
 490   FeatureSlowUAMem32
 491 ]>;
 492 def : SandyBridgeProc<"sandybridge">;
 493 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 494
 495 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
 496   FeatureRDRAND,
 497   FeatureF16C,
 498   FeatureFSGSBase
 499 ]>;
 500
 501 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
 502                                              IVBFeatures.Value, [
 503   FeatureSlowBTMem,
 504   FeatureSlowUAMem32
 505 ]>;
 506 def : IvyBridgeProc<"ivybridge">;
 507 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 508
 509 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
 510   FeatureAVX2,
 511   FeatureBMI,
 512   FeatureBMI2,
 513   FeatureERMSB,
 514   FeatureFMA,
 515   FeatureLZCNT,
 516   FeatureMOVBE,
 517   FeatureSlowIncDec
 518 ]>;
 519
 520 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
 521                                            HSWFeatures.Value, []>;
 522 def : HaswellProc<"haswell">;
 523 def : HaswellProc<"core-avx2">; // Legacy alias.
 524
 525 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
 526   FeatureADX,
 527   FeatureRDSEED
 528 ]>;
 529 class BroadwellProc<string Name> : ProcModel<Name, HaswellModel,
 530                                              BDWFeatures.Value, []>;
 531 def : BroadwellProc<"broadwell">;
 532
 533 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
 534   FeatureMPX,
 535   FeatureRTM,
 536   FeatureXSAVEC,
 537   FeatureXSAVES,
 538   FeatureSGX,
 539   FeatureCLFLUSHOPT,
 540   FeatureFastVectorFSQRT
 541 ]>;
 542
 543 // FIXME: define SKL model
 544 class SkylakeClientProc<string Name> : ProcModel<Name, HaswellModel,
 545                                                  SKLFeatures.Value, []>;
 546 def : SkylakeClientProc<"skylake">;
 547
 548 // FIXME: define KNL model
 549 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
 550                                                   IVBFeatures.Value, [
 551   FeatureAVX512,
 552   FeatureERI,
 553   FeatureCDI,
 554   FeaturePFI,
 555   FeaturePREFETCHWT1,
 556   FeatureADX,
 557   FeatureRDSEED,
 558   FeatureMOVBE,
 559   FeatureLZCNT,
 560   FeatureBMI,
 561   FeatureBMI2,
 562   FeatureFMA,
 563   FeatureFastPartialYMMorZMMWrite
 564 ]>;
 565 def : KnightsLandingProc<"knl">;
 566
 567 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
 568   FeatureAVX512,
 569   FeatureCDI,
 570   FeatureDQI,
 571   FeatureBWI,
 572   FeatureVLX,
 573   FeaturePKU,
 574   FeatureCLWB
 575 ]>;
 576
 577 // FIXME: define SKX model
 578 class SkylakeServerProc<string Name> : ProcModel<Name, HaswellModel,
 579                                                  SKXFeatures.Value, []>;
 580 def : SkylakeServerProc<"skylake-avx512">;
 581 def : SkylakeServerProc<"skx">; // Legacy alias.
 582
 583 def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
 584   FeatureVBMI,
 585   FeatureIFMA,
 586   FeatureSHA
 587 ]>;
 588
 589 class CannonlakeProc<string Name> : ProcModel<Name, HaswellModel,
 590                                               CNLFeatures.Value, []>;
 591 def : CannonlakeProc<"cannonlake">;
 592
 593 // AMD CPUs.
 594
 595 def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 596 def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 597 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 598 def : Proc<"athlon",          [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
 599                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 600 def : Proc<"athlon-tbird",    [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
 601                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 602 def : Proc<"athlon-4",        [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
 603                                Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
 604                                FeatureSlowSHLD]>;
 605 def : Proc<"athlon-xp",       [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
 606                                Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
 607                                FeatureSlowSHLD]>;
 608 def : Proc<"athlon-mp",       [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
 609                                Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
 610                                FeatureSlowSHLD]>;
 611 def : Proc<"k8",              [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
 612                                Feature3DNowA, FeatureFXSR, Feature64Bit,
 613                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 614 def : Proc<"opteron",         [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
 615                                Feature3DNowA, FeatureFXSR, Feature64Bit,
 616                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 617 def : Proc<"athlon64",        [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
 618                                Feature3DNowA, FeatureFXSR, Feature64Bit,
 619                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 620 def : Proc<"athlon-fx",       [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
 621                                Feature3DNowA, FeatureFXSR, Feature64Bit,
 622                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 623 def : Proc<"k8-sse3",         [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
 624                                Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
 625                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 626 def : Proc<"opteron-sse3",    [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
 627                                Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
 628                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 629 def : Proc<"athlon64-sse3",   [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
 630                                Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
 631                                FeatureSlowBTMem, FeatureSlowSHLD]>;
 632 def : Proc<"amdfam10",        [FeatureX87, FeatureSSE4A, Feature3DNowA,
 633                                FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
 634                                FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
 635                                FeatureLAHFSAHF]>;
 636 def : Proc<"barcelona",       [FeatureX87, FeatureSSE4A, Feature3DNowA,
 637                                FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
 638                                FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
 639                                FeatureLAHFSAHF]>;
 640
 641 // Bobcat
 642 def : Proc<"btver1", [
 643   FeatureX87,
 644   FeatureMMX,
 645   FeatureSSSE3,
 646   FeatureSSE4A,
 647   FeatureFXSR,
 648   FeatureCMPXCHG16B,
 649   FeaturePRFCHW,
 650   FeatureLZCNT,
 651   FeaturePOPCNT,
 652   FeatureSlowSHLD,
 653   FeatureLAHFSAHF
 654 ]>;
 655
 656 // Jaguar
 657 def : ProcessorModel<"btver2", BtVer2Model, [
 658   FeatureX87,
 659   FeatureMMX,
 660   FeatureAVX,
 661   FeatureFXSR,
 662   FeatureSSE4A,
 663   FeatureCMPXCHG16B,
 664   FeaturePRFCHW,
 665   FeatureAES,
 666   FeaturePCLMUL,
 667   FeatureBMI,
 668   FeatureF16C,
 669   FeatureMOVBE,
 670   FeatureLZCNT,
 671   FeatureFastLZCNT,
 672   FeaturePOPCNT,
 673   FeatureXSAVE,
 674   FeatureXSAVEOPT,
 675   FeatureSlowSHLD,
 676   FeatureLAHFSAHF,
 677   FeatureFastPartialYMMorZMMWrite
 678 ]>;
 679
 680 // Bulldozer
 681 def : Proc<"bdver1", [
 682   FeatureX87,
 683   FeatureXOP,
 684   FeatureFMA4,
 685   FeatureCMPXCHG16B,
 686   FeatureAES,
 687   FeaturePRFCHW,
 688   FeaturePCLMUL,
 689   FeatureMMX,
 690   FeatureAVX,
 691   FeatureFXSR,
 692   FeatureSSE4A,
 693   FeatureLZCNT,
 694   FeaturePOPCNT,
 695   FeatureXSAVE,
 696   FeatureLWP,
 697   FeatureSlowSHLD,
 698   FeatureLAHFSAHF
 699 ]>;
 700 // Piledriver
 701 def : Proc<"bdver2", [
 702   FeatureX87,
 703   FeatureXOP,
 704   FeatureFMA4,
 705   FeatureCMPXCHG16B,
 706   FeatureAES,
 707   FeaturePRFCHW,
 708   FeaturePCLMUL,
 709   FeatureMMX,
 710   FeatureAVX,
 711   FeatureFXSR,
 712   FeatureSSE4A,
 713   FeatureF16C,
 714   FeatureLZCNT,
 715   FeaturePOPCNT,
 716   FeatureXSAVE,
 717   FeatureBMI,
 718   FeatureTBM,
 719   FeatureLWP,
 720   FeatureFMA,
 721   FeatureSlowSHLD,
 722   FeatureLAHFSAHF
 723 ]>;
 724
 725 // Steamroller
 726 def : Proc<"bdver3", [
 727   FeatureX87,
 728   FeatureXOP,
 729   FeatureFMA4,
 730   FeatureCMPXCHG16B,
 731   FeatureAES,
 732   FeaturePRFCHW,
 733   FeaturePCLMUL,
 734   FeatureMMX,
 735   FeatureAVX,
 736   FeatureFXSR,
 737   FeatureSSE4A,
 738   FeatureF16C,
 739   FeatureLZCNT,
 740   FeaturePOPCNT,
 741   FeatureXSAVE,
 742   FeatureBMI,
 743   FeatureTBM,
 744   FeatureLWP,
 745   FeatureFMA,
 746   FeatureXSAVEOPT,
 747   FeatureSlowSHLD,
 748   FeatureFSGSBase,
 749   FeatureLAHFSAHF
 750 ]>;
 751
 752 // Excavator
 753 def : Proc<"bdver4", [
 754   FeatureX87,
 755   FeatureMMX,
 756   FeatureAVX2,
 757   FeatureFXSR,
 758   FeatureXOP,
 759   FeatureFMA4,
 760   FeatureCMPXCHG16B,
 761   FeatureAES,
 762   FeaturePRFCHW,
 763   FeaturePCLMUL,
 764   FeatureF16C,
 765   FeatureLZCNT,
 766   FeaturePOPCNT,
 767   FeatureXSAVE,
 768   FeatureBMI,
 769   FeatureBMI2,
 770   FeatureTBM,
 771   FeatureLWP,
 772   FeatureFMA,
 773   FeatureXSAVEOPT,
 774   FeatureSlowSHLD,
 775   FeatureFSGSBase,
 776   FeatureLAHFSAHF,
 777   FeatureMWAITX
 778 ]>;
 779
 780 // TODO: The scheduler model falls to BTVER2 model.
 781 // The znver1 model has to be put in place.
 782 // Zen
 783 def: ProcessorModel<"znver1", BtVer2Model, [
 784   FeatureADX,
 785   FeatureAES,
 786   FeatureAVX2,
 787   FeatureBMI,
 788   FeatureBMI2,
 789   FeatureCLFLUSHOPT,
 790   FeatureCLZERO,
 791   FeatureCMPXCHG16B,
 792   FeatureF16C,
 793   FeatureFMA,
 794   FeatureFSGSBase,
 795   FeatureFXSR,
 796   FeatureFastLZCNT,
 797   FeatureLAHFSAHF,
 798   FeatureLZCNT,
 799   FeatureMMX,
 800   FeatureMOVBE,
 801   FeatureMWAITX,
 802   FeaturePCLMUL,
 803   FeaturePOPCNT,
 804   FeaturePRFCHW,
 805   FeatureRDRAND,
 806   FeatureRDSEED,
 807   FeatureSHA,
 808   FeatureSSE4A,
 809   FeatureSlowSHLD,
 810   FeatureX87,
 811   FeatureXSAVE,
 812   FeatureXSAVEC,
 813   FeatureXSAVEOPT,
 814   FeatureXSAVES]>;
 815
 816 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 817
 818 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
 819 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 820 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
 821 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
 822                                FeatureSSE1, FeatureFXSR]>;
 823
 824 // We also provide a generic 64-bit specific x86 processor model which tries to
 825 // be good for modern chips without enabling instruction set encodings past the
 826 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
 827 // modern 64-bit x86 chip, and enables features that are generally beneficial.
 828 //
 829 // We currently use the Sandy Bridge model as the default scheduling model as
 830 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
 831 // covers a huge swath of x86 processors. If there are specific scheduling
 832 // knobs which need to be tuned differently for AMD chips, we might consider
 833 // forming a common base for them.
 834 def : ProcessorModel<"x86-64", SandyBridgeModel,
 835                      [FeatureX87, FeatureMMX, FeatureSSE2, FeatureFXSR,
 836                       Feature64Bit, FeatureSlowBTMem ]>;
 837
 838 //===----------------------------------------------------------------------===//
 839 // Register File Description
 840 //===----------------------------------------------------------------------===//
 841
 842 include "X86RegisterInfo.td"
 843 include "X86RegisterBanks.td"
 844
 845 //===----------------------------------------------------------------------===//
 846 // Instruction Descriptions
 847 //===----------------------------------------------------------------------===//
 848
 849 include "X86InstrInfo.td"
 850
 851 def X86InstrInfo : InstrInfo;
 852
 853 //===----------------------------------------------------------------------===//
 854 // Calling Conventions
 855 //===----------------------------------------------------------------------===//
 856
 857 include "X86CallingConv.td"
 858
 859
 860 //===----------------------------------------------------------------------===//
 861 // Assembly Parser
 862 //===----------------------------------------------------------------------===//
 863
 864 def ATTAsmParserVariant : AsmParserVariant {
 865   int Variant = 0;
 866
 867   // Variant name.
 868   string Name = "att";
 869
 870   // Discard comments in assembly strings.
 871   string CommentDelimiter = "#";
 872
 873   // Recognize hard coded registers.
 874   string RegisterPrefix = "%";
 875 }
 876
 877 def IntelAsmParserVariant : AsmParserVariant {
 878   int Variant = 1;
 879
 880   // Variant name.
 881   string Name = "intel";
 882
 883   // Discard comments in assembly strings.
 884   string CommentDelimiter = ";";
 885
 886   // Recognize hard coded registers.
 887   string RegisterPrefix = "";
 888 }
 889
 890 //===----------------------------------------------------------------------===//
 891 // Assembly Printers
 892 //===----------------------------------------------------------------------===//
 893
 894 // The X86 target supports two different syntaxes for emitting machine code.
 895 // This is controlled by the -x86-asm-syntax={att|intel}
 896 def ATTAsmWriter : AsmWriter {
 897   string AsmWriterClassName  = "ATTInstPrinter";
 898   int Variant = 0;
 899 }
 900 def IntelAsmWriter : AsmWriter {
 901   string AsmWriterClassName  = "IntelInstPrinter";
 902   int Variant = 1;
 903 }
 904
 905 def X86 : Target {
 906   // Information about the instructions...
 907   let InstructionSet = X86InstrInfo;
 908   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
 909   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
 910 }