]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/Target/X86/X86.td
Vendor import of llvm trunk r321414:
[FreeBSD/FreeBSD.git] / lib / Target / X86 / X86.td
1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is a target description file for the Intel i386 architecture, referred
11 // to here as the "X86" architecture.
12 //
13 //===----------------------------------------------------------------------===//
14
15 // Get the target-independent interfaces which we are implementing...
16 //
17 include "llvm/Target/Target.td"
18
19 //===----------------------------------------------------------------------===//
20 // X86 Subtarget state
21 //
22
23 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
24                                   "64-bit mode (x86_64)">;
25 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
26                                   "32-bit mode (80386)">;
27 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
28                                   "16-bit mode (i8086)">;
29
30 //===----------------------------------------------------------------------===//
31 // X86 Subtarget features
32 //===----------------------------------------------------------------------===//
33
34 def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
35                                       "Enable X87 float instructions">;
36
37 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
38                                       "Enable conditional move instructions">;
39
40 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
41                                        "Support POPCNT instruction">;
42
43 def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
44                                       "Support fxsave/fxrestore instructions">;
45
46 def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
47                                        "Support xsave instructions">;
48
49 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
50                                        "Support xsaveopt instructions">;
51
52 def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
53                                        "Support xsavec instructions">;
54
55 def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
56                                        "Support xsaves instructions">;
57
58 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
59                                       "Enable SSE instructions",
60                                       // SSE codegen depends on cmovs, and all
61                                       // SSE1+ processors support them.
62                                       [FeatureCMOV]>;
63 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
64                                       "Enable SSE2 instructions",
65                                       [FeatureSSE1]>;
66 def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
67                                       "Enable SSE3 instructions",
68                                       [FeatureSSE2]>;
69 def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
70                                       "Enable SSSE3 instructions",
71                                       [FeatureSSE3]>;
72 def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
73                                       "Enable SSE 4.1 instructions",
74                                       [FeatureSSSE3]>;
75 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
76                                       "Enable SSE 4.2 instructions",
77                                       [FeatureSSE41]>;
78 // The MMX subtarget feature is separate from the rest of the SSE features
79 // because it's important (for odd compatibility reasons) to be able to
80 // turn it off explicitly while allowing SSE+ to be on.
81 def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
82                                       "Enable MMX instructions">;
83 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
84                                       "Enable 3DNow! instructions",
85                                       [FeatureMMX]>;
86 def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
87                                       "Enable 3DNow! Athlon instructions",
88                                       [Feature3DNow]>;
89 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
90 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
91 // without disabling 64-bit mode.
92 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
93                                       "Support 64-bit instructions",
94                                       [FeatureCMOV]>;
95 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
96                                       "64-bit with cmpxchg16b",
97                                       [Feature64Bit]>;
98 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
99                                        "SHLD instruction is slow">;
100 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
101                                         "PMULLD instruction is slow">;
102 // FIXME: This should not apply to CPUs that do not have SSE.
103 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
104                                 "IsUAMem16Slow", "true",
105                                 "Slow unaligned 16-byte memory access">;
106 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
107                                 "IsUAMem32Slow", "true",
108                                 "Slow unaligned 32-byte memory access">;
109 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
110                                       "Support SSE 4a instructions",
111                                       [FeatureSSE3]>;
112
113 def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
114                                       "Enable AVX instructions",
115                                       [FeatureSSE42]>;
116 def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
117                                       "Enable AVX2 instructions",
118                                       [FeatureAVX]>;
119 def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
120                                       "Enable three-operand fused multiple-add",
121                                       [FeatureAVX]>;
122 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
123                        "Support 16-bit floating point conversion instructions",
124                        [FeatureAVX]>;
125 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
126                                       "Enable AVX-512 instructions",
127                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
128 def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
129                       "Enable AVX-512 Exponential and Reciprocal Instructions",
130                                       [FeatureAVX512]>;
131 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
132                       "Enable AVX-512 Conflict Detection Instructions",
133                                       [FeatureAVX512]>;
134 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
135                        "true", "Enable AVX-512 Population Count Instructions",
136                                       [FeatureAVX512]>;
137 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
138                       "Enable AVX-512 PreFetch Instructions",
139                                       [FeatureAVX512]>;
140 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
141                                    "true",
142                                    "Prefetch with Intent to Write and T1 Hint">;
143 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
144                       "Enable AVX-512 Doubleword and Quadword Instructions",
145                                       [FeatureAVX512]>;
146 def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
147                       "Enable AVX-512 Byte and Word Instructions",
148                                       [FeatureAVX512]>;
149 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
150                       "Enable AVX-512 Vector Length eXtensions",
151                                       [FeatureAVX512]>;
152 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
153                       "Enable AVX-512 Vector Byte Manipulation Instructions",
154                                       [FeatureBWI]>;
155 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
156                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
157                                       [FeatureBWI]>;
158 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
159                       "Enable AVX-512 Integer Fused Multiple-Add",
160                                       [FeatureAVX512]>;
161 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
162                       "Enable protection keys">;
163 def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
164                           "Enable AVX-512 Vector Neural Network Instructions",
165                                       [FeatureAVX512]>;
166 def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
167                        "Enable AVX-512 Bit Algorithms",
168                         [FeatureBWI]>;
169 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
170                          "Enable packed carry-less multiplication instructions",
171                                [FeatureSSE2]>;
172 def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
173                          "Enable Galois Field Arithmetic Instructions",
174                                [FeatureSSE2]>;
175 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
176                                          "Enable vpclmulqdq instructions",
177                                          [FeatureAVX, FeaturePCLMUL]>;
178 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
179                                       "Enable four-operand fused multiple-add",
180                                       [FeatureAVX, FeatureSSE4A]>;
181 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
182                                       "Enable XOP instructions",
183                                       [FeatureFMA4]>;
184 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
185                                           "HasSSEUnalignedMem", "true",
186                       "Allow unaligned memory operands with SSE instructions">;
187 def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
188                                       "Enable AES instructions",
189                                       [FeatureSSE2]>;
190 def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
191                        "Promote selected AES instructions to AVX512/AVX registers",
192                         [FeatureAVX, FeatureAES]>;
193 def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
194                                       "Enable TBM instructions">;
195 def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
196                                       "Enable LWP instructions">;
197 def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
198                                       "Support MOVBE instruction">;
199 def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
200                                       "Support RDRAND instruction">;
201 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
202                                        "Support FS/GS Base instructions">;
203 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
204                                       "Support LZCNT instruction">;
205 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
206                                       "Support BMI instructions">;
207 def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
208                                       "Support BMI2 instructions">;
209 def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
210                                       "Support RTM instructions">;
211 def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
212                                       "Support ADX instructions">;
213 def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
214                                       "Enable SHA instructions",
215                                       [FeatureSSE2]>;
216 def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
217                        "Support CET Shadow-Stack instructions">;
218 def FeatureIBT     : SubtargetFeature<"ibt", "HasIBT", "true",
219                        "Support CET Indirect-Branch-Tracking instructions">;
220 def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
221                                       "Support PRFCHW instructions">;
222 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
223                                       "Support RDSEED instruction">;
224 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
225                                        "Support LAHF and SAHF instructions">;
226 def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
227                                       "Enable MONITORX/MWAITX timer functionality">;
228 def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
229                                       "Enable Cache Line Zero">;
230 def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
231                                       "Support MPX instructions">;
232 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
233                                      "Use LEA for adjusting the stack pointer">;
234 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
235                                      "HasSlowDivide32", "true",
236                                      "Use 8-bit divide for positive values less than 256">;
237 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
238                                      "HasSlowDivide64", "true",
239                                      "Use 32-bit divide for positive values less than 2^32">;
240 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
241                                      "PadShortFunctions", "true",
242                                      "Pad short functions">;
243 def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
244                                       "Enable Software Guard Extensions">;
245 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
246                                       "Flush A Cache Line Optimized">;
247 def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
248                                       "Cache Line Write Back">;
249 // On some processors, instructions that implicitly take two memory operands are
250 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
251 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
252 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
253                                      "SlowTwoMemOps", "true",
254                                      "Two memory operand instructions are slow">;
255 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
256                                    "LEA instruction needs inputs at AG stage">;
257 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
258                                    "LEA instruction with certain arguments is slow">;
259 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
260                                    "LEA instruction with 3 ops or certain registers is slow">;
261 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
262                                    "INC and DEC instructions are slower than ADD and SUB">;
263 def FeatureSoftFloat
264     : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
265                        "Use software floating point features.">;
266 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
267 // using a variable mask over multiple fixed shuffles.
268 def FeatureFastVariableShuffle
269     : SubtargetFeature<"fast-variable-shuffle",
270                        "HasFastVariableShuffle",
271                        "true", "Shuffles with variable masks are fast">;
272 // On some X86 processors, there is no performance hazard to writing only the
273 // lower parts of a YMM or ZMM register without clearing the upper part.
274 def FeatureFastPartialYMMorZMMWrite
275     : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
276                        "HasFastPartialYMMorZMMWrite",
277                        "true", "Partial writes to YMM/ZMM registers are fast">;
278 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
279 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
280 // vector FSQRT has higher throughput than the corresponding NR code.
281 // The idea is that throughput bound code is likely to be vectorized, so for
282 // vectorized code we should care about the throughput of SQRT operations.
283 // But if the code is scalar that probably means that the code has some kind of
284 // dependency and we should care more about reducing the latency.
285 def FeatureFastScalarFSQRT
286     : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
287                        "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
288 def FeatureFastVectorFSQRT
289     : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
290                        "true", "Vector SQRT is fast (disable Newton-Raphson)">;
291 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
292 // be used to replace test/set sequences.
293 def FeatureFastLZCNT
294     : SubtargetFeature<
295           "fast-lzcnt", "HasFastLZCNT", "true",
296           "LZCNT instructions are as fast as most simple integer ops">;
297
298
299 // Sandy Bridge and newer processors can use SHLD with the same source on both
300 // inputs to implement rotate to avoid the partial flag update of the normal
301 // rotate instructions.
302 def FeatureFastSHLDRotate
303     : SubtargetFeature<
304           "fast-shld-rotate", "HasFastSHLDRotate", "true",
305           "SHLD can be used as a faster rotate">;
306
307 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
308 // "string operations"). See "REP String Enhancement" in the Intel Software
309 // Development Manual. This feature essentially means that REP MOVSB will copy
310 // using the largest available size instead of copying bytes one by one, making
311 // it at least as fast as REPMOVS{W,D,Q}.
312 def FeatureERMSB
313     : SubtargetFeature<
314           "ermsb", "HasERMSB", "true",
315           "REP MOVS/STOS are fast">;
316
317 // Sandy Bridge and newer processors have many instructions that can be
318 // fused with conditional branches and pass through the CPU as a single
319 // operation.
320 def FeatureMacroFusion
321     : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
322                  "Various instructions can be fused with conditional branches">;
323
324 // Gather is available since Haswell (AVX2 set). So technically, we can
325 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
326 // Skylake Client processor has faster Gathers than HSW and performance is
327 // similar to Skylake Server (AVX-512).
328 def FeatureHasFastGather
329     : SubtargetFeature<"fast-gather", "HasFastGather", "true",
330                        "Indicates if gather is reasonably fast.">;
331
332 //===----------------------------------------------------------------------===//
333 // Register File Description
334 //===----------------------------------------------------------------------===//
335
336 include "X86RegisterInfo.td"
337 include "X86RegisterBanks.td"
338
339 //===----------------------------------------------------------------------===//
340 // Instruction Descriptions
341 //===----------------------------------------------------------------------===//
342
343 include "X86Schedule.td"
344 include "X86InstrInfo.td"
345
346 def X86InstrInfo : InstrInfo;
347
348 //===----------------------------------------------------------------------===//
349 // X86 processors supported.
350 //===----------------------------------------------------------------------===//
351
352 include "X86ScheduleAtom.td"
353 include "X86SchedSandyBridge.td"
354 include "X86SchedHaswell.td"
355 include "X86SchedBroadwell.td"
356 include "X86ScheduleSLM.td"
357 include "X86ScheduleZnver1.td"
358 include "X86ScheduleBtVer2.td"
359 include "X86SchedSkylakeClient.td"
360 include "X86SchedSkylakeServer.td"
361
362 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
363                     "Intel Atom processors">;
364 def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
365                     "Intel Silvermont processors">;
366 def ProcIntelGLM  : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
367                     "Intel Goldmont processors">;
368 def ProcIntelHSW  : SubtargetFeature<"haswell", "X86ProcFamily",
369                     "IntelHaswell", "Intel Haswell processors">;
370 def ProcIntelBDW  : SubtargetFeature<"broadwell", "X86ProcFamily",
371                     "IntelBroadwell", "Intel Broadwell processors">;
372 def ProcIntelSKL  : SubtargetFeature<"skylake", "X86ProcFamily",
373                     "IntelSkylake", "Intel Skylake processors">;
374 def ProcIntelKNL  : SubtargetFeature<"knl", "X86ProcFamily",
375                     "IntelKNL", "Intel Knights Landing processors">;
376 def ProcIntelSKX  : SubtargetFeature<"skx", "X86ProcFamily",
377                     "IntelSKX", "Intel Skylake Server processors">;
378 def ProcIntelCNL  : SubtargetFeature<"cannonlake", "X86ProcFamily",
379                     "IntelCannonlake", "Intel Cannonlake processors">;
380 def ProcIntelICL  : SubtargetFeature<"icelake", "X86ProcFamily",
381                     "IntelIcelake", "Intel Icelake processors">;
382
383 class Proc<string Name, list<SubtargetFeature> Features>
384  : ProcessorModel<Name, GenericModel, Features>;
385
386 def : Proc<"generic",         [FeatureX87, FeatureSlowUAMem16]>;
387 def : Proc<"i386",            [FeatureX87, FeatureSlowUAMem16]>;
388 def : Proc<"i486",            [FeatureX87, FeatureSlowUAMem16]>;
389 def : Proc<"i586",            [FeatureX87, FeatureSlowUAMem16]>;
390 def : Proc<"pentium",         [FeatureX87, FeatureSlowUAMem16]>;
391 def : Proc<"pentium-mmx",     [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
392
393 foreach P = ["i686", "pentiumpro"] in {
394   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
395 }
396
397 def : Proc<"pentium2",        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
398                                FeatureCMOV, FeatureFXSR]>;
399
400 foreach P = ["pentium3", "pentium3m"] in {
401   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
402                  FeatureFXSR]>;
403 }
404
405 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
406 // The intent is to enable it for pentium4 which is the current default
407 // processor in a vanilla 32-bit clang compilation when no specific
408 // architecture is specified.  This generally gives a nice performance
409 // increase on silvermont, with largely neutral behavior on other
410 // contemporary large core processors.
411 // pentium-m, pentium4m, prescott and nocona are included as a preventative
412 // measure to avoid performance surprises, in case clang's default cpu
413 // changes slightly.
414
415 def : ProcessorModel<"pentium-m", GenericPostRAModel,
416                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
417                       FeatureSSE2, FeatureFXSR]>;
418
419 foreach P = ["pentium4", "pentium4m"] in {
420   def : ProcessorModel<P, GenericPostRAModel,
421                        [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
422                         FeatureSSE2, FeatureFXSR]>;
423 }
424
425 // Intel Quark.
426 def : Proc<"lakemont",        []>;
427
428 // Intel Core Duo.
429 def : ProcessorModel<"yonah", SandyBridgeModel,
430                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
431                       FeatureFXSR]>;
432
433 // NetBurst.
434 def : ProcessorModel<"prescott", GenericPostRAModel,
435                      [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
436                       FeatureFXSR]>;
437 def : ProcessorModel<"nocona", GenericPostRAModel, [
438   FeatureX87,
439   FeatureSlowUAMem16,
440   FeatureMMX,
441   FeatureSSE3,
442   FeatureFXSR,
443   FeatureCMPXCHG16B
444 ]>;
445
446 // Intel Core 2 Solo/Duo.
447 def : ProcessorModel<"core2", SandyBridgeModel, [
448   FeatureX87,
449   FeatureSlowUAMem16,
450   FeatureMMX,
451   FeatureSSSE3,
452   FeatureFXSR,
453   FeatureCMPXCHG16B,
454   FeatureLAHFSAHF,
455   FeatureMacroFusion
456 ]>;
457 def : ProcessorModel<"penryn", SandyBridgeModel, [
458   FeatureX87,
459   FeatureSlowUAMem16,
460   FeatureMMX,
461   FeatureSSE41,
462   FeatureFXSR,
463   FeatureCMPXCHG16B,
464   FeatureLAHFSAHF,
465   FeatureMacroFusion
466 ]>;
467
468 // Atom CPUs.
469 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
470   ProcIntelAtom,
471   FeatureX87,
472   FeatureSlowUAMem16,
473   FeatureMMX,
474   FeatureSSSE3,
475   FeatureFXSR,
476   FeatureCMPXCHG16B,
477   FeatureMOVBE,
478   FeatureLEAForSP,
479   FeatureSlowDivide32,
480   FeatureSlowDivide64,
481   FeatureSlowTwoMemOps,
482   FeatureLEAUsesAG,
483   FeaturePadShortFunctions,
484   FeatureLAHFSAHF
485 ]>;
486 def : BonnellProc<"bonnell">;
487 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
488
489 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
490   ProcIntelSLM,
491   FeatureX87,
492   FeatureMMX,
493   FeatureSSE42,
494   FeatureFXSR,
495   FeatureCMPXCHG16B,
496   FeatureMOVBE,
497   FeaturePOPCNT,
498   FeaturePCLMUL,
499   FeatureAES,
500   FeatureSlowDivide64,
501   FeatureSlowTwoMemOps,
502   FeaturePRFCHW,
503   FeatureSlowLEA,
504   FeatureSlowIncDec,
505   FeatureSlowPMULLD,
506   FeatureLAHFSAHF
507 ]>;
508 def : SilvermontProc<"silvermont">;
509 def : SilvermontProc<"slm">; // Legacy alias.
510
511 class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [
512   ProcIntelGLM,
513   FeatureX87,
514   FeatureMMX,
515   FeatureSSE42,
516   FeatureFXSR,
517   FeatureCMPXCHG16B,
518   FeatureMOVBE,
519   FeaturePOPCNT,
520   FeaturePCLMUL,
521   FeatureAES,
522   FeaturePRFCHW,
523   FeatureSlowTwoMemOps,
524   FeatureSlowLEA,
525   FeatureSlowIncDec,
526   FeatureLAHFSAHF,
527   FeatureMPX,
528   FeatureSHA,
529   FeatureRDRAND,
530   FeatureRDSEED,
531   FeatureXSAVE,
532   FeatureXSAVEOPT,
533   FeatureXSAVEC,
534   FeatureXSAVES,
535   FeatureCLFLUSHOPT,
536   FeatureFSGSBase
537 ]>;
538 def : GoldmontProc<"goldmont">;
539
540 // "Arrandale" along with corei3 and corei5
541 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
542   FeatureX87,
543   FeatureMMX,
544   FeatureSSE42,
545   FeatureFXSR,
546   FeatureCMPXCHG16B,
547   FeaturePOPCNT,
548   FeatureLAHFSAHF,
549   FeatureMacroFusion
550 ]>;
551 def : NehalemProc<"nehalem">;
552 def : NehalemProc<"corei7">;
553
554 // Westmere is a similar machine to nehalem with some additional features.
555 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
556 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
557   FeatureX87,
558   FeatureMMX,
559   FeatureSSE42,
560   FeatureFXSR,
561   FeatureCMPXCHG16B,
562   FeaturePOPCNT,
563   FeatureAES,
564   FeaturePCLMUL,
565   FeatureLAHFSAHF,
566   FeatureMacroFusion
567 ]>;
568 def : WestmereProc<"westmere">;
569
570 class ProcessorFeatures<list<SubtargetFeature> Inherited,
571                         list<SubtargetFeature> NewFeatures> {
572   list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
573 }
574
575 class ProcModel<string Name, SchedMachineModel Model,
576                 list<SubtargetFeature> ProcFeatures,
577                 list<SubtargetFeature> OtherFeatures> :
578   ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
579
580 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
581 // rather than a superset.
582 def SNBFeatures : ProcessorFeatures<[], [
583   FeatureX87,
584   FeatureMMX,
585   FeatureAVX,
586   FeatureFXSR,
587   FeatureCMPXCHG16B,
588   FeaturePOPCNT,
589   FeatureAES,
590   FeatureSlowDivide64,
591   FeaturePCLMUL,
592   FeatureXSAVE,
593   FeatureXSAVEOPT,
594   FeatureLAHFSAHF,
595   FeatureSlow3OpsLEA,
596   FeatureFastScalarFSQRT,
597   FeatureFastSHLDRotate,
598   FeatureSlowIncDec,
599   FeatureMacroFusion
600 ]>;
601
602 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
603                                                SNBFeatures.Value, [
604   FeatureSlowUAMem32
605 ]>;
606 def : SandyBridgeProc<"sandybridge">;
607 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
608
609 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
610   FeatureRDRAND,
611   FeatureF16C,
612   FeatureFSGSBase
613 ]>;
614
615 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
616                                              IVBFeatures.Value, [
617   FeatureSlowUAMem32
618 ]>;
619 def : IvyBridgeProc<"ivybridge">;
620 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
621
622 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
623   FeatureAVX2,
624   FeatureBMI,
625   FeatureBMI2,
626   FeatureERMSB,
627   FeatureFMA,
628   FeatureLZCNT,
629   FeatureMOVBE,
630   FeatureFastVariableShuffle
631 ]>;
632
633 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
634                                            HSWFeatures.Value, [
635   ProcIntelHSW
636 ]>;
637 def : HaswellProc<"haswell">;
638 def : HaswellProc<"core-avx2">; // Legacy alias.
639
640 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
641   FeatureADX,
642   FeatureRDSEED,
643   FeaturePRFCHW
644 ]>;
645 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
646                                              BDWFeatures.Value, [
647   ProcIntelBDW
648 ]>;
649 def : BroadwellProc<"broadwell">;
650
651 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
652   FeatureMPX,
653   FeatureRTM,
654   FeatureXSAVEC,
655   FeatureXSAVES,
656   FeatureSGX,
657   FeatureCLFLUSHOPT,
658   FeatureFastVectorFSQRT
659 ]>;
660
661 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
662                                                  SKLFeatures.Value, [
663   ProcIntelSKL,
664   FeatureHasFastGather
665 ]>;
666 def : SkylakeClientProc<"skylake">;
667
668 def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [
669   FeatureAVX512,
670   FeatureERI,
671   FeatureCDI,
672   FeaturePFI,
673   FeaturePREFETCHWT1,
674   FeatureADX,
675   FeatureRDSEED,
676   FeatureMOVBE,
677   FeatureLZCNT,
678   FeatureBMI,
679   FeatureBMI2,
680   FeatureFMA,
681   FeaturePRFCHW
682 ]>;
683
684 // FIXME: define KNL model
685 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
686                                                   KNLFeatures.Value, [
687   ProcIntelKNL,
688   FeatureSlowTwoMemOps,
689   FeatureFastPartialYMMorZMMWrite,
690   FeatureHasFastGather
691 ]>;
692 def : KnightsLandingProc<"knl">;
693
694 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
695                                                KNLFeatures.Value, [
696   ProcIntelKNL,
697   FeatureSlowTwoMemOps,
698   FeatureFastPartialYMMorZMMWrite,
699   FeatureHasFastGather,
700   FeatureVPOPCNTDQ
701 ]>;
702 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
703
704 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
705   FeatureAVX512,
706   FeatureCDI,
707   FeatureDQI,
708   FeatureBWI,
709   FeatureVLX,
710   FeaturePKU,
711   FeatureCLWB
712 ]>;
713
714 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
715                                                  SKXFeatures.Value, [
716   ProcIntelSKX,
717   FeatureHasFastGather
718 ]>;
719 def : SkylakeServerProc<"skylake-avx512">;
720 def : SkylakeServerProc<"skx">; // Legacy alias.
721
722 def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
723   FeatureVBMI,
724   FeatureIFMA,
725   FeatureSHA
726 ]>;
727
728 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
729                                               CNLFeatures.Value, [
730   ProcIntelCNL,
731   FeatureHasFastGather
732 ]>;
733 def : CannonlakeProc<"cannonlake">;
734
735 def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
736   FeatureBITALG,
737   FeatureVAES,
738   FeatureVBMI2,
739   FeatureVNNI,
740   FeatureVPCLMULQDQ,
741   FeatureVPOPCNTDQ,
742   FeatureGFNI
743 ]>;
744
745 class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
746                                            ICLFeatures.Value, [
747   ProcIntelICL,
748   FeatureHasFastGather
749 ]>;
750 def : IcelakeProc<"icelake">;
751
752 // AMD CPUs.
753
754 def : Proc<"k6",              [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
755 def : Proc<"k6-2",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
756 def : Proc<"k6-3",            [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
757
758 foreach P = ["athlon", "athlon-tbird"] in {
759   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, Feature3DNowA, FeatureSlowSHLD]>;
760 }
761
762 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
763   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
764                  Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
765 }
766
767 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
768   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
769                  FeatureFXSR, Feature64Bit, FeatureSlowSHLD]>;
770 }
771
772 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
773   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
774                  FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
775 }
776
777 foreach P = ["amdfam10", "barcelona"] in {
778   def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
779                  FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
780                  FeatureSlowSHLD, FeatureLAHFSAHF]>;
781 }
782
783 // Bobcat
784 def : Proc<"btver1", [
785   FeatureX87,
786   FeatureMMX,
787   FeatureSSSE3,
788   FeatureSSE4A,
789   FeatureFXSR,
790   FeatureCMPXCHG16B,
791   FeaturePRFCHW,
792   FeatureLZCNT,
793   FeaturePOPCNT,
794   FeatureSlowSHLD,
795   FeatureLAHFSAHF
796 ]>;
797
798 // Jaguar
799 def : ProcessorModel<"btver2", BtVer2Model, [
800   FeatureX87,
801   FeatureMMX,
802   FeatureAVX,
803   FeatureFXSR,
804   FeatureSSE4A,
805   FeatureCMPXCHG16B,
806   FeaturePRFCHW,
807   FeatureAES,
808   FeaturePCLMUL,
809   FeatureBMI,
810   FeatureF16C,
811   FeatureMOVBE,
812   FeatureLZCNT,
813   FeatureFastLZCNT,
814   FeaturePOPCNT,
815   FeatureXSAVE,
816   FeatureXSAVEOPT,
817   FeatureSlowSHLD,
818   FeatureLAHFSAHF,
819   FeatureFastPartialYMMorZMMWrite
820 ]>;
821
822 // Bulldozer
823 def : Proc<"bdver1", [
824   FeatureX87,
825   FeatureXOP,
826   FeatureFMA4,
827   FeatureCMPXCHG16B,
828   FeatureAES,
829   FeaturePRFCHW,
830   FeaturePCLMUL,
831   FeatureMMX,
832   FeatureAVX,
833   FeatureFXSR,
834   FeatureSSE4A,
835   FeatureLZCNT,
836   FeaturePOPCNT,
837   FeatureXSAVE,
838   FeatureLWP,
839   FeatureSlowSHLD,
840   FeatureLAHFSAHF,
841   FeatureMacroFusion
842 ]>;
843 // Piledriver
844 def : Proc<"bdver2", [
845   FeatureX87,
846   FeatureXOP,
847   FeatureFMA4,
848   FeatureCMPXCHG16B,
849   FeatureAES,
850   FeaturePRFCHW,
851   FeaturePCLMUL,
852   FeatureMMX,
853   FeatureAVX,
854   FeatureFXSR,
855   FeatureSSE4A,
856   FeatureF16C,
857   FeatureLZCNT,
858   FeaturePOPCNT,
859   FeatureXSAVE,
860   FeatureBMI,
861   FeatureTBM,
862   FeatureLWP,
863   FeatureFMA,
864   FeatureSlowSHLD,
865   FeatureLAHFSAHF,
866   FeatureMacroFusion
867 ]>;
868
869 // Steamroller
870 def : Proc<"bdver3", [
871   FeatureX87,
872   FeatureXOP,
873   FeatureFMA4,
874   FeatureCMPXCHG16B,
875   FeatureAES,
876   FeaturePRFCHW,
877   FeaturePCLMUL,
878   FeatureMMX,
879   FeatureAVX,
880   FeatureFXSR,
881   FeatureSSE4A,
882   FeatureF16C,
883   FeatureLZCNT,
884   FeaturePOPCNT,
885   FeatureXSAVE,
886   FeatureBMI,
887   FeatureTBM,
888   FeatureLWP,
889   FeatureFMA,
890   FeatureXSAVEOPT,
891   FeatureSlowSHLD,
892   FeatureFSGSBase,
893   FeatureLAHFSAHF,
894   FeatureMacroFusion
895 ]>;
896
897 // Excavator
898 def : Proc<"bdver4", [
899   FeatureX87,
900   FeatureMMX,
901   FeatureAVX2,
902   FeatureFXSR,
903   FeatureXOP,
904   FeatureFMA4,
905   FeatureCMPXCHG16B,
906   FeatureAES,
907   FeaturePRFCHW,
908   FeaturePCLMUL,
909   FeatureF16C,
910   FeatureLZCNT,
911   FeaturePOPCNT,
912   FeatureXSAVE,
913   FeatureBMI,
914   FeatureBMI2,
915   FeatureTBM,
916   FeatureLWP,
917   FeatureFMA,
918   FeatureXSAVEOPT,
919   FeatureSlowSHLD,
920   FeatureFSGSBase,
921   FeatureLAHFSAHF,
922   FeatureMWAITX,
923   FeatureMacroFusion
924 ]>;
925
926 // Znver1
927 def: ProcessorModel<"znver1", Znver1Model, [
928   FeatureADX,
929   FeatureAES,
930   FeatureAVX2,
931   FeatureBMI,
932   FeatureBMI2,
933   FeatureCLFLUSHOPT,
934   FeatureCLZERO,
935   FeatureCMPXCHG16B,
936   FeatureF16C,
937   FeatureFMA,
938   FeatureFSGSBase,
939   FeatureFXSR,
940   FeatureFastLZCNT,
941   FeatureLAHFSAHF,
942   FeatureLZCNT,
943   FeatureMacroFusion,
944   FeatureMMX,
945   FeatureMOVBE,
946   FeatureMWAITX,
947   FeaturePCLMUL,
948   FeaturePOPCNT,
949   FeaturePRFCHW,
950   FeatureRDRAND,
951   FeatureRDSEED,
952   FeatureSHA,
953   FeatureSSE4A,
954   FeatureSlowSHLD,
955   FeatureX87,
956   FeatureXSAVE,
957   FeatureXSAVEC,
958   FeatureXSAVEOPT,
959   FeatureXSAVES]>;
960
961 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
962
963 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
964 def : Proc<"winchip2",        [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
965 def : Proc<"c3",              [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
966 def : Proc<"c3-2",            [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
967                                FeatureSSE1, FeatureFXSR]>;
968
969 // We also provide a generic 64-bit specific x86 processor model which tries to
970 // be good for modern chips without enabling instruction set encodings past the
971 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
972 // modern 64-bit x86 chip, and enables features that are generally beneficial.
973 //
974 // We currently use the Sandy Bridge model as the default scheduling model as
975 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
976 // covers a huge swath of x86 processors. If there are specific scheduling
977 // knobs which need to be tuned differently for AMD chips, we might consider
978 // forming a common base for them.
979 def : ProcessorModel<"x86-64", SandyBridgeModel, [
980   FeatureX87,
981   FeatureMMX,
982   FeatureSSE2,
983   FeatureFXSR,
984   Feature64Bit,
985   FeatureSlow3OpsLEA,
986   FeatureSlowIncDec,
987   FeatureMacroFusion
988 ]>;
989
990 //===----------------------------------------------------------------------===//
991 // Calling Conventions
992 //===----------------------------------------------------------------------===//
993
994 include "X86CallingConv.td"
995
996
997 //===----------------------------------------------------------------------===//
998 // Assembly Parser
999 //===----------------------------------------------------------------------===//
1000
1001 def ATTAsmParserVariant : AsmParserVariant {
1002   int Variant = 0;
1003
1004   // Variant name.
1005   string Name = "att";
1006
1007   // Discard comments in assembly strings.
1008   string CommentDelimiter = "#";
1009
1010   // Recognize hard coded registers.
1011   string RegisterPrefix = "%";
1012 }
1013
1014 def IntelAsmParserVariant : AsmParserVariant {
1015   int Variant = 1;
1016
1017   // Variant name.
1018   string Name = "intel";
1019
1020   // Discard comments in assembly strings.
1021   string CommentDelimiter = ";";
1022
1023   // Recognize hard coded registers.
1024   string RegisterPrefix = "";
1025 }
1026
1027 //===----------------------------------------------------------------------===//
1028 // Assembly Printers
1029 //===----------------------------------------------------------------------===//
1030
1031 // The X86 target supports two different syntaxes for emitting machine code.
1032 // This is controlled by the -x86-asm-syntax={att|intel}
1033 def ATTAsmWriter : AsmWriter {
1034   string AsmWriterClassName  = "ATTInstPrinter";
1035   int Variant = 0;
1036 }
1037 def IntelAsmWriter : AsmWriter {
1038   string AsmWriterClassName  = "IntelInstPrinter";
1039   int Variant = 1;
1040 }
1041
1042 def X86 : Target {
1043   // Information about the instructions...
1044   let InstructionSet = X86InstrInfo;
1045   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1046   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1047 }