1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This is a target description file for the Intel i386 architecture, referred
11 // to here as the "X86" architecture.
13 //===----------------------------------------------------------------------===//
15 // Get the target-independent interfaces which we are implementing...
17 include "llvm/Target/Target.td"
19 //===----------------------------------------------------------------------===//
20 // X86 Subtarget state
23 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
24 "64-bit mode (x86_64)">;
25 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
26 "32-bit mode (80386)">;
27 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
28 "16-bit mode (i8086)">;
30 //===----------------------------------------------------------------------===//
31 // X86 Subtarget features
32 //===----------------------------------------------------------------------===//
34 def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
35 "Enable X87 float instructions">;
37 def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
38 "Enable NOPL instruction">;
40 def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
41 "Enable conditional move instructions">;
43 def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
44 "Support POPCNT instruction">;
46 def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
47 "Support fxsave/fxrestore instructions">;
49 def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
50 "Support xsave instructions">;
52 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
53 "Support xsaveopt instructions">;
55 def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
56 "Support xsavec instructions">;
58 def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
59 "Support xsaves instructions">;
61 def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
62 "Enable SSE instructions">;
63 def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
64 "Enable SSE2 instructions",
66 def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
67 "Enable SSE3 instructions",
69 def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
70 "Enable SSSE3 instructions",
72 def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
73 "Enable SSE 4.1 instructions",
75 def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
76 "Enable SSE 4.2 instructions",
78 // The MMX subtarget feature is separate from the rest of the SSE features
79 // because it's important (for odd compatibility reasons) to be able to
80 // turn it off explicitly while allowing SSE+ to be on.
81 def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
82 "Enable MMX instructions">;
83 def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
84 "Enable 3DNow! instructions",
86 def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
87 "Enable 3DNow! Athlon instructions",
89 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
90 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
91 // without disabling 64-bit mode. Nothing should imply this feature bit. It
92 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
93 def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
94 "Support 64-bit instructions">;
95 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
96 "64-bit with cmpxchg16b">;
97 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
98 "SHLD instruction is slow">;
99 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
100 "PMULLD instruction is slow">;
101 def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
103 "PMADDWD is slower than PMULLD">;
104 // FIXME: This should not apply to CPUs that do not have SSE.
105 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
106 "IsUAMem16Slow", "true",
107 "Slow unaligned 16-byte memory access">;
108 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
109 "IsUAMem32Slow", "true",
110 "Slow unaligned 32-byte memory access">;
111 def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
112 "Support SSE 4a instructions",
115 def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
116 "Enable AVX instructions",
118 def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
119 "Enable AVX2 instructions",
121 def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
122 "Enable three-operand fused multiple-add",
124 def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
125 "Support 16-bit floating point conversion instructions",
127 def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
128 "Enable AVX-512 instructions",
129 [FeatureAVX2, FeatureFMA, FeatureF16C]>;
130 def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
131 "Enable AVX-512 Exponential and Reciprocal Instructions",
133 def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
134 "Enable AVX-512 Conflict Detection Instructions",
136 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
137 "true", "Enable AVX-512 Population Count Instructions",
139 def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
140 "Enable AVX-512 PreFetch Instructions",
142 def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
144 "Prefetch with Intent to Write and T1 Hint">;
145 def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
146 "Enable AVX-512 Doubleword and Quadword Instructions",
148 def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
149 "Enable AVX-512 Byte and Word Instructions",
151 def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
152 "Enable AVX-512 Vector Length eXtensions",
154 def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
155 "Enable AVX-512 Vector Byte Manipulation Instructions",
157 def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
158 "Enable AVX-512 further Vector Byte Manipulation Instructions",
160 def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
161 "Enable AVX-512 Integer Fused Multiple-Add",
163 def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
164 "Enable protection keys">;
165 def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
166 "Enable AVX-512 Vector Neural Network Instructions",
168 def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
169 "Enable AVX-512 Bit Algorithms",
171 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
172 "Enable packed carry-less multiplication instructions",
174 def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
175 "Enable Galois Field Arithmetic Instructions",
177 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
178 "Enable vpclmulqdq instructions",
179 [FeatureAVX, FeaturePCLMUL]>;
180 def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
181 "Enable four-operand fused multiple-add",
182 [FeatureAVX, FeatureSSE4A]>;
183 def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
184 "Enable XOP instructions",
186 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
187 "HasSSEUnalignedMem", "true",
188 "Allow unaligned memory operands with SSE instructions">;
189 def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
190 "Enable AES instructions",
192 def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
193 "Promote selected AES instructions to AVX512/AVX registers",
194 [FeatureAVX, FeatureAES]>;
195 def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
196 "Enable TBM instructions">;
197 def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
198 "Enable LWP instructions">;
199 def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
200 "Support MOVBE instruction">;
201 def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
202 "Support RDRAND instruction">;
203 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
204 "Support FS/GS Base instructions">;
205 def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
206 "Support LZCNT instruction">;
207 def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
208 "Support BMI instructions">;
209 def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
210 "Support BMI2 instructions">;
211 def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
212 "Support RTM instructions">;
213 def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
214 "Support ADX instructions">;
215 def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
216 "Enable SHA instructions",
218 def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
219 "Support CET Shadow-Stack instructions">;
220 def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
221 "Support PRFCHW instructions">;
222 def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
223 "Support RDSEED instruction">;
224 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
225 "Support LAHF and SAHF instructions">;
226 def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
227 "Enable MONITORX/MWAITX timer functionality">;
228 def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
229 "Enable Cache Line Zero">;
230 def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
231 "Enable Cache Demote">;
232 def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
233 "Support ptwrite instruction">;
234 def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
235 "Support MPX instructions">;
236 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
237 "Use LEA for adjusting the stack pointer">;
238 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
239 "HasSlowDivide32", "true",
240 "Use 8-bit divide for positive values less than 256">;
241 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
242 "HasSlowDivide64", "true",
243 "Use 32-bit divide for positive values less than 2^32">;
244 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
245 "PadShortFunctions", "true",
246 "Pad short functions">;
247 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
248 "Invalidate Process-Context Identifier">;
249 def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
250 "Enable Software Guard Extensions">;
251 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
252 "Flush A Cache Line Optimized">;
253 def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
254 "Cache Line Write Back">;
255 def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
256 "Write Back No Invalidate">;
257 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
258 "Support RDPID instructions">;
259 def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
260 "Wait and pause enhancements">;
261 // On some processors, instructions that implicitly take two memory operands are
262 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
263 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
264 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
265 "SlowTwoMemOps", "true",
266 "Two memory operand instructions are slow">;
267 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
268 "LEA instruction needs inputs at AG stage">;
269 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
270 "LEA instruction with certain arguments is slow">;
271 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
272 "LEA instruction with 3 ops or certain registers is slow">;
273 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
274 "INC and DEC instructions are slower than ADD and SUB">;
276 : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
277 "Use software floating point features.">;
278 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
279 "HasPOPCNTFalseDeps", "true",
280 "POPCNT has a false dependency on dest register">;
281 def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
282 "HasLZCNTFalseDeps", "true",
283 "LZCNT/TZCNT have a false dependency on dest register">;
284 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
285 "platform configuration instruction">;
286 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
287 // using a variable mask over multiple fixed shuffles.
288 def FeatureFastVariableShuffle
289 : SubtargetFeature<"fast-variable-shuffle",
290 "HasFastVariableShuffle",
291 "true", "Shuffles with variable masks are fast">;
292 // On some X86 processors, there is no performance hazard to writing only the
293 // lower parts of a YMM or ZMM register without clearing the upper part.
294 def FeatureFastPartialYMMorZMMWrite
295 : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
296 "HasFastPartialYMMorZMMWrite",
297 "true", "Partial writes to YMM/ZMM registers are fast">;
298 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
299 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
300 // vector FSQRT has higher throughput than the corresponding NR code.
301 // The idea is that throughput bound code is likely to be vectorized, so for
302 // vectorized code we should care about the throughput of SQRT operations.
303 // But if the code is scalar that probably means that the code has some kind of
304 // dependency and we should care more about reducing the latency.
305 def FeatureFastScalarFSQRT
306 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
307 "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
308 def FeatureFastVectorFSQRT
309 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
310 "true", "Vector SQRT is fast (disable Newton-Raphson)">;
311 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
312 // be used to replace test/set sequences.
315 "fast-lzcnt", "HasFastLZCNT", "true",
316 "LZCNT instructions are as fast as most simple integer ops">;
317 // If the target can efficiently decode NOPs upto 11-bytes in length.
318 def FeatureFast11ByteNOP
320 "fast-11bytenop", "HasFast11ByteNOP", "true",
321 "Target can quickly decode up to 11 byte NOPs">;
322 // If the target can efficiently decode NOPs upto 15-bytes in length.
323 def FeatureFast15ByteNOP
325 "fast-15bytenop", "HasFast15ByteNOP", "true",
326 "Target can quickly decode up to 15 byte NOPs">;
327 // Sandy Bridge and newer processors can use SHLD with the same source on both
328 // inputs to implement rotate to avoid the partial flag update of the normal
329 // rotate instructions.
330 def FeatureFastSHLDRotate
332 "fast-shld-rotate", "HasFastSHLDRotate", "true",
333 "SHLD can be used as a faster rotate">;
335 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
336 // "string operations"). See "REP String Enhancement" in the Intel Software
337 // Development Manual. This feature essentially means that REP MOVSB will copy
338 // using the largest available size instead of copying bytes one by one, making
339 // it at least as fast as REPMOVS{W,D,Q}.
342 "ermsb", "HasERMSB", "true",
343 "REP MOVS/STOS are fast">;
345 // Sandy Bridge and newer processors have many instructions that can be
346 // fused with conditional branches and pass through the CPU as a single
348 def FeatureMacroFusion
349 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
350 "Various instructions can be fused with conditional branches">;
352 // Gather is available since Haswell (AVX2 set). So technically, we can
353 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
354 // Skylake Client processor has faster Gathers than HSW and performance is
355 // similar to Skylake Server (AVX-512).
356 def FeatureHasFastGather
357 : SubtargetFeature<"fast-gather", "HasFastGather", "true",
358 "Indicates if gather is reasonably fast.">;
360 def FeaturePrefer256Bit
361 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
362 "Prefer 256-bit AVX instructions">;
364 // Lower indirect calls using a special construct called a `retpoline` to
365 // mitigate potential Spectre v2 attacks against them.
366 def FeatureRetpolineIndirectCalls
368 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
369 "Remove speculation of indirect calls from the generated code.">;
371 // Lower indirect branches and switches either using conditional branch trees
372 // or using a special construct called a `retpoline` to mitigate potential
373 // Spectre v2 attacks against them.
374 def FeatureRetpolineIndirectBranches
376 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
377 "Remove speculation of indirect branches from the generated code.">;
379 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
380 // `retpoline-indirect-branches` above.
382 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
383 "Remove speculation of indirect branches from the "
384 "generated code, either by avoiding them entirely or "
385 "lowering them with a speculation blocking construct.",
386 [FeatureRetpolineIndirectCalls,
387 FeatureRetpolineIndirectBranches]>;
389 // Rely on external thunks for the emitted retpoline calls. This allows users
390 // to provide their own custom thunk definitions in highly specialized
391 // environments such as a kernel that does boot-time hot patching.
392 def FeatureRetpolineExternalThunk
394 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
395 "When lowering an indirect call or branch using a `retpoline`, rely "
396 "on the specified user provided thunk rather than emitting one "
397 "ourselves. Only has effect when combined with some other retpoline "
398 "feature.", [FeatureRetpolineIndirectCalls]>;
400 // Direct Move instructions.
401 def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
402 "Support movdiri instruction">;
403 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
404 "Support movdir64b instruction">;
406 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
407 "Indicates that the BEXTR instruction is implemented as a single uop "
408 "with good throughput.">;
410 // Combine vector math operations with shuffles into horizontal math
411 // instructions if a CPU implements horizontal operations (introduced with
412 // SSE3) with better latency/throughput than the alternative sequence.
413 def FeatureFastHorizontalOps
415 "fast-hops", "HasFastHorizontalOps", "true",
416 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
417 "normal vector instructions with shuffles", [FeatureSSE3]>;
419 // Merge branches using three-way conditional code.
420 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
421 "ThreewayBranchProfitable", "true",
422 "Merge branches to a three-way "
423 "conditional branch">;
425 //===----------------------------------------------------------------------===//
426 // Register File Description
427 //===----------------------------------------------------------------------===//
429 include "X86RegisterInfo.td"
430 include "X86RegisterBanks.td"
432 //===----------------------------------------------------------------------===//
433 // Instruction Descriptions
434 //===----------------------------------------------------------------------===//
436 include "X86Schedule.td"
437 include "X86InstrInfo.td"
438 include "X86SchedPredicates.td"
440 def X86InstrInfo : InstrInfo;
442 //===----------------------------------------------------------------------===//
443 // X86 processors supported.
444 //===----------------------------------------------------------------------===//
446 include "X86ScheduleAtom.td"
447 include "X86SchedSandyBridge.td"
448 include "X86SchedHaswell.td"
449 include "X86SchedBroadwell.td"
450 include "X86ScheduleSLM.td"
451 include "X86ScheduleZnver1.td"
452 include "X86ScheduleBdVer2.td"
453 include "X86ScheduleBtVer2.td"
454 include "X86SchedSkylakeClient.td"
455 include "X86SchedSkylakeServer.td"
457 def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
458 "Intel Atom processors">;
459 def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
460 "Intel Silvermont processors">;
461 def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM",
462 "Intel Goldmont processors">;
463 def ProcIntelGLP : SubtargetFeature<"glp", "X86ProcFamily", "IntelGLP",
464 "Intel Goldmont Plus processors">;
465 def ProcIntelTRM : SubtargetFeature<"tremont", "X86ProcFamily", "IntelTRM",
466 "Intel Tremont processors">;
468 class Proc<string Name, list<SubtargetFeature> Features>
469 : ProcessorModel<Name, GenericModel, Features>;
471 def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16]>;
472 def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16]>;
473 def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16]>;
474 def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16]>;
475 def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16]>;
476 def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
478 def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV]>;
479 def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMOV,
482 def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
483 FeatureCMOV, FeatureFXSR, FeatureNOPL]>;
485 foreach P = ["pentium3", "pentium3m"] in {
486 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
487 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
490 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
491 // The intent is to enable it for pentium4 which is the current default
492 // processor in a vanilla 32-bit clang compilation when no specific
493 // architecture is specified. This generally gives a nice performance
494 // increase on silvermont, with largely neutral behavior on other
495 // contemporary large core processors.
496 // pentium-m, pentium4m, prescott and nocona are included as a preventative
497 // measure to avoid performance surprises, in case clang's default cpu
500 def : ProcessorModel<"pentium-m", GenericPostRAModel,
501 [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
502 FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
504 foreach P = ["pentium4", "pentium4m"] in {
505 def : ProcessorModel<P, GenericPostRAModel,
506 [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
507 FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
511 def : Proc<"lakemont", []>;
514 def : ProcessorModel<"yonah", SandyBridgeModel,
515 [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
516 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
519 def : ProcessorModel<"prescott", GenericPostRAModel,
520 [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
521 FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
522 def : ProcessorModel<"nocona", GenericPostRAModel, [
534 // Intel Core 2 Solo/Duo.
535 def : ProcessorModel<"core2", SandyBridgeModel, [
548 def : ProcessorModel<"penryn", SandyBridgeModel, [
563 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
578 FeatureSlowTwoMemOps,
580 FeaturePadShortFunctions,
583 def : BonnellProc<"bonnell">;
584 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
586 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
600 FeatureSlowTwoMemOps,
607 FeaturePOPCNTFalseDeps
609 def : SilvermontProc<"silvermont">;
610 def : SilvermontProc<"slm">; // Legacy alias.
612 class ProcessorFeatures<list<SubtargetFeature> Inherited,
613 list<SubtargetFeature> NewFeatures> {
614 list<SubtargetFeature> Value = !listconcat(Inherited, NewFeatures);
617 class ProcModel<string Name, SchedMachineModel Model,
618 list<SubtargetFeature> ProcFeatures,
619 list<SubtargetFeature> OtherFeatures> :
620 ProcessorModel<Name, Model, !listconcat(ProcFeatures, OtherFeatures)>;
622 def GLMFeatures : ProcessorFeatures<[], [
636 FeatureSlowTwoMemOps,
652 class GoldmontProc<string Name> : ProcModel<Name, SLMModel,
655 FeaturePOPCNTFalseDeps
657 def : GoldmontProc<"goldmont">;
659 def GLPFeatures : ProcessorFeatures<GLMFeatures.Value, [
665 class GoldmontPlusProc<string Name> : ProcModel<Name, SLMModel,
669 def : GoldmontPlusProc<"goldmont-plus">;
671 class TremontProc<string Name> : ProcModel<Name, SLMModel,
680 def : TremontProc<"tremont">;
682 // "Arrandale" along with corei3 and corei5
683 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
696 def : NehalemProc<"nehalem">;
697 def : NehalemProc<"corei7">;
699 // Westmere is a similar machine to nehalem with some additional features.
700 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
701 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
715 def : WestmereProc<"westmere">;
717 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
718 // rather than a superset.
719 def SNBFeatures : ProcessorFeatures<[], [
735 FeatureFastScalarFSQRT,
736 FeatureFastSHLDRotate,
738 FeatureMergeToThreeWayBranch,
742 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
745 FeaturePOPCNTFalseDeps
747 def : SandyBridgeProc<"sandybridge">;
748 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
750 def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [
756 class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
759 FeaturePOPCNTFalseDeps
761 def : IvyBridgeProc<"ivybridge">;
762 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
764 def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
773 FeatureFastVariableShuffle
776 class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
778 FeaturePOPCNTFalseDeps,
779 FeatureLZCNTFalseDeps
781 def : HaswellProc<"haswell">;
782 def : HaswellProc<"core-avx2">; // Legacy alias.
784 def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
789 class BroadwellProc<string Name> : ProcModel<Name, BroadwellModel,
791 FeaturePOPCNTFalseDeps,
792 FeatureLZCNTFalseDeps
794 def : BroadwellProc<"broadwell">;
796 def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
802 FeatureFastVectorFSQRT
805 class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
807 FeatureHasFastGather,
808 FeaturePOPCNTFalseDeps,
811 def : SkylakeClientProc<"skylake">;
813 def KNLFeatures : ProcessorFeatures<[], [
848 // FIXME: define KNL model
849 class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
851 FeatureSlowTwoMemOps,
852 FeatureFastPartialYMMorZMMWrite,
853 FeatureHasFastGather,
856 def : KnightsLandingProc<"knl">;
858 class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
860 FeatureSlowTwoMemOps,
861 FeatureFastPartialYMMorZMMWrite,
862 FeatureHasFastGather,
866 def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features
868 def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
878 class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
880 FeatureHasFastGather,
881 FeaturePOPCNTFalseDeps
883 def : SkylakeServerProc<"skylake-avx512">;
884 def : SkylakeServerProc<"skx">; // Legacy alias.
886 def CLXFeatures : ProcessorFeatures<SKXFeatures.Value, [
890 class CascadelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
892 FeatureHasFastGather,
893 FeaturePOPCNTFalseDeps
895 def : CascadelakeProc<"cascadelake">;
897 def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
910 class CannonlakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
914 def : CannonlakeProc<"cannonlake">;
916 def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
928 class IcelakeClientProc<string Name> : ProcModel<Name, SkylakeServerModel,
932 def : IcelakeClientProc<"icelake-client">;
934 class IcelakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
940 def : IcelakeServerProc<"icelake-server">;
944 def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
945 def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
946 def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
948 foreach P = ["athlon", "athlon-tbird"] in {
949 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, Feature3DNowA,
950 FeatureNOPL, FeatureSlowSHLD]>;
953 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
954 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
955 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
958 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
959 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
960 FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
964 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
965 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
966 FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
967 FeatureCMOV, Feature64Bit]>;
970 foreach P = ["amdfam10", "barcelona"] in {
971 def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
972 FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
973 FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV, Feature64Bit]>;
977 def : Proc<"btver1", [
996 def : ProcessorModel<"btver2", BtVer2Model, [
1019 FeatureFast15ByteNOP,
1021 FeatureFastPartialYMMorZMMWrite,
1022 FeatureFastHorizontalOps
1026 def : ProcessorModel<"bdver1", BdVer2Model, [
1047 FeatureFast11ByteNOP,
1051 def : ProcessorModel<"bdver2", BdVer2Model, [
1076 FeatureFast11ByteNOP,
1082 def : Proc<"bdver3", [
1109 FeatureFast11ByteNOP,
1115 def : Proc<"bdver4", [
1143 FeatureFast11ByteNOP,
1149 def: ProcessorModel<"znver1", Znver1Model, [
1169 FeatureFast15ByteNOP,
1188 def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
1190 def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
1191 def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1192 def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
1193 def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
1194 FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
1196 // We also provide a generic 64-bit specific x86 processor model which tries to
1197 // be good for modern chips without enabling instruction set encodings past the
1198 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1199 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1201 // We currently use the Sandy Bridge model as the default scheduling model as
1202 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1203 // covers a huge swath of x86 processors. If there are specific scheduling
1204 // knobs which need to be tuned differently for AMD chips, we might consider
1205 // forming a common base for them.
1206 def : ProcessorModel<"x86-64", SandyBridgeModel, [
1219 //===----------------------------------------------------------------------===//
1220 // Calling Conventions
1221 //===----------------------------------------------------------------------===//
1223 include "X86CallingConv.td"
1226 //===----------------------------------------------------------------------===//
1228 //===----------------------------------------------------------------------===//
1230 def ATTAsmParserVariant : AsmParserVariant {
1234 string Name = "att";
1236 // Discard comments in assembly strings.
1237 string CommentDelimiter = "#";
1239 // Recognize hard coded registers.
1240 string RegisterPrefix = "%";
1243 def IntelAsmParserVariant : AsmParserVariant {
1247 string Name = "intel";
1249 // Discard comments in assembly strings.
1250 string CommentDelimiter = ";";
1252 // Recognize hard coded registers.
1253 string RegisterPrefix = "";
1256 //===----------------------------------------------------------------------===//
1257 // Assembly Printers
1258 //===----------------------------------------------------------------------===//
1260 // The X86 target supports two different syntaxes for emitting machine code.
1261 // This is controlled by the -x86-asm-syntax={att|intel}
1262 def ATTAsmWriter : AsmWriter {
1263 string AsmWriterClassName = "ATTInstPrinter";
1266 def IntelAsmWriter : AsmWriter {
1267 string AsmWriterClassName = "IntelInstPrinter";
1272 // Information about the instructions...
1273 let InstructionSet = X86InstrInfo;
1274 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1275 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1276 let AllowRegisterRenaming = 1;
1279 //===----------------------------------------------------------------------===//
1281 //===----------------------------------------------------------------------===//
1283 include "X86PfmCounters.td"