1 //===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a target description file for the Intel i386 architecture, referred
10 // to here as the "X86" architecture.
12 //===----------------------------------------------------------------------===//
14 // Get the target-independent interfaces which we are implementing...
16 include "llvm/Target/Target.td"
18 //===----------------------------------------------------------------------===//
19 // X86 Subtarget state
22 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
23 "64-bit mode (x86_64)">;
24 def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
25 "32-bit mode (80386)">;
26 def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
27 "16-bit mode (i8086)">;
29 //===----------------------------------------------------------------------===//
30 // X86 Subtarget features
31 //===----------------------------------------------------------------------===//
33 def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
34 "Enable X87 float instructions">;
36 def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
37 "Enable NOPL instruction">;
39 def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
40 "Enable conditional move instructions">;
42 def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
43 "Support CMPXCHG8B instructions">;
45 def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
46 "Support POPCNT instruction">;
48 def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
49 "Support fxsave/fxrestore instructions">;
51 def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
52 "Support xsave instructions">;
54 def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
55 "Support xsaveopt instructions">;
57 def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
58 "Support xsavec instructions">;
60 def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
61 "Support xsaves instructions">;
63 def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
64 "Enable SSE instructions">;
65 def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
66 "Enable SSE2 instructions",
68 def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
69 "Enable SSE3 instructions",
71 def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
72 "Enable SSSE3 instructions",
74 def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
75 "Enable SSE 4.1 instructions",
77 def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
78 "Enable SSE 4.2 instructions",
80 // The MMX subtarget feature is separate from the rest of the SSE features
81 // because it's important (for odd compatibility reasons) to be able to
82 // turn it off explicitly while allowing SSE+ to be on.
83 def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
84 "Enable MMX instructions">;
85 def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
86 "Enable 3DNow! instructions",
88 def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
89 "Enable 3DNow! Athlon instructions",
91 // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
92 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
93 // without disabling 64-bit mode. Nothing should imply this feature bit. It
94 // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode.
95 def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
96 "Support 64-bit instructions">;
97 def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
98 "64-bit with cmpxchg16b",
100 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
101 "SHLD instruction is slow">;
102 def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
103 "PMULLD instruction is slow">;
104 def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
106 "PMADDWD is slower than PMULLD">;
107 // FIXME: This should not apply to CPUs that do not have SSE.
108 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
109 "IsUAMem16Slow", "true",
110 "Slow unaligned 16-byte memory access">;
111 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
112 "IsUAMem32Slow", "true",
113 "Slow unaligned 32-byte memory access">;
114 def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
115 "Support SSE 4a instructions",
118 def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
119 "Enable AVX instructions",
121 def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
122 "Enable AVX2 instructions",
124 def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
125 "Enable three-operand fused multiple-add",
127 def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
128 "Support 16-bit floating point conversion instructions",
130 def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
131 "Enable AVX-512 instructions",
132 [FeatureAVX2, FeatureFMA, FeatureF16C]>;
133 def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
134 "Enable AVX-512 Exponential and Reciprocal Instructions",
136 def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
137 "Enable AVX-512 Conflict Detection Instructions",
139 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
140 "true", "Enable AVX-512 Population Count Instructions",
142 def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
143 "Enable AVX-512 PreFetch Instructions",
145 def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
147 "Prefetch with Intent to Write and T1 Hint">;
148 def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
149 "Enable AVX-512 Doubleword and Quadword Instructions",
151 def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
152 "Enable AVX-512 Byte and Word Instructions",
154 def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
155 "Enable AVX-512 Vector Length eXtensions",
157 def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
158 "Enable AVX-512 Vector Byte Manipulation Instructions",
160 def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
161 "Enable AVX-512 further Vector Byte Manipulation Instructions",
163 def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
164 "Enable AVX-512 Integer Fused Multiple-Add",
166 def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
167 "Enable protection keys">;
168 def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
169 "Enable AVX-512 Vector Neural Network Instructions",
171 def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
172 "Support bfloat16 floating point",
174 def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
175 "Enable AVX-512 Bit Algorithms",
177 def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
178 "HasVP2INTERSECT", "true",
179 "Enable AVX-512 vp2intersect",
181 def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
182 "Enable packed carry-less multiplication instructions",
184 def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
185 "Enable Galois Field Arithmetic Instructions",
187 def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
188 "Enable vpclmulqdq instructions",
189 [FeatureAVX, FeaturePCLMUL]>;
190 def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
191 "Enable four-operand fused multiple-add",
192 [FeatureAVX, FeatureSSE4A]>;
193 def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
194 "Enable XOP instructions",
196 def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
197 "HasSSEUnalignedMem", "true",
198 "Allow unaligned memory operands with SSE instructions">;
199 def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
200 "Enable AES instructions",
202 def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
203 "Promote selected AES instructions to AVX512/AVX registers",
204 [FeatureAVX, FeatureAES]>;
205 def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
206 "Enable TBM instructions">;
207 def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
208 "Enable LWP instructions">;
209 def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
210 "Support MOVBE instruction">;
211 def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
212 "Support RDRAND instruction">;
213 def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
214 "Support FS/GS Base instructions">;
215 def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
216 "Support LZCNT instruction">;
217 def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
218 "Support BMI instructions">;
219 def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
220 "Support BMI2 instructions">;
221 def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
222 "Support RTM instructions">;
223 def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
224 "Support ADX instructions">;
225 def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
226 "Enable SHA instructions",
228 def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
229 "Support CET Shadow-Stack instructions">;
230 def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
231 "Support PRFCHW instructions">;
232 def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
233 "Support RDSEED instruction">;
234 def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
235 "Support LAHF and SAHF instructions">;
236 def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
237 "Enable MONITORX/MWAITX timer functionality">;
238 def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
239 "Enable Cache Line Zero">;
240 def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
241 "Enable Cache Demote">;
242 def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
243 "Support ptwrite instruction">;
244 // FIXME: This feature is deprecated in 10.0 and should not be used for
245 // anything, but removing it would break IR files that may contain it in a
246 // target-feature attribute.
247 def FeatureDeprecatedMPX : SubtargetFeature<"mpx", "DeprecatedHasMPX", "false",
248 "Deprecated. Support MPX instructions">;
249 def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
250 "Use LEA for adjusting the stack pointer">;
251 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
252 "HasSlowDivide32", "true",
253 "Use 8-bit divide for positive values less than 256">;
254 def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
255 "HasSlowDivide64", "true",
256 "Use 32-bit divide for positive values less than 2^32">;
257 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
258 "PadShortFunctions", "true",
259 "Pad short functions">;
260 def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
261 "Invalidate Process-Context Identifier">;
262 def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
263 "Enable Software Guard Extensions">;
264 def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
265 "Flush A Cache Line Optimized">;
266 def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
267 "Cache Line Write Back">;
268 def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
269 "Write Back No Invalidate">;
270 def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
271 "Support RDPID instructions">;
272 def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
273 "Wait and pause enhancements">;
274 def FeatureENQCMD : SubtargetFeature<"enqcmd", "HasENQCMD", "true",
275 "Has ENQCMD instructions">;
276 // On some processors, instructions that implicitly take two memory operands are
277 // slow. In practice, this means that CALL, PUSH, and POP with memory operands
278 // should be avoided in favor of a MOV + register CALL/PUSH/POP.
279 def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
280 "SlowTwoMemOps", "true",
281 "Two memory operand instructions are slow">;
282 def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
283 "LEA instruction needs inputs at AG stage">;
284 def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
285 "LEA instruction with certain arguments is slow">;
286 def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
287 "LEA instruction with 3 ops or certain registers is slow">;
288 def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
289 "INC and DEC instructions are slower than ADD and SUB">;
291 : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
292 "Use software floating point features">;
293 def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
294 "HasPOPCNTFalseDeps", "true",
295 "POPCNT has a false dependency on dest register">;
296 def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
297 "HasLZCNTFalseDeps", "true",
298 "LZCNT/TZCNT have a false dependency on dest register">;
299 def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
300 "platform configuration instruction">;
301 // On recent X86 (port bound) processors, its preferable to combine to a single shuffle
302 // using a variable mask over multiple fixed shuffles.
303 def FeatureFastVariableShuffle
304 : SubtargetFeature<"fast-variable-shuffle",
305 "HasFastVariableShuffle",
306 "true", "Shuffles with variable masks are fast">;
307 // On some X86 processors, a vzeroupper instruction should be inserted after
308 // using ymm/zmm registers before executing code that may use SSE instructions.
309 def FeatureInsertVZEROUPPER
310 : SubtargetFeature<"vzeroupper",
312 "true", "Should insert vzeroupper instructions">;
313 // FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
314 // than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
315 // vector FSQRT has higher throughput than the corresponding NR code.
316 // The idea is that throughput bound code is likely to be vectorized, so for
317 // vectorized code we should care about the throughput of SQRT operations.
318 // But if the code is scalar that probably means that the code has some kind of
319 // dependency and we should care more about reducing the latency.
320 def FeatureFastScalarFSQRT
321 : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
322 "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
323 def FeatureFastVectorFSQRT
324 : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
325 "true", "Vector SQRT is fast (disable Newton-Raphson)">;
326 // If lzcnt has equivalent latency/throughput to most simple integer ops, it can
327 // be used to replace test/set sequences.
330 "fast-lzcnt", "HasFastLZCNT", "true",
331 "LZCNT instructions are as fast as most simple integer ops">;
332 // If the target can efficiently decode NOPs upto 11-bytes in length.
333 def FeatureFast11ByteNOP
335 "fast-11bytenop", "HasFast11ByteNOP", "true",
336 "Target can quickly decode up to 11 byte NOPs">;
337 // If the target can efficiently decode NOPs upto 15-bytes in length.
338 def FeatureFast15ByteNOP
340 "fast-15bytenop", "HasFast15ByteNOP", "true",
341 "Target can quickly decode up to 15 byte NOPs">;
342 // Sandy Bridge and newer processors can use SHLD with the same source on both
343 // inputs to implement rotate to avoid the partial flag update of the normal
344 // rotate instructions.
345 def FeatureFastSHLDRotate
347 "fast-shld-rotate", "HasFastSHLDRotate", "true",
348 "SHLD can be used as a faster rotate">;
350 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
351 // "string operations"). See "REP String Enhancement" in the Intel Software
352 // Development Manual. This feature essentially means that REP MOVSB will copy
353 // using the largest available size instead of copying bytes one by one, making
354 // it at least as fast as REPMOVS{W,D,Q}.
357 "ermsb", "HasERMSB", "true",
358 "REP MOVS/STOS are fast">;
360 // Bulldozer and newer processors can merge CMP/TEST (but not other
361 // instructions) with conditional branches.
362 def FeatureBranchFusion
363 : SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
364 "CMP/TEST can be fused with conditional branches">;
366 // Sandy Bridge and newer processors have many instructions that can be
367 // fused with conditional branches and pass through the CPU as a single
369 def FeatureMacroFusion
370 : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
371 "Various instructions can be fused with conditional branches">;
373 // Gather is available since Haswell (AVX2 set). So technically, we can
374 // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
375 // Skylake Client processor has faster Gathers than HSW and performance is
376 // similar to Skylake Server (AVX-512).
377 def FeatureHasFastGather
378 : SubtargetFeature<"fast-gather", "HasFastGather", "true",
379 "Indicates if gather is reasonably fast">;
381 def FeaturePrefer128Bit
382 : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
383 "Prefer 128-bit AVX instructions">;
385 def FeaturePrefer256Bit
386 : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
387 "Prefer 256-bit AVX instructions">;
389 def FeaturePreferMaskRegisters
390 : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
391 "Prefer AVX512 mask registers over PTEST/MOVMSK">;
393 // Lower indirect calls using a special construct called a `retpoline` to
394 // mitigate potential Spectre v2 attacks against them.
395 def FeatureRetpolineIndirectCalls
397 "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
398 "Remove speculation of indirect calls from the generated code">;
400 // Lower indirect branches and switches either using conditional branch trees
401 // or using a special construct called a `retpoline` to mitigate potential
402 // Spectre v2 attacks against them.
403 def FeatureRetpolineIndirectBranches
405 "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
406 "Remove speculation of indirect branches from the generated code">;
408 // Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
409 // `retpoline-indirect-branches` above.
411 : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
412 "Remove speculation of indirect branches from the "
413 "generated code, either by avoiding them entirely or "
414 "lowering them with a speculation blocking construct",
415 [FeatureRetpolineIndirectCalls,
416 FeatureRetpolineIndirectBranches]>;
418 // Rely on external thunks for the emitted retpoline calls. This allows users
419 // to provide their own custom thunk definitions in highly specialized
420 // environments such as a kernel that does boot-time hot patching.
421 def FeatureRetpolineExternalThunk
423 "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
424 "When lowering an indirect call or branch using a `retpoline`, rely "
425 "on the specified user provided thunk rather than emitting one "
426 "ourselves. Only has effect when combined with some other retpoline "
427 "feature", [FeatureRetpolineIndirectCalls]>;
429 // Mitigate LVI attacks against indirect calls/branches and call returns
430 def FeatureLVIControlFlowIntegrity
432 "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
433 "Prevent indirect calls/branches from using a memory operand, and "
434 "precede all indirect calls/branches from a register with an "
435 "LFENCE instruction to serialize control flow. Also decompose RET "
436 "instructions into a POP+LFENCE+JMP sequence.">;
438 // Mitigate LVI attacks against data loads
439 def FeatureLVILoadHardening
441 "lvi-load-hardening", "UseLVILoadHardening", "true",
442 "Insert LFENCE instructions to prevent data speculatively injected "
443 "into loads from being used maliciously.">;
445 // Direct Move instructions.
446 def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
447 "Support movdiri instruction">;
448 def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
449 "Support movdir64b instruction">;
451 def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
452 "Indicates that the BEXTR instruction is implemented as a single uop "
453 "with good throughput">;
455 // Combine vector math operations with shuffles into horizontal math
456 // instructions if a CPU implements horizontal operations (introduced with
457 // SSE3) with better latency/throughput than the alternative sequence.
458 def FeatureFastHorizontalOps
460 "fast-hops", "HasFastHorizontalOps", "true",
461 "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
462 "normal vector instructions with shuffles">;
464 def FeatureFastScalarShiftMasks
466 "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
467 "Prefer a left/right scalar logical shift pair over a shift+and pair">;
469 def FeatureFastVectorShiftMasks
471 "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
472 "Prefer a left/right vector logical shift pair over a shift+and pair">;
474 def FeatureUseGLMDivSqrtCosts
475 : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
476 "Use Goldmont specific floating point div/sqrt costs">;
478 // Merge branches using three-way conditional code.
479 def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch",
480 "ThreewayBranchProfitable", "true",
481 "Merge branches to a three-way "
482 "conditional branch">;
484 // Enable use of alias analysis during code generation.
485 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
486 "Use alias analysis during codegen">;
489 def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
491 def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
493 //===----------------------------------------------------------------------===//
494 // Register File Description
495 //===----------------------------------------------------------------------===//
497 include "X86RegisterInfo.td"
498 include "X86RegisterBanks.td"
500 //===----------------------------------------------------------------------===//
501 // Instruction Descriptions
502 //===----------------------------------------------------------------------===//
504 include "X86Schedule.td"
505 include "X86InstrInfo.td"
506 include "X86SchedPredicates.td"
508 def X86InstrInfo : InstrInfo;
510 //===----------------------------------------------------------------------===//
511 // X86 Scheduler Models
512 //===----------------------------------------------------------------------===//
514 include "X86ScheduleAtom.td"
515 include "X86SchedSandyBridge.td"
516 include "X86SchedHaswell.td"
517 include "X86SchedBroadwell.td"
518 include "X86ScheduleSLM.td"
519 include "X86ScheduleZnver1.td"
520 include "X86ScheduleZnver2.td"
521 include "X86ScheduleBdVer2.td"
522 include "X86ScheduleBtVer2.td"
523 include "X86SchedSkylakeClient.td"
524 include "X86SchedSkylakeServer.td"
526 //===----------------------------------------------------------------------===//
527 // X86 Processor Feature Lists
528 //===----------------------------------------------------------------------===//
530 def ProcessorFeatures {
532 list<SubtargetFeature> NHMInheritableFeatures = [FeatureX87,
544 FeatureInsertVZEROUPPER];
545 list<SubtargetFeature> NHMSpecificFeatures = [];
546 list<SubtargetFeature> NHMFeatures =
547 !listconcat(NHMInheritableFeatures, NHMSpecificFeatures);
550 list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
551 list<SubtargetFeature> WSMSpecificFeatures = [];
552 list<SubtargetFeature> WSMInheritableFeatures =
553 !listconcat(NHMInheritableFeatures, WSMAdditionalFeatures);
554 list<SubtargetFeature> WSMFeatures =
555 !listconcat(WSMInheritableFeatures, WSMSpecificFeatures);
558 list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
563 FeatureFastScalarFSQRT,
564 FeatureFastSHLDRotate,
565 FeatureMergeToThreeWayBranch];
566 list<SubtargetFeature> SNBSpecificFeatures = [FeatureSlowUAMem32,
567 FeaturePOPCNTFalseDeps];
568 list<SubtargetFeature> SNBInheritableFeatures =
569 !listconcat(WSMInheritableFeatures, SNBAdditionalFeatures);
570 list<SubtargetFeature> SNBFeatures =
571 !listconcat(SNBInheritableFeatures, SNBSpecificFeatures);
574 list<SubtargetFeature> IVBAdditionalFeatures = [FeatureRDRAND,
577 list<SubtargetFeature> IVBSpecificFeatures = [FeatureSlowUAMem32,
578 FeaturePOPCNTFalseDeps];
579 list<SubtargetFeature> IVBInheritableFeatures =
580 !listconcat(SNBInheritableFeatures, IVBAdditionalFeatures);
581 list<SubtargetFeature> IVBFeatures =
582 !listconcat(IVBInheritableFeatures, IVBSpecificFeatures);
585 list<SubtargetFeature> HSWAdditionalFeatures = [FeatureAVX2,
593 FeatureFastVariableShuffle];
594 list<SubtargetFeature> HSWSpecificFeatures = [FeaturePOPCNTFalseDeps,
595 FeatureLZCNTFalseDeps];
596 list<SubtargetFeature> HSWInheritableFeatures =
597 !listconcat(IVBInheritableFeatures, HSWAdditionalFeatures);
598 list<SubtargetFeature> HSWFeatures =
599 !listconcat(HSWInheritableFeatures, HSWSpecificFeatures);
602 list<SubtargetFeature> BDWAdditionalFeatures = [FeatureADX,
605 list<SubtargetFeature> BDWSpecificFeatures = [FeaturePOPCNTFalseDeps,
606 FeatureLZCNTFalseDeps];
607 list<SubtargetFeature> BDWInheritableFeatures =
608 !listconcat(HSWInheritableFeatures, BDWAdditionalFeatures);
609 list<SubtargetFeature> BDWFeatures =
610 !listconcat(BDWInheritableFeatures, BDWSpecificFeatures);
613 list<SubtargetFeature> SKLAdditionalFeatures = [FeatureAES,
617 FeatureFastVectorFSQRT];
618 list<SubtargetFeature> SKLSpecificFeatures = [FeatureHasFastGather,
619 FeaturePOPCNTFalseDeps,
621 list<SubtargetFeature> SKLInheritableFeatures =
622 !listconcat(BDWInheritableFeatures, SKLAdditionalFeatures);
623 list<SubtargetFeature> SKLFeatures =
624 !listconcat(SKLInheritableFeatures, SKLSpecificFeatures);
627 list<SubtargetFeature> SKXAdditionalFeatures = [FeatureAVX512,
635 list<SubtargetFeature> SKXSpecificFeatures = [FeatureHasFastGather,
636 FeaturePOPCNTFalseDeps];
637 list<SubtargetFeature> SKXInheritableFeatures =
638 !listconcat(SKLInheritableFeatures, SKXAdditionalFeatures);
639 list<SubtargetFeature> SKXFeatures =
640 !listconcat(SKXInheritableFeatures, SKXSpecificFeatures);
643 list<SubtargetFeature> CLXAdditionalFeatures = [FeatureVNNI];
644 list<SubtargetFeature> CLXSpecificFeatures = [FeatureHasFastGather,
645 FeaturePOPCNTFalseDeps];
646 list<SubtargetFeature> CLXInheritableFeatures =
647 !listconcat(SKXInheritableFeatures, CLXAdditionalFeatures);
648 list<SubtargetFeature> CLXFeatures =
649 !listconcat(CLXInheritableFeatures, CLXSpecificFeatures);
652 list<SubtargetFeature> CPXAdditionalFeatures = [FeatureBF16];
653 list<SubtargetFeature> CPXSpecificFeatures = [FeatureHasFastGather,
654 FeaturePOPCNTFalseDeps];
655 list<SubtargetFeature> CPXInheritableFeatures =
656 !listconcat(CLXInheritableFeatures, CPXAdditionalFeatures);
657 list<SubtargetFeature> CPXFeatures =
658 !listconcat(CPXInheritableFeatures, CPXSpecificFeatures);
661 list<SubtargetFeature> CNLAdditionalFeatures = [FeatureAVX512,
672 list<SubtargetFeature> CNLSpecificFeatures = [FeatureHasFastGather];
673 list<SubtargetFeature> CNLInheritableFeatures =
674 !listconcat(SKLInheritableFeatures, CNLAdditionalFeatures);
675 list<SubtargetFeature> CNLFeatures =
676 !listconcat(CNLInheritableFeatures, CNLSpecificFeatures);
679 list<SubtargetFeature> ICLAdditionalFeatures = [FeatureBITALG,
688 list<SubtargetFeature> ICLSpecificFeatures = [FeatureHasFastGather];
689 list<SubtargetFeature> ICLInheritableFeatures =
690 !listconcat(CNLInheritableFeatures, ICLAdditionalFeatures);
691 list<SubtargetFeature> ICLFeatures =
692 !listconcat(ICLInheritableFeatures, ICLSpecificFeatures);
695 list<SubtargetFeature> ICXSpecificFeatures = [FeaturePCONFIG,
697 FeatureHasFastGather];
698 list<SubtargetFeature> ICXFeatures =
699 !listconcat(ICLInheritableFeatures, ICXSpecificFeatures);
702 list<SubtargetFeature> TGLAdditionalFeatures = [FeatureVP2INTERSECT,
706 list<SubtargetFeature> TGLSpecificFeatures = [FeatureHasFastGather];
707 list<SubtargetFeature> TGLInheritableFeatures =
708 !listconcat(TGLAdditionalFeatures ,TGLSpecificFeatures);
709 list<SubtargetFeature> TGLFeatures =
710 !listconcat(ICLFeatures, TGLInheritableFeatures );
713 list<SubtargetFeature> AtomInheritableFeatures = [FeatureX87,
723 FeatureSlowTwoMemOps,
725 FeatureInsertVZEROUPPER];
726 list<SubtargetFeature> AtomSpecificFeatures = [ProcIntelAtom,
732 FeaturePadShortFunctions];
733 list<SubtargetFeature> AtomFeatures =
734 !listconcat(AtomInheritableFeatures, AtomSpecificFeatures);
737 list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
744 list<SubtargetFeature> SLMSpecificFeatures = [ProcIntelSLM,
747 FeaturePOPCNTFalseDeps];
748 list<SubtargetFeature> SLMInheritableFeatures =
749 !listconcat(AtomInheritableFeatures, SLMAdditionalFeatures);
750 list<SubtargetFeature> SLMFeatures =
751 !listconcat(SLMInheritableFeatures, SLMSpecificFeatures);
754 list<SubtargetFeature> GLMAdditionalFeatures = [FeatureAES,
763 list<SubtargetFeature> GLMSpecificFeatures = [FeatureUseGLMDivSqrtCosts,
764 FeaturePOPCNTFalseDeps];
765 list<SubtargetFeature> GLMInheritableFeatures =
766 !listconcat(SLMInheritableFeatures, GLMAdditionalFeatures);
767 list<SubtargetFeature> GLMFeatures =
768 !listconcat(GLMInheritableFeatures, GLMSpecificFeatures);
771 list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
774 list<SubtargetFeature> GLPSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
775 list<SubtargetFeature> GLPInheritableFeatures =
776 !listconcat(GLMInheritableFeatures, GLPAdditionalFeatures);
777 list<SubtargetFeature> GLPFeatures =
778 !listconcat(GLPInheritableFeatures, GLPSpecificFeatures);
781 list<SubtargetFeature> TRMAdditionalFeatures = [FeatureCLDEMOTE,
786 list<SubtargetFeature> TRMSpecificFeatures = [FeatureUseGLMDivSqrtCosts];
787 list<SubtargetFeature> TRMFeatures =
788 !listconcat(GLPInheritableFeatures, TRMAdditionalFeatures,
789 TRMSpecificFeatures);
792 list<SubtargetFeature> KNLFeatures = [FeatureX87,
825 FeaturePreferMaskRegisters,
826 FeatureSlowTwoMemOps,
827 FeatureHasFastGather,
829 // TODO Add AVX5124FMAPS/AVX5124VNNIW features
830 list<SubtargetFeature> KNMFeatures =
831 !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
834 list<SubtargetFeature> BarcelonaInheritableFeatures = [FeatureX87,
847 FeatureFastScalarShiftMasks,
848 FeatureInsertVZEROUPPER];
849 list<SubtargetFeature> BarcelonaFeatures = BarcelonaInheritableFeatures;
852 list<SubtargetFeature> BtVer1InheritableFeatures = [FeatureX87,
867 FeatureFast15ByteNOP,
868 FeatureFastScalarShiftMasks,
869 FeatureFastVectorShiftMasks];
870 list<SubtargetFeature> BtVer1SpecificFeatures = [FeatureInsertVZEROUPPER];
871 list<SubtargetFeature> BtVer1Features =
872 !listconcat(BtVer1InheritableFeatures, BtVer1SpecificFeatures);
875 list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
883 list<SubtargetFeature> BtVer2SpecificFeatures = [FeatureFastLZCNT,
885 FeatureFastHorizontalOps];
886 list<SubtargetFeature> BtVer2InheritableFeatures =
887 !listconcat(BtVer1InheritableFeatures, BtVer2AdditionalFeatures);
888 list<SubtargetFeature> BtVer2Features =
889 !listconcat(BtVer2InheritableFeatures, BtVer2SpecificFeatures);
892 list<SubtargetFeature> BdVer1InheritableFeatures = [FeatureX87,
910 FeatureFast11ByteNOP,
911 FeatureFastScalarShiftMasks,
913 FeatureInsertVZEROUPPER];
914 list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
917 list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
922 list<SubtargetFeature> BdVer2InheritableFeatures =
923 !listconcat(BdVer1InheritableFeatures, BdVer2AdditionalFeatures);
924 list<SubtargetFeature> BdVer2Features = BdVer2InheritableFeatures;
927 list<SubtargetFeature> BdVer3AdditionalFeatures = [FeatureXSAVEOPT,
929 list<SubtargetFeature> BdVer3InheritableFeatures =
930 !listconcat(BdVer2InheritableFeatures, BdVer3AdditionalFeatures);
931 list<SubtargetFeature> BdVer3Features = BdVer3InheritableFeatures;
934 list<SubtargetFeature> BdVer4AdditionalFeatures = [FeatureAVX2,
937 list<SubtargetFeature> BdVer4InheritableFeatures =
938 !listconcat(BdVer3InheritableFeatures, BdVer4AdditionalFeatures);
939 list<SubtargetFeature> BdVer4Features = BdVer4InheritableFeatures;
942 // AMD Zen Processors common ISAs
943 list<SubtargetFeature> ZNFeatures = [FeatureADX,
962 FeatureFast15ByteNOP,
964 FeatureFastScalarShiftMasks,
976 FeatureInsertVZEROUPPER,
982 list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
985 list<SubtargetFeature> ZN2Features =
986 !listconcat(ZNFeatures, ZN2AdditionalFeatures);
989 //===----------------------------------------------------------------------===//
990 // X86 processors supported.
991 //===----------------------------------------------------------------------===//
993 class Proc<string Name, list<SubtargetFeature> Features>
994 : ProcessorModel<Name, GenericModel, Features>;
996 // NOTE: CMPXCHG8B is here for legacy compatbility so that it is only disabled
997 // if i386/i486 is specifically requested.
998 def : Proc<"generic", [FeatureX87, FeatureSlowUAMem16,
999 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
1000 def : Proc<"i386", [FeatureX87, FeatureSlowUAMem16,
1001 FeatureInsertVZEROUPPER]>;
1002 def : Proc<"i486", [FeatureX87, FeatureSlowUAMem16,
1003 FeatureInsertVZEROUPPER]>;
1004 def : Proc<"i586", [FeatureX87, FeatureSlowUAMem16,
1005 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
1006 def : Proc<"pentium", [FeatureX87, FeatureSlowUAMem16,
1007 FeatureCMPXCHG8B, FeatureInsertVZEROUPPER]>;
1008 def : Proc<"pentium-mmx", [FeatureX87, FeatureSlowUAMem16,
1009 FeatureCMPXCHG8B, FeatureMMX,
1010 FeatureInsertVZEROUPPER]>;
1012 def : Proc<"i686", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1013 FeatureCMOV, FeatureInsertVZEROUPPER]>;
1014 def : Proc<"pentiumpro", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1015 FeatureCMOV, FeatureNOPL, FeatureInsertVZEROUPPER]>;
1017 def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1018 FeatureMMX, FeatureCMOV, FeatureFXSR,
1019 FeatureNOPL, FeatureInsertVZEROUPPER]>;
1021 foreach P = ["pentium3", "pentium3m"] in {
1022 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,FeatureMMX,
1023 FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV,
1024 FeatureInsertVZEROUPPER]>;
1027 // Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
1028 // The intent is to enable it for pentium4 which is the current default
1029 // processor in a vanilla 32-bit clang compilation when no specific
1030 // architecture is specified. This generally gives a nice performance
1031 // increase on silvermont, with largely neutral behavior on other
1032 // contemporary large core processors.
1033 // pentium-m, pentium4m, prescott and nocona are included as a preventative
1034 // measure to avoid performance surprises, in case clang's default cpu
1035 // changes slightly.
1037 def : ProcessorModel<"pentium-m", GenericPostRAModel,
1038 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1039 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
1040 FeatureCMOV, FeatureInsertVZEROUPPER]>;
1042 foreach P = ["pentium4", "pentium4m"] in {
1043 def : ProcessorModel<P, GenericPostRAModel,
1044 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1045 FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
1046 FeatureCMOV, FeatureInsertVZEROUPPER]>;
1050 def : Proc<"lakemont", [FeatureInsertVZEROUPPER]>;
1053 def : ProcessorModel<"yonah", SandyBridgeModel,
1054 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1055 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
1056 FeatureCMOV, FeatureInsertVZEROUPPER]>;
1059 def : ProcessorModel<"prescott", GenericPostRAModel,
1060 [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1061 FeatureMMX, FeatureSSE3, FeatureFXSR, FeatureNOPL,
1062 FeatureCMOV, FeatureInsertVZEROUPPER]>;
1063 def : ProcessorModel<"nocona", GenericPostRAModel, [
1074 FeatureInsertVZEROUPPER
1077 // Intel Core 2 Solo/Duo.
1078 def : ProcessorModel<"core2", SandyBridgeModel, [
1091 FeatureInsertVZEROUPPER
1093 def : ProcessorModel<"penryn", SandyBridgeModel, [
1106 FeatureInsertVZEROUPPER
1110 foreach P = ["bonnell", "atom"] in {
1111 def : ProcessorModel<P, AtomModel, ProcessorFeatures.AtomFeatures>;
1114 foreach P = ["silvermont", "slm"] in {
1115 def : ProcessorModel<P, SLMModel, ProcessorFeatures.SLMFeatures>;
1118 def : ProcessorModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures>;
1119 def : ProcessorModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures>;
1120 def : ProcessorModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures>;
1122 // "Arrandale" along with corei3 and corei5
1123 foreach P = ["nehalem", "corei7"] in {
1124 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures>;
1127 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
1128 def : ProcessorModel<"westmere", SandyBridgeModel,
1129 ProcessorFeatures.WSMFeatures>;
1131 foreach P = ["sandybridge", "corei7-avx"] in {
1132 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures>;
1135 foreach P = ["ivybridge", "core-avx-i"] in {
1136 def : ProcessorModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures>;
1139 foreach P = ["haswell", "core-avx2"] in {
1140 def : ProcessorModel<P, HaswellModel, ProcessorFeatures.HSWFeatures>;
1143 def : ProcessorModel<"broadwell", BroadwellModel,
1144 ProcessorFeatures.BDWFeatures>;
1146 def : ProcessorModel<"skylake", SkylakeClientModel,
1147 ProcessorFeatures.SKLFeatures>;
1149 // FIXME: define KNL scheduler model
1150 def : ProcessorModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures>;
1151 def : ProcessorModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures>;
1153 foreach P = ["skylake-avx512", "skx"] in {
1154 def : ProcessorModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures>;
1157 def : ProcessorModel<"cascadelake", SkylakeServerModel,
1158 ProcessorFeatures.CLXFeatures>;
1159 def : ProcessorModel<"cooperlake", SkylakeServerModel,
1160 ProcessorFeatures.CPXFeatures>;
1161 def : ProcessorModel<"cannonlake", SkylakeServerModel,
1162 ProcessorFeatures.CNLFeatures>;
1163 def : ProcessorModel<"icelake-client", SkylakeServerModel,
1164 ProcessorFeatures.ICLFeatures>;
1165 def : ProcessorModel<"icelake-server", SkylakeServerModel,
1166 ProcessorFeatures.ICXFeatures>;
1167 def : ProcessorModel<"tigerlake", SkylakeServerModel,
1168 ProcessorFeatures.TGLFeatures>;
1172 def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1173 FeatureMMX, FeatureInsertVZEROUPPER]>;
1174 def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1175 Feature3DNow, FeatureInsertVZEROUPPER]>;
1176 def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1177 Feature3DNow, FeatureInsertVZEROUPPER]>;
1179 foreach P = ["athlon", "athlon-tbird"] in {
1180 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
1181 Feature3DNowA, FeatureNOPL, FeatureSlowSHLD,
1182 FeatureInsertVZEROUPPER]>;
1185 foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
1186 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureCMOV,
1187 FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL,
1188 FeatureSlowSHLD, FeatureInsertVZEROUPPER]>;
1191 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
1192 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1193 FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
1194 Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
1195 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
1198 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
1199 def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
1200 Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
1201 FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
1202 FeatureFastScalarShiftMasks, FeatureInsertVZEROUPPER]>;
1205 foreach P = ["amdfam10", "barcelona"] in {
1206 def : Proc<P, ProcessorFeatures.BarcelonaFeatures>;
1210 def : Proc<"btver1", ProcessorFeatures.BtVer1Features>;
1212 def : ProcessorModel<"btver2", BtVer2Model, ProcessorFeatures.BtVer2Features>;
1215 def : ProcessorModel<"bdver1", BdVer2Model, ProcessorFeatures.BdVer1Features>;
1217 def : ProcessorModel<"bdver2", BdVer2Model, ProcessorFeatures.BdVer2Features>;
1219 def : Proc<"bdver3", ProcessorFeatures.BdVer3Features>;
1221 def : Proc<"bdver4", ProcessorFeatures.BdVer4Features>;
1223 def : ProcessorModel<"znver1", Znver1Model, ProcessorFeatures.ZNFeatures>;
1224 def : ProcessorModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features>;
1226 def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1227 Feature3DNowA, FeatureInsertVZEROUPPER]>;
1229 def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
1230 FeatureInsertVZEROUPPER]>;
1231 def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
1232 FeatureInsertVZEROUPPER]>;
1233 def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow,
1234 FeatureInsertVZEROUPPER]>;
1235 def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
1236 FeatureMMX, FeatureSSE1, FeatureFXSR,
1237 FeatureCMOV, FeatureInsertVZEROUPPER]>;
1239 // We also provide a generic 64-bit specific x86 processor model which tries to
1240 // be good for modern chips without enabling instruction set encodings past the
1241 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
1242 // modern 64-bit x86 chip, and enables features that are generally beneficial.
1244 // We currently use the Sandy Bridge model as the default scheduling model as
1245 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
1246 // covers a huge swath of x86 processors. If there are specific scheduling
1247 // knobs which need to be tuned differently for AMD chips, we might consider
1248 // forming a common base for them.
1249 def : ProcessorModel<"x86-64", SandyBridgeModel, [
1261 FeatureInsertVZEROUPPER
1264 //===----------------------------------------------------------------------===//
1265 // Calling Conventions
1266 //===----------------------------------------------------------------------===//
1268 include "X86CallingConv.td"
1271 //===----------------------------------------------------------------------===//
1273 //===----------------------------------------------------------------------===//
1275 def ATTAsmParserVariant : AsmParserVariant {
1279 string Name = "att";
1281 // Discard comments in assembly strings.
1282 string CommentDelimiter = "#";
1284 // Recognize hard coded registers.
1285 string RegisterPrefix = "%";
1288 def IntelAsmParserVariant : AsmParserVariant {
1292 string Name = "intel";
1294 // Discard comments in assembly strings.
1295 string CommentDelimiter = ";";
1297 // Recognize hard coded registers.
1298 string RegisterPrefix = "";
1301 //===----------------------------------------------------------------------===//
1302 // Assembly Printers
1303 //===----------------------------------------------------------------------===//
1305 // The X86 target supports two different syntaxes for emitting machine code.
1306 // This is controlled by the -x86-asm-syntax={att|intel}
1307 def ATTAsmWriter : AsmWriter {
1308 string AsmWriterClassName = "ATTInstPrinter";
1311 def IntelAsmWriter : AsmWriter {
1312 string AsmWriterClassName = "IntelInstPrinter";
1317 // Information about the instructions...
1318 let InstructionSet = X86InstrInfo;
1319 let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
1320 let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
1321 let AllowRegisterRenaming = 1;
1324 //===----------------------------------------------------------------------===//
1326 //===----------------------------------------------------------------------===//
1328 include "X86PfmCounters.td"