1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===------------------------------------------------------------===//
10 include "llvm/Target/Target.td"
12 //===------------------------------------------------------------===//
13 // Subtarget Features (device properties)
14 //===------------------------------------------------------------===//
16 def FeatureFP64 : SubtargetFeature<"fp64",
19 "Enable double precision operations"
22 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
25 "Assuming f32 fma is at least as fast as mul + add"
28 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
31 "Most fp64 instructions are half rate instead of quarter"
34 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
37 "Older version of ALU instructions encoding"
40 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
43 "Specify use of dedicated vertex cache"
46 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
52 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
58 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
61 "Support flat address space"
64 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
65 "UnalignedBufferAccess",
67 "Support unaligned global loads and stores"
70 def FeatureTrapHandler: SubtargetFeature<"trap-handler",
73 "Trap handler support"
76 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
77 "UnalignedScratchAccess",
79 "Support unaligned scratch loads and stores"
82 def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
85 "Has Memory Aperture Base and Size Registers"
88 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
89 // XNACK. The current default kernel driver setting is:
90 // - graphics ring: XNACK disabled
91 // - compute ring: XNACK enabled
93 // If XNACK is enabled, the VMEM latency can be worse.
94 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
95 def FeatureXNACK : SubtargetFeature<"xnack",
98 "Enable XNACK support"
101 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
104 "VI SGPR initilization bug requiring a fixed SGPR allocation size"
107 class SubtargetFeatureFetchLimit <string Value> :
108 SubtargetFeature <"fetch"#Value,
111 "Limit the maximum number of fetches in a clause to "#Value
114 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
115 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
117 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
118 "wavefrontsize"#Value,
120 !cast<string>(Value),
121 "The number of threads per wavefront"
124 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
125 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
126 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
128 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
129 "ldsbankcount"#Value,
131 !cast<string>(Value),
132 "The number of LDS banks per compute unit."
135 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
136 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
138 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
139 "localmemorysize"#Value,
141 !cast<string>(Value),
142 "The size of local memory in bytes"
145 def FeatureGCN : SubtargetFeature<"gcn",
151 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
154 "Encoding format for SI and CI"
157 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
160 "Encoding format for VI"
163 def FeatureCIInsts : SubtargetFeature<"ci-insts",
166 "Additional intstructions for CI+"
169 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
172 "Additional intstructions for GFX9+"
175 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
178 "Has s_memrealtime instruction"
181 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
182 "HasInv2PiInlineImm",
184 "Has 1 / (2 * pi) as inline immediate"
187 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
190 "Has i16/f16 instructions"
193 def FeatureVOP3P : SubtargetFeature<"vop3p",
196 "Has VOP3P packed instructions"
199 def FeatureMovrel : SubtargetFeature<"movrel",
202 "Has v_movrel*_b32 instructions"
205 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
208 "Has VGPR mode register indexing"
211 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
214 "Has store scalar memory instructions"
217 def FeatureSDWA : SubtargetFeature<"sdwa",
220 "Support SDWA (Sub-DWORD Addressing) extension"
223 def FeatureDPP : SubtargetFeature<"dpp",
226 "Support DPP (Data Parallel Primitives) extension"
229 //===------------------------------------------------------------===//
230 // Subtarget Features (options and debugging)
231 //===------------------------------------------------------------===//
233 // Some instructions do not support denormals despite this flag. Using
234 // fp32 denormals also causes instructions to run at the double
235 // precision rate for the device.
236 def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
239 "Enable single precision denormal handling"
242 // Denormal handling for fp64 and fp16 is controlled by the same
243 // config register when fp16 supported.
244 // TODO: Do we need a separate f16 setting when not legal?
245 def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
248 "Enable double and half precision denormal handling",
252 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
255 "Enable double and half precision denormal handling",
256 [FeatureFP64, FeatureFP64FP16Denormals]
259 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
262 "Enable half precision denormal handling",
263 [FeatureFP64FP16Denormals]
266 def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
269 "clamp modifier clamps NaNs to 0.0"
272 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
275 "Enable floating point exceptions"
278 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
279 "max-private-element-size-"#size,
280 "MaxPrivateElementSize",
282 "Maximum private access size may be "#size
285 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
286 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
287 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
289 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
290 "EnableVGPRSpilling",
292 "Enable spilling of VGPRs to scratch memory"
295 def FeatureDumpCode : SubtargetFeature <"DumpCode",
298 "Dump MachineInstrs in the CodeEmitter"
301 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
304 "Dump MachineInstrs in the CodeEmitter"
307 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
308 "EnablePromoteAlloca",
310 "Enable promote alloca pass"
313 // XXX - This should probably be removed once enabled by default
314 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
315 "EnableLoadStoreOpt",
317 "Enable SI load/store optimizer pass"
320 // Performance debugging feature. Allow using DS instruction immediate
321 // offsets even if the base pointer can't be proven to be base. On SI,
322 // base pointer values that won't give the same result as a 16-bit add
323 // are not safe to fold, but this will override the conservative test
324 // for the base pointer.
325 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
326 "unsafe-ds-offset-folding",
327 "EnableUnsafeDSOffsetFolding",
329 "Force using DS instruction immediate offsets on SI"
332 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
335 "Enable SI Machine Scheduler"
338 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
339 // all OS-es on VI and newer hardware to avoid assertion failures due
340 // to missing ADDR64 variants of MUBUF instructions.
341 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
344 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
347 "Force to generate flat instruction for global"
350 // Dummy feature used to disable assembler instructions.
351 def FeatureDisable : SubtargetFeature<"",
352 "FeatureDisable","true",
353 "Dummy feature to disable assembler instructions"
356 class SubtargetFeatureGeneration <string Value,
357 list<SubtargetFeature> Implies> :
358 SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
359 Value#" GPU generation", Implies>;
361 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
362 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
363 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
365 def FeatureR600 : SubtargetFeatureGeneration<"R600",
366 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
369 def FeatureR700 : SubtargetFeatureGeneration<"R700",
370 [FeatureFetchLimit16, FeatureLocalMemorySize0]
373 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
374 [FeatureFetchLimit16, FeatureLocalMemorySize32768]
377 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
378 [FeatureFetchLimit16, FeatureWavefrontSize64,
379 FeatureLocalMemorySize32768]
382 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
383 [FeatureFP64, FeatureLocalMemorySize32768,
384 FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
385 FeatureLDSBankCount32, FeatureMovrel]
388 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
389 [FeatureFP64, FeatureLocalMemorySize65536,
390 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
391 FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
394 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
395 [FeatureFP64, FeatureLocalMemorySize65536,
396 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
397 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
398 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
399 FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
404 def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
405 [FeatureFP64, FeatureLocalMemorySize65536,
406 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
407 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
408 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
409 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode
413 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
414 list<SubtargetFeature> Implies>
416 "isaver"#Major#"."#Minor#"."#Stepping,
418 "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
419 "Instruction set version number",
423 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
425 FeatureLDSBankCount32]>;
427 def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
430 FeatureLDSBankCount32,
433 def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
435 FeatureLDSBankCount16]>;
437 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
438 [FeatureVolcanicIslands,
439 FeatureLDSBankCount32,
440 FeatureSGPRInitBug]>;
442 def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
443 [FeatureVolcanicIslands,
444 FeatureLDSBankCount32,
447 def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
448 [FeatureVolcanicIslands,
449 FeatureLDSBankCount32,
450 FeatureSGPRInitBug]>;
452 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
453 [FeatureVolcanicIslands,
454 FeatureLDSBankCount32]>;
456 def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4,
457 [FeatureVolcanicIslands,
458 FeatureLDSBankCount32]>;
460 def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
461 [FeatureVolcanicIslands,
462 FeatureLDSBankCount16,
465 def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
466 def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
468 //===----------------------------------------------------------------------===//
469 // Debugger related subtarget features.
470 //===----------------------------------------------------------------------===//
472 def FeatureDebuggerInsertNops : SubtargetFeature<
473 "amdgpu-debugger-insert-nops",
474 "DebuggerInsertNops",
476 "Insert one nop instruction for each high level source statement"
479 def FeatureDebuggerReserveRegs : SubtargetFeature<
480 "amdgpu-debugger-reserve-regs",
481 "DebuggerReserveRegs",
483 "Reserve registers for debugger usage"
486 def FeatureDebuggerEmitPrologue : SubtargetFeature<
487 "amdgpu-debugger-emit-prologue",
488 "DebuggerEmitPrologue",
490 "Emit debugger prologue"
493 //===----------------------------------------------------------------------===//
495 def AMDGPUInstrInfo : InstrInfo {
496 let guessInstructionProperties = 1;
497 let noNamedPositionallyEncodedOperands = 1;
500 def AMDGPUAsmParser : AsmParser {
501 // Some of the R600 registers have the same name, so this crashes.
502 // For example T0_XYZW and T0_XY both have the asm name T0.
503 let ShouldEmitMatchRegisterName = 0;
506 def AMDGPUAsmWriter : AsmWriter {
507 int PassSubtarget = 1;
510 def AMDGPUAsmVariants {
511 string Default = "Default";
513 string VOP3 = "VOP3";
515 string SDWA = "SDWA";
519 string Disable = "Disable";
523 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
524 let Variant = AMDGPUAsmVariants.Default_ID;
525 let Name = AMDGPUAsmVariants.Default;
528 def VOP3AsmParserVariant : AsmParserVariant {
529 let Variant = AMDGPUAsmVariants.VOP3_ID;
530 let Name = AMDGPUAsmVariants.VOP3;
533 def SDWAAsmParserVariant : AsmParserVariant {
534 let Variant = AMDGPUAsmVariants.SDWA_ID;
535 let Name = AMDGPUAsmVariants.SDWA;
538 def DPPAsmParserVariant : AsmParserVariant {
539 let Variant = AMDGPUAsmVariants.DPP_ID;
540 let Name = AMDGPUAsmVariants.DPP;
543 def AMDGPU : Target {
544 // Pull in Instruction Info:
545 let InstructionSet = AMDGPUInstrInfo;
546 let AssemblyParsers = [AMDGPUAsmParser];
547 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
548 VOP3AsmParserVariant,
549 SDWAAsmParserVariant,
550 DPPAsmParserVariant];
551 let AssemblyWriters = [AMDGPUAsmWriter];
554 // Dummy Instruction itineraries for pseudo instructions
555 def ALU_NULL : FuncUnit;
556 def NullALU : InstrItinClass;
558 //===----------------------------------------------------------------------===//
559 // Predicate helper class
560 //===----------------------------------------------------------------------===//
562 def TruePredicate : Predicate<"true">;
564 def isSICI : Predicate<
565 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
566 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
567 >, AssemblerPredicate<"FeatureGCN1Encoding">;
569 def isVI : Predicate <
570 "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
571 AssemblerPredicate<"FeatureGCN3Encoding">;
573 def isGFX9 : Predicate <
574 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
575 AssemblerPredicate<"FeatureGFX9Insts">;
577 // TODO: Either the name to be changed or we simply use IsCI!
578 def isCIVI : Predicate <
579 "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
580 AssemblerPredicate<"FeatureCIInsts">;
582 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
584 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
585 AssemblerPredicate<"Feature16BitInsts">;
586 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
587 AssemblerPredicate<"FeatureVOP3P">;
589 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
590 AssemblerPredicate<"FeatureSDWA">;
592 def HasDPP : Predicate<"Subtarget->hasDPP()">,
593 AssemblerPredicate<"FeatureDPP">;
595 class PredicateControl {
596 Predicate SubtargetPredicate;
597 Predicate SIAssemblerPredicate = isSICI;
598 Predicate VIAssemblerPredicate = isVI;
599 list<Predicate> AssemblerPredicates = [];
600 Predicate AssemblerPredicate = TruePredicate;
601 list<Predicate> OtherPredicates = [];
602 list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
607 // Include AMDGPU TD files
608 include "R600Schedule.td"
609 include "SISchedule.td"
610 include "Processors.td"
611 include "AMDGPUInstrInfo.td"
612 include "AMDGPUIntrinsics.td"
613 include "AMDGPURegisterInfo.td"
614 include "AMDGPURegisterBanks.td"
615 include "AMDGPUInstructions.td"
616 include "AMDGPUCallingConv.td"