1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===------------------------------------------------------------===//
10 include "llvm/Target/Target.td"
12 //===------------------------------------------------------------===//
13 // Subtarget Features (device properties)
14 //===------------------------------------------------------------===//
16 def FeatureFP64 : SubtargetFeature<"fp64",
19 "Enable double precision operations"
22 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
25 "Assuming f32 fma is at least as fast as mul + add"
28 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
31 "Most fp64 instructions are half rate instead of quarter"
34 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
37 "Older version of ALU instructions encoding"
40 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
43 "Specify use of dedicated vertex cache"
46 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
52 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
58 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
61 "Support flat address space"
64 def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets",
67 "Flat instructions have immediate offset addressing mode"
70 def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts",
73 "Have global_* flat memory instructions"
76 def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
79 "Have scratch_* flat memory instructions"
82 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
83 "UnalignedBufferAccess",
85 "Support unaligned global loads and stores"
88 def FeatureTrapHandler: SubtargetFeature<"trap-handler",
91 "Trap handler support"
94 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
95 "UnalignedScratchAccess",
97 "Support unaligned scratch loads and stores"
100 def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
103 "Has Memory Aperture Base and Size Registers"
106 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
107 // XNACK. The current default kernel driver setting is:
108 // - graphics ring: XNACK disabled
109 // - compute ring: XNACK enabled
111 // If XNACK is enabled, the VMEM latency can be worse.
112 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
113 def FeatureXNACK : SubtargetFeature<"xnack",
116 "Enable XNACK support"
119 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
122 "VI SGPR initilization bug requiring a fixed SGPR allocation size"
125 class SubtargetFeatureFetchLimit <string Value> :
126 SubtargetFeature <"fetch"#Value,
129 "Limit the maximum number of fetches in a clause to "#Value
132 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
133 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
135 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
136 "wavefrontsize"#Value,
138 !cast<string>(Value),
139 "The number of threads per wavefront"
142 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
143 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
144 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
146 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
147 "ldsbankcount"#Value,
149 !cast<string>(Value),
150 "The number of LDS banks per compute unit."
153 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
154 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
156 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
157 "localmemorysize"#Value,
159 !cast<string>(Value),
160 "The size of local memory in bytes"
163 def FeatureGCN : SubtargetFeature<"gcn",
169 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
172 "Encoding format for SI and CI"
175 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
178 "Encoding format for VI"
181 def FeatureCIInsts : SubtargetFeature<"ci-insts",
184 "Additional intstructions for CI+"
187 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
190 "Additional intstructions for GFX9+"
193 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
196 "Has s_memrealtime instruction"
199 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
200 "HasInv2PiInlineImm",
202 "Has 1 / (2 * pi) as inline immediate"
205 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
208 "Has i16/f16 instructions"
211 def FeatureVOP3P : SubtargetFeature<"vop3p",
214 "Has VOP3P packed instructions"
217 def FeatureMovrel : SubtargetFeature<"movrel",
220 "Has v_movrel*_b32 instructions"
223 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
226 "Has VGPR mode register indexing"
229 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
232 "Has store scalar memory instructions"
235 def FeatureSDWA : SubtargetFeature<"sdwa",
238 "Support SDWA (Sub-DWORD Addressing) extension"
241 def FeatureDPP : SubtargetFeature<"dpp",
244 "Support DPP (Data Parallel Primitives) extension"
247 //===------------------------------------------------------------===//
248 // Subtarget Features (options and debugging)
249 //===------------------------------------------------------------===//
251 // Some instructions do not support denormals despite this flag. Using
252 // fp32 denormals also causes instructions to run at the double
253 // precision rate for the device.
254 def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
257 "Enable single precision denormal handling"
260 // Denormal handling for fp64 and fp16 is controlled by the same
261 // config register when fp16 supported.
262 // TODO: Do we need a separate f16 setting when not legal?
263 def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
266 "Enable double and half precision denormal handling",
270 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
273 "Enable double and half precision denormal handling",
274 [FeatureFP64, FeatureFP64FP16Denormals]
277 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
280 "Enable half precision denormal handling",
281 [FeatureFP64FP16Denormals]
284 def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
287 "clamp modifier clamps NaNs to 0.0"
290 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
293 "Enable floating point exceptions"
296 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
297 "max-private-element-size-"#size,
298 "MaxPrivateElementSize",
300 "Maximum private access size may be "#size
303 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
304 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
305 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
307 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
308 "EnableVGPRSpilling",
310 "Enable spilling of VGPRs to scratch memory"
313 def FeatureDumpCode : SubtargetFeature <"DumpCode",
316 "Dump MachineInstrs in the CodeEmitter"
319 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
322 "Dump MachineInstrs in the CodeEmitter"
325 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
326 "EnablePromoteAlloca",
328 "Enable promote alloca pass"
331 // XXX - This should probably be removed once enabled by default
332 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
333 "EnableLoadStoreOpt",
335 "Enable SI load/store optimizer pass"
338 // Performance debugging feature. Allow using DS instruction immediate
339 // offsets even if the base pointer can't be proven to be base. On SI,
340 // base pointer values that won't give the same result as a 16-bit add
341 // are not safe to fold, but this will override the conservative test
342 // for the base pointer.
343 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
344 "unsafe-ds-offset-folding",
345 "EnableUnsafeDSOffsetFolding",
347 "Force using DS instruction immediate offsets on SI"
350 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
353 "Enable SI Machine Scheduler"
356 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
357 // all OS-es on VI and newer hardware to avoid assertion failures due
358 // to missing ADDR64 variants of MUBUF instructions.
359 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
362 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
365 "Force to generate flat instruction for global"
368 // Dummy feature used to disable assembler instructions.
369 def FeatureDisable : SubtargetFeature<"",
370 "FeatureDisable","true",
371 "Dummy feature to disable assembler instructions"
374 class SubtargetFeatureGeneration <string Value,
375 list<SubtargetFeature> Implies> :
376 SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
377 Value#" GPU generation", Implies>;
379 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
380 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
381 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
383 def FeatureR600 : SubtargetFeatureGeneration<"R600",
384 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
387 def FeatureR700 : SubtargetFeatureGeneration<"R700",
388 [FeatureFetchLimit16, FeatureLocalMemorySize0]
391 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
392 [FeatureFetchLimit16, FeatureLocalMemorySize32768]
395 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
396 [FeatureFetchLimit16, FeatureWavefrontSize64,
397 FeatureLocalMemorySize32768]
400 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
401 [FeatureFP64, FeatureLocalMemorySize32768,
402 FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
403 FeatureLDSBankCount32, FeatureMovrel]
406 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
407 [FeatureFP64, FeatureLocalMemorySize65536,
408 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
409 FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
412 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
413 [FeatureFP64, FeatureLocalMemorySize65536,
414 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
415 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
416 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
417 FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
422 def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
423 [FeatureFP64, FeatureLocalMemorySize65536,
424 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
425 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
426 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
427 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
428 FeatureFastFMAF32, FeatureDPP,
429 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts
433 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
434 list<SubtargetFeature> Implies>
436 "isaver"#Major#"."#Minor#"."#Stepping,
438 "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
439 "Instruction set version number",
443 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
445 FeatureLDSBankCount32]>;
447 def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
450 FeatureLDSBankCount32,
453 def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
455 FeatureLDSBankCount16]>;
457 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
458 [FeatureVolcanicIslands,
459 FeatureLDSBankCount32,
460 FeatureSGPRInitBug]>;
462 def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
463 [FeatureVolcanicIslands,
464 FeatureLDSBankCount32,
467 def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
468 [FeatureVolcanicIslands,
469 FeatureLDSBankCount32,
470 FeatureSGPRInitBug]>;
472 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
473 [FeatureVolcanicIslands,
474 FeatureLDSBankCount32]>;
476 def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4,
477 [FeatureVolcanicIslands,
478 FeatureLDSBankCount32]>;
480 def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
481 [FeatureVolcanicIslands,
482 FeatureLDSBankCount16,
485 def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
486 def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
488 //===----------------------------------------------------------------------===//
489 // Debugger related subtarget features.
490 //===----------------------------------------------------------------------===//
492 def FeatureDebuggerInsertNops : SubtargetFeature<
493 "amdgpu-debugger-insert-nops",
494 "DebuggerInsertNops",
496 "Insert one nop instruction for each high level source statement"
499 def FeatureDebuggerReserveRegs : SubtargetFeature<
500 "amdgpu-debugger-reserve-regs",
501 "DebuggerReserveRegs",
503 "Reserve registers for debugger usage"
506 def FeatureDebuggerEmitPrologue : SubtargetFeature<
507 "amdgpu-debugger-emit-prologue",
508 "DebuggerEmitPrologue",
510 "Emit debugger prologue"
513 //===----------------------------------------------------------------------===//
515 def AMDGPUInstrInfo : InstrInfo {
516 let guessInstructionProperties = 1;
517 let noNamedPositionallyEncodedOperands = 1;
520 def AMDGPUAsmParser : AsmParser {
521 // Some of the R600 registers have the same name, so this crashes.
522 // For example T0_XYZW and T0_XY both have the asm name T0.
523 let ShouldEmitMatchRegisterName = 0;
526 def AMDGPUAsmWriter : AsmWriter {
527 int PassSubtarget = 1;
530 def AMDGPUAsmVariants {
531 string Default = "Default";
533 string VOP3 = "VOP3";
535 string SDWA = "SDWA";
539 string Disable = "Disable";
543 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
544 let Variant = AMDGPUAsmVariants.Default_ID;
545 let Name = AMDGPUAsmVariants.Default;
548 def VOP3AsmParserVariant : AsmParserVariant {
549 let Variant = AMDGPUAsmVariants.VOP3_ID;
550 let Name = AMDGPUAsmVariants.VOP3;
553 def SDWAAsmParserVariant : AsmParserVariant {
554 let Variant = AMDGPUAsmVariants.SDWA_ID;
555 let Name = AMDGPUAsmVariants.SDWA;
558 def DPPAsmParserVariant : AsmParserVariant {
559 let Variant = AMDGPUAsmVariants.DPP_ID;
560 let Name = AMDGPUAsmVariants.DPP;
563 def AMDGPU : Target {
564 // Pull in Instruction Info:
565 let InstructionSet = AMDGPUInstrInfo;
566 let AssemblyParsers = [AMDGPUAsmParser];
567 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
568 VOP3AsmParserVariant,
569 SDWAAsmParserVariant,
570 DPPAsmParserVariant];
571 let AssemblyWriters = [AMDGPUAsmWriter];
574 // Dummy Instruction itineraries for pseudo instructions
575 def ALU_NULL : FuncUnit;
576 def NullALU : InstrItinClass;
578 //===----------------------------------------------------------------------===//
579 // Predicate helper class
580 //===----------------------------------------------------------------------===//
582 def TruePredicate : Predicate<"true">;
584 def isSICI : Predicate<
585 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
586 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
587 >, AssemblerPredicate<"FeatureGCN1Encoding">;
589 def isVI : Predicate <
590 "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
591 AssemblerPredicate<"FeatureGCN3Encoding">;
593 def isGFX9 : Predicate <
594 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
595 AssemblerPredicate<"FeatureGFX9Insts">;
597 // TODO: Either the name to be changed or we simply use IsCI!
598 def isCIVI : Predicate <
599 "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
600 AssemblerPredicate<"FeatureCIInsts">;
602 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
604 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
605 AssemblerPredicate<"Feature16BitInsts">;
606 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
607 AssemblerPredicate<"FeatureVOP3P">;
609 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
610 AssemblerPredicate<"FeatureSDWA">;
612 def HasDPP : Predicate<"Subtarget->hasDPP()">,
613 AssemblerPredicate<"FeatureDPP">;
615 class PredicateControl {
616 Predicate SubtargetPredicate;
617 Predicate SIAssemblerPredicate = isSICI;
618 Predicate VIAssemblerPredicate = isVI;
619 list<Predicate> AssemblerPredicates = [];
620 Predicate AssemblerPredicate = TruePredicate;
621 list<Predicate> OtherPredicates = [];
622 list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
627 // Include AMDGPU TD files
628 include "R600Schedule.td"
629 include "SISchedule.td"
630 include "Processors.td"
631 include "AMDGPUInstrInfo.td"
632 include "AMDGPUIntrinsics.td"
633 include "AMDGPURegisterInfo.td"
634 include "AMDGPURegisterBanks.td"
635 include "AMDGPUInstructions.td"
636 include "AMDGPUCallingConv.td"