1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===------------------------------------------------------------===//
10 include "llvm/Target/Target.td"
12 //===------------------------------------------------------------===//
13 // Subtarget Features (device properties)
14 //===------------------------------------------------------------===//
16 def FeatureFP64 : SubtargetFeature<"fp64",
19 "Enable double precision operations"
22 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
25 "Assuming f32 fma is at least as fast as mul + add"
28 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
31 "Most fp64 instructions are half rate instead of quarter"
34 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
37 "Older version of ALU instructions encoding"
40 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
43 "Specify use of dedicated vertex cache"
46 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
52 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
58 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
61 "Support flat address space"
64 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
65 "UnalignedBufferAccess",
67 "Support unaligned global loads and stores"
70 def FeatureTrapHandler: SubtargetFeature<"trap-handler",
73 "Trap handler support"
76 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
77 "UnalignedScratchAccess",
79 "Support unaligned scratch loads and stores"
82 def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
85 "Has Memory Aperture Base and Size Registers"
88 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
89 // XNACK. The current default kernel driver setting is:
90 // - graphics ring: XNACK disabled
91 // - compute ring: XNACK enabled
93 // If XNACK is enabled, the VMEM latency can be worse.
94 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
95 def FeatureXNACK : SubtargetFeature<"xnack",
98 "Enable XNACK support"
101 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
104 "VI SGPR initilization bug requiring a fixed SGPR allocation size"
107 class SubtargetFeatureFetchLimit <string Value> :
108 SubtargetFeature <"fetch"#Value,
111 "Limit the maximum number of fetches in a clause to "#Value
114 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
115 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
117 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
118 "wavefrontsize"#Value,
120 !cast<string>(Value),
121 "The number of threads per wavefront"
124 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
125 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
126 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
128 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
129 "ldsbankcount"#Value,
131 !cast<string>(Value),
132 "The number of LDS banks per compute unit."
135 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
136 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
138 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
139 "localmemorysize"#Value,
141 !cast<string>(Value),
142 "The size of local memory in bytes"
145 def FeatureGCN : SubtargetFeature<"gcn",
151 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
154 "Encoding format for SI and CI"
157 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
160 "Encoding format for VI"
163 def FeatureCIInsts : SubtargetFeature<"ci-insts",
166 "Additional intstructions for CI+"
169 def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
172 "Additional intstructions for GFX9+"
175 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
178 "Has s_memrealtime instruction"
181 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
182 "HasInv2PiInlineImm",
184 "Has 1 / (2 * pi) as inline immediate"
187 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
190 "Has i16/f16 instructions"
193 def FeatureVOP3P : SubtargetFeature<"vop3p",
196 "Has VOP3P packed instructions"
199 def FeatureMovrel : SubtargetFeature<"movrel",
202 "Has v_movrel*_b32 instructions"
205 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
208 "Has VGPR mode register indexing"
211 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
214 "Has store scalar memory instructions"
217 def FeatureSDWA : SubtargetFeature<"sdwa",
220 "Support SDWA (Sub-DWORD Addressing) extension"
223 def FeatureDPP : SubtargetFeature<"dpp",
226 "Support DPP (Data Parallel Primitives) extension"
229 //===------------------------------------------------------------===//
230 // Subtarget Features (options and debugging)
231 //===------------------------------------------------------------===//
233 // Some instructions do not support denormals despite this flag. Using
234 // fp32 denormals also causes instructions to run at the double
235 // precision rate for the device.
236 def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
239 "Enable single precision denormal handling"
242 // Denormal handling for fp64 and fp16 is controlled by the same
243 // config register when fp16 supported.
244 // TODO: Do we need a separate f16 setting when not legal?
245 def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
248 "Enable double and half precision denormal handling",
252 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
255 "Enable double and half precision denormal handling",
256 [FeatureFP64, FeatureFP64FP16Denormals]
259 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
262 "Enable half precision denormal handling",
263 [FeatureFP64FP16Denormals]
266 def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
269 "clamp modifier clamps NaNs to 0.0"
272 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
275 "Enable floating point exceptions"
278 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
279 "max-private-element-size-"#size,
280 "MaxPrivateElementSize",
282 "Maximum private access size may be "#size
285 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
286 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
287 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
289 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
290 "EnableVGPRSpilling",
292 "Enable spilling of VGPRs to scratch memory"
295 def FeatureDumpCode : SubtargetFeature <"DumpCode",
298 "Dump MachineInstrs in the CodeEmitter"
301 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
304 "Dump MachineInstrs in the CodeEmitter"
307 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
308 "EnablePromoteAlloca",
310 "Enable promote alloca pass"
313 // XXX - This should probably be removed once enabled by default
314 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
315 "EnableLoadStoreOpt",
317 "Enable SI load/store optimizer pass"
320 // Performance debugging feature. Allow using DS instruction immediate
321 // offsets even if the base pointer can't be proven to be base. On SI,
322 // base pointer values that won't give the same result as a 16-bit add
323 // are not safe to fold, but this will override the conservative test
324 // for the base pointer.
325 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
326 "unsafe-ds-offset-folding",
327 "EnableUnsafeDSOffsetFolding",
329 "Force using DS instruction immediate offsets on SI"
332 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
335 "Enable SI Machine Scheduler"
338 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
339 // all OS-es on VI and newer hardware to avoid assertion failures due
340 // to missing ADDR64 variants of MUBUF instructions.
341 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
344 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
347 "Force to generate flat instruction for global"
350 // Dummy feature used to disable assembler instructions.
351 def FeatureDisable : SubtargetFeature<"",
352 "FeatureDisable","true",
353 "Dummy feature to disable assembler instructions"
356 class SubtargetFeatureGeneration <string Value,
357 list<SubtargetFeature> Implies> :
358 SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
359 Value#" GPU generation", Implies>;
361 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
362 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
363 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
365 def FeatureR600 : SubtargetFeatureGeneration<"R600",
366 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
369 def FeatureR700 : SubtargetFeatureGeneration<"R700",
370 [FeatureFetchLimit16, FeatureLocalMemorySize0]
373 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
374 [FeatureFetchLimit16, FeatureLocalMemorySize32768]
377 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
378 [FeatureFetchLimit16, FeatureWavefrontSize64,
379 FeatureLocalMemorySize32768]
382 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
383 [FeatureFP64, FeatureLocalMemorySize32768,
384 FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
385 FeatureLDSBankCount32, FeatureMovrel]
388 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
389 [FeatureFP64, FeatureLocalMemorySize65536,
390 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
391 FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
394 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
395 [FeatureFP64, FeatureLocalMemorySize65536,
396 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
397 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
398 FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
399 FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
404 def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
405 [FeatureFP64, FeatureLocalMemorySize65536,
406 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
407 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
408 FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
409 FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
410 FeatureFastFMAF32, FeatureDPP
414 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
415 list<SubtargetFeature> Implies>
417 "isaver"#Major#"."#Minor#"."#Stepping,
419 "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
420 "Instruction set version number",
424 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
426 FeatureLDSBankCount32]>;
428 def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
431 FeatureLDSBankCount32,
434 def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
436 FeatureLDSBankCount16]>;
438 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
439 [FeatureVolcanicIslands,
440 FeatureLDSBankCount32,
441 FeatureSGPRInitBug]>;
443 def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
444 [FeatureVolcanicIslands,
445 FeatureLDSBankCount32,
448 def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
449 [FeatureVolcanicIslands,
450 FeatureLDSBankCount32,
451 FeatureSGPRInitBug]>;
453 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
454 [FeatureVolcanicIslands,
455 FeatureLDSBankCount32]>;
457 def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4,
458 [FeatureVolcanicIslands,
459 FeatureLDSBankCount32]>;
461 def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
462 [FeatureVolcanicIslands,
463 FeatureLDSBankCount16,
466 def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
467 def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
469 //===----------------------------------------------------------------------===//
470 // Debugger related subtarget features.
471 //===----------------------------------------------------------------------===//
473 def FeatureDebuggerInsertNops : SubtargetFeature<
474 "amdgpu-debugger-insert-nops",
475 "DebuggerInsertNops",
477 "Insert one nop instruction for each high level source statement"
480 def FeatureDebuggerReserveRegs : SubtargetFeature<
481 "amdgpu-debugger-reserve-regs",
482 "DebuggerReserveRegs",
484 "Reserve registers for debugger usage"
487 def FeatureDebuggerEmitPrologue : SubtargetFeature<
488 "amdgpu-debugger-emit-prologue",
489 "DebuggerEmitPrologue",
491 "Emit debugger prologue"
494 //===----------------------------------------------------------------------===//
496 def AMDGPUInstrInfo : InstrInfo {
497 let guessInstructionProperties = 1;
498 let noNamedPositionallyEncodedOperands = 1;
501 def AMDGPUAsmParser : AsmParser {
502 // Some of the R600 registers have the same name, so this crashes.
503 // For example T0_XYZW and T0_XY both have the asm name T0.
504 let ShouldEmitMatchRegisterName = 0;
507 def AMDGPUAsmWriter : AsmWriter {
508 int PassSubtarget = 1;
511 def AMDGPUAsmVariants {
512 string Default = "Default";
514 string VOP3 = "VOP3";
516 string SDWA = "SDWA";
520 string Disable = "Disable";
524 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
525 let Variant = AMDGPUAsmVariants.Default_ID;
526 let Name = AMDGPUAsmVariants.Default;
529 def VOP3AsmParserVariant : AsmParserVariant {
530 let Variant = AMDGPUAsmVariants.VOP3_ID;
531 let Name = AMDGPUAsmVariants.VOP3;
534 def SDWAAsmParserVariant : AsmParserVariant {
535 let Variant = AMDGPUAsmVariants.SDWA_ID;
536 let Name = AMDGPUAsmVariants.SDWA;
539 def DPPAsmParserVariant : AsmParserVariant {
540 let Variant = AMDGPUAsmVariants.DPP_ID;
541 let Name = AMDGPUAsmVariants.DPP;
544 def AMDGPU : Target {
545 // Pull in Instruction Info:
546 let InstructionSet = AMDGPUInstrInfo;
547 let AssemblyParsers = [AMDGPUAsmParser];
548 let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
549 VOP3AsmParserVariant,
550 SDWAAsmParserVariant,
551 DPPAsmParserVariant];
552 let AssemblyWriters = [AMDGPUAsmWriter];
555 // Dummy Instruction itineraries for pseudo instructions
556 def ALU_NULL : FuncUnit;
557 def NullALU : InstrItinClass;
559 //===----------------------------------------------------------------------===//
560 // Predicate helper class
561 //===----------------------------------------------------------------------===//
563 def TruePredicate : Predicate<"true">;
565 def isSICI : Predicate<
566 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
567 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
568 >, AssemblerPredicate<"FeatureGCN1Encoding">;
570 def isVI : Predicate <
571 "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
572 AssemblerPredicate<"FeatureGCN3Encoding">;
574 def isGFX9 : Predicate <
575 "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
576 AssemblerPredicate<"FeatureGFX9Insts">;
578 // TODO: Either the name to be changed or we simply use IsCI!
579 def isCIVI : Predicate <
580 "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
581 AssemblerPredicate<"FeatureCIInsts">;
583 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
585 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
586 AssemblerPredicate<"Feature16BitInsts">;
587 def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
588 AssemblerPredicate<"FeatureVOP3P">;
590 def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
591 AssemblerPredicate<"FeatureSDWA">;
593 def HasDPP : Predicate<"Subtarget->hasDPP()">,
594 AssemblerPredicate<"FeatureDPP">;
596 class PredicateControl {
597 Predicate SubtargetPredicate;
598 Predicate SIAssemblerPredicate = isSICI;
599 Predicate VIAssemblerPredicate = isVI;
600 list<Predicate> AssemblerPredicates = [];
601 Predicate AssemblerPredicate = TruePredicate;
602 list<Predicate> OtherPredicates = [];
603 list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
608 // Include AMDGPU TD files
609 include "R600Schedule.td"
610 include "SISchedule.td"
611 include "Processors.td"
612 include "AMDGPUInstrInfo.td"
613 include "AMDGPUIntrinsics.td"
614 include "AMDGPURegisterInfo.td"
615 include "AMDGPURegisterBanks.td"
616 include "AMDGPUInstructions.td"
617 include "AMDGPUCallingConv.td"