contrib/llvm/lib/Target/AMDGPU/AMDGPU.td

   1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===------------------------------------------------------------===//
   9
  10 include "llvm/Target/Target.td"
  11
  12 //===------------------------------------------------------------===//
  13 // Subtarget Features (device properties)
  14 //===------------------------------------------------------------===//
  15
  16 def FeatureFP64 : SubtargetFeature<"fp64",
  17   "FP64",
  18   "true",
  19   "Enable double precision operations"
  20 >;
  21
  22 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
  23   "FastFMAF32",
  24   "true",
  25   "Assuming f32 fma is at least as fast as mul + add"
  26 >;
  27
  28 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
  29   "HalfRate64Ops",
  30   "true",
  31   "Most fp64 instructions are half rate instead of quarter"
  32 >;
  33
  34 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
  35   "R600ALUInst",
  36   "false",
  37   "Older version of ALU instructions encoding"
  38 >;
  39
  40 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
  41   "HasVertexCache",
  42   "true",
  43   "Specify use of dedicated vertex cache"
  44 >;
  45
  46 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
  47   "CaymanISA",
  48   "true",
  49   "Use Cayman ISA"
  50 >;
  51
  52 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
  53   "CFALUBug",
  54   "true",
  55   "GPU has CF_ALU bug"
  56 >;
  57
  58 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
  59   "FlatAddressSpace",
  60   "true",
  61   "Support flat address space"
  62 >;
  63
  64 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
  65   "UnalignedBufferAccess",
  66   "true",
  67   "Support unaligned global loads and stores"
  68 >;
  69
  70 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
  71   "UnalignedScratchAccess",
  72   "true",
  73   "Support unaligned scratch loads and stores"
  74 >;
  75
  76 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
  77 // XNACK. The current default kernel driver setting is:
  78 // - graphics ring: XNACK disabled
  79 // - compute ring: XNACK enabled
  80 //
  81 // If XNACK is enabled, the VMEM latency can be worse.
  82 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
  83 def FeatureXNACK : SubtargetFeature<"xnack",
  84   "EnableXNACK",
  85   "true",
  86   "Enable XNACK support"
  87 >;
  88
  89 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
  90   "SGPRInitBug",
  91   "true",
  92   "VI SGPR initilization bug requiring a fixed SGPR allocation size"
  93 >;
  94
  95 class SubtargetFeatureFetchLimit <string Value> :
  96                           SubtargetFeature <"fetch"#Value,
  97   "TexVTXClauseSize",
  98   Value,
  99   "Limit the maximum number of fetches in a clause to "#Value
 100 >;
 101
 102 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
 103 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
 104
 105 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
 106   "wavefrontsize"#Value,
 107   "WavefrontSize",
 108   !cast<string>(Value),
 109   "The number of threads per wavefront"
 110 >;
 111
 112 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 113 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 114 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 115
 116 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
 117   "ldsbankcount"#Value,
 118   "LDSBankCount",
 119   !cast<string>(Value),
 120   "The number of LDS banks per compute unit."
 121 >;
 122
 123 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 124 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
 125
 126 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
 127   "localmemorysize"#Value,
 128   "LocalMemorySize",
 129   !cast<string>(Value),
 130   "The size of local memory in bytes"
 131 >;
 132
 133 def FeatureGCN : SubtargetFeature<"gcn",
 134   "IsGCN",
 135   "true",
 136   "GCN or newer GPU"
 137 >;
 138
 139 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
 140   "GCN1Encoding",
 141   "true",
 142   "Encoding format for SI and CI"
 143 >;
 144
 145 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
 146   "GCN3Encoding",
 147   "true",
 148   "Encoding format for VI"
 149 >;
 150
 151 def FeatureCIInsts : SubtargetFeature<"ci-insts",
 152   "CIInsts",
 153   "true",
 154   "Additional intstructions for CI+"
 155 >;
 156
 157 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
 158   "HasSMemRealTime",
 159   "true",
 160   "Has s_memrealtime instruction"
 161 >;
 162
 163 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
 164   "HasInv2PiInlineImm",
 165   "true",
 166   "Has 1 / (2 * pi) as inline immediate"
 167 >;
 168
 169 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
 170   "Has16BitInsts",
 171   "true",
 172   "Has i16/f16 instructions"
 173 >;
 174
 175 def FeatureMovrel : SubtargetFeature<"movrel",
 176   "HasMovrel",
 177   "true",
 178   "Has v_movrel*_b32 instructions"
 179 >;
 180
 181 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
 182   "HasVGPRIndexMode",
 183   "true",
 184   "Has VGPR mode register indexing"
 185 >;
 186
 187 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
 188   "HasScalarStores",
 189   "true",
 190   "Has store scalar memory instructions"
 191 >;
 192
 193 //===------------------------------------------------------------===//
 194 // Subtarget Features (options and debugging)
 195 //===------------------------------------------------------------===//
 196
 197 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
 198   "FP16Denormals",
 199   "true",
 200   "Enable half precision denormal handling"
 201 >;
 202
 203 // Some instructions do not support denormals despite this flag. Using
 204 // fp32 denormals also causes instructions to run at the double
 205 // precision rate for the device.
 206 def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
 207   "FP32Denormals",
 208   "true",
 209   "Enable single precision denormal handling"
 210 >;
 211
 212 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
 213   "FP64Denormals",
 214   "true",
 215   "Enable double precision denormal handling",
 216   [FeatureFP64]
 217 >;
 218
 219 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
 220   "FPExceptions",
 221   "true",
 222   "Enable floating point exceptions"
 223 >;
 224
 225 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
 226   "max-private-element-size-"#size,
 227   "MaxPrivateElementSize",
 228   !cast<string>(size),
 229   "Maximum private access size may be "#size
 230 >;
 231
 232 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
 233 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
 234 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
 235
 236 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
 237   "EnableVGPRSpilling",
 238   "true",
 239   "Enable spilling of VGPRs to scratch memory"
 240 >;
 241
 242 def FeatureDumpCode : SubtargetFeature <"DumpCode",
 243   "DumpCode",
 244   "true",
 245   "Dump MachineInstrs in the CodeEmitter"
 246 >;
 247
 248 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
 249   "DumpCode",
 250   "true",
 251   "Dump MachineInstrs in the CodeEmitter"
 252 >;
 253
 254 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
 255   "EnablePromoteAlloca",
 256   "true",
 257   "Enable promote alloca pass"
 258 >;
 259
 260 // XXX - This should probably be removed once enabled by default
 261 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
 262   "EnableLoadStoreOpt",
 263   "true",
 264   "Enable SI load/store optimizer pass"
 265 >;
 266
 267 // Performance debugging feature. Allow using DS instruction immediate
 268 // offsets even if the base pointer can't be proven to be base. On SI,
 269 // base pointer values that won't give the same result as a 16-bit add
 270 // are not safe to fold, but this will override the conservative test
 271 // for the base pointer.
 272 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
 273   "unsafe-ds-offset-folding",
 274   "EnableUnsafeDSOffsetFolding",
 275   "true",
 276   "Force using DS instruction immediate offsets on SI"
 277 >;
 278
 279 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
 280   "EnableSIScheduler",
 281   "true",
 282   "Enable SI Machine Scheduler"
 283 >;
 284
 285 // Unless +-flat-for-global is specified, turn on FlatForGlobal for
 286 // all OS-es on VI and newer hardware to avoid assertion failures due
 287 // to missing ADDR64 variants of MUBUF instructions.
 288 // FIXME: moveToVALU should be able to handle converting addr64 MUBUF
 289 // instructions.
 290
 291 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
 292   "FlatForGlobal",
 293   "true",
 294   "Force to generate flat instruction for global"
 295 >;
 296
 297 // Dummy feature used to disable assembler instructions.
 298 def FeatureDisable : SubtargetFeature<"",
 299   "FeatureDisable","true",
 300   "Dummy feature to disable assembler instructions"
 301 >;
 302
 303 class SubtargetFeatureGeneration <string Value,
 304                                   list<SubtargetFeature> Implies> :
 305         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
 306                           Value#" GPU generation", Implies>;
 307
 308 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
 309 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
 310 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
 311
 312 def FeatureR600 : SubtargetFeatureGeneration<"R600",
 313   [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
 314 >;
 315
 316 def FeatureR700 : SubtargetFeatureGeneration<"R700",
 317   [FeatureFetchLimit16, FeatureLocalMemorySize0]
 318 >;
 319
 320 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
 321   [FeatureFetchLimit16, FeatureLocalMemorySize32768]
 322 >;
 323
 324 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
 325   [FeatureFetchLimit16, FeatureWavefrontSize64,
 326    FeatureLocalMemorySize32768]
 327 >;
 328
 329 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
 330   [FeatureFP64, FeatureLocalMemorySize32768,
 331   FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
 332   FeatureLDSBankCount32, FeatureMovrel]
 333 >;
 334
 335 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
 336   [FeatureFP64, FeatureLocalMemorySize65536,
 337   FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
 338   FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
 339 >;
 340
 341 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
 342   [FeatureFP64, FeatureLocalMemorySize65536,
 343    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
 344    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
 345    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
 346    FeatureScalarStores, FeatureInv2PiInlineImm
 347   ]
 348 >;
 349
 350 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
 351                                   list<SubtargetFeature> Implies>
 352                                  : SubtargetFeature <
 353   "isaver"#Major#"."#Minor#"."#Stepping,
 354   "IsaVersion",
 355   "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
 356   "Instruction set version number",
 357   Implies
 358 >;
 359
 360 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
 361   [FeatureSeaIslands,
 362    FeatureLDSBankCount32]>;
 363
 364 def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
 365   [FeatureSeaIslands,
 366    HalfRate64Ops,
 367    FeatureLDSBankCount32,
 368    FeatureFastFMAF32]>;
 369
 370 def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
 371   [FeatureSeaIslands,
 372    FeatureLDSBankCount16]>;
 373
 374 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
 375   [FeatureVolcanicIslands,
 376    FeatureLDSBankCount32,
 377    FeatureSGPRInitBug]>;
 378
 379 def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
 380   [FeatureVolcanicIslands,
 381    FeatureLDSBankCount32,
 382    FeatureXNACK]>;
 383
 384 def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
 385   [FeatureVolcanicIslands,
 386    FeatureLDSBankCount32,
 387    FeatureSGPRInitBug]>;
 388
 389 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
 390   [FeatureVolcanicIslands,
 391    FeatureLDSBankCount32]>;
 392
 393 def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4,
 394   [FeatureVolcanicIslands,
 395    FeatureLDSBankCount32]>;
 396
 397 def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
 398   [FeatureVolcanicIslands,
 399    FeatureLDSBankCount16,
 400    FeatureXNACK]>;
 401
 402 //===----------------------------------------------------------------------===//
 403 // Debugger related subtarget features.
 404 //===----------------------------------------------------------------------===//
 405
 406 def FeatureDebuggerInsertNops : SubtargetFeature<
 407   "amdgpu-debugger-insert-nops",
 408   "DebuggerInsertNops",
 409   "true",
 410   "Insert one nop instruction for each high level source statement"
 411 >;
 412
 413 def FeatureDebuggerReserveRegs : SubtargetFeature<
 414   "amdgpu-debugger-reserve-regs",
 415   "DebuggerReserveRegs",
 416   "true",
 417   "Reserve registers for debugger usage"
 418 >;
 419
 420 def FeatureDebuggerEmitPrologue : SubtargetFeature<
 421   "amdgpu-debugger-emit-prologue",
 422   "DebuggerEmitPrologue",
 423   "true",
 424   "Emit debugger prologue"
 425 >;
 426
 427 //===----------------------------------------------------------------------===//
 428
 429 def AMDGPUInstrInfo : InstrInfo {
 430   let guessInstructionProperties = 1;
 431   let noNamedPositionallyEncodedOperands = 1;
 432 }
 433
 434 def AMDGPUAsmParser : AsmParser {
 435   // Some of the R600 registers have the same name, so this crashes.
 436   // For example T0_XYZW and T0_XY both have the asm name T0.
 437   let ShouldEmitMatchRegisterName = 0;
 438 }
 439
 440 def AMDGPUAsmWriter : AsmWriter {
 441   int PassSubtarget = 1;
 442 }
 443
 444 def AMDGPUAsmVariants {
 445   string Default = "Default";
 446   int Default_ID = 0;
 447   string VOP3 = "VOP3";
 448   int VOP3_ID = 1;
 449   string SDWA = "SDWA";
 450   int SDWA_ID = 2;
 451   string DPP = "DPP";
 452   int DPP_ID = 3;
 453   string Disable = "Disable";
 454   int Disable_ID = 4;
 455 }
 456
 457 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
 458   let Variant = AMDGPUAsmVariants.Default_ID;
 459   let Name = AMDGPUAsmVariants.Default;
 460 }
 461
 462 def VOP3AsmParserVariant : AsmParserVariant {
 463   let Variant = AMDGPUAsmVariants.VOP3_ID;
 464   let Name = AMDGPUAsmVariants.VOP3;
 465 }
 466
 467 def SDWAAsmParserVariant : AsmParserVariant {
 468   let Variant = AMDGPUAsmVariants.SDWA_ID;
 469   let Name = AMDGPUAsmVariants.SDWA;
 470 }
 471
 472 def DPPAsmParserVariant : AsmParserVariant {
 473   let Variant = AMDGPUAsmVariants.DPP_ID;
 474   let Name = AMDGPUAsmVariants.DPP;
 475 }
 476
 477 def AMDGPU : Target {
 478   // Pull in Instruction Info:
 479   let InstructionSet = AMDGPUInstrInfo;
 480   let AssemblyParsers = [AMDGPUAsmParser];
 481   let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
 482                                 VOP3AsmParserVariant,
 483                                 SDWAAsmParserVariant,
 484                                 DPPAsmParserVariant];
 485   let AssemblyWriters = [AMDGPUAsmWriter];
 486 }
 487
 488 // Dummy Instruction itineraries for pseudo instructions
 489 def ALU_NULL : FuncUnit;
 490 def NullALU : InstrItinClass;
 491
 492 //===----------------------------------------------------------------------===//
 493 // Predicate helper class
 494 //===----------------------------------------------------------------------===//
 495
 496 def TruePredicate : Predicate<"true">;
 497
 498 def isSICI : Predicate<
 499   "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
 500   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
 501 >, AssemblerPredicate<"FeatureGCN1Encoding">;
 502
 503 def isVI : Predicate <
 504   "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
 505   AssemblerPredicate<"FeatureGCN3Encoding">;
 506
 507 def isCIVI : Predicate <
 508   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
 509   "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
 510 >, AssemblerPredicate<"FeatureCIInsts">;
 511
 512 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
 513
 514 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
 515
 516 class PredicateControl {
 517   Predicate SubtargetPredicate;
 518   Predicate SIAssemblerPredicate = isSICI;
 519   Predicate VIAssemblerPredicate = isVI;
 520   list<Predicate> AssemblerPredicates = [];
 521   Predicate AssemblerPredicate = TruePredicate;
 522   list<Predicate> OtherPredicates = [];
 523   list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
 524                                             AssemblerPredicates,
 525                                             OtherPredicates);
 526 }
 527
 528 // Include AMDGPU TD files
 529 include "R600Schedule.td"
 530 include "SISchedule.td"
 531 include "Processors.td"
 532 include "AMDGPUInstrInfo.td"
 533 include "AMDGPUIntrinsics.td"
 534 include "AMDGPURegisterInfo.td"
 535 include "AMDGPUInstructions.td"
 536 include "AMDGPUCallingConv.td"