contrib/llvm/lib/Target/AMDGPU/AMDGPU.td

   1 //===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===------------------------------------------------------------===//
   9
  10 include "llvm/Target/Target.td"
  11
  12 //===------------------------------------------------------------===//
  13 // Subtarget Features (device properties)
  14 //===------------------------------------------------------------===//
  15
  16 def FeatureFP64 : SubtargetFeature<"fp64",
  17   "FP64",
  18   "true",
  19   "Enable double precision operations"
  20 >;
  21
  22 def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
  23   "FastFMAF32",
  24   "true",
  25   "Assuming f32 fma is at least as fast as mul + add"
  26 >;
  27
  28 def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
  29   "HalfRate64Ops",
  30   "true",
  31   "Most fp64 instructions are half rate instead of quarter"
  32 >;
  33
  34 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
  35   "R600ALUInst",
  36   "false",
  37   "Older version of ALU instructions encoding"
  38 >;
  39
  40 def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
  41   "HasVertexCache",
  42   "true",
  43   "Specify use of dedicated vertex cache"
  44 >;
  45
  46 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
  47   "CaymanISA",
  48   "true",
  49   "Use Cayman ISA"
  50 >;
  51
  52 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
  53   "CFALUBug",
  54   "true",
  55   "GPU has CF_ALU bug"
  56 >;
  57
  58 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
  59   "FlatAddressSpace",
  60   "true",
  61   "Support flat address space"
  62 >;
  63
  64 def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
  65   "UnalignedBufferAccess",
  66   "true",
  67   "Support unaligned global loads and stores"
  68 >;
  69
  70 def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
  71   "UnalignedScratchAccess",
  72   "true",
  73   "Support unaligned scratch loads and stores"
  74 >;
  75
  76 // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
  77 // XNACK. The current default kernel driver setting is:
  78 // - graphics ring: XNACK disabled
  79 // - compute ring: XNACK enabled
  80 //
  81 // If XNACK is enabled, the VMEM latency can be worse.
  82 // If XNACK is disabled, the 2 SGPRs can be used for general purposes.
  83 def FeatureXNACK : SubtargetFeature<"xnack",
  84   "EnableXNACK",
  85   "true",
  86   "Enable XNACK support"
  87 >;
  88
  89 def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
  90   "SGPRInitBug",
  91   "true",
  92   "VI SGPR initilization bug requiring a fixed SGPR allocation size"
  93 >;
  94
  95 class SubtargetFeatureFetchLimit <string Value> :
  96                           SubtargetFeature <"fetch"#Value,
  97   "TexVTXClauseSize",
  98   Value,
  99   "Limit the maximum number of fetches in a clause to "#Value
 100 >;
 101
 102 def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
 103 def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
 104
 105 class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
 106   "wavefrontsize"#Value,
 107   "WavefrontSize",
 108   !cast<string>(Value),
 109   "The number of threads per wavefront"
 110 >;
 111
 112 def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
 113 def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
 114 def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
 115
 116 class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
 117   "ldsbankcount"#Value,
 118   "LDSBankCount",
 119   !cast<string>(Value),
 120   "The number of LDS banks per compute unit."
 121 >;
 122
 123 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 124 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
 125
 126 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
 127   "localmemorysize"#Value,
 128   "LocalMemorySize",
 129   !cast<string>(Value),
 130   "The size of local memory in bytes"
 131 >;
 132
 133 def FeatureGCN : SubtargetFeature<"gcn",
 134   "IsGCN",
 135   "true",
 136   "GCN or newer GPU"
 137 >;
 138
 139 def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
 140   "GCN1Encoding",
 141   "true",
 142   "Encoding format for SI and CI"
 143 >;
 144
 145 def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
 146   "GCN3Encoding",
 147   "true",
 148   "Encoding format for VI"
 149 >;
 150
 151 def FeatureCIInsts : SubtargetFeature<"ci-insts",
 152   "CIInsts",
 153   "true",
 154   "Additional intstructions for CI+"
 155 >;
 156
 157 def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
 158   "HasSMemRealTime",
 159   "true",
 160   "Has s_memrealtime instruction"
 161 >;
 162
 163 def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
 164   "HasInv2PiInlineImm",
 165   "true",
 166   "Has 1 / (2 * pi) as inline immediate"
 167 >;
 168
 169 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
 170   "Has16BitInsts",
 171   "true",
 172   "Has i16/f16 instructions"
 173 >;
 174
 175 def FeatureMovrel : SubtargetFeature<"movrel",
 176   "HasMovrel",
 177   "true",
 178   "Has v_movrel*_b32 instructions"
 179 >;
 180
 181 def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
 182   "HasVGPRIndexMode",
 183   "true",
 184   "Has VGPR mode register indexing"
 185 >;
 186
 187 def FeatureScalarStores : SubtargetFeature<"scalar-stores",
 188   "HasScalarStores",
 189   "true",
 190   "Has store scalar memory instructions"
 191 >;
 192
 193 //===------------------------------------------------------------===//
 194 // Subtarget Features (options and debugging)
 195 //===------------------------------------------------------------===//
 196
 197 def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
 198   "FP16Denormals",
 199   "true",
 200   "Enable half precision denormal handling"
 201 >;
 202
 203 // Some instructions do not support denormals despite this flag. Using
 204 // fp32 denormals also causes instructions to run at the double
 205 // precision rate for the device.
 206 def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
 207   "FP32Denormals",
 208   "true",
 209   "Enable single precision denormal handling"
 210 >;
 211
 212 def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
 213   "FP64Denormals",
 214   "true",
 215   "Enable double precision denormal handling",
 216   [FeatureFP64]
 217 >;
 218
 219 def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
 220   "FPExceptions",
 221   "true",
 222   "Enable floating point exceptions"
 223 >;
 224
 225 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
 226   "max-private-element-size-"#size,
 227   "MaxPrivateElementSize",
 228   !cast<string>(size),
 229   "Maximum private access size may be "#size
 230 >;
 231
 232 def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
 233 def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
 234 def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
 235
 236 def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
 237   "EnableVGPRSpilling",
 238   "true",
 239   "Enable spilling of VGPRs to scratch memory"
 240 >;
 241
 242 def FeatureDumpCode : SubtargetFeature <"DumpCode",
 243   "DumpCode",
 244   "true",
 245   "Dump MachineInstrs in the CodeEmitter"
 246 >;
 247
 248 def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
 249   "DumpCode",
 250   "true",
 251   "Dump MachineInstrs in the CodeEmitter"
 252 >;
 253
 254 def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
 255   "EnablePromoteAlloca",
 256   "true",
 257   "Enable promote alloca pass"
 258 >;
 259
 260 // XXX - This should probably be removed once enabled by default
 261 def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
 262   "EnableLoadStoreOpt",
 263   "true",
 264   "Enable SI load/store optimizer pass"
 265 >;
 266
 267 // Performance debugging feature. Allow using DS instruction immediate
 268 // offsets even if the base pointer can't be proven to be base. On SI,
 269 // base pointer values that won't give the same result as a 16-bit add
 270 // are not safe to fold, but this will override the conservative test
 271 // for the base pointer.
 272 def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
 273   "unsafe-ds-offset-folding",
 274   "EnableUnsafeDSOffsetFolding",
 275   "true",
 276   "Force using DS instruction immediate offsets on SI"
 277 >;
 278
 279 def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
 280   "EnableSIScheduler",
 281   "true",
 282   "Enable SI Machine Scheduler"
 283 >;
 284
 285 def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
 286   "FlatForGlobal",
 287   "true",
 288   "Force to generate flat instruction for global"
 289 >;
 290
 291 // Dummy feature used to disable assembler instructions.
 292 def FeatureDisable : SubtargetFeature<"",
 293   "FeatureDisable","true",
 294   "Dummy feature to disable assembler instructions"
 295 >;
 296
 297 class SubtargetFeatureGeneration <string Value,
 298                                   list<SubtargetFeature> Implies> :
 299         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
 300                           Value#" GPU generation", Implies>;
 301
 302 def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
 303 def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
 304 def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
 305
 306 def FeatureR600 : SubtargetFeatureGeneration<"R600",
 307   [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
 308 >;
 309
 310 def FeatureR700 : SubtargetFeatureGeneration<"R700",
 311   [FeatureFetchLimit16, FeatureLocalMemorySize0]
 312 >;
 313
 314 def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
 315   [FeatureFetchLimit16, FeatureLocalMemorySize32768]
 316 >;
 317
 318 def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
 319   [FeatureFetchLimit16, FeatureWavefrontSize64,
 320    FeatureLocalMemorySize32768]
 321 >;
 322
 323 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
 324   [FeatureFP64, FeatureLocalMemorySize32768,
 325   FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
 326   FeatureLDSBankCount32, FeatureMovrel]
 327 >;
 328
 329 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
 330   [FeatureFP64, FeatureLocalMemorySize65536,
 331   FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
 332   FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
 333 >;
 334
 335 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
 336   [FeatureFP64, FeatureLocalMemorySize65536,
 337    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
 338    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
 339    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
 340    FeatureScalarStores, FeatureInv2PiInlineImm
 341   ]
 342 >;
 343
 344 class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
 345                                   list<SubtargetFeature> Implies>
 346                                  : SubtargetFeature <
 347   "isaver"#Major#"."#Minor#"."#Stepping,
 348   "IsaVersion",
 349   "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
 350   "Instruction set version number",
 351   Implies
 352 >;
 353
 354 def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
 355   [FeatureSeaIslands,
 356    FeatureLDSBankCount32]>;
 357
 358 def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
 359   [FeatureSeaIslands,
 360    HalfRate64Ops,
 361    FeatureLDSBankCount32,
 362    FeatureFastFMAF32]>;
 363
 364 def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
 365   [FeatureSeaIslands,
 366    FeatureLDSBankCount16]>;
 367
 368 def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
 369   [FeatureVolcanicIslands,
 370    FeatureLDSBankCount32,
 371    FeatureSGPRInitBug]>;
 372
 373 def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
 374   [FeatureVolcanicIslands,
 375    FeatureLDSBankCount32,
 376    FeatureXNACK]>;
 377
 378 def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
 379   [FeatureVolcanicIslands,
 380    FeatureLDSBankCount32,
 381    FeatureSGPRInitBug]>;
 382
 383 def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
 384   [FeatureVolcanicIslands,
 385    FeatureLDSBankCount32]>;
 386
 387 def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4,
 388   [FeatureVolcanicIslands,
 389    FeatureLDSBankCount32]>;
 390
 391 def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
 392   [FeatureVolcanicIslands,
 393    FeatureLDSBankCount16,
 394    FeatureXNACK]>;
 395
 396 //===----------------------------------------------------------------------===//
 397 // Debugger related subtarget features.
 398 //===----------------------------------------------------------------------===//
 399
 400 def FeatureDebuggerInsertNops : SubtargetFeature<
 401   "amdgpu-debugger-insert-nops",
 402   "DebuggerInsertNops",
 403   "true",
 404   "Insert one nop instruction for each high level source statement"
 405 >;
 406
 407 def FeatureDebuggerReserveRegs : SubtargetFeature<
 408   "amdgpu-debugger-reserve-regs",
 409   "DebuggerReserveRegs",
 410   "true",
 411   "Reserve registers for debugger usage"
 412 >;
 413
 414 def FeatureDebuggerEmitPrologue : SubtargetFeature<
 415   "amdgpu-debugger-emit-prologue",
 416   "DebuggerEmitPrologue",
 417   "true",
 418   "Emit debugger prologue"
 419 >;
 420
 421 //===----------------------------------------------------------------------===//
 422
 423 def AMDGPUInstrInfo : InstrInfo {
 424   let guessInstructionProperties = 1;
 425   let noNamedPositionallyEncodedOperands = 1;
 426 }
 427
 428 def AMDGPUAsmParser : AsmParser {
 429   // Some of the R600 registers have the same name, so this crashes.
 430   // For example T0_XYZW and T0_XY both have the asm name T0.
 431   let ShouldEmitMatchRegisterName = 0;
 432 }
 433
 434 def AMDGPUAsmWriter : AsmWriter {
 435   int PassSubtarget = 1;
 436 }
 437
 438 def AMDGPUAsmVariants {
 439   string Default = "Default";
 440   int Default_ID = 0;
 441   string VOP3 = "VOP3";
 442   int VOP3_ID = 1;
 443   string SDWA = "SDWA";
 444   int SDWA_ID = 2;
 445   string DPP = "DPP";
 446   int DPP_ID = 3;
 447   string Disable = "Disable";
 448   int Disable_ID = 4;
 449 }
 450
 451 def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
 452   let Variant = AMDGPUAsmVariants.Default_ID;
 453   let Name = AMDGPUAsmVariants.Default;
 454 }
 455
 456 def VOP3AsmParserVariant : AsmParserVariant {
 457   let Variant = AMDGPUAsmVariants.VOP3_ID;
 458   let Name = AMDGPUAsmVariants.VOP3;
 459 }
 460
 461 def SDWAAsmParserVariant : AsmParserVariant {
 462   let Variant = AMDGPUAsmVariants.SDWA_ID;
 463   let Name = AMDGPUAsmVariants.SDWA;
 464 }
 465
 466 def DPPAsmParserVariant : AsmParserVariant {
 467   let Variant = AMDGPUAsmVariants.DPP_ID;
 468   let Name = AMDGPUAsmVariants.DPP;
 469 }
 470
 471 def AMDGPU : Target {
 472   // Pull in Instruction Info:
 473   let InstructionSet = AMDGPUInstrInfo;
 474   let AssemblyParsers = [AMDGPUAsmParser];
 475   let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
 476                                 VOP3AsmParserVariant,
 477                                 SDWAAsmParserVariant,
 478                                 DPPAsmParserVariant];
 479   let AssemblyWriters = [AMDGPUAsmWriter];
 480 }
 481
 482 // Dummy Instruction itineraries for pseudo instructions
 483 def ALU_NULL : FuncUnit;
 484 def NullALU : InstrItinClass;
 485
 486 //===----------------------------------------------------------------------===//
 487 // Predicate helper class
 488 //===----------------------------------------------------------------------===//
 489
 490 def TruePredicate : Predicate<"true">;
 491
 492 def isSICI : Predicate<
 493   "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
 494   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
 495 >, AssemblerPredicate<"FeatureGCN1Encoding">;
 496
 497 def isVI : Predicate <
 498   "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
 499   AssemblerPredicate<"FeatureGCN3Encoding">;
 500
 501 def isCIVI : Predicate <
 502   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
 503   "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
 504 >, AssemblerPredicate<"FeatureCIInsts">;
 505
 506 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
 507
 508 def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
 509
 510 class PredicateControl {
 511   Predicate SubtargetPredicate;
 512   Predicate SIAssemblerPredicate = isSICI;
 513   Predicate VIAssemblerPredicate = isVI;
 514   list<Predicate> AssemblerPredicates = [];
 515   Predicate AssemblerPredicate = TruePredicate;
 516   list<Predicate> OtherPredicates = [];
 517   list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate],
 518                                             AssemblerPredicates,
 519                                             OtherPredicates);
 520 }
 521
 522 // Include AMDGPU TD files
 523 include "R600Schedule.td"
 524 include "SISchedule.td"
 525 include "Processors.td"
 526 include "AMDGPUInstrInfo.td"
 527 include "AMDGPUIntrinsics.td"
 528 include "AMDGPURegisterInfo.td"
 529 include "AMDGPUInstructions.td"
 530 include "AMDGPUCallingConv.td"