contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h

   1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //==-----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// AMD GCN specific subclass of TargetSubtarget.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
  15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
  16
  17 #include "AMDGPUCallLowering.h"
  18 #include "AMDGPUSubtarget.h"
  19 #include "SIFrameLowering.h"
  20 #include "SIISelLowering.h"
  21 #include "SIInstrInfo.h"
  22 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  23
  24 #define GET_SUBTARGETINFO_HEADER
  25 #include "AMDGPUGenSubtargetInfo.inc"
  26
  27 namespace llvm {
  28
  29 class GCNTargetMachine;
  30
  31 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
  32                            public AMDGPUSubtarget {
  33 public:
  34   using AMDGPUSubtarget::getMaxWavesPerEU;
  35
  36   // Following 2 enums are documented at:
  37   //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
  38   enum class TrapHandlerAbi {
  39     NONE   = 0x00,
  40     AMDHSA = 0x01,
  41   };
  42
  43   enum class TrapID {
  44     LLVMAMDHSATrap      = 0x02,
  45     LLVMAMDHSADebugTrap = 0x03,
  46   };
  47
  48 private:
  49   /// GlobalISel related APIs.
  50   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
  51   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
  52   std::unique_ptr<InstructionSelector> InstSelector;
  53   std::unique_ptr<LegalizerInfo> Legalizer;
  54   std::unique_ptr<RegisterBankInfo> RegBankInfo;
  55
  56 protected:
  57   // Basic subtarget description.
  58   Triple TargetTriple;
  59   AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
  60   unsigned Gen = INVALID;
  61   InstrItineraryData InstrItins;
  62   int LDSBankCount = 0;
  63   unsigned MaxPrivateElementSize = 0;
  64
  65   // Possibly statically set by tablegen, but may want to be overridden.
  66   bool FastFMAF32 = false;
  67   bool FastDenormalF32 = false;
  68   bool HalfRate64Ops = false;
  69   bool FullRate64Ops = false;
  70
  71   // Dynamically set bits that enable features.
  72   bool FlatForGlobal = false;
  73   bool AutoWaitcntBeforeBarrier = false;
  74   bool BackOffBarrier = false;
  75   bool UnalignedScratchAccess = false;
  76   bool UnalignedAccessMode = false;
  77   bool HasApertureRegs = false;
  78   bool SupportsXNACK = false;
  79
  80   // This should not be used directly. 'TargetID' tracks the dynamic settings
  81   // for XNACK.
  82   bool EnableXNACK = false;
  83
  84   bool EnableTgSplit = false;
  85   bool EnableCuMode = false;
  86   bool TrapHandler = false;
  87
  88   // Used as options.
  89   bool EnableLoadStoreOpt = false;
  90   bool EnableUnsafeDSOffsetFolding = false;
  91   bool EnableSIScheduler = false;
  92   bool EnableDS128 = false;
  93   bool EnablePRTStrictNull = false;
  94   bool DumpCode = false;
  95
  96   // Subtarget statically properties set by tablegen
  97   bool FP64 = false;
  98   bool FMA = false;
  99   bool MIMG_R128 = false;
 100   bool CIInsts = false;
 101   bool GFX8Insts = false;
 102   bool GFX9Insts = false;
 103   bool GFX90AInsts = false;
 104   bool GFX940Insts = false;
 105   bool GFX10Insts = false;
 106   bool GFX11Insts = false;
 107   bool GFX10_3Insts = false;
 108   bool GFX7GFX8GFX9Insts = false;
 109   bool SGPRInitBug = false;
 110   bool UserSGPRInit16Bug = false;
 111   bool NegativeScratchOffsetBug = false;
 112   bool NegativeUnalignedScratchOffsetBug = false;
 113   bool HasSMemRealTime = false;
 114   bool HasIntClamp = false;
 115   bool HasFmaMixInsts = false;
 116   bool HasMovrel = false;
 117   bool HasVGPRIndexMode = false;
 118   bool HasScalarStores = false;
 119   bool HasScalarAtomics = false;
 120   bool HasSDWAOmod = false;
 121   bool HasSDWAScalar = false;
 122   bool HasSDWASdst = false;
 123   bool HasSDWAMac = false;
 124   bool HasSDWAOutModsVOPC = false;
 125   bool HasDPP = false;
 126   bool HasDPP8 = false;
 127   bool Has64BitDPP = false;
 128   bool HasPackedFP32Ops = false;
 129   bool HasImageInsts = false;
 130   bool HasExtendedImageInsts = false;
 131   bool HasR128A16 = false;
 132   bool HasA16 = false;
 133   bool HasG16 = false;
 134   bool HasNSAEncoding = false;
 135   unsigned NSAMaxSize = 0;
 136   bool GFX10_AEncoding = false;
 137   bool GFX10_BEncoding = false;
 138   bool HasDLInsts = false;
 139   bool HasFmacF64Inst = false;
 140   bool HasDot1Insts = false;
 141   bool HasDot2Insts = false;
 142   bool HasDot3Insts = false;
 143   bool HasDot4Insts = false;
 144   bool HasDot5Insts = false;
 145   bool HasDot6Insts = false;
 146   bool HasDot7Insts = false;
 147   bool HasDot8Insts = false;
 148   bool HasDot9Insts = false;
 149   bool HasMAIInsts = false;
 150   bool HasFP8Insts = false;
 151   bool HasPkFmacF16Inst = false;
 152   bool HasAtomicFaddRtnInsts = false;
 153   bool HasAtomicFaddNoRtnInsts = false;
 154   bool HasAtomicPkFaddNoRtnInsts = false;
 155   bool HasFlatAtomicFaddF32Inst = false;
 156   bool SupportsSRAMECC = false;
 157
 158   // This should not be used directly. 'TargetID' tracks the dynamic settings
 159   // for SRAMECC.
 160   bool EnableSRAMECC = false;
 161
 162   bool HasNoSdstCMPX = false;
 163   bool HasVscnt = false;
 164   bool HasGetWaveIdInst = false;
 165   bool HasSMemTimeInst = false;
 166   bool HasShaderCyclesRegister = false;
 167   bool HasVOP3Literal = false;
 168   bool HasNoDataDepHazard = false;
 169   bool FlatAddressSpace = false;
 170   bool FlatInstOffsets = false;
 171   bool FlatGlobalInsts = false;
 172   bool FlatScratchInsts = false;
 173   bool ScalarFlatScratchInsts = false;
 174   bool HasArchitectedFlatScratch = false;
 175   bool EnableFlatScratch = false;
 176   bool AddNoCarryInsts = false;
 177   bool HasUnpackedD16VMem = false;
 178   bool LDSMisalignedBug = false;
 179   bool HasMFMAInlineLiteralBug = false;
 180   bool UnalignedBufferAccess = false;
 181   bool UnalignedDSAccess = false;
 182   bool HasPackedTID = false;
 183   bool ScalarizeGlobal = false;
 184
 185   bool HasVcmpxPermlaneHazard = false;
 186   bool HasVMEMtoScalarWriteHazard = false;
 187   bool HasSMEMtoVectorWriteHazard = false;
 188   bool HasInstFwdPrefetchBug = false;
 189   bool HasVcmpxExecWARHazard = false;
 190   bool HasLdsBranchVmemWARHazard = false;
 191   bool HasNSAtoVMEMBug = false;
 192   bool HasNSAClauseBug = false;
 193   bool HasOffset3fBug = false;
 194   bool HasFlatSegmentOffsetBug = false;
 195   bool HasImageStoreD16Bug = false;
 196   bool HasImageGather4D16Bug = false;
 197   bool HasGFX11FullVGPRs = false;
 198   bool HasMADIntraFwdBug = false;
 199   bool HasVOPDInsts = false;
 200   bool HasVALUTransUseHazard = false;
 201
 202   // Dummy feature to use for assembler in tablegen.
 203   bool FeatureDisable = false;
 204
 205   SelectionDAGTargetInfo TSInfo;
 206 private:
 207   SIInstrInfo InstrInfo;
 208   SITargetLowering TLInfo;
 209   SIFrameLowering FrameLowering;
 210
 211 public:
 212   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 213                const GCNTargetMachine &TM);
 214   ~GCNSubtarget() override;
 215
 216   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
 217                                                    StringRef GPU, StringRef FS);
 218
 219   const SIInstrInfo *getInstrInfo() const override {
 220     return &InstrInfo;
 221   }
 222
 223   const SIFrameLowering *getFrameLowering() const override {
 224     return &FrameLowering;
 225   }
 226
 227   const SITargetLowering *getTargetLowering() const override {
 228     return &TLInfo;
 229   }
 230
 231   const SIRegisterInfo *getRegisterInfo() const override {
 232     return &InstrInfo.getRegisterInfo();
 233   }
 234
 235   const CallLowering *getCallLowering() const override {
 236     return CallLoweringInfo.get();
 237   }
 238
 239   const InlineAsmLowering *getInlineAsmLowering() const override {
 240     return InlineAsmLoweringInfo.get();
 241   }
 242
 243   InstructionSelector *getInstructionSelector() const override {
 244     return InstSelector.get();
 245   }
 246
 247   const LegalizerInfo *getLegalizerInfo() const override {
 248     return Legalizer.get();
 249   }
 250
 251   const RegisterBankInfo *getRegBankInfo() const override {
 252     return RegBankInfo.get();
 253   }
 254
 255   const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
 256     return TargetID;
 257   }
 258
 259   // Nothing implemented, just prevent crashes on use.
 260   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
 261     return &TSInfo;
 262   }
 263
 264   const InstrItineraryData *getInstrItineraryData() const override {
 265     return &InstrItins;
 266   }
 267
 268   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 269
 270   Generation getGeneration() const {
 271     return (Generation)Gen;
 272   }
 273
 274   unsigned getMaxWaveScratchSize() const {
 275     // See COMPUTE_TMPRING_SIZE.WAVESIZE.
 276     if (getGeneration() < GFX11) {
 277       // 13-bit field in units of 256-dword.
 278       return (256 * 4) * ((1 << 13) - 1);
 279     }
 280     // 15-bit field in units of 64-dword.
 281     return (64 * 4) * ((1 << 15) - 1);
 282   }
 283
 284   /// Return the number of high bits known to be zero for a frame index.
 285   unsigned getKnownHighZeroBitsForFrameIndex() const {
 286     return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
 287   }
 288
 289   int getLDSBankCount() const {
 290     return LDSBankCount;
 291   }
 292
 293   unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
 294     return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
 295   }
 296
 297   unsigned getConstantBusLimit(unsigned Opcode) const;
 298
 299   /// Returns if the result of this instruction with a 16-bit result returned in
 300   /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
 301   /// the original value.
 302   bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
 303
 304   bool supportsWGP() const { return getGeneration() >= GFX10; }
 305
 306   bool hasIntClamp() const {
 307     return HasIntClamp;
 308   }
 309
 310   bool hasFP64() const {
 311     return FP64;
 312   }
 313
 314   bool hasMIMG_R128() const {
 315     return MIMG_R128;
 316   }
 317
 318   bool hasHWFP64() const {
 319     return FP64;
 320   }
 321
 322   bool hasFastFMAF32() const {
 323     return FastFMAF32;
 324   }
 325
 326   bool hasHalfRate64Ops() const {
 327     return HalfRate64Ops;
 328   }
 329
 330   bool hasFullRate64Ops() const {
 331     return FullRate64Ops;
 332   }
 333
 334   bool hasAddr64() const {
 335     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
 336   }
 337
 338   bool hasFlat() const {
 339     return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
 340   }
 341
 342   // Return true if the target only has the reverse operand versions of VALU
 343   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
 344   bool hasOnlyRevVALUShifts() const {
 345     return getGeneration() >= VOLCANIC_ISLANDS;
 346   }
 347
 348   bool hasFractBug() const {
 349     return getGeneration() == SOUTHERN_ISLANDS;
 350   }
 351
 352   bool hasBFE() const {
 353     return true;
 354   }
 355
 356   bool hasBFI() const {
 357     return true;
 358   }
 359
 360   bool hasBFM() const {
 361     return hasBFE();
 362   }
 363
 364   bool hasBCNT(unsigned Size) const {
 365     return true;
 366   }
 367
 368   bool hasFFBL() const {
 369     return true;
 370   }
 371
 372   bool hasFFBH() const {
 373     return true;
 374   }
 375
 376   bool hasMed3_16() const {
 377     return getGeneration() >= AMDGPUSubtarget::GFX9;
 378   }
 379
 380   bool hasMin3Max3_16() const {
 381     return getGeneration() >= AMDGPUSubtarget::GFX9;
 382   }
 383
 384   bool hasFmaMixInsts() const {
 385     return HasFmaMixInsts;
 386   }
 387
 388   bool hasCARRY() const {
 389     return true;
 390   }
 391
 392   bool hasFMA() const {
 393     return FMA;
 394   }
 395
 396   bool hasSwap() const {
 397     return GFX9Insts;
 398   }
 399
 400   bool hasScalarPackInsts() const {
 401     return GFX9Insts;
 402   }
 403
 404   bool hasScalarMulHiInsts() const {
 405     return GFX9Insts;
 406   }
 407
 408   TrapHandlerAbi getTrapHandlerAbi() const {
 409     return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
 410   }
 411
 412   bool supportsGetDoorbellID() const {
 413     // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
 414     return getGeneration() >= GFX9;
 415   }
 416
 417   /// True if the offset field of DS instructions works as expected. On SI, the
 418   /// offset uses a 16-bit adder and does not always wrap properly.
 419   bool hasUsableDSOffset() const {
 420     return getGeneration() >= SEA_ISLANDS;
 421   }
 422
 423   bool unsafeDSOffsetFoldingEnabled() const {
 424     return EnableUnsafeDSOffsetFolding;
 425   }
 426
 427   /// Condition output from div_scale is usable.
 428   bool hasUsableDivScaleConditionOutput() const {
 429     return getGeneration() != SOUTHERN_ISLANDS;
 430   }
 431
 432   /// Extra wait hazard is needed in some cases before
 433   /// s_cbranch_vccnz/s_cbranch_vccz.
 434   bool hasReadVCCZBug() const {
 435     return getGeneration() <= SEA_ISLANDS;
 436   }
 437
 438   /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
 439   bool partialVCCWritesUpdateVCCZ() const {
 440     return getGeneration() >= GFX10;
 441   }
 442
 443   /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
 444   /// was written by a VALU instruction.
 445   bool hasSMRDReadVALUDefHazard() const {
 446     return getGeneration() == SOUTHERN_ISLANDS;
 447   }
 448
 449   /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
 450   /// SGPR was written by a VALU Instruction.
 451   bool hasVMEMReadSGPRVALUDefHazard() const {
 452     return getGeneration() >= VOLCANIC_ISLANDS;
 453   }
 454
 455   bool hasRFEHazards() const {
 456     return getGeneration() >= VOLCANIC_ISLANDS;
 457   }
 458
 459   /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
 460   unsigned getSetRegWaitStates() const {
 461     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
 462   }
 463
 464   bool dumpCode() const {
 465     return DumpCode;
 466   }
 467
 468   /// Return the amount of LDS that can be used that will not restrict the
 469   /// occupancy lower than WaveCount.
 470   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 471                                            const Function &) const;
 472
 473   bool supportsMinMaxDenormModes() const {
 474     return getGeneration() >= AMDGPUSubtarget::GFX9;
 475   }
 476
 477   /// \returns If target supports S_DENORM_MODE.
 478   bool hasDenormModeInst() const {
 479     return getGeneration() >= AMDGPUSubtarget::GFX10;
 480   }
 481
 482   bool useFlatForGlobal() const {
 483     return FlatForGlobal;
 484   }
 485
 486   /// \returns If target supports ds_read/write_b128 and user enables generation
 487   /// of ds_read/write_b128.
 488   bool useDS128() const {
 489     return CIInsts && EnableDS128;
 490   }
 491
 492   /// \return If target supports ds_read/write_b96/128.
 493   bool hasDS96AndDS128() const {
 494     return CIInsts;
 495   }
 496
 497   /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
 498   bool haveRoundOpsF64() const {
 499     return CIInsts;
 500   }
 501
 502   /// \returns If MUBUF instructions always perform range checking, even for
 503   /// buffer resources used for private memory access.
 504   bool privateMemoryResourceIsRangeChecked() const {
 505     return getGeneration() < AMDGPUSubtarget::GFX9;
 506   }
 507
 508   /// \returns If target requires PRT Struct NULL support (zero result registers
 509   /// for sparse texture support).
 510   bool usePRTStrictNull() const {
 511     return EnablePRTStrictNull;
 512   }
 513
 514   bool hasAutoWaitcntBeforeBarrier() const {
 515     return AutoWaitcntBeforeBarrier;
 516   }
 517
 518   /// \returns true if the target supports backing off of s_barrier instructions
 519   /// when an exception is raised.
 520   bool supportsBackOffBarrier() const {
 521     return BackOffBarrier;
 522   }
 523
 524   bool hasUnalignedBufferAccess() const {
 525     return UnalignedBufferAccess;
 526   }
 527
 528   bool hasUnalignedBufferAccessEnabled() const {
 529     return UnalignedBufferAccess && UnalignedAccessMode;
 530   }
 531
 532   bool hasUnalignedDSAccess() const {
 533     return UnalignedDSAccess;
 534   }
 535
 536   bool hasUnalignedDSAccessEnabled() const {
 537     return UnalignedDSAccess && UnalignedAccessMode;
 538   }
 539
 540   bool hasUnalignedScratchAccess() const {
 541     return UnalignedScratchAccess;
 542   }
 543
 544   bool hasUnalignedAccessMode() const {
 545     return UnalignedAccessMode;
 546   }
 547
 548   bool hasApertureRegs() const {
 549     return HasApertureRegs;
 550   }
 551
 552   bool isTrapHandlerEnabled() const {
 553     return TrapHandler;
 554   }
 555
 556   bool isXNACKEnabled() const {
 557     return TargetID.isXnackOnOrAny();
 558   }
 559
 560   bool isTgSplitEnabled() const {
 561     return EnableTgSplit;
 562   }
 563
 564   bool isCuModeEnabled() const {
 565     return EnableCuMode;
 566   }
 567
 568   bool hasFlatAddressSpace() const {
 569     return FlatAddressSpace;
 570   }
 571
 572   bool hasFlatScrRegister() const {
 573     return hasFlatAddressSpace();
 574   }
 575
 576   bool hasFlatInstOffsets() const {
 577     return FlatInstOffsets;
 578   }
 579
 580   bool hasFlatGlobalInsts() const {
 581     return FlatGlobalInsts;
 582   }
 583
 584   bool hasFlatScratchInsts() const {
 585     return FlatScratchInsts;
 586   }
 587
 588   // Check if target supports ST addressing mode with FLAT scratch instructions.
 589   // The ST addressing mode means no registers are used, either VGPR or SGPR,
 590   // but only immediate offset is swizzled and added to the FLAT scratch base.
 591   bool hasFlatScratchSTMode() const {
 592     return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
 593   }
 594
 595   bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
 596
 597   bool hasScalarFlatScratchInsts() const {
 598     return ScalarFlatScratchInsts;
 599   }
 600
 601   bool enableFlatScratch() const {
 602     return flatScratchIsArchitected() ||
 603            (EnableFlatScratch && hasFlatScratchInsts());
 604   }
 605
 606   bool hasGlobalAddTidInsts() const {
 607     return GFX10_BEncoding;
 608   }
 609
 610   bool hasAtomicCSub() const {
 611     return GFX10_BEncoding;
 612   }
 613
 614   bool hasMultiDwordFlatScratchAddressing() const {
 615     return getGeneration() >= GFX9;
 616   }
 617
 618   bool hasFlatSegmentOffsetBug() const {
 619     return HasFlatSegmentOffsetBug;
 620   }
 621
 622   bool hasFlatLgkmVMemCountInOrder() const {
 623     return getGeneration() > GFX9;
 624   }
 625
 626   bool hasD16LoadStore() const {
 627     return getGeneration() >= GFX9;
 628   }
 629
 630   bool d16PreservesUnusedBits() const {
 631     return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
 632   }
 633
 634   bool hasD16Images() const {
 635     return getGeneration() >= VOLCANIC_ISLANDS;
 636   }
 637
 638   /// Return if most LDS instructions have an m0 use that require m0 to be
 639   /// initialized.
 640   bool ldsRequiresM0Init() const {
 641     return getGeneration() < GFX9;
 642   }
 643
 644   // True if the hardware rewinds and replays GWS operations if a wave is
 645   // preempted.
 646   //
 647   // If this is false, a GWS operation requires testing if a nack set the
 648   // MEM_VIOL bit, and repeating if so.
 649   bool hasGWSAutoReplay() const {
 650     return getGeneration() >= GFX9;
 651   }
 652
 653   /// \returns if target has ds_gws_sema_release_all instruction.
 654   bool hasGWSSemaReleaseAll() const {
 655     return CIInsts;
 656   }
 657
 658   /// \returns true if the target has integer add/sub instructions that do not
 659   /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
 660   /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
 661   /// for saturation.
 662   bool hasAddNoCarry() const {
 663     return AddNoCarryInsts;
 664   }
 665
 666   bool hasUnpackedD16VMem() const {
 667     return HasUnpackedD16VMem;
 668   }
 669
 670   // Covers VS/PS/CS graphics shaders
 671   bool isMesaGfxShader(const Function &F) const {
 672     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
 673   }
 674
 675   bool hasMad64_32() const {
 676     return getGeneration() >= SEA_ISLANDS;
 677   }
 678
 679   bool hasSDWAOmod() const {
 680     return HasSDWAOmod;
 681   }
 682
 683   bool hasSDWAScalar() const {
 684     return HasSDWAScalar;
 685   }
 686
 687   bool hasSDWASdst() const {
 688     return HasSDWASdst;
 689   }
 690
 691   bool hasSDWAMac() const {
 692     return HasSDWAMac;
 693   }
 694
 695   bool hasSDWAOutModsVOPC() const {
 696     return HasSDWAOutModsVOPC;
 697   }
 698
 699   bool hasDLInsts() const {
 700     return HasDLInsts;
 701   }
 702
 703   bool hasFmacF64Inst() const { return HasFmacF64Inst; }
 704
 705   bool hasDot1Insts() const {
 706     return HasDot1Insts;
 707   }
 708
 709   bool hasDot2Insts() const {
 710     return HasDot2Insts;
 711   }
 712
 713   bool hasDot3Insts() const {
 714     return HasDot3Insts;
 715   }
 716
 717   bool hasDot4Insts() const {
 718     return HasDot4Insts;
 719   }
 720
 721   bool hasDot5Insts() const {
 722     return HasDot5Insts;
 723   }
 724
 725   bool hasDot6Insts() const {
 726     return HasDot6Insts;
 727   }
 728
 729   bool hasDot7Insts() const {
 730     return HasDot7Insts;
 731   }
 732
 733   bool hasDot8Insts() const {
 734     return HasDot8Insts;
 735   }
 736
 737   bool hasDot9Insts() const {
 738     return HasDot9Insts;
 739   }
 740
 741   bool hasMAIInsts() const {
 742     return HasMAIInsts;
 743   }
 744
 745   bool hasFP8Insts() const {
 746     return HasFP8Insts;
 747   }
 748
 749   bool hasPkFmacF16Inst() const {
 750     return HasPkFmacF16Inst;
 751   }
 752
 753   bool hasAtomicFaddInsts() const {
 754     return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
 755   }
 756
 757   bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }
 758
 759   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
 760
 761   bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
 762
 763   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 764
 765   bool hasNoSdstCMPX() const {
 766     return HasNoSdstCMPX;
 767   }
 768
 769   bool hasVscnt() const {
 770     return HasVscnt;
 771   }
 772
 773   bool hasGetWaveIdInst() const {
 774     return HasGetWaveIdInst;
 775   }
 776
 777   bool hasSMemTimeInst() const {
 778     return HasSMemTimeInst;
 779   }
 780
 781   bool hasShaderCyclesRegister() const {
 782     return HasShaderCyclesRegister;
 783   }
 784
 785   bool hasVOP3Literal() const {
 786     return HasVOP3Literal;
 787   }
 788
 789   bool hasNoDataDepHazard() const {
 790     return HasNoDataDepHazard;
 791   }
 792
 793   bool vmemWriteNeedsExpWaitcnt() const {
 794     return getGeneration() < SEA_ISLANDS;
 795   }
 796
 797   bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
 798
 799   // Scratch is allocated in 256 dword per wave blocks for the entire
 800   // wavefront. When viewed from the perspective of an arbitrary workitem, this
 801   // is 4-byte aligned.
 802   //
 803   // Only 4-byte alignment is really needed to access anything. Transformations
 804   // on the pointer value itself may rely on the alignment / known low bits of
 805   // the pointer. Set this to something above the minimum to avoid needing
 806   // dynamic realignment in common cases.
 807   Align getStackAlignment() const { return Align(16); }
 808
 809   bool enableMachineScheduler() const override {
 810     return true;
 811   }
 812
 813   bool useAA() const override;
 814
 815   bool enableSubRegLiveness() const override {
 816     return true;
 817   }
 818
 819   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
 820   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
 821
 822   // static wrappers
 823   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
 824
 825   // XXX - Why is this here if it isn't in the default pass set?
 826   bool enableEarlyIfConversion() const override {
 827     return true;
 828   }
 829
 830   void overrideSchedPolicy(MachineSchedPolicy &Policy,
 831                            unsigned NumRegionInstrs) const override;
 832
 833   unsigned getMaxNumUserSGPRs() const {
 834     return 16;
 835   }
 836
 837   bool hasSMemRealTime() const {
 838     return HasSMemRealTime;
 839   }
 840
 841   bool hasMovrel() const {
 842     return HasMovrel;
 843   }
 844
 845   bool hasVGPRIndexMode() const {
 846     return HasVGPRIndexMode;
 847   }
 848
 849   bool useVGPRIndexMode() const;
 850
 851   bool hasScalarCompareEq64() const {
 852     return getGeneration() >= VOLCANIC_ISLANDS;
 853   }
 854
 855   bool hasScalarStores() const {
 856     return HasScalarStores;
 857   }
 858
 859   bool hasScalarAtomics() const {
 860     return HasScalarAtomics;
 861   }
 862
 863   bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
 864
 865   /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
 866   bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
 867
 868   /// \returns true if the subtarget has the v_permlane64_b32 instruction.
 869   bool hasPermLane64() const { return getGeneration() >= GFX11; }
 870
 871   bool hasDPP() const {
 872     return HasDPP;
 873   }
 874
 875   bool hasDPPBroadcasts() const {
 876     return HasDPP && getGeneration() < GFX10;
 877   }
 878
 879   bool hasDPPWavefrontShifts() const {
 880     return HasDPP && getGeneration() < GFX10;
 881   }
 882
 883   bool hasDPP8() const {
 884     return HasDPP8;
 885   }
 886
 887   bool has64BitDPP() const {
 888     return Has64BitDPP;
 889   }
 890
 891   bool hasPackedFP32Ops() const {
 892     return HasPackedFP32Ops;
 893   }
 894
 895   bool hasFmaakFmamkF32Insts() const {
 896     return getGeneration() >= GFX10 || hasGFX940Insts();
 897   }
 898
 899   bool hasImageInsts() const {
 900     return HasImageInsts;
 901   }
 902
 903   bool hasExtendedImageInsts() const {
 904     return HasExtendedImageInsts;
 905   }
 906
 907   bool hasR128A16() const {
 908     return HasR128A16;
 909   }
 910
 911   bool hasA16() const { return HasA16; }
 912
 913   bool hasG16() const { return HasG16; }
 914
 915   bool hasOffset3fBug() const {
 916     return HasOffset3fBug;
 917   }
 918
 919   bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
 920
 921   bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
 922
 923   bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
 924
 925   bool hasNSAEncoding() const { return HasNSAEncoding; }
 926
 927   unsigned getNSAMaxSize() const { return NSAMaxSize; }
 928
 929   bool hasGFX10_AEncoding() const {
 930     return GFX10_AEncoding;
 931   }
 932
 933   bool hasGFX10_BEncoding() const {
 934     return GFX10_BEncoding;
 935   }
 936
 937   bool hasGFX10_3Insts() const {
 938     return GFX10_3Insts;
 939   }
 940
 941   bool hasMadF16() const;
 942
 943   bool hasMovB64() const { return GFX940Insts; }
 944
 945   bool hasLshlAddB64() const { return GFX940Insts; }
 946
 947   bool enableSIScheduler() const {
 948     return EnableSIScheduler;
 949   }
 950
 951   bool loadStoreOptEnabled() const {
 952     return EnableLoadStoreOpt;
 953   }
 954
 955   bool hasSGPRInitBug() const {
 956     return SGPRInitBug;
 957   }
 958
 959   bool hasUserSGPRInit16Bug() const {
 960     return UserSGPRInit16Bug && isWave32();
 961   }
 962
 963   bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
 964
 965   bool hasNegativeUnalignedScratchOffsetBug() const {
 966     return NegativeUnalignedScratchOffsetBug;
 967   }
 968
 969   bool hasMFMAInlineLiteralBug() const {
 970     return HasMFMAInlineLiteralBug;
 971   }
 972
 973   bool has12DWordStoreHazard() const {
 974     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
 975   }
 976
 977   // \returns true if the subtarget supports DWORDX3 load/store instructions.
 978   bool hasDwordx3LoadStores() const {
 979     return CIInsts;
 980   }
 981
 982   bool hasReadM0MovRelInterpHazard() const {
 983     return getGeneration() == AMDGPUSubtarget::GFX9;
 984   }
 985
 986   bool hasReadM0SendMsgHazard() const {
 987     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
 988            getGeneration() <= AMDGPUSubtarget::GFX9;
 989   }
 990
 991   bool hasReadM0LdsDmaHazard() const {
 992     return getGeneration() == AMDGPUSubtarget::GFX9;
 993   }
 994
 995   bool hasReadM0LdsDirectHazard() const {
 996     return getGeneration() == AMDGPUSubtarget::GFX9;
 997   }
 998
 999   bool hasVcmpxPermlaneHazard() const {
1000     return HasVcmpxPermlaneHazard;
1001   }
1002
1003   bool hasVMEMtoScalarWriteHazard() const {
1004     return HasVMEMtoScalarWriteHazard;
1005   }
1006
1007   bool hasSMEMtoVectorWriteHazard() const {
1008     return HasSMEMtoVectorWriteHazard;
1009   }
1010
1011   bool hasLDSMisalignedBug() const {
1012     return LDSMisalignedBug && !EnableCuMode;
1013   }
1014
1015   bool hasInstFwdPrefetchBug() const {
1016     return HasInstFwdPrefetchBug;
1017   }
1018
1019   bool hasVcmpxExecWARHazard() const {
1020     return HasVcmpxExecWARHazard;
1021   }
1022
1023   bool hasLdsBranchVmemWARHazard() const {
1024     return HasLdsBranchVmemWARHazard;
1025   }
1026
1027   // Shift amount of a 64 bit shift cannot be a highest allocated register
1028   // if also at the end of the allocation block.
1029   bool hasShift64HighRegBug() const {
1030     return GFX90AInsts && !GFX940Insts;
1031   }
1032
1033   // Has one cycle hazard on transcendental instruction feeding a
1034   // non transcendental VALU.
1035   bool hasTransForwardingHazard() const { return GFX940Insts; }
1036
1037   // Has one cycle hazard on a VALU instruction partially writing dst with
1038   // a shift of result bits feeding another VALU instruction.
1039   bool hasDstSelForwardingHazard() const { return GFX940Insts; }
1040
1041   // Cannot use op_sel with v_dot instructions.
1042   bool hasDOTOpSelHazard() const { return GFX940Insts; }
1043
1044   // Does not have HW interlocs for VALU writing and then reading SGPRs.
1045   bool hasVDecCoExecHazard() const {
1046     return GFX940Insts;
1047   }
1048
1049   bool hasNSAtoVMEMBug() const {
1050     return HasNSAtoVMEMBug;
1051   }
1052
1053   bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1054
1055   bool hasHardClauses() const { return getGeneration() >= GFX10; }
1056
1057   bool hasGFX90AInsts() const { return GFX90AInsts; }
1058
1059   bool hasFPAtomicToDenormModeHazard() const {
1060     return getGeneration() == GFX10;
1061   }
1062
1063   bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1064
1065   bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1066
1067   bool hasVALUPartialForwardingHazard() const {
1068     return getGeneration() >= GFX11;
1069   }
1070
1071   bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
1072
1073   bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1074
1075   /// Return if operations acting on VGPR tuples require even alignment.
1076   bool needsAlignedVGPRs() const { return GFX90AInsts; }
1077
1078   /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1079   bool hasSPackHL() const { return GFX11Insts; }
1080
1081   /// Return true if the target's EXP instruction has the COMPR flag, which
1082   /// affects the meaning of the EN (enable) bits.
1083   bool hasCompressedExport() const { return !GFX11Insts; }
1084
1085   /// Return true if the target's EXP instruction supports the NULL export
1086   /// target.
1087   bool hasNullExportTarget() const { return !GFX11Insts; }
1088
1089   bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1090
1091   bool hasVOPDInsts() const { return HasVOPDInsts; }
1092
1093   bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
1094
1095   /// Return true if the target has the S_DELAY_ALU instruction.
1096   bool hasDelayAlu() const { return GFX11Insts; }
1097
1098   bool hasPackedTID() const { return HasPackedTID; }
1099
1100   // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1101   // hasGFX90AInsts is also true.
1102   bool hasGFX940Insts() const { return GFX940Insts; }
1103
1104   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1105   /// SGPRs
1106   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1107
1108   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1109   /// VGPRs
1110   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1111
1112   /// Return occupancy for the given function. Used LDS and a number of
1113   /// registers if provided.
1114   /// Note, occupancy can be affected by the scratch allocation as well, but
1115   /// we do not have enough information to compute it.
1116   unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1117                             unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1118
1119   /// \returns true if the flat_scratch register should be initialized with the
1120   /// pointer to the wave's scratch memory rather than a size and offset.
1121   bool flatScratchIsPointer() const {
1122     return getGeneration() >= AMDGPUSubtarget::GFX9;
1123   }
1124
1125   /// \returns true if the flat_scratch register is initialized by the HW.
1126   /// In this case it is readonly.
1127   bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
1128
1129   /// \returns true if the machine has merged shaders in which s0-s7 are
1130   /// reserved by the hardware and user SGPRs start at s8
1131   bool hasMergedShaders() const {
1132     return getGeneration() >= GFX9;
1133   }
1134
1135   // \returns true if the target supports the pre-NGG legacy geometry path.
1136   bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1137
1138   /// \returns SGPR allocation granularity supported by the subtarget.
1139   unsigned getSGPRAllocGranule() const {
1140     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
1141   }
1142
1143   /// \returns SGPR encoding granularity supported by the subtarget.
1144   unsigned getSGPREncodingGranule() const {
1145     return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
1146   }
1147
1148   /// \returns Total number of SGPRs supported by the subtarget.
1149   unsigned getTotalNumSGPRs() const {
1150     return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1151   }
1152
1153   /// \returns Addressable number of SGPRs supported by the subtarget.
1154   unsigned getAddressableNumSGPRs() const {
1155     return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
1156   }
1157
1158   /// \returns Minimum number of SGPRs that meets the given number of waves per
1159   /// execution unit requirement supported by the subtarget.
1160   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1161     return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1162   }
1163
1164   /// \returns Maximum number of SGPRs that meets the given number of waves per
1165   /// execution unit requirement supported by the subtarget.
1166   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1167     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1168   }
1169
1170   /// \returns Reserved number of SGPRs. This is common
1171   /// utility function called by MachineFunction and
1172   /// Function variants of getReservedNumSGPRs.
1173   unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1174   /// \returns Reserved number of SGPRs for given machine function \p MF.
1175   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1176
1177   /// \returns Reserved number of SGPRs for given function \p F.
1178   unsigned getReservedNumSGPRs(const Function &F) const;
1179
1180   /// \returns max num SGPRs. This is the common utility
1181   /// function called by MachineFunction and Function
1182   /// variants of getMaxNumSGPRs.
1183   unsigned getBaseMaxNumSGPRs(const Function &F,
1184                               std::pair<unsigned, unsigned> WavesPerEU,
1185                               unsigned PreloadedSGPRs,
1186                               unsigned ReservedNumSGPRs) const;
1187
1188   /// \returns Maximum number of SGPRs that meets number of waves per execution
1189   /// unit requirement for function \p MF, or number of SGPRs explicitly
1190   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1191   ///
1192   /// \returns Value that meets number of waves per execution unit requirement
1193   /// if explicitly requested value cannot be converted to integer, violates
1194   /// subtarget's specifications, or does not meet number of waves per execution
1195   /// unit requirement.
1196   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1197
1198   /// \returns Maximum number of SGPRs that meets number of waves per execution
1199   /// unit requirement for function \p F, or number of SGPRs explicitly
1200   /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1201   ///
1202   /// \returns Value that meets number of waves per execution unit requirement
1203   /// if explicitly requested value cannot be converted to integer, violates
1204   /// subtarget's specifications, or does not meet number of waves per execution
1205   /// unit requirement.
1206   unsigned getMaxNumSGPRs(const Function &F) const;
1207
1208   /// \returns VGPR allocation granularity supported by the subtarget.
1209   unsigned getVGPRAllocGranule() const {
1210     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
1211   }
1212
1213   /// \returns VGPR encoding granularity supported by the subtarget.
1214   unsigned getVGPREncodingGranule() const {
1215     return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
1216   }
1217
1218   /// \returns Total number of VGPRs supported by the subtarget.
1219   unsigned getTotalNumVGPRs() const {
1220     return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1221   }
1222
1223   /// \returns Addressable number of VGPRs supported by the subtarget.
1224   unsigned getAddressableNumVGPRs() const {
1225     return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
1226   }
1227
1228   /// \returns the minimum number of VGPRs that will prevent achieving more than
1229   /// the specified number of waves \p WavesPerEU.
1230   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1231     return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1232   }
1233
1234   /// \returns the maximum number of VGPRs that can be used and still achieved
1235   /// at least the specified number of waves \p WavesPerEU.
1236   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1237     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1238   }
1239
1240   /// \returns max num VGPRs. This is the common utility function
1241   /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1242   unsigned getBaseMaxNumVGPRs(const Function &F,
1243                               std::pair<unsigned, unsigned> WavesPerEU) const;
1244   /// \returns Maximum number of VGPRs that meets number of waves per execution
1245   /// unit requirement for function \p F, or number of VGPRs explicitly
1246   /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1247   ///
1248   /// \returns Value that meets number of waves per execution unit requirement
1249   /// if explicitly requested value cannot be converted to integer, violates
1250   /// subtarget's specifications, or does not meet number of waves per execution
1251   /// unit requirement.
1252   unsigned getMaxNumVGPRs(const Function &F) const;
1253
1254   unsigned getMaxNumAGPRs(const Function &F) const {
1255     return getMaxNumVGPRs(F);
1256   }
1257
1258   /// \returns Maximum number of VGPRs that meets number of waves per execution
1259   /// unit requirement for function \p MF, or number of VGPRs explicitly
1260   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1261   ///
1262   /// \returns Value that meets number of waves per execution unit requirement
1263   /// if explicitly requested value cannot be converted to integer, violates
1264   /// subtarget's specifications, or does not meet number of waves per execution
1265   /// unit requirement.
1266   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1267
1268   void getPostRAMutations(
1269       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1270       const override;
1271
1272   std::unique_ptr<ScheduleDAGMutation>
1273   createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
1274
1275   bool isWave32() const {
1276     return getWavefrontSize() == 32;
1277   }
1278
1279   bool isWave64() const {
1280     return getWavefrontSize() == 64;
1281   }
1282
1283   const TargetRegisterClass *getBoolRC() const {
1284     return getRegisterInfo()->getBoolRC();
1285   }
1286
1287   /// \returns Maximum number of work groups per compute unit supported by the
1288   /// subtarget and limited by given \p FlatWorkGroupSize.
1289   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1290     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1291   }
1292
1293   /// \returns Minimum flat work group size supported by the subtarget.
1294   unsigned getMinFlatWorkGroupSize() const override {
1295     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1296   }
1297
1298   /// \returns Maximum flat work group size supported by the subtarget.
1299   unsigned getMaxFlatWorkGroupSize() const override {
1300     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1301   }
1302
1303   /// \returns Number of waves per execution unit required to support the given
1304   /// \p FlatWorkGroupSize.
1305   unsigned
1306   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1307     return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1308   }
1309
1310   /// \returns Minimum number of waves per execution unit supported by the
1311   /// subtarget.
1312   unsigned getMinWavesPerEU() const override {
1313     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1314   }
1315
1316   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1317                              SDep &Dep) const override;
1318
1319   // \returns true if it's beneficial on this subtarget for the scheduler to
1320   // cluster stores as well as loads.
1321   bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1322
1323   // \returns the number of address arguments from which to enable MIMG NSA
1324   // on supported architectures.
1325   unsigned getNSAThreshold(const MachineFunction &MF) const;
1326 };
1327
1328 } // end namespace llvm
1329
1330 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H