contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

   1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief AMDGPU specific subclass of TargetSubtarget.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  17
  18 #include "AMDGPU.h"
  19 #include "R600InstrInfo.h"
  20 #include "R600ISelLowering.h"
  21 #include "R600FrameLowering.h"
  22 #include "SIInstrInfo.h"
  23 #include "SIISelLowering.h"
  24 #include "SIFrameLowering.h"
  25 #include "SIMachineFunctionInfo.h"
  26 #include "Utils/AMDGPUBaseInfo.h"
  27 #include "llvm/ADT/Triple.h"
  28 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
  29 #include "llvm/CodeGen/MachineFunction.h"
  30 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  31 #include "llvm/MC/MCInstrItineraries.h"
  32 #include "llvm/Support/MathExtras.h"
  33 #include <cassert>
  34 #include <cstdint>
  35 #include <memory>
  36 #include <utility>
  37
  38 #define GET_SUBTARGETINFO_HEADER
  39 #include "AMDGPUGenSubtargetInfo.inc"
  40
  41 namespace llvm {
  42
  43 class StringRef;
  44
  45 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
  46 public:
  47   enum Generation {
  48     R600 = 0,
  49     R700,
  50     EVERGREEN,
  51     NORTHERN_ISLANDS,
  52     SOUTHERN_ISLANDS,
  53     SEA_ISLANDS,
  54     VOLCANIC_ISLANDS,
  55     GFX9,
  56   };
  57
  58   enum {
  59     ISAVersion0_0_0,
  60     ISAVersion7_0_0,
  61     ISAVersion7_0_1,
  62     ISAVersion7_0_2,
  63     ISAVersion8_0_0,
  64     ISAVersion8_0_1,
  65     ISAVersion8_0_2,
  66     ISAVersion8_0_3,
  67     ISAVersion8_0_4,
  68     ISAVersion8_1_0,
  69     ISAVersion9_0_0,
  70     ISAVersion9_0_1
  71   };
  72
  73   enum TrapHandlerAbi {
  74     TrapHandlerAbiNone = 0,
  75     TrapHandlerAbiHsa = 1
  76   };
  77
  78   enum TrapID {
  79     TrapIDHardwareReserved = 0,
  80     TrapIDHSADebugTrap = 1,
  81     TrapIDLLVMTrap = 2,
  82     TrapIDLLVMDebugTrap = 3,
  83     TrapIDDebugBreakpoint = 7,
  84     TrapIDDebugReserved8 = 8,
  85     TrapIDDebugReservedFE = 0xfe,
  86     TrapIDDebugReservedFF = 0xff
  87   };
  88
  89   enum TrapRegValues {
  90     LLVMTrapHandlerRegValue = 1
  91   };
  92
  93 protected:
  94   // Basic subtarget description.
  95   Triple TargetTriple;
  96   Generation Gen;
  97   unsigned IsaVersion;
  98   unsigned WavefrontSize;
  99   int LocalMemorySize;
 100   int LDSBankCount;
 101   unsigned MaxPrivateElementSize;
 102
 103   // Possibly statically set by tablegen, but may want to be overridden.
 104   bool FastFMAF32;
 105   bool HalfRate64Ops;
 106
 107   // Dynamially set bits that enable features.
 108   bool FP32Denormals;
 109   bool FP64FP16Denormals;
 110   bool FPExceptions;
 111   bool DX10Clamp;
 112   bool FlatForGlobal;
 113   bool UnalignedScratchAccess;
 114   bool UnalignedBufferAccess;
 115   bool HasApertureRegs;
 116   bool EnableXNACK;
 117   bool TrapHandler;
 118   bool DebuggerInsertNops;
 119   bool DebuggerReserveRegs;
 120   bool DebuggerEmitPrologue;
 121
 122   // Used as options.
 123   bool EnableVGPRSpilling;
 124   bool EnablePromoteAlloca;
 125   bool EnableLoadStoreOpt;
 126   bool EnableUnsafeDSOffsetFolding;
 127   bool EnableSIScheduler;
 128   bool DumpCode;
 129
 130   // Subtarget statically properties set by tablegen
 131   bool FP64;
 132   bool IsGCN;
 133   bool GCN1Encoding;
 134   bool GCN3Encoding;
 135   bool CIInsts;
 136   bool GFX9Insts;
 137   bool SGPRInitBug;
 138   bool HasSMemRealTime;
 139   bool Has16BitInsts;
 140   bool HasVOP3PInsts;
 141   bool HasMovrel;
 142   bool HasVGPRIndexMode;
 143   bool HasScalarStores;
 144   bool HasInv2PiInlineImm;
 145   bool HasSDWA;
 146   bool HasDPP;
 147   bool FlatAddressSpace;
 148   bool R600ALUInst;
 149   bool CaymanISA;
 150   bool CFALUBug;
 151   bool HasVertexCache;
 152   short TexVTXClauseSize;
 153   bool ScalarizeGlobal;
 154
 155   // Dummy feature to use for assembler in tablegen.
 156   bool FeatureDisable;
 157
 158   InstrItineraryData InstrItins;
 159   SelectionDAGTargetInfo TSInfo;
 160   AMDGPUAS AS;
 161
 162 public:
 163   AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 164                   const TargetMachine &TM);
 165   ~AMDGPUSubtarget() override;
 166
 167   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
 168                                                    StringRef GPU, StringRef FS);
 169
 170   const AMDGPUInstrInfo *getInstrInfo() const override = 0;
 171   const AMDGPUFrameLowering *getFrameLowering() const override = 0;
 172   const AMDGPUTargetLowering *getTargetLowering() const override = 0;
 173   const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
 174
 175   const InstrItineraryData *getInstrItineraryData() const override {
 176     return &InstrItins;
 177   }
 178
 179   // Nothing implemented, just prevent crashes on use.
 180   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
 181     return &TSInfo;
 182   }
 183
 184   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 185
 186   bool isAmdHsaOS() const {
 187     return TargetTriple.getOS() == Triple::AMDHSA;
 188   }
 189
 190   bool isMesa3DOS() const {
 191     return TargetTriple.getOS() == Triple::Mesa3D;
 192   }
 193
 194   bool isOpenCLEnv() const {
 195     return TargetTriple.getEnvironment() == Triple::OpenCL;
 196   }
 197
 198   Generation getGeneration() const {
 199     return Gen;
 200   }
 201
 202   unsigned getWavefrontSize() const {
 203     return WavefrontSize;
 204   }
 205
 206   int getLocalMemorySize() const {
 207     return LocalMemorySize;
 208   }
 209
 210   int getLDSBankCount() const {
 211     return LDSBankCount;
 212   }
 213
 214   unsigned getMaxPrivateElementSize() const {
 215     return MaxPrivateElementSize;
 216   }
 217
 218   AMDGPUAS getAMDGPUAS() const {
 219     return AS;
 220   }
 221
 222   bool has16BitInsts() const {
 223     return Has16BitInsts;
 224   }
 225
 226   bool hasVOP3PInsts() const {
 227     return HasVOP3PInsts;
 228   }
 229
 230   bool hasHWFP64() const {
 231     return FP64;
 232   }
 233
 234   bool hasFastFMAF32() const {
 235     return FastFMAF32;
 236   }
 237
 238   bool hasHalfRate64Ops() const {
 239     return HalfRate64Ops;
 240   }
 241
 242   bool hasAddr64() const {
 243     return (getGeneration() < VOLCANIC_ISLANDS);
 244   }
 245
 246   bool hasBFE() const {
 247     return (getGeneration() >= EVERGREEN);
 248   }
 249
 250   bool hasBFI() const {
 251     return (getGeneration() >= EVERGREEN);
 252   }
 253
 254   bool hasBFM() const {
 255     return hasBFE();
 256   }
 257
 258   bool hasBCNT(unsigned Size) const {
 259     if (Size == 32)
 260       return (getGeneration() >= EVERGREEN);
 261
 262     if (Size == 64)
 263       return (getGeneration() >= SOUTHERN_ISLANDS);
 264
 265     return false;
 266   }
 267
 268   bool hasMulU24() const {
 269     return (getGeneration() >= EVERGREEN);
 270   }
 271
 272   bool hasMulI24() const {
 273     return (getGeneration() >= SOUTHERN_ISLANDS ||
 274             hasCaymanISA());
 275   }
 276
 277   bool hasFFBL() const {
 278     return (getGeneration() >= EVERGREEN);
 279   }
 280
 281   bool hasFFBH() const {
 282     return (getGeneration() >= EVERGREEN);
 283   }
 284
 285   bool hasMed3_16() const {
 286     return getGeneration() >= GFX9;
 287   }
 288
 289   bool hasCARRY() const {
 290     return (getGeneration() >= EVERGREEN);
 291   }
 292
 293   bool hasBORROW() const {
 294     return (getGeneration() >= EVERGREEN);
 295   }
 296
 297   bool hasCaymanISA() const {
 298     return CaymanISA;
 299   }
 300
 301   TrapHandlerAbi getTrapHandlerAbi() const {
 302     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
 303   }
 304
 305   bool isPromoteAllocaEnabled() const {
 306     return EnablePromoteAlloca;
 307   }
 308
 309   bool unsafeDSOffsetFoldingEnabled() const {
 310     return EnableUnsafeDSOffsetFolding;
 311   }
 312
 313   bool dumpCode() const {
 314     return DumpCode;
 315   }
 316
 317   /// Return the amount of LDS that can be used that will not restrict the
 318   /// occupancy lower than WaveCount.
 319   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 320                                            const Function &) const;
 321
 322   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
 323   /// the given LDS memory size is the only constraint.
 324   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
 325
 326   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
 327     const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
 328     return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
 329   }
 330
 331   bool hasFP16Denormals() const {
 332     return FP64FP16Denormals;
 333   }
 334
 335   bool hasFP32Denormals() const {
 336     return FP32Denormals;
 337   }
 338
 339   bool hasFP64Denormals() const {
 340     return FP64FP16Denormals;
 341   }
 342
 343   bool hasFPExceptions() const {
 344     return FPExceptions;
 345   }
 346
 347   bool enableDX10Clamp() const {
 348     return DX10Clamp;
 349   }
 350
 351   bool enableIEEEBit(const MachineFunction &MF) const {
 352     return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
 353   }
 354
 355   bool useFlatForGlobal() const {
 356     return FlatForGlobal;
 357   }
 358
 359   bool hasUnalignedBufferAccess() const {
 360     return UnalignedBufferAccess;
 361   }
 362
 363   bool hasUnalignedScratchAccess() const {
 364     return UnalignedScratchAccess;
 365   }
 366
 367   bool hasApertureRegs() const {
 368    return HasApertureRegs;
 369   }
 370
 371   bool isTrapHandlerEnabled() const {
 372     return TrapHandler;
 373   }
 374
 375   bool isXNACKEnabled() const {
 376     return EnableXNACK;
 377   }
 378
 379   bool hasFlatAddressSpace() const {
 380     return FlatAddressSpace;
 381   }
 382
 383   bool isMesaKernel(const MachineFunction &MF) const {
 384     return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
 385   }
 386
 387   // Covers VS/PS/CS graphics shaders
 388   bool isMesaGfxShader(const MachineFunction &MF) const {
 389     return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
 390   }
 391
 392   bool isAmdCodeObjectV2(const MachineFunction &MF) const {
 393     return isAmdHsaOS() || isMesaKernel(MF);
 394   }
 395
 396   bool hasFminFmaxLegacy() const {
 397     return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
 398   }
 399
 400   /// \brief Returns the offset in bytes from the start of the input buffer
 401   ///        of the first explicit kernel argument.
 402   unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
 403     return isAmdCodeObjectV2(MF) ? 0 : 36;
 404   }
 405
 406   unsigned getAlignmentForImplicitArgPtr() const {
 407     return isAmdHsaOS() ? 8 : 4;
 408   }
 409
 410   unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
 411     if (isMesaKernel(MF))
 412       return 16;
 413     if (isAmdHsaOS() && isOpenCLEnv())
 414       return 32;
 415     return 0;
 416   }
 417
 418   // Scratch is allocated in 256 dword per wave blocks for the entire
 419   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
 420   // is 4-byte aligned.
 421   unsigned getStackAlignment() const {
 422     return 4;
 423   }
 424
 425   bool enableMachineScheduler() const override {
 426     return true;
 427   }
 428
 429   bool enableSubRegLiveness() const override {
 430     return true;
 431   }
 432
 433   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
 434   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
 435
 436   /// \returns Number of execution units per compute unit supported by the
 437   /// subtarget.
 438   unsigned getEUsPerCU() const {
 439     return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
 440   }
 441
 442   /// \returns Maximum number of work groups per compute unit supported by the
 443   /// subtarget and limited by given \p FlatWorkGroupSize.
 444   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
 445     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
 446                                                   FlatWorkGroupSize);
 447   }
 448
 449   /// \returns Maximum number of waves per compute unit supported by the
 450   /// subtarget without any kind of limitation.
 451   unsigned getMaxWavesPerCU() const {
 452     return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
 453   }
 454
 455   /// \returns Maximum number of waves per compute unit supported by the
 456   /// subtarget and limited by given \p FlatWorkGroupSize.
 457   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
 458     return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
 459                                              FlatWorkGroupSize);
 460   }
 461
 462   /// \returns Minimum number of waves per execution unit supported by the
 463   /// subtarget.
 464   unsigned getMinWavesPerEU() const {
 465     return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
 466   }
 467
 468   /// \returns Maximum number of waves per execution unit supported by the
 469   /// subtarget without any kind of limitation.
 470   unsigned getMaxWavesPerEU() const {
 471     return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
 472   }
 473
 474   /// \returns Maximum number of waves per execution unit supported by the
 475   /// subtarget and limited by given \p FlatWorkGroupSize.
 476   unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
 477     return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
 478                                              FlatWorkGroupSize);
 479   }
 480
 481   /// \returns Minimum flat work group size supported by the subtarget.
 482   unsigned getMinFlatWorkGroupSize() const {
 483     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
 484   }
 485
 486   /// \returns Maximum flat work group size supported by the subtarget.
 487   unsigned getMaxFlatWorkGroupSize() const {
 488     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
 489   }
 490
 491   /// \returns Number of waves per work group supported by the subtarget and
 492   /// limited by given \p FlatWorkGroupSize.
 493   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
 494     return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
 495                                                  FlatWorkGroupSize);
 496   }
 497
 498   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
 499   /// for function \p F, or minimum/maximum flat work group sizes explicitly
 500   /// requested using "amdgpu-flat-work-group-size" attribute attached to
 501   /// function \p F.
 502   ///
 503   /// \returns Subtarget's default values if explicitly requested values cannot
 504   /// be converted to integer, or violate subtarget's specifications.
 505   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
 506
 507   /// \returns Subtarget's default pair of minimum/maximum number of waves per
 508   /// execution unit for function \p F, or minimum/maximum number of waves per
 509   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
 510   /// attached to function \p F.
 511   ///
 512   /// \returns Subtarget's default values if explicitly requested values cannot
 513   /// be converted to integer, violate subtarget's specifications, or are not
 514   /// compatible with minimum/maximum number of waves limited by flat work group
 515   /// size, register usage, and/or lds usage.
 516   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
 517
 518   /// Creates value range metadata on an workitemid.* inrinsic call or load.
 519   bool makeLIDRangeMetadata(Instruction *I) const;
 520 };
 521
 522 class R600Subtarget final : public AMDGPUSubtarget {
 523 private:
 524   R600InstrInfo InstrInfo;
 525   R600FrameLowering FrameLowering;
 526   R600TargetLowering TLInfo;
 527
 528 public:
 529   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
 530                 const TargetMachine &TM);
 531
 532   const R600InstrInfo *getInstrInfo() const override {
 533     return &InstrInfo;
 534   }
 535
 536   const R600FrameLowering *getFrameLowering() const override {
 537     return &FrameLowering;
 538   }
 539
 540   const R600TargetLowering *getTargetLowering() const override {
 541     return &TLInfo;
 542   }
 543
 544   const R600RegisterInfo *getRegisterInfo() const override {
 545     return &InstrInfo.getRegisterInfo();
 546   }
 547
 548   bool hasCFAluBug() const {
 549     return CFALUBug;
 550   }
 551
 552   bool hasVertexCache() const {
 553     return HasVertexCache;
 554   }
 555
 556   short getTexVTXClauseSize() const {
 557     return TexVTXClauseSize;
 558   }
 559 };
 560
 561 class SISubtarget final : public AMDGPUSubtarget {
 562 private:
 563   SIInstrInfo InstrInfo;
 564   SIFrameLowering FrameLowering;
 565   SITargetLowering TLInfo;
 566   std::unique_ptr<GISelAccessor> GISel;
 567
 568 public:
 569   SISubtarget(const Triple &TT, StringRef CPU, StringRef FS,
 570               const TargetMachine &TM);
 571
 572   const SIInstrInfo *getInstrInfo() const override {
 573     return &InstrInfo;
 574   }
 575
 576   const SIFrameLowering *getFrameLowering() const override {
 577     return &FrameLowering;
 578   }
 579
 580   const SITargetLowering *getTargetLowering() const override {
 581     return &TLInfo;
 582   }
 583
 584   const CallLowering *getCallLowering() const override {
 585     assert(GISel && "Access to GlobalISel APIs not set");
 586     return GISel->getCallLowering();
 587   }
 588
 589   const InstructionSelector *getInstructionSelector() const override {
 590     assert(GISel && "Access to GlobalISel APIs not set");
 591     return GISel->getInstructionSelector();
 592   }
 593
 594   const LegalizerInfo *getLegalizerInfo() const override {
 595     assert(GISel && "Access to GlobalISel APIs not set");
 596     return GISel->getLegalizerInfo();
 597   }
 598
 599   const RegisterBankInfo *getRegBankInfo() const override {
 600     assert(GISel && "Access to GlobalISel APIs not set");
 601     return GISel->getRegBankInfo();
 602   }
 603
 604   const SIRegisterInfo *getRegisterInfo() const override {
 605     return &InstrInfo.getRegisterInfo();
 606   }
 607
 608   void setGISelAccessor(GISelAccessor &GISel) {
 609     this->GISel.reset(&GISel);
 610   }
 611
 612   // XXX - Why is this here if it isn't in the default pass set?
 613   bool enableEarlyIfConversion() const override {
 614     return true;
 615   }
 616
 617   void overrideSchedPolicy(MachineSchedPolicy &Policy,
 618                            unsigned NumRegionInstrs) const override;
 619
 620   bool isVGPRSpillingEnabled(const Function& F) const;
 621
 622   unsigned getMaxNumUserSGPRs() const {
 623     return 16;
 624   }
 625
 626   bool hasSMemRealTime() const {
 627     return HasSMemRealTime;
 628   }
 629
 630   bool hasMovrel() const {
 631     return HasMovrel;
 632   }
 633
 634   bool hasVGPRIndexMode() const {
 635     return HasVGPRIndexMode;
 636   }
 637
 638   bool useVGPRIndexMode(bool UserEnable) const {
 639     return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
 640   }
 641
 642   bool hasScalarCompareEq64() const {
 643     return getGeneration() >= VOLCANIC_ISLANDS;
 644   }
 645
 646   bool hasScalarStores() const {
 647     return HasScalarStores;
 648   }
 649
 650   bool hasInv2PiInlineImm() const {
 651     return HasInv2PiInlineImm;
 652   }
 653
 654   bool hasSDWA() const {
 655     return HasSDWA;
 656   }
 657
 658   bool hasDPP() const {
 659     return HasDPP;
 660   }
 661
 662   bool enableSIScheduler() const {
 663     return EnableSIScheduler;
 664   }
 665
 666   bool debuggerSupported() const {
 667     return debuggerInsertNops() && debuggerReserveRegs() &&
 668       debuggerEmitPrologue();
 669   }
 670
 671   bool debuggerInsertNops() const {
 672     return DebuggerInsertNops;
 673   }
 674
 675   bool debuggerReserveRegs() const {
 676     return DebuggerReserveRegs;
 677   }
 678
 679   bool debuggerEmitPrologue() const {
 680     return DebuggerEmitPrologue;
 681   }
 682
 683   bool loadStoreOptEnabled() const {
 684     return EnableLoadStoreOpt;
 685   }
 686
 687   bool hasSGPRInitBug() const {
 688     return SGPRInitBug;
 689   }
 690
 691   bool has12DWordStoreHazard() const {
 692     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
 693   }
 694
 695   bool hasSMovFedHazard() const {
 696     return getGeneration() >= AMDGPUSubtarget::GFX9;
 697   }
 698
 699   bool hasReadM0Hazard() const {
 700     return getGeneration() >= AMDGPUSubtarget::GFX9;
 701   }
 702
 703   unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
 704
 705   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
 706   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 707
 708   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
 709   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 710
 711   /// \returns True if waitcnt instruction is needed before barrier instruction,
 712   /// false otherwise.
 713   bool needWaitcntBeforeBarrier() const {
 714     return getGeneration() < GFX9;
 715   }
 716
 717   /// \returns true if the flat_scratch register should be initialized with the
 718   /// pointer to the wave's scratch memory rather than a size and offset.
 719   bool flatScratchIsPointer() const {
 720     return getGeneration() >= GFX9;
 721   }
 722
 723   /// \returns SGPR allocation granularity supported by the subtarget.
 724   unsigned getSGPRAllocGranule() const {
 725     return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
 726   }
 727
 728   /// \returns SGPR encoding granularity supported by the subtarget.
 729   unsigned getSGPREncodingGranule() const {
 730     return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
 731   }
 732
 733   /// \returns Total number of SGPRs supported by the subtarget.
 734   unsigned getTotalNumSGPRs() const {
 735     return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
 736   }
 737
 738   /// \returns Addressable number of SGPRs supported by the subtarget.
 739   unsigned getAddressableNumSGPRs() const {
 740     return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
 741   }
 742
 743   /// \returns Minimum number of SGPRs that meets the given number of waves per
 744   /// execution unit requirement supported by the subtarget.
 745   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
 746     return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
 747   }
 748
 749   /// \returns Maximum number of SGPRs that meets the given number of waves per
 750   /// execution unit requirement supported by the subtarget.
 751   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
 752     return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
 753                                            Addressable);
 754   }
 755
 756   /// \returns Reserved number of SGPRs for given function \p MF.
 757   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
 758
 759   /// \returns Maximum number of SGPRs that meets number of waves per execution
 760   /// unit requirement for function \p MF, or number of SGPRs explicitly
 761   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
 762   ///
 763   /// \returns Value that meets number of waves per execution unit requirement
 764   /// if explicitly requested value cannot be converted to integer, violates
 765   /// subtarget's specifications, or does not meet number of waves per execution
 766   /// unit requirement.
 767   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
 768
 769   /// \returns VGPR allocation granularity supported by the subtarget.
 770   unsigned getVGPRAllocGranule() const {
 771     return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
 772   }
 773
 774   /// \returns VGPR encoding granularity supported by the subtarget.
 775   unsigned getVGPREncodingGranule() const {
 776     return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
 777   }
 778
 779   /// \returns Total number of VGPRs supported by the subtarget.
 780   unsigned getTotalNumVGPRs() const {
 781     return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
 782   }
 783
 784   /// \returns Addressable number of VGPRs supported by the subtarget.
 785   unsigned getAddressableNumVGPRs() const {
 786     return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
 787   }
 788
 789   /// \returns Minimum number of VGPRs that meets given number of waves per
 790   /// execution unit requirement supported by the subtarget.
 791   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
 792     return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
 793   }
 794
 795   /// \returns Maximum number of VGPRs that meets given number of waves per
 796   /// execution unit requirement supported by the subtarget.
 797   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
 798     return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
 799   }
 800
 801   /// \returns Reserved number of VGPRs for given function \p MF.
 802   unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
 803     return debuggerReserveRegs() ? 4 : 0;
 804   }
 805
 806   /// \returns Maximum number of VGPRs that meets number of waves per execution
 807   /// unit requirement for function \p MF, or number of VGPRs explicitly
 808   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
 809   ///
 810   /// \returns Value that meets number of waves per execution unit requirement
 811   /// if explicitly requested value cannot be converted to integer, violates
 812   /// subtarget's specifications, or does not meet number of waves per execution
 813   /// unit requirement.
 814   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 815 };
 816
 817 } // end namespace llvm
 818
 819 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H