contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

   1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// AMDGPU specific subclass of TargetSubtarget.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
  17
  18 #include "AMDGPU.h"
  19 #include "AMDGPUCallLowering.h"
  20 #include "R600FrameLowering.h"
  21 #include "R600ISelLowering.h"
  22 #include "R600InstrInfo.h"
  23 #include "SIFrameLowering.h"
  24 #include "SIISelLowering.h"
  25 #include "SIInstrInfo.h"
  26 #include "Utils/AMDGPUBaseInfo.h"
  27 #include "llvm/ADT/Triple.h"
  28 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  29 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
  30 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
  31 #include "llvm/CodeGen/MachineFunction.h"
  32 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  33 #include "llvm/MC/MCInstrItineraries.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include <cassert>
  36 #include <cstdint>
  37 #include <memory>
  38 #include <utility>
  39
  40 #define GET_SUBTARGETINFO_HEADER
  41 #include "AMDGPUGenSubtargetInfo.inc"
  42 #define GET_SUBTARGETINFO_HEADER
  43 #include "R600GenSubtargetInfo.inc"
  44
  45 namespace llvm {
  46
  47 class StringRef;
  48
  49 class AMDGPUSubtarget {
  50 public:
  51   enum Generation {
  52     R600 = 0,
  53     R700 = 1,
  54     EVERGREEN = 2,
  55     NORTHERN_ISLANDS = 3,
  56     SOUTHERN_ISLANDS = 4,
  57     SEA_ISLANDS = 5,
  58     VOLCANIC_ISLANDS = 6,
  59     GFX9 = 7
  60   };
  61
  62 private:
  63   Triple TargetTriple;
  64
  65 protected:
  66   const FeatureBitset &SubtargetFeatureBits;
  67   bool Has16BitInsts;
  68   bool HasMadMixInsts;
  69   bool FP32Denormals;
  70   bool FPExceptions;
  71   bool HasSDWA;
  72   bool HasVOP3PInsts;
  73   bool HasMulI24;
  74   bool HasMulU24;
  75   bool HasFminFmaxLegacy;
  76   bool EnablePromoteAlloca;
  77   int LocalMemorySize;
  78   unsigned WavefrontSize;
  79
  80 public:
  81   AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
  82
  83   static const AMDGPUSubtarget &get(const MachineFunction &MF);
  84   static const AMDGPUSubtarget &get(const TargetMachine &TM,
  85                                     const Function &F);
  86
  87   /// \returns Default range flat work group size for a calling convention.
  88   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
  89
  90   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
  91   /// for function \p F, or minimum/maximum flat work group sizes explicitly
  92   /// requested using "amdgpu-flat-work-group-size" attribute attached to
  93   /// function \p F.
  94   ///
  95   /// \returns Subtarget's default values if explicitly requested values cannot
  96   /// be converted to integer, or violate subtarget's specifications.
  97   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
  98
  99   /// \returns Subtarget's default pair of minimum/maximum number of waves per
 100   /// execution unit for function \p F, or minimum/maximum number of waves per
 101   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
 102   /// attached to function \p F.
 103   ///
 104   /// \returns Subtarget's default values if explicitly requested values cannot
 105   /// be converted to integer, violate subtarget's specifications, or are not
 106   /// compatible with minimum/maximum number of waves limited by flat work group
 107   /// size, register usage, and/or lds usage.
 108   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
 109
 110   /// Return the amount of LDS that can be used that will not restrict the
 111   /// occupancy lower than WaveCount.
 112   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 113                                            const Function &) const;
 114
 115   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
 116   /// the given LDS memory size is the only constraint.
 117   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
 118
 119   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
 120
 121   bool isAmdHsaOS() const {
 122     return TargetTriple.getOS() == Triple::AMDHSA;
 123   }
 124
 125   bool isAmdPalOS() const {
 126     return TargetTriple.getOS() == Triple::AMDPAL;
 127   }
 128
 129   bool isMesa3DOS() const {
 130     return TargetTriple.getOS() == Triple::Mesa3D;
 131   }
 132
 133   bool isMesaKernel(const Function &F) const {
 134     return isMesa3DOS() && !AMDGPU::isShader(F.getCallingConv());
 135   }
 136
 137   bool isAmdCodeObjectV2(const Function &F) const {
 138     return isAmdHsaOS() || isMesaKernel(F);
 139   }
 140
 141   bool has16BitInsts() const {
 142     return Has16BitInsts;
 143   }
 144
 145   bool hasMadMixInsts() const {
 146     return HasMadMixInsts;
 147   }
 148
 149   bool hasFP32Denormals() const {
 150     return FP32Denormals;
 151   }
 152
 153   bool hasFPExceptions() const {
 154     return FPExceptions;
 155   }
 156
 157   bool hasSDWA() const {
 158     return HasSDWA;
 159   }
 160
 161   bool hasVOP3PInsts() const {
 162     return HasVOP3PInsts;
 163   }
 164
 165   bool hasMulI24() const {
 166     return HasMulI24;
 167   }
 168
 169   bool hasMulU24() const {
 170     return HasMulU24;
 171   }
 172
 173   bool hasFminFmaxLegacy() const {
 174     return HasFminFmaxLegacy;
 175   }
 176
 177   bool isPromoteAllocaEnabled() const {
 178     return EnablePromoteAlloca;
 179   }
 180
 181   unsigned getWavefrontSize() const {
 182     return WavefrontSize;
 183   }
 184
 185   int getLocalMemorySize() const {
 186     return LocalMemorySize;
 187   }
 188
 189   unsigned getAlignmentForImplicitArgPtr() const {
 190     return isAmdHsaOS() ? 8 : 4;
 191   }
 192
 193   /// Returns the offset in bytes from the start of the input buffer
 194   ///        of the first explicit kernel argument.
 195   unsigned getExplicitKernelArgOffset(const Function &F) const {
 196     return isAmdCodeObjectV2(F) ? 0 : 36;
 197   }
 198
 199   /// \returns Maximum number of work groups per compute unit supported by the
 200   /// subtarget and limited by given \p FlatWorkGroupSize.
 201   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
 202     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
 203                                                   FlatWorkGroupSize);
 204   }
 205
 206   /// \returns Minimum flat work group size supported by the subtarget.
 207   unsigned getMinFlatWorkGroupSize() const {
 208     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
 209   }
 210
 211   /// \returns Maximum flat work group size supported by the subtarget.
 212   unsigned getMaxFlatWorkGroupSize() const {
 213     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
 214   }
 215
 216   /// \returns Maximum number of waves per execution unit supported by the
 217   /// subtarget and limited by given \p FlatWorkGroupSize.
 218   unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
 219     return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
 220                                              FlatWorkGroupSize);
 221   }
 222
 223   /// \returns Minimum number of waves per execution unit supported by the
 224   /// subtarget.
 225   unsigned getMinWavesPerEU() const {
 226     return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
 227   }
 228
 229   unsigned getMaxWavesPerEU() const { return 10; }
 230
 231   /// Creates value range metadata on an workitemid.* inrinsic call or load.
 232   bool makeLIDRangeMetadata(Instruction *I) const;
 233
 234   /// \returns Number of bytes of arguments that are passed to a shader or
 235   /// kernel in addition to the explicit ones declared for the function.
 236   unsigned getImplicitArgNumBytes(const Function &F) const {
 237     if (isMesaKernel(F))
 238       return 16;
 239     return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 0);
 240   }
 241   uint64_t getExplicitKernArgSize(const Function &F,
 242                                   unsigned &MaxAlign) const;
 243   unsigned getKernArgSegmentSize(const Function &F,
 244                                  unsigned &MaxAlign) const;
 245
 246   virtual ~AMDGPUSubtarget() {}
 247 };
 248
 249 class GCNSubtarget : public AMDGPUGenSubtargetInfo,
 250                      public AMDGPUSubtarget {
 251 public:
 252   enum {
 253     ISAVersion0_0_0,
 254     ISAVersion6_0_0,
 255     ISAVersion6_0_1,
 256     ISAVersion7_0_0,
 257     ISAVersion7_0_1,
 258     ISAVersion7_0_2,
 259     ISAVersion7_0_3,
 260     ISAVersion7_0_4,
 261     ISAVersion8_0_1,
 262     ISAVersion8_0_2,
 263     ISAVersion8_0_3,
 264     ISAVersion8_1_0,
 265     ISAVersion9_0_0,
 266     ISAVersion9_0_2,
 267     ISAVersion9_0_4,
 268     ISAVersion9_0_6,
 269   };
 270
 271   enum TrapHandlerAbi {
 272     TrapHandlerAbiNone = 0,
 273     TrapHandlerAbiHsa = 1
 274   };
 275
 276   enum TrapID {
 277     TrapIDHardwareReserved = 0,
 278     TrapIDHSADebugTrap = 1,
 279     TrapIDLLVMTrap = 2,
 280     TrapIDLLVMDebugTrap = 3,
 281     TrapIDDebugBreakpoint = 7,
 282     TrapIDDebugReserved8 = 8,
 283     TrapIDDebugReservedFE = 0xfe,
 284     TrapIDDebugReservedFF = 0xff
 285   };
 286
 287   enum TrapRegValues {
 288     LLVMTrapHandlerRegValue = 1
 289   };
 290
 291 private:
 292   /// GlobalISel related APIs.
 293   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
 294   std::unique_ptr<InstructionSelector> InstSelector;
 295   std::unique_ptr<LegalizerInfo> Legalizer;
 296   std::unique_ptr<RegisterBankInfo> RegBankInfo;
 297
 298 protected:
 299   // Basic subtarget description.
 300   Triple TargetTriple;
 301   unsigned Gen;
 302   unsigned IsaVersion;
 303   int LDSBankCount;
 304   unsigned MaxPrivateElementSize;
 305
 306   // Possibly statically set by tablegen, but may want to be overridden.
 307   bool FastFMAF32;
 308   bool HalfRate64Ops;
 309
 310   // Dynamially set bits that enable features.
 311   bool FP64FP16Denormals;
 312   bool DX10Clamp;
 313   bool FlatForGlobal;
 314   bool AutoWaitcntBeforeBarrier;
 315   bool CodeObjectV3;
 316   bool UnalignedScratchAccess;
 317   bool UnalignedBufferAccess;
 318   bool HasApertureRegs;
 319   bool EnableXNACK;
 320   bool TrapHandler;
 321   bool DebuggerInsertNops;
 322   bool DebuggerEmitPrologue;
 323
 324   // Used as options.
 325   bool EnableHugePrivateBuffer;
 326   bool EnableVGPRSpilling;
 327   bool EnableLoadStoreOpt;
 328   bool EnableUnsafeDSOffsetFolding;
 329   bool EnableSIScheduler;
 330   bool EnableDS128;
 331   bool DumpCode;
 332
 333   // Subtarget statically properties set by tablegen
 334   bool FP64;
 335   bool FMA;
 336   bool MIMG_R128;
 337   bool IsGCN;
 338   bool GCN3Encoding;
 339   bool CIInsts;
 340   bool GFX9Insts;
 341   bool SGPRInitBug;
 342   bool HasSMemRealTime;
 343   bool HasIntClamp;
 344   bool HasFmaMixInsts;
 345   bool HasMovrel;
 346   bool HasVGPRIndexMode;
 347   bool HasScalarStores;
 348   bool HasScalarAtomics;
 349   bool HasInv2PiInlineImm;
 350   bool HasSDWAOmod;
 351   bool HasSDWAScalar;
 352   bool HasSDWASdst;
 353   bool HasSDWAMac;
 354   bool HasSDWAOutModsVOPC;
 355   bool HasDPP;
 356   bool HasDLInsts;
 357   bool D16PreservesUnusedBits;
 358   bool FlatAddressSpace;
 359   bool FlatInstOffsets;
 360   bool FlatGlobalInsts;
 361   bool FlatScratchInsts;
 362   bool AddNoCarryInsts;
 363   bool HasUnpackedD16VMem;
 364   bool R600ALUInst;
 365   bool CaymanISA;
 366   bool CFALUBug;
 367   bool HasVertexCache;
 368   short TexVTXClauseSize;
 369   bool ScalarizeGlobal;
 370
 371   // Dummy feature to use for assembler in tablegen.
 372   bool FeatureDisable;
 373
 374   SelectionDAGTargetInfo TSInfo;
 375   AMDGPUAS AS;
 376 private:
 377   SIInstrInfo InstrInfo;
 378   SITargetLowering TLInfo;
 379   SIFrameLowering FrameLowering;
 380
 381 public:
 382   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
 383                const GCNTargetMachine &TM);
 384   ~GCNSubtarget() override;
 385
 386   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
 387                                                    StringRef GPU, StringRef FS);
 388
 389   const SIInstrInfo *getInstrInfo() const override {
 390     return &InstrInfo;
 391   }
 392
 393   const SIFrameLowering *getFrameLowering() const override {
 394     return &FrameLowering;
 395   }
 396
 397   const SITargetLowering *getTargetLowering() const override {
 398     return &TLInfo;
 399   }
 400
 401   const SIRegisterInfo *getRegisterInfo() const override {
 402     return &InstrInfo.getRegisterInfo();
 403   }
 404
 405   const CallLowering *getCallLowering() const override {
 406     return CallLoweringInfo.get();
 407   }
 408
 409   const InstructionSelector *getInstructionSelector() const override {
 410     return InstSelector.get();
 411   }
 412
 413   const LegalizerInfo *getLegalizerInfo() const override {
 414     return Legalizer.get();
 415   }
 416
 417   const RegisterBankInfo *getRegBankInfo() const override {
 418     return RegBankInfo.get();
 419   }
 420
 421   // Nothing implemented, just prevent crashes on use.
 422   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
 423     return &TSInfo;
 424   }
 425
 426   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 427
 428   Generation getGeneration() const {
 429     return (Generation)Gen;
 430   }
 431
 432   unsigned getWavefrontSizeLog2() const {
 433     return Log2_32(WavefrontSize);
 434   }
 435
 436   int getLDSBankCount() const {
 437     return LDSBankCount;
 438   }
 439
 440   unsigned getMaxPrivateElementSize() const {
 441     return MaxPrivateElementSize;
 442   }
 443
 444   AMDGPUAS getAMDGPUAS() const {
 445     return AS;
 446   }
 447
 448   bool hasIntClamp() const {
 449     return HasIntClamp;
 450   }
 451
 452   bool hasFP64() const {
 453     return FP64;
 454   }
 455
 456   bool hasMIMG_R128() const {
 457     return MIMG_R128;
 458   }
 459
 460   bool hasHWFP64() const {
 461     return FP64;
 462   }
 463
 464   bool hasFastFMAF32() const {
 465     return FastFMAF32;
 466   }
 467
 468   bool hasHalfRate64Ops() const {
 469     return HalfRate64Ops;
 470   }
 471
 472   bool hasAddr64() const {
 473     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
 474   }
 475
 476   bool hasBFE() const {
 477     return true;
 478   }
 479
 480   bool hasBFI() const {
 481     return true;
 482   }
 483
 484   bool hasBFM() const {
 485     return hasBFE();
 486   }
 487
 488   bool hasBCNT(unsigned Size) const {
 489     return true;
 490   }
 491
 492   bool hasFFBL() const {
 493     return true;
 494   }
 495
 496   bool hasFFBH() const {
 497     return true;
 498   }
 499
 500   bool hasMed3_16() const {
 501     return getGeneration() >= AMDGPUSubtarget::GFX9;
 502   }
 503
 504   bool hasMin3Max3_16() const {
 505     return getGeneration() >= AMDGPUSubtarget::GFX9;
 506   }
 507
 508   bool hasFmaMixInsts() const {
 509     return HasFmaMixInsts;
 510   }
 511
 512   bool hasCARRY() const {
 513     return true;
 514   }
 515
 516   bool hasFMA() const {
 517     return FMA;
 518   }
 519
 520   TrapHandlerAbi getTrapHandlerAbi() const {
 521     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
 522   }
 523
 524   bool enableHugePrivateBuffer() const {
 525     return EnableHugePrivateBuffer;
 526   }
 527
 528   bool unsafeDSOffsetFoldingEnabled() const {
 529     return EnableUnsafeDSOffsetFolding;
 530   }
 531
 532   bool dumpCode() const {
 533     return DumpCode;
 534   }
 535
 536   /// Return the amount of LDS that can be used that will not restrict the
 537   /// occupancy lower than WaveCount.
 538   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
 539                                            const Function &) const;
 540
 541   bool hasFP16Denormals() const {
 542     return FP64FP16Denormals;
 543   }
 544
 545   bool hasFP64Denormals() const {
 546     return FP64FP16Denormals;
 547   }
 548
 549   bool supportsMinMaxDenormModes() const {
 550     return getGeneration() >= AMDGPUSubtarget::GFX9;
 551   }
 552
 553   bool enableDX10Clamp() const {
 554     return DX10Clamp;
 555   }
 556
 557   bool enableIEEEBit(const MachineFunction &MF) const {
 558     return AMDGPU::isCompute(MF.getFunction().getCallingConv());
 559   }
 560
 561   bool useFlatForGlobal() const {
 562     return FlatForGlobal;
 563   }
 564
 565   /// \returns If target supports ds_read/write_b128 and user enables generation
 566   /// of ds_read/write_b128.
 567   bool useDS128() const {
 568     return CIInsts && EnableDS128;
 569   }
 570
 571   /// \returns If MUBUF instructions always perform range checking, even for
 572   /// buffer resources used for private memory access.
 573   bool privateMemoryResourceIsRangeChecked() const {
 574     return getGeneration() < AMDGPUSubtarget::GFX9;
 575   }
 576
 577   bool hasAutoWaitcntBeforeBarrier() const {
 578     return AutoWaitcntBeforeBarrier;
 579   }
 580
 581   bool hasCodeObjectV3() const {
 582     return CodeObjectV3;
 583   }
 584
 585   bool hasUnalignedBufferAccess() const {
 586     return UnalignedBufferAccess;
 587   }
 588
 589   bool hasUnalignedScratchAccess() const {
 590     return UnalignedScratchAccess;
 591   }
 592
 593   bool hasApertureRegs() const {
 594     return HasApertureRegs;
 595   }
 596
 597   bool isTrapHandlerEnabled() const {
 598     return TrapHandler;
 599   }
 600
 601   bool isXNACKEnabled() const {
 602     return EnableXNACK;
 603   }
 604
 605   bool hasFlatAddressSpace() const {
 606     return FlatAddressSpace;
 607   }
 608
 609   bool hasFlatInstOffsets() const {
 610     return FlatInstOffsets;
 611   }
 612
 613   bool hasFlatGlobalInsts() const {
 614     return FlatGlobalInsts;
 615   }
 616
 617   bool hasFlatScratchInsts() const {
 618     return FlatScratchInsts;
 619   }
 620
 621   bool hasFlatLgkmVMemCountInOrder() const {
 622     return getGeneration() > GFX9;
 623   }
 624
 625   bool hasD16LoadStore() const {
 626     return getGeneration() >= GFX9;
 627   }
 628
 629   /// Return if most LDS instructions have an m0 use that require m0 to be
 630   /// iniitalized.
 631   bool ldsRequiresM0Init() const {
 632     return getGeneration() < GFX9;
 633   }
 634
 635   bool hasAddNoCarry() const {
 636     return AddNoCarryInsts;
 637   }
 638
 639   bool hasUnpackedD16VMem() const {
 640     return HasUnpackedD16VMem;
 641   }
 642
 643   // Covers VS/PS/CS graphics shaders
 644   bool isMesaGfxShader(const Function &F) const {
 645     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
 646   }
 647
 648   bool hasMad64_32() const {
 649     return getGeneration() >= SEA_ISLANDS;
 650   }
 651
 652   bool hasSDWAOmod() const {
 653     return HasSDWAOmod;
 654   }
 655
 656   bool hasSDWAScalar() const {
 657     return HasSDWAScalar;
 658   }
 659
 660   bool hasSDWASdst() const {
 661     return HasSDWASdst;
 662   }
 663
 664   bool hasSDWAMac() const {
 665     return HasSDWAMac;
 666   }
 667
 668   bool hasSDWAOutModsVOPC() const {
 669     return HasSDWAOutModsVOPC;
 670   }
 671
 672   bool vmemWriteNeedsExpWaitcnt() const {
 673     return getGeneration() < SEA_ISLANDS;
 674   }
 675
 676   bool hasDLInsts() const {
 677     return HasDLInsts;
 678   }
 679
 680   bool d16PreservesUnusedBits() const {
 681     return D16PreservesUnusedBits;
 682   }
 683
 684   // Scratch is allocated in 256 dword per wave blocks for the entire
 685   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
 686   // is 4-byte aligned.
 687   //
 688   // Only 4-byte alignment is really needed to access anything. Transformations
 689   // on the pointer value itself may rely on the alignment / known low bits of
 690   // the pointer. Set this to something above the minimum to avoid needing
 691   // dynamic realignment in common cases.
 692   unsigned getStackAlignment() const {
 693     return 16;
 694   }
 695
 696   bool enableMachineScheduler() const override {
 697     return true;
 698   }
 699
 700   bool enableSubRegLiveness() const override {
 701     return true;
 702   }
 703
 704   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
 705   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
 706
 707   /// \returns Number of execution units per compute unit supported by the
 708   /// subtarget.
 709   unsigned getEUsPerCU() const {
 710     return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
 711   }
 712
 713   /// \returns Maximum number of waves per compute unit supported by the
 714   /// subtarget without any kind of limitation.
 715   unsigned getMaxWavesPerCU() const {
 716     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
 717   }
 718
 719   /// \returns Maximum number of waves per compute unit supported by the
 720   /// subtarget and limited by given \p FlatWorkGroupSize.
 721   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
 722     return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
 723                                              FlatWorkGroupSize);
 724   }
 725
 726   /// \returns Maximum number of waves per execution unit supported by the
 727   /// subtarget without any kind of limitation.
 728   unsigned getMaxWavesPerEU() const {
 729     return AMDGPU::IsaInfo::getMaxWavesPerEU();
 730   }
 731
 732   /// \returns Number of waves per work group supported by the subtarget and
 733   /// limited by given \p FlatWorkGroupSize.
 734   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
 735     return AMDGPU::IsaInfo::getWavesPerWorkGroup(
 736         MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
 737   }
 738
 739   // static wrappers
 740   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
 741
 742   // XXX - Why is this here if it isn't in the default pass set?
 743   bool enableEarlyIfConversion() const override {
 744     return true;
 745   }
 746
 747   void overrideSchedPolicy(MachineSchedPolicy &Policy,
 748                            unsigned NumRegionInstrs) const override;
 749
 750   bool isVGPRSpillingEnabled(const Function &F) const;
 751
 752   unsigned getMaxNumUserSGPRs() const {
 753     return 16;
 754   }
 755
 756   bool hasSMemRealTime() const {
 757     return HasSMemRealTime;
 758   }
 759
 760   bool hasMovrel() const {
 761     return HasMovrel;
 762   }
 763
 764   bool hasVGPRIndexMode() const {
 765     return HasVGPRIndexMode;
 766   }
 767
 768   bool useVGPRIndexMode(bool UserEnable) const {
 769     return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
 770   }
 771
 772   bool hasScalarCompareEq64() const {
 773     return getGeneration() >= VOLCANIC_ISLANDS;
 774   }
 775
 776   bool hasScalarStores() const {
 777     return HasScalarStores;
 778   }
 779
 780   bool hasScalarAtomics() const {
 781     return HasScalarAtomics;
 782   }
 783
 784   bool hasInv2PiInlineImm() const {
 785     return HasInv2PiInlineImm;
 786   }
 787
 788   bool hasDPP() const {
 789     return HasDPP;
 790   }
 791
 792   bool enableSIScheduler() const {
 793     return EnableSIScheduler;
 794   }
 795
 796   bool debuggerSupported() const {
 797     return debuggerInsertNops() && debuggerEmitPrologue();
 798   }
 799
 800   bool debuggerInsertNops() const {
 801     return DebuggerInsertNops;
 802   }
 803
 804   bool debuggerEmitPrologue() const {
 805     return DebuggerEmitPrologue;
 806   }
 807
 808   bool loadStoreOptEnabled() const {
 809     return EnableLoadStoreOpt;
 810   }
 811
 812   bool hasSGPRInitBug() const {
 813     return SGPRInitBug;
 814   }
 815
 816   bool has12DWordStoreHazard() const {
 817     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
 818   }
 819
 820   bool hasSMovFedHazard() const {
 821     return getGeneration() >= AMDGPUSubtarget::GFX9;
 822   }
 823
 824   bool hasReadM0MovRelInterpHazard() const {
 825     return getGeneration() >= AMDGPUSubtarget::GFX9;
 826   }
 827
 828   bool hasReadM0SendMsgHazard() const {
 829     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
 830   }
 831
 832   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
 833   /// SGPRs
 834   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 835
 836   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
 837   /// VGPRs
 838   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 839
 840   /// \returns true if the flat_scratch register should be initialized with the
 841   /// pointer to the wave's scratch memory rather than a size and offset.
 842   bool flatScratchIsPointer() const {
 843     return getGeneration() >= AMDGPUSubtarget::GFX9;
 844   }
 845
 846   /// \returns true if the machine has merged shaders in which s0-s7 are
 847   /// reserved by the hardware and user SGPRs start at s8
 848   bool hasMergedShaders() const {
 849     return getGeneration() >= GFX9;
 850   }
 851
 852   /// \returns SGPR allocation granularity supported by the subtarget.
 853   unsigned getSGPRAllocGranule() const {
 854     return AMDGPU::IsaInfo::getSGPRAllocGranule(
 855         MCSubtargetInfo::getFeatureBits());
 856   }
 857
 858   /// \returns SGPR encoding granularity supported by the subtarget.
 859   unsigned getSGPREncodingGranule() const {
 860     return AMDGPU::IsaInfo::getSGPREncodingGranule(
 861         MCSubtargetInfo::getFeatureBits());
 862   }
 863
 864   /// \returns Total number of SGPRs supported by the subtarget.
 865   unsigned getTotalNumSGPRs() const {
 866     return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
 867   }
 868
 869   /// \returns Addressable number of SGPRs supported by the subtarget.
 870   unsigned getAddressableNumSGPRs() const {
 871     return AMDGPU::IsaInfo::getAddressableNumSGPRs(
 872         MCSubtargetInfo::getFeatureBits());
 873   }
 874
 875   /// \returns Minimum number of SGPRs that meets the given number of waves per
 876   /// execution unit requirement supported by the subtarget.
 877   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
 878     return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
 879                                            WavesPerEU);
 880   }
 881
 882   /// \returns Maximum number of SGPRs that meets the given number of waves per
 883   /// execution unit requirement supported by the subtarget.
 884   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
 885     return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
 886                                            WavesPerEU, Addressable);
 887   }
 888
 889   /// \returns Reserved number of SGPRs for given function \p MF.
 890   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
 891
 892   /// \returns Maximum number of SGPRs that meets number of waves per execution
 893   /// unit requirement for function \p MF, or number of SGPRs explicitly
 894   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
 895   ///
 896   /// \returns Value that meets number of waves per execution unit requirement
 897   /// if explicitly requested value cannot be converted to integer, violates
 898   /// subtarget's specifications, or does not meet number of waves per execution
 899   /// unit requirement.
 900   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
 901
 902   /// \returns VGPR allocation granularity supported by the subtarget.
 903   unsigned getVGPRAllocGranule() const {
 904     return AMDGPU::IsaInfo::getVGPRAllocGranule(
 905         MCSubtargetInfo::getFeatureBits());
 906   }
 907
 908   /// \returns VGPR encoding granularity supported by the subtarget.
 909   unsigned getVGPREncodingGranule() const {
 910     return AMDGPU::IsaInfo::getVGPREncodingGranule(
 911         MCSubtargetInfo::getFeatureBits());
 912   }
 913
 914   /// \returns Total number of VGPRs supported by the subtarget.
 915   unsigned getTotalNumVGPRs() const {
 916     return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
 917   }
 918
 919   /// \returns Addressable number of VGPRs supported by the subtarget.
 920   unsigned getAddressableNumVGPRs() const {
 921     return AMDGPU::IsaInfo::getAddressableNumVGPRs(
 922         MCSubtargetInfo::getFeatureBits());
 923   }
 924
 925   /// \returns Minimum number of VGPRs that meets given number of waves per
 926   /// execution unit requirement supported by the subtarget.
 927   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
 928     return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
 929                                            WavesPerEU);
 930   }
 931
 932   /// \returns Maximum number of VGPRs that meets given number of waves per
 933   /// execution unit requirement supported by the subtarget.
 934   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
 935     return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
 936                                            WavesPerEU);
 937   }
 938
 939   /// \returns Maximum number of VGPRs that meets number of waves per execution
 940   /// unit requirement for function \p MF, or number of VGPRs explicitly
 941   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
 942   ///
 943   /// \returns Value that meets number of waves per execution unit requirement
 944   /// if explicitly requested value cannot be converted to integer, violates
 945   /// subtarget's specifications, or does not meet number of waves per execution
 946   /// unit requirement.
 947   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
 948
 949   void getPostRAMutations(
 950       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
 951       const override;
 952 };
 953
 954 class R600Subtarget final : public R600GenSubtargetInfo,
 955                             public AMDGPUSubtarget {
 956 private:
 957   R600InstrInfo InstrInfo;
 958   R600FrameLowering FrameLowering;
 959   bool FMA;
 960   bool CaymanISA;
 961   bool CFALUBug;
 962   bool DX10Clamp;
 963   bool HasVertexCache;
 964   bool R600ALUInst;
 965   bool FP64;
 966   short TexVTXClauseSize;
 967   Generation Gen;
 968   R600TargetLowering TLInfo;
 969   InstrItineraryData InstrItins;
 970   SelectionDAGTargetInfo TSInfo;
 971   AMDGPUAS AS;
 972
 973 public:
 974   R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
 975                 const TargetMachine &TM);
 976
 977   const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
 978
 979   const R600FrameLowering *getFrameLowering() const override {
 980     return &FrameLowering;
 981   }
 982
 983   const R600TargetLowering *getTargetLowering() const override {
 984     return &TLInfo;
 985   }
 986
 987   const R600RegisterInfo *getRegisterInfo() const override {
 988     return &InstrInfo.getRegisterInfo();
 989   }
 990
 991   const InstrItineraryData *getInstrItineraryData() const override {
 992     return &InstrItins;
 993   }
 994
 995   // Nothing implemented, just prevent crashes on use.
 996   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
 997     return &TSInfo;
 998   }
 999
1000   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
1001
1002   Generation getGeneration() const {
1003     return Gen;
1004   }
1005
1006   unsigned getStackAlignment() const {
1007     return 4;
1008   }
1009
1010   R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
1011                                                  StringRef GPU, StringRef FS);
1012
1013   bool hasBFE() const {
1014     return (getGeneration() >= EVERGREEN);
1015   }
1016
1017   bool hasBFI() const {
1018     return (getGeneration() >= EVERGREEN);
1019   }
1020
1021   bool hasBCNT(unsigned Size) const {
1022     if (Size == 32)
1023       return (getGeneration() >= EVERGREEN);
1024
1025     return false;
1026   }
1027
1028   bool hasBORROW() const {
1029     return (getGeneration() >= EVERGREEN);
1030   }
1031
1032   bool hasCARRY() const {
1033     return (getGeneration() >= EVERGREEN);
1034   }
1035
1036   bool hasCaymanISA() const {
1037     return CaymanISA;
1038   }
1039
1040   bool hasFFBL() const {
1041     return (getGeneration() >= EVERGREEN);
1042   }
1043
1044   bool hasFFBH() const {
1045     return (getGeneration() >= EVERGREEN);
1046   }
1047
1048   bool hasFMA() const { return FMA; }
1049
1050   bool hasCFAluBug() const { return CFALUBug; }
1051
1052   bool hasVertexCache() const { return HasVertexCache; }
1053
1054   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
1055
1056   AMDGPUAS getAMDGPUAS() const { return AS; }
1057
1058   bool enableMachineScheduler() const override {
1059     return true;
1060   }
1061
1062   bool enableSubRegLiveness() const override {
1063     return true;
1064   }
1065 };
1066
1067 } // end namespace llvm
1068
1069 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H