contrib/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  14 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15
  16 #include "AMDGPUArgumentUsageInfo.h"
  17 #include "AMDGPUMachineFunction.h"
  18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  19 #include "SIInstrInfo.h"
  20 #include "SIRegisterInfo.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/DenseMap.h"
  23 #include "llvm/ADT/Optional.h"
  24 #include "llvm/ADT/STLExtras.h"
  25 #include "llvm/ADT/SmallVector.h"
  26 #include "llvm/ADT/SparseBitVector.h"
  27 #include "llvm/CodeGen/MIRYamlMapping.h"
  28 #include "llvm/CodeGen/PseudoSourceValue.h"
  29 #include "llvm/CodeGen/TargetInstrInfo.h"
  30 #include "llvm/MC/MCRegisterInfo.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include <array>
  33 #include <cassert>
  34 #include <utility>
  35 #include <vector>
  36
  37 namespace llvm {
  38
  39 class MachineFrameInfo;
  40 class MachineFunction;
  41 class TargetRegisterClass;
  42
  43 class AMDGPUPseudoSourceValue : public PseudoSourceValue {
  44 public:
  45   enum AMDGPUPSVKind : unsigned {
  46     PSVBuffer = PseudoSourceValue::TargetCustom,
  47     PSVImage,
  48     GWSResource
  49   };
  50
  51 protected:
  52   AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
  53       : PseudoSourceValue(Kind, TII) {}
  54
  55 public:
  56   bool isConstant(const MachineFrameInfo *) const override {
  57     // This should probably be true for most images, but we will start by being
  58     // conservative.
  59     return false;
  60   }
  61
  62   bool isAliased(const MachineFrameInfo *) const override {
  63     return true;
  64   }
  65
  66   bool mayAlias(const MachineFrameInfo *) const override {
  67     return true;
  68   }
  69 };
  70
  71 class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
  72 public:
  73   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
  74       : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
  75
  76   static bool classof(const PseudoSourceValue *V) {
  77     return V->kind() == PSVBuffer;
  78   }
  79 };
  80
  81 class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  82 public:
  83   // TODO: Is the img rsrc useful?
  84   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
  85       : AMDGPUPseudoSourceValue(PSVImage, TII) {}
  86
  87   static bool classof(const PseudoSourceValue *V) {
  88     return V->kind() == PSVImage;
  89   }
  90 };
  91
  92 class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
  93 public:
  94   explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
  95       : AMDGPUPseudoSourceValue(GWSResource, TII) {}
  96
  97   static bool classof(const PseudoSourceValue *V) {
  98     return V->kind() == GWSResource;
  99   }
 100
 101   // These are inaccessible memory from IR.
 102   bool isAliased(const MachineFrameInfo *) const override {
 103     return false;
 104   }
 105
 106   // These are inaccessible memory from IR.
 107   bool mayAlias(const MachineFrameInfo *) const override {
 108     return false;
 109   }
 110
 111   void printCustom(raw_ostream &OS) const override {
 112     OS << "GWSResource";
 113   }
 114 };
 115
 116 namespace yaml {
 117
 118 struct SIArgument {
 119   bool IsRegister;
 120   union {
 121     StringValue RegisterName;
 122     unsigned StackOffset;
 123   };
 124   Optional<unsigned> Mask;
 125
 126   // Default constructor, which creates a stack argument.
 127   SIArgument() : IsRegister(false), StackOffset(0) {}
 128   SIArgument(const SIArgument &Other) {
 129     IsRegister = Other.IsRegister;
 130     if (IsRegister) {
 131       ::new ((void *)std::addressof(RegisterName))
 132           StringValue(Other.RegisterName);
 133     } else
 134       StackOffset = Other.StackOffset;
 135     Mask = Other.Mask;
 136   }
 137   SIArgument &operator=(const SIArgument &Other) {
 138     IsRegister = Other.IsRegister;
 139     if (IsRegister) {
 140       ::new ((void *)std::addressof(RegisterName))
 141           StringValue(Other.RegisterName);
 142     } else
 143       StackOffset = Other.StackOffset;
 144     Mask = Other.Mask;
 145     return *this;
 146   }
 147   ~SIArgument() {
 148     if (IsRegister)
 149       RegisterName.~StringValue();
 150   }
 151
 152   // Helper to create a register or stack argument.
 153   static inline SIArgument createArgument(bool IsReg) {
 154     if (IsReg)
 155       return SIArgument(IsReg);
 156     return SIArgument();
 157   }
 158
 159 private:
 160   // Construct a register argument.
 161   SIArgument(bool) : IsRegister(true), RegisterName() {}
 162 };
 163
 164 template <> struct MappingTraits<SIArgument> {
 165   static void mapping(IO &YamlIO, SIArgument &A) {
 166     if (YamlIO.outputting()) {
 167       if (A.IsRegister)
 168         YamlIO.mapRequired("reg", A.RegisterName);
 169       else
 170         YamlIO.mapRequired("offset", A.StackOffset);
 171     } else {
 172       auto Keys = YamlIO.keys();
 173       if (is_contained(Keys, "reg")) {
 174         A = SIArgument::createArgument(true);
 175         YamlIO.mapRequired("reg", A.RegisterName);
 176       } else if (is_contained(Keys, "offset"))
 177         YamlIO.mapRequired("offset", A.StackOffset);
 178       else
 179         YamlIO.setError("missing required key 'reg' or 'offset'");
 180     }
 181     YamlIO.mapOptional("mask", A.Mask);
 182   }
 183   static const bool flow = true;
 184 };
 185
 186 struct SIArgumentInfo {
 187   Optional<SIArgument> PrivateSegmentBuffer;
 188   Optional<SIArgument> DispatchPtr;
 189   Optional<SIArgument> QueuePtr;
 190   Optional<SIArgument> KernargSegmentPtr;
 191   Optional<SIArgument> DispatchID;
 192   Optional<SIArgument> FlatScratchInit;
 193   Optional<SIArgument> PrivateSegmentSize;
 194
 195   Optional<SIArgument> WorkGroupIDX;
 196   Optional<SIArgument> WorkGroupIDY;
 197   Optional<SIArgument> WorkGroupIDZ;
 198   Optional<SIArgument> WorkGroupInfo;
 199   Optional<SIArgument> PrivateSegmentWaveByteOffset;
 200
 201   Optional<SIArgument> ImplicitArgPtr;
 202   Optional<SIArgument> ImplicitBufferPtr;
 203
 204   Optional<SIArgument> WorkItemIDX;
 205   Optional<SIArgument> WorkItemIDY;
 206   Optional<SIArgument> WorkItemIDZ;
 207 };
 208
 209 template <> struct MappingTraits<SIArgumentInfo> {
 210   static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
 211     YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
 212     YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
 213     YamlIO.mapOptional("queuePtr", AI.QueuePtr);
 214     YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
 215     YamlIO.mapOptional("dispatchID", AI.DispatchID);
 216     YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
 217     YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
 218
 219     YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
 220     YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
 221     YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
 222     YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
 223     YamlIO.mapOptional("privateSegmentWaveByteOffset",
 224                        AI.PrivateSegmentWaveByteOffset);
 225
 226     YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
 227     YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
 228
 229     YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
 230     YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
 231     YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
 232   }
 233 };
 234
 235 // Default to default mode for default calling convention.
 236 struct SIMode {
 237   bool IEEE = true;
 238   bool DX10Clamp = true;
 239   bool FP32InputDenormals = true;
 240   bool FP32OutputDenormals = true;
 241   bool FP64FP16InputDenormals = true;
 242   bool FP64FP16OutputDenormals = true;
 243
 244   SIMode() = default;
 245
 246   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
 247     IEEE = Mode.IEEE;
 248     DX10Clamp = Mode.DX10Clamp;
 249     FP32InputDenormals = Mode.FP32InputDenormals;
 250     FP32OutputDenormals = Mode.FP32OutputDenormals;
 251     FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
 252     FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
 253   }
 254
 255   bool operator ==(const SIMode Other) const {
 256     return IEEE == Other.IEEE &&
 257            DX10Clamp == Other.DX10Clamp &&
 258            FP32InputDenormals == Other.FP32InputDenormals &&
 259            FP32OutputDenormals == Other.FP32OutputDenormals &&
 260            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
 261            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
 262   }
 263 };
 264
 265 template <> struct MappingTraits<SIMode> {
 266   static void mapping(IO &YamlIO, SIMode &Mode) {
 267     YamlIO.mapOptional("ieee", Mode.IEEE, true);
 268     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
 269     YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
 270     YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
 271     YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
 272     YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
 273   }
 274 };
 275
 276 struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
 277   uint64_t ExplicitKernArgSize = 0;
 278   unsigned MaxKernArgAlign = 0;
 279   unsigned LDSSize = 0;
 280   bool IsEntryFunction = false;
 281   bool NoSignedZerosFPMath = false;
 282   bool MemoryBound = false;
 283   bool WaveLimiter = false;
 284   uint32_t HighBitsOf32BitAddress = 0;
 285
 286   StringValue ScratchRSrcReg = "$private_rsrc_reg";
 287   StringValue FrameOffsetReg = "$fp_reg";
 288   StringValue StackPtrOffsetReg = "$sp_reg";
 289
 290   Optional<SIArgumentInfo> ArgInfo;
 291   SIMode Mode;
 292
 293   SIMachineFunctionInfo() = default;
 294   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
 295                         const TargetRegisterInfo &TRI);
 296
 297   void mappingImpl(yaml::IO &YamlIO) override;
 298   ~SIMachineFunctionInfo() = default;
 299 };
 300
 301 template <> struct MappingTraits<SIMachineFunctionInfo> {
 302   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
 303     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
 304                        UINT64_C(0));
 305     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
 306     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
 307     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
 308     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
 309     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
 310     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
 311     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
 312                        StringValue("$private_rsrc_reg"));
 313     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
 314                        StringValue("$fp_reg"));
 315     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
 316                        StringValue("$sp_reg"));
 317     YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
 318     YamlIO.mapOptional("mode", MFI.Mode, SIMode());
 319     YamlIO.mapOptional("highBitsOf32BitAddress",
 320                        MFI.HighBitsOf32BitAddress, 0u);
 321   }
 322 };
 323
 324 } // end namespace yaml
 325
 326 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 327 /// tells the hardware which interpolation parameters to load.
 328 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 329   friend class GCNTargetMachine;
 330
 331   Register TIDReg = AMDGPU::NoRegister;
 332
 333   // Registers that may be reserved for spilling purposes. These may be the same
 334   // as the input registers.
 335   Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
 336
 337   // This is the the unswizzled offset from the current dispatch's scratch wave
 338   // base to the beginning of the current function's frame.
 339   Register FrameOffsetReg = AMDGPU::FP_REG;
 340
 341   // This is an ABI register used in the non-entry calling convention to
 342   // communicate the unswizzled offset from the current dispatch's scratch wave
 343   // base to the beginning of the new function's frame.
 344   Register StackPtrOffsetReg = AMDGPU::SP_REG;
 345
 346   AMDGPUFunctionArgInfo ArgInfo;
 347
 348   // Graphics info.
 349   unsigned PSInputAddr = 0;
 350   unsigned PSInputEnable = 0;
 351
 352   /// Number of bytes of arguments this function has on the stack. If the callee
 353   /// is expected to restore the argument stack this should be a multiple of 16,
 354   /// all usable during a tail call.
 355   ///
 356   /// The alternative would forbid tail call optimisation in some cases: if we
 357   /// want to transfer control from a function with 8-bytes of stack-argument
 358   /// space to a function with 16-bytes then misalignment of this value would
 359   /// make a stack adjustment necessary, which could not be undone by the
 360   /// callee.
 361   unsigned BytesInStackArgArea = 0;
 362
 363   bool ReturnsVoid = true;
 364
 365   // A pair of default/requested minimum/maximum flat work group sizes.
 366   // Minimum - first, maximum - second.
 367   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
 368
 369   // A pair of default/requested minimum/maximum number of waves per execution
 370   // unit. Minimum - first, maximum - second.
 371   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 372
 373   DenseMap<const Value *,
 374            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
 375   DenseMap<const Value *,
 376            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
 377   std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
 378
 379 private:
 380   unsigned LDSWaveSpillSize = 0;
 381   unsigned NumUserSGPRs = 0;
 382   unsigned NumSystemSGPRs = 0;
 383
 384   bool HasSpilledSGPRs = false;
 385   bool HasSpilledVGPRs = false;
 386   bool HasNonSpillStackObjects = false;
 387   bool IsStackRealigned = false;
 388
 389   unsigned NumSpilledSGPRs = 0;
 390   unsigned NumSpilledVGPRs = 0;
 391
 392   // Feature bits required for inputs passed in user SGPRs.
 393   bool PrivateSegmentBuffer : 1;
 394   bool DispatchPtr : 1;
 395   bool QueuePtr : 1;
 396   bool KernargSegmentPtr : 1;
 397   bool DispatchID : 1;
 398   bool FlatScratchInit : 1;
 399
 400   // Feature bits required for inputs passed in system SGPRs.
 401   bool WorkGroupIDX : 1; // Always initialized.
 402   bool WorkGroupIDY : 1;
 403   bool WorkGroupIDZ : 1;
 404   bool WorkGroupInfo : 1;
 405   bool PrivateSegmentWaveByteOffset : 1;
 406
 407   bool WorkItemIDX : 1; // Always initialized.
 408   bool WorkItemIDY : 1;
 409   bool WorkItemIDZ : 1;
 410
 411   // Private memory buffer
 412   // Compute directly in sgpr[0:1]
 413   // Other shaders indirect 64-bits at sgpr[0:1]
 414   bool ImplicitBufferPtr : 1;
 415
 416   // Pointer to where the ABI inserts special kernel arguments separate from the
 417   // user arguments. This is an offset from the KernargSegmentPtr.
 418   bool ImplicitArgPtr : 1;
 419
 420   // The hard-wired high half of the address of the global information table
 421   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
 422   // current hardware only allows a 16 bit value.
 423   unsigned GITPtrHigh;
 424
 425   unsigned HighBitsOf32BitAddress;
 426   unsigned GDSSize;
 427
 428   // Current recorded maximum possible occupancy.
 429   unsigned Occupancy;
 430
 431   MCPhysReg getNextUserSGPR() const;
 432
 433   MCPhysReg getNextSystemSGPR() const;
 434
 435 public:
 436   struct SpilledReg {
 437     Register VGPR;
 438     int Lane = -1;
 439
 440     SpilledReg() = default;
 441     SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
 442
 443     bool hasLane() { return Lane != -1;}
 444     bool hasReg() { return VGPR != 0;}
 445   };
 446
 447   struct SGPRSpillVGPRCSR {
 448     // VGPR used for SGPR spills
 449     Register VGPR;
 450
 451     // If the VGPR is a CSR, the stack slot used to save/restore it in the
 452     // prolog/epilog.
 453     Optional<int> FI;
 454
 455     SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
 456   };
 457
 458   struct VGPRSpillToAGPR {
 459     SmallVector<MCPhysReg, 32> Lanes;
 460     bool FullyAllocated = false;
 461   };
 462
 463   SparseBitVector<> WWMReservedRegs;
 464
 465   void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }
 466
 467 private:
 468   // Track VGPR + wave index for each subregister of the SGPR spilled to
 469   // frameindex key.
 470   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 471   unsigned NumVGPRSpillLanes = 0;
 472   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
 473
 474   DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
 475
 476   // AGPRs used for VGPR spills.
 477   SmallVector<MCPhysReg, 32> SpillAGPR;
 478
 479   // VGPRs used for AGPR spills.
 480   SmallVector<MCPhysReg, 32> SpillVGPR;
 481
 482 public: // FIXME
 483   /// If this is set, an SGPR used for save/restore of the register used for the
 484   /// frame pointer.
 485   Register SGPRForFPSaveRestoreCopy;
 486   Optional<int> FramePointerSaveIndex;
 487
 488   /// If this is set, an SGPR used for save/restore of the register used for the
 489   /// base pointer.
 490   Register SGPRForBPSaveRestoreCopy;
 491   Optional<int> BasePointerSaveIndex;
 492
 493   Register VGPRReservedForSGPRSpill;
 494   bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
 495
 496 public:
 497   SIMachineFunctionInfo(const MachineFunction &MF);
 498
 499   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
 500
 501   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 502     auto I = SGPRToVGPRSpills.find(FrameIndex);
 503     return (I == SGPRToVGPRSpills.end()) ?
 504       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 505   }
 506
 507   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
 508     return SpillVGPRs;
 509   }
 510
 511   void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
 512     SpillVGPRs[Index].VGPR = NewVGPR;
 513     SpillVGPRs[Index].FI = newFI;
 514     VGPRReservedForSGPRSpill = NewVGPR;
 515   }
 516
 517   bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
 518
 519   ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
 520     return SpillAGPR;
 521   }
 522
 523   ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
 524     return SpillVGPR;
 525   }
 526
 527   MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
 528     auto I = VGPRToAGPRSpills.find(FrameIndex);
 529     return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
 530                                          : I->second.Lanes[Lane];
 531   }
 532
 533   bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
 534                                  unsigned NumLane) const;
 535   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 536   bool reserveVGPRforSGPRSpills(MachineFunction &MF);
 537   bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
 538   void removeDeadFrameIndices(MachineFrameInfo &MFI);
 539
 540   bool hasCalculatedTID() const { return TIDReg != 0; };
 541   Register getTIDReg() const { return TIDReg; };
 542   void setTIDReg(Register Reg) { TIDReg = Reg; }
 543
 544   unsigned getBytesInStackArgArea() const {
 545     return BytesInStackArgArea;
 546   }
 547
 548   void setBytesInStackArgArea(unsigned Bytes) {
 549     BytesInStackArgArea = Bytes;
 550   }
 551
 552   // Add user SGPRs.
 553   Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 554   Register addDispatchPtr(const SIRegisterInfo &TRI);
 555   Register addQueuePtr(const SIRegisterInfo &TRI);
 556   Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
 557   Register addDispatchID(const SIRegisterInfo &TRI);
 558   Register addFlatScratchInit(const SIRegisterInfo &TRI);
 559   Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
 560
 561   // Add system SGPRs.
 562   Register addWorkGroupIDX() {
 563     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
 564     NumSystemSGPRs += 1;
 565     return ArgInfo.WorkGroupIDX.getRegister();
 566   }
 567
 568   Register addWorkGroupIDY() {
 569     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
 570     NumSystemSGPRs += 1;
 571     return ArgInfo.WorkGroupIDY.getRegister();
 572   }
 573
 574   Register addWorkGroupIDZ() {
 575     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
 576     NumSystemSGPRs += 1;
 577     return ArgInfo.WorkGroupIDZ.getRegister();
 578   }
 579
 580   Register addWorkGroupInfo() {
 581     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
 582     NumSystemSGPRs += 1;
 583     return ArgInfo.WorkGroupInfo.getRegister();
 584   }
 585
 586   // Add special VGPR inputs
 587   void setWorkItemIDX(ArgDescriptor Arg) {
 588     ArgInfo.WorkItemIDX = Arg;
 589   }
 590
 591   void setWorkItemIDY(ArgDescriptor Arg) {
 592     ArgInfo.WorkItemIDY = Arg;
 593   }
 594
 595   void setWorkItemIDZ(ArgDescriptor Arg) {
 596     ArgInfo.WorkItemIDZ = Arg;
 597   }
 598
 599   Register addPrivateSegmentWaveByteOffset() {
 600     ArgInfo.PrivateSegmentWaveByteOffset
 601       = ArgDescriptor::createRegister(getNextSystemSGPR());
 602     NumSystemSGPRs += 1;
 603     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 604   }
 605
 606   void setPrivateSegmentWaveByteOffset(Register Reg) {
 607     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
 608   }
 609
 610   bool hasPrivateSegmentBuffer() const {
 611     return PrivateSegmentBuffer;
 612   }
 613
 614   bool hasDispatchPtr() const {
 615     return DispatchPtr;
 616   }
 617
 618   bool hasQueuePtr() const {
 619     return QueuePtr;
 620   }
 621
 622   bool hasKernargSegmentPtr() const {
 623     return KernargSegmentPtr;
 624   }
 625
 626   bool hasDispatchID() const {
 627     return DispatchID;
 628   }
 629
 630   bool hasFlatScratchInit() const {
 631     return FlatScratchInit;
 632   }
 633
 634   bool hasWorkGroupIDX() const {
 635     return WorkGroupIDX;
 636   }
 637
 638   bool hasWorkGroupIDY() const {
 639     return WorkGroupIDY;
 640   }
 641
 642   bool hasWorkGroupIDZ() const {
 643     return WorkGroupIDZ;
 644   }
 645
 646   bool hasWorkGroupInfo() const {
 647     return WorkGroupInfo;
 648   }
 649
 650   bool hasPrivateSegmentWaveByteOffset() const {
 651     return PrivateSegmentWaveByteOffset;
 652   }
 653
 654   bool hasWorkItemIDX() const {
 655     return WorkItemIDX;
 656   }
 657
 658   bool hasWorkItemIDY() const {
 659     return WorkItemIDY;
 660   }
 661
 662   bool hasWorkItemIDZ() const {
 663     return WorkItemIDZ;
 664   }
 665
 666   bool hasImplicitArgPtr() const {
 667     return ImplicitArgPtr;
 668   }
 669
 670   bool hasImplicitBufferPtr() const {
 671     return ImplicitBufferPtr;
 672   }
 673
 674   AMDGPUFunctionArgInfo &getArgInfo() {
 675     return ArgInfo;
 676   }
 677
 678   const AMDGPUFunctionArgInfo &getArgInfo() const {
 679     return ArgInfo;
 680   }
 681
 682   std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
 683   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 684     return ArgInfo.getPreloadedValue(Value);
 685   }
 686
 687   Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 688     auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
 689     return Arg ? Arg->getRegister() : Register();
 690   }
 691
 692   unsigned getGITPtrHigh() const {
 693     return GITPtrHigh;
 694   }
 695
 696   Register getGITPtrLoReg(const MachineFunction &MF) const;
 697
 698   uint32_t get32BitAddressHighBits() const {
 699     return HighBitsOf32BitAddress;
 700   }
 701
 702   unsigned getGDSSize() const {
 703     return GDSSize;
 704   }
 705
 706   unsigned getNumUserSGPRs() const {
 707     return NumUserSGPRs;
 708   }
 709
 710   unsigned getNumPreloadedSGPRs() const {
 711     return NumUserSGPRs + NumSystemSGPRs;
 712   }
 713
 714   Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 715     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 716   }
 717
 718   /// Returns the physical register reserved for use as the resource
 719   /// descriptor for scratch accesses.
 720   Register getScratchRSrcReg() const {
 721     return ScratchRSrcReg;
 722   }
 723
 724   void setScratchRSrcReg(Register Reg) {
 725     assert(Reg != 0 && "Should never be unset");
 726     ScratchRSrcReg = Reg;
 727   }
 728
 729   Register getFrameOffsetReg() const {
 730     return FrameOffsetReg;
 731   }
 732
 733   void setFrameOffsetReg(Register Reg) {
 734     assert(Reg != 0 && "Should never be unset");
 735     FrameOffsetReg = Reg;
 736   }
 737
 738   void setStackPtrOffsetReg(Register Reg) {
 739     assert(Reg != 0 && "Should never be unset");
 740     StackPtrOffsetReg = Reg;
 741   }
 742
 743   // Note the unset value for this is AMDGPU::SP_REG rather than
 744   // NoRegister. This is mostly a workaround for MIR tests where state that
 745   // can't be directly computed from the function is not preserved in serialized
 746   // MIR.
 747   Register getStackPtrOffsetReg() const {
 748     return StackPtrOffsetReg;
 749   }
 750
 751   Register getQueuePtrUserSGPR() const {
 752     return ArgInfo.QueuePtr.getRegister();
 753   }
 754
 755   Register getImplicitBufferPtrUserSGPR() const {
 756     return ArgInfo.ImplicitBufferPtr.getRegister();
 757   }
 758
 759   bool hasSpilledSGPRs() const {
 760     return HasSpilledSGPRs;
 761   }
 762
 763   void setHasSpilledSGPRs(bool Spill = true) {
 764     HasSpilledSGPRs = Spill;
 765   }
 766
 767   bool hasSpilledVGPRs() const {
 768     return HasSpilledVGPRs;
 769   }
 770
 771   void setHasSpilledVGPRs(bool Spill = true) {
 772     HasSpilledVGPRs = Spill;
 773   }
 774
 775   bool hasNonSpillStackObjects() const {
 776     return HasNonSpillStackObjects;
 777   }
 778
 779   void setHasNonSpillStackObjects(bool StackObject = true) {
 780     HasNonSpillStackObjects = StackObject;
 781   }
 782
 783   bool isStackRealigned() const {
 784     return IsStackRealigned;
 785   }
 786
 787   void setIsStackRealigned(bool Realigned = true) {
 788     IsStackRealigned = Realigned;
 789   }
 790
 791   unsigned getNumSpilledSGPRs() const {
 792     return NumSpilledSGPRs;
 793   }
 794
 795   unsigned getNumSpilledVGPRs() const {
 796     return NumSpilledVGPRs;
 797   }
 798
 799   void addToSpilledSGPRs(unsigned num) {
 800     NumSpilledSGPRs += num;
 801   }
 802
 803   void addToSpilledVGPRs(unsigned num) {
 804     NumSpilledVGPRs += num;
 805   }
 806
 807   unsigned getPSInputAddr() const {
 808     return PSInputAddr;
 809   }
 810
 811   unsigned getPSInputEnable() const {
 812     return PSInputEnable;
 813   }
 814
 815   bool isPSInputAllocated(unsigned Index) const {
 816     return PSInputAddr & (1 << Index);
 817   }
 818
 819   void markPSInputAllocated(unsigned Index) {
 820     PSInputAddr |= 1 << Index;
 821   }
 822
 823   void markPSInputEnabled(unsigned Index) {
 824     PSInputEnable |= 1 << Index;
 825   }
 826
 827   bool returnsVoid() const {
 828     return ReturnsVoid;
 829   }
 830
 831   void setIfReturnsVoid(bool Value) {
 832     ReturnsVoid = Value;
 833   }
 834
 835   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 836   /// for this function.
 837   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 838     return FlatWorkGroupSizes;
 839   }
 840
 841   /// \returns Default/requested minimum flat work group size for this function.
 842   unsigned getMinFlatWorkGroupSize() const {
 843     return FlatWorkGroupSizes.first;
 844   }
 845
 846   /// \returns Default/requested maximum flat work group size for this function.
 847   unsigned getMaxFlatWorkGroupSize() const {
 848     return FlatWorkGroupSizes.second;
 849   }
 850
 851   /// \returns A pair of default/requested minimum/maximum number of waves per
 852   /// execution unit.
 853   std::pair<unsigned, unsigned> getWavesPerEU() const {
 854     return WavesPerEU;
 855   }
 856
 857   /// \returns Default/requested minimum number of waves per execution unit.
 858   unsigned getMinWavesPerEU() const {
 859     return WavesPerEU.first;
 860   }
 861
 862   /// \returns Default/requested maximum number of waves per execution unit.
 863   unsigned getMaxWavesPerEU() const {
 864     return WavesPerEU.second;
 865   }
 866
 867   /// \returns SGPR used for \p Dim's work group ID.
 868   Register getWorkGroupIDSGPR(unsigned Dim) const {
 869     switch (Dim) {
 870     case 0:
 871       assert(hasWorkGroupIDX());
 872       return ArgInfo.WorkGroupIDX.getRegister();
 873     case 1:
 874       assert(hasWorkGroupIDY());
 875       return ArgInfo.WorkGroupIDY.getRegister();
 876     case 2:
 877       assert(hasWorkGroupIDZ());
 878       return ArgInfo.WorkGroupIDZ.getRegister();
 879     }
 880     llvm_unreachable("unexpected dimension");
 881   }
 882
 883   unsigned getLDSWaveSpillSize() const {
 884     return LDSWaveSpillSize;
 885   }
 886
 887   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
 888                                                     const Value *BufferRsrc) {
 889     assert(BufferRsrc);
 890     auto PSV = BufferPSVs.try_emplace(
 891       BufferRsrc,
 892       std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
 893     return PSV.first->second.get();
 894   }
 895
 896   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
 897                                                   const Value *ImgRsrc) {
 898     assert(ImgRsrc);
 899     auto PSV = ImagePSVs.try_emplace(
 900       ImgRsrc,
 901       std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
 902     return PSV.first->second.get();
 903   }
 904
 905   const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
 906     if (!GWSResourcePSV) {
 907       GWSResourcePSV =
 908           std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
 909     }
 910
 911     return GWSResourcePSV.get();
 912   }
 913
 914   unsigned getOccupancy() const {
 915     return Occupancy;
 916   }
 917
 918   unsigned getMinAllowedOccupancy() const {
 919     if (!isMemoryBound() && !needsWaveLimiter())
 920       return Occupancy;
 921     return (Occupancy < 4) ? Occupancy : 4;
 922   }
 923
 924   void limitOccupancy(const MachineFunction &MF);
 925
 926   void limitOccupancy(unsigned Limit) {
 927     if (Occupancy > Limit)
 928       Occupancy = Limit;
 929   }
 930
 931   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
 932     if (Occupancy < Limit)
 933       Occupancy = Limit;
 934     limitOccupancy(MF);
 935   }
 936 };
 937
 938 } // end namespace llvm
 939
 940 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H