contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  16
  17 #include "AMDGPUMachineFunction.h"
  18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  19 #include "SIRegisterInfo.h"
  20 #include "llvm/CodeGen/PseudoSourceValue.h"
  21 #include "llvm/MC/MCRegisterInfo.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include <array>
  24 #include <cassert>
  25 #include <map>
  26 #include <utility>
  27
  28 namespace llvm {
  29
  30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
  31 public:
  32   explicit AMDGPUImagePseudoSourceValue() :
  33     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
  34
  35   bool isConstant(const MachineFrameInfo *) const override {
  36     // This should probably be true for most images, but we will start by being
  37     // conservative.
  38     return false;
  39   }
  40
  41   bool isAliased(const MachineFrameInfo *) const override {
  42     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  43     // this could be true for some cases.
  44     return false;
  45   }
  46
  47   bool mayAlias(const MachineFrameInfo*) const override {
  48     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  49     // this could be true for some cases.
  50     return false;
  51   }
  52 };
  53
  54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
  55 public:
  56   explicit AMDGPUBufferPseudoSourceValue() :
  57     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
  58
  59   bool isConstant(const MachineFrameInfo *) const override {
  60     // This should probably be true for most images, but we will start by being
  61     // conservative.
  62     return false;
  63   }
  64
  65   bool isAliased(const MachineFrameInfo *) const override {
  66     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  67     // this could be true for some cases.
  68     return false;
  69   }
  70
  71   bool mayAlias(const MachineFrameInfo*) const override {
  72     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  73     // this could be true for some cases.
  74     return false;
  75   }
  76 };
  77
  78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
  79 /// tells the hardware which interpolation parameters to load.
  80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
  81   // FIXME: This should be removed and getPreloadedValue moved here.
  82   friend class SIRegisterInfo;
  83
  84   unsigned TIDReg;
  85
  86   // Registers that may be reserved for spilling purposes. These may be the same
  87   // as the input registers.
  88   unsigned ScratchRSrcReg;
  89   unsigned ScratchWaveOffsetReg;
  90
  91   // This is the current function's incremented size from the kernel's scratch
  92   // wave offset register. For an entry function, this is exactly the same as
  93   // the ScratchWaveOffsetReg.
  94   unsigned FrameOffsetReg;
  95
  96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
  97   unsigned StackPtrOffsetReg;
  98
  99   // Input registers for non-HSA ABI
 100   unsigned ImplicitBufferPtrUserSGPR;
 101
 102   // Input registers setup for the HSA ABI.
 103   // User SGPRs in allocation order.
 104   unsigned PrivateSegmentBufferUserSGPR;
 105   unsigned DispatchPtrUserSGPR;
 106   unsigned QueuePtrUserSGPR;
 107   unsigned KernargSegmentPtrUserSGPR;
 108   unsigned DispatchIDUserSGPR;
 109   unsigned FlatScratchInitUserSGPR;
 110   unsigned PrivateSegmentSizeUserSGPR;
 111   unsigned GridWorkGroupCountXUserSGPR;
 112   unsigned GridWorkGroupCountYUserSGPR;
 113   unsigned GridWorkGroupCountZUserSGPR;
 114
 115   // System SGPRs in allocation order.
 116   unsigned WorkGroupIDXSystemSGPR;
 117   unsigned WorkGroupIDYSystemSGPR;
 118   unsigned WorkGroupIDZSystemSGPR;
 119   unsigned WorkGroupInfoSystemSGPR;
 120   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
 121
 122   // VGPR inputs. These are always v0, v1 and v2 for entry functions.
 123   unsigned WorkItemIDXVGPR;
 124   unsigned WorkItemIDYVGPR;
 125   unsigned WorkItemIDZVGPR;
 126
 127   // Graphics info.
 128   unsigned PSInputAddr;
 129   unsigned PSInputEnable;
 130
 131   bool ReturnsVoid;
 132
 133   // A pair of default/requested minimum/maximum flat work group sizes.
 134   // Minimum - first, maximum - second.
 135   std::pair<unsigned, unsigned> FlatWorkGroupSizes;
 136
 137   // A pair of default/requested minimum/maximum number of waves per execution
 138   // unit. Minimum - first, maximum - second.
 139   std::pair<unsigned, unsigned> WavesPerEU;
 140
 141   // Stack object indices for work group IDs.
 142   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
 143   // Stack object indices for work item IDs.
 144   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
 145
 146   AMDGPUBufferPseudoSourceValue BufferPSV;
 147   AMDGPUImagePseudoSourceValue ImagePSV;
 148
 149 private:
 150   unsigned LDSWaveSpillSize;
 151   unsigned ScratchOffsetReg;
 152   unsigned NumUserSGPRs;
 153   unsigned NumSystemSGPRs;
 154
 155   bool HasSpilledSGPRs;
 156   bool HasSpilledVGPRs;
 157   bool HasNonSpillStackObjects;
 158
 159   unsigned NumSpilledSGPRs;
 160   unsigned NumSpilledVGPRs;
 161
 162   // Feature bits required for inputs passed in user SGPRs.
 163   bool PrivateSegmentBuffer : 1;
 164   bool DispatchPtr : 1;
 165   bool QueuePtr : 1;
 166   bool KernargSegmentPtr : 1;
 167   bool DispatchID : 1;
 168   bool FlatScratchInit : 1;
 169   bool GridWorkgroupCountX : 1;
 170   bool GridWorkgroupCountY : 1;
 171   bool GridWorkgroupCountZ : 1;
 172
 173   // Feature bits required for inputs passed in system SGPRs.
 174   bool WorkGroupIDX : 1; // Always initialized.
 175   bool WorkGroupIDY : 1;
 176   bool WorkGroupIDZ : 1;
 177   bool WorkGroupInfo : 1;
 178   bool PrivateSegmentWaveByteOffset : 1;
 179
 180   bool WorkItemIDX : 1; // Always initialized.
 181   bool WorkItemIDY : 1;
 182   bool WorkItemIDZ : 1;
 183
 184   // Private memory buffer
 185   // Compute directly in sgpr[0:1]
 186   // Other shaders indirect 64-bits at sgpr[0:1]
 187   bool ImplicitBufferPtr : 1;
 188
 189   MCPhysReg getNextUserSGPR() const {
 190     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
 191     return AMDGPU::SGPR0 + NumUserSGPRs;
 192   }
 193
 194   MCPhysReg getNextSystemSGPR() const {
 195     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 196   }
 197
 198 public:
 199   struct SpilledReg {
 200     unsigned VGPR = AMDGPU::NoRegister;
 201     int Lane = -1;
 202
 203     SpilledReg() = default;
 204     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
 205
 206     bool hasLane() { return Lane != -1;}
 207     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
 208   };
 209
 210 private:
 211   // SGPR->VGPR spilling support.
 212   typedef std::pair<unsigned, unsigned> SpillRegMask;
 213
 214   // Track VGPR + wave index for each subregister of the SGPR spilled to
 215   // frameindex key.
 216   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 217   unsigned NumVGPRSpillLanes = 0;
 218   SmallVector<unsigned, 2> SpillVGPRs;
 219
 220 public:
 221
 222   SIMachineFunctionInfo(const MachineFunction &MF);
 223
 224   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 225     auto I = SGPRToVGPRSpills.find(FrameIndex);
 226     return (I == SGPRToVGPRSpills.end()) ?
 227       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 228   }
 229
 230   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 231   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
 232
 233   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
 234   unsigned getTIDReg() const { return TIDReg; };
 235   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 236
 237   // Add user SGPRs.
 238   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 239   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
 240   unsigned addQueuePtr(const SIRegisterInfo &TRI);
 241   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
 242   unsigned addDispatchID(const SIRegisterInfo &TRI);
 243   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
 244   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
 245
 246   // Add system SGPRs.
 247   unsigned addWorkGroupIDX() {
 248     WorkGroupIDXSystemSGPR = getNextSystemSGPR();
 249     NumSystemSGPRs += 1;
 250     return WorkGroupIDXSystemSGPR;
 251   }
 252
 253   unsigned addWorkGroupIDY() {
 254     WorkGroupIDYSystemSGPR = getNextSystemSGPR();
 255     NumSystemSGPRs += 1;
 256     return WorkGroupIDYSystemSGPR;
 257   }
 258
 259   unsigned addWorkGroupIDZ() {
 260     WorkGroupIDZSystemSGPR = getNextSystemSGPR();
 261     NumSystemSGPRs += 1;
 262     return WorkGroupIDZSystemSGPR;
 263   }
 264
 265   unsigned addWorkGroupInfo() {
 266     WorkGroupInfoSystemSGPR = getNextSystemSGPR();
 267     NumSystemSGPRs += 1;
 268     return WorkGroupInfoSystemSGPR;
 269   }
 270
 271   unsigned addPrivateSegmentWaveByteOffset() {
 272     PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
 273     NumSystemSGPRs += 1;
 274     return PrivateSegmentWaveByteOffsetSystemSGPR;
 275   }
 276
 277   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
 278     PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
 279   }
 280
 281   bool hasPrivateSegmentBuffer() const {
 282     return PrivateSegmentBuffer;
 283   }
 284
 285   bool hasDispatchPtr() const {
 286     return DispatchPtr;
 287   }
 288
 289   bool hasQueuePtr() const {
 290     return QueuePtr;
 291   }
 292
 293   bool hasKernargSegmentPtr() const {
 294     return KernargSegmentPtr;
 295   }
 296
 297   bool hasDispatchID() const {
 298     return DispatchID;
 299   }
 300
 301   bool hasFlatScratchInit() const {
 302     return FlatScratchInit;
 303   }
 304
 305   bool hasGridWorkgroupCountX() const {
 306     return GridWorkgroupCountX;
 307   }
 308
 309   bool hasGridWorkgroupCountY() const {
 310     return GridWorkgroupCountY;
 311   }
 312
 313   bool hasGridWorkgroupCountZ() const {
 314     return GridWorkgroupCountZ;
 315   }
 316
 317   bool hasWorkGroupIDX() const {
 318     return WorkGroupIDX;
 319   }
 320
 321   bool hasWorkGroupIDY() const {
 322     return WorkGroupIDY;
 323   }
 324
 325   bool hasWorkGroupIDZ() const {
 326     return WorkGroupIDZ;
 327   }
 328
 329   bool hasWorkGroupInfo() const {
 330     return WorkGroupInfo;
 331   }
 332
 333   bool hasPrivateSegmentWaveByteOffset() const {
 334     return PrivateSegmentWaveByteOffset;
 335   }
 336
 337   bool hasWorkItemIDX() const {
 338     return WorkItemIDX;
 339   }
 340
 341   bool hasWorkItemIDY() const {
 342     return WorkItemIDY;
 343   }
 344
 345   bool hasWorkItemIDZ() const {
 346     return WorkItemIDZ;
 347   }
 348
 349   bool hasImplicitBufferPtr() const {
 350     return ImplicitBufferPtr;
 351   }
 352
 353   unsigned getNumUserSGPRs() const {
 354     return NumUserSGPRs;
 355   }
 356
 357   unsigned getNumPreloadedSGPRs() const {
 358     return NumUserSGPRs + NumSystemSGPRs;
 359   }
 360
 361   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 362     return PrivateSegmentWaveByteOffsetSystemSGPR;
 363   }
 364
 365   /// \brief Returns the physical register reserved for use as the resource
 366   /// descriptor for scratch accesses.
 367   unsigned getScratchRSrcReg() const {
 368     return ScratchRSrcReg;
 369   }
 370
 371   void setScratchRSrcReg(unsigned Reg) {
 372     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 373     ScratchRSrcReg = Reg;
 374   }
 375
 376   unsigned getScratchWaveOffsetReg() const {
 377     return ScratchWaveOffsetReg;
 378   }
 379
 380   unsigned getFrameOffsetReg() const {
 381     return FrameOffsetReg;
 382   }
 383
 384   void setStackPtrOffsetReg(unsigned Reg) {
 385     StackPtrOffsetReg = Reg;
 386   }
 387
 388   // Note the unset value for this is AMDGPU::SP_REG rather than
 389   // NoRegister. This is mostly a workaround for MIR tests where state that
 390   // can't be directly computed from the function is not preserved in serialized
 391   // MIR.
 392   unsigned getStackPtrOffsetReg() const {
 393     return StackPtrOffsetReg;
 394   }
 395
 396   void setScratchWaveOffsetReg(unsigned Reg) {
 397     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 398     ScratchWaveOffsetReg = Reg;
 399     if (isEntryFunction())
 400       FrameOffsetReg = ScratchWaveOffsetReg;
 401   }
 402
 403   unsigned getQueuePtrUserSGPR() const {
 404     return QueuePtrUserSGPR;
 405   }
 406
 407   unsigned getImplicitBufferPtrUserSGPR() const {
 408     return ImplicitBufferPtrUserSGPR;
 409   }
 410
 411   bool hasSpilledSGPRs() const {
 412     return HasSpilledSGPRs;
 413   }
 414
 415   void setHasSpilledSGPRs(bool Spill = true) {
 416     HasSpilledSGPRs = Spill;
 417   }
 418
 419   bool hasSpilledVGPRs() const {
 420     return HasSpilledVGPRs;
 421   }
 422
 423   void setHasSpilledVGPRs(bool Spill = true) {
 424     HasSpilledVGPRs = Spill;
 425   }
 426
 427   bool hasNonSpillStackObjects() const {
 428     return HasNonSpillStackObjects;
 429   }
 430
 431   void setHasNonSpillStackObjects(bool StackObject = true) {
 432     HasNonSpillStackObjects = StackObject;
 433   }
 434
 435   unsigned getNumSpilledSGPRs() const {
 436     return NumSpilledSGPRs;
 437   }
 438
 439   unsigned getNumSpilledVGPRs() const {
 440     return NumSpilledVGPRs;
 441   }
 442
 443   void addToSpilledSGPRs(unsigned num) {
 444     NumSpilledSGPRs += num;
 445   }
 446
 447   void addToSpilledVGPRs(unsigned num) {
 448     NumSpilledVGPRs += num;
 449   }
 450
 451   unsigned getPSInputAddr() const {
 452     return PSInputAddr;
 453   }
 454
 455   unsigned getPSInputEnable() const {
 456     return PSInputEnable;
 457   }
 458
 459   bool isPSInputAllocated(unsigned Index) const {
 460     return PSInputAddr & (1 << Index);
 461   }
 462
 463   void markPSInputAllocated(unsigned Index) {
 464     PSInputAddr |= 1 << Index;
 465   }
 466
 467   void markPSInputEnabled(unsigned Index) {
 468     PSInputEnable |= 1 << Index;
 469   }
 470
 471   bool returnsVoid() const {
 472     return ReturnsVoid;
 473   }
 474
 475   void setIfReturnsVoid(bool Value) {
 476     ReturnsVoid = Value;
 477   }
 478
 479   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 480   /// for this function.
 481   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 482     return FlatWorkGroupSizes;
 483   }
 484
 485   /// \returns Default/requested minimum flat work group size for this function.
 486   unsigned getMinFlatWorkGroupSize() const {
 487     return FlatWorkGroupSizes.first;
 488   }
 489
 490   /// \returns Default/requested maximum flat work group size for this function.
 491   unsigned getMaxFlatWorkGroupSize() const {
 492     return FlatWorkGroupSizes.second;
 493   }
 494
 495   /// \returns A pair of default/requested minimum/maximum number of waves per
 496   /// execution unit.
 497   std::pair<unsigned, unsigned> getWavesPerEU() const {
 498     return WavesPerEU;
 499   }
 500
 501   /// \returns Default/requested minimum number of waves per execution unit.
 502   unsigned getMinWavesPerEU() const {
 503     return WavesPerEU.first;
 504   }
 505
 506   /// \returns Default/requested maximum number of waves per execution unit.
 507   unsigned getMaxWavesPerEU() const {
 508     return WavesPerEU.second;
 509   }
 510
 511   /// \returns Stack object index for \p Dim's work group ID.
 512   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
 513     assert(Dim < 3);
 514     return DebuggerWorkGroupIDStackObjectIndices[Dim];
 515   }
 516
 517   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
 518   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
 519     assert(Dim < 3);
 520     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
 521   }
 522
 523   /// \returns Stack object index for \p Dim's work item ID.
 524   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
 525     assert(Dim < 3);
 526     return DebuggerWorkItemIDStackObjectIndices[Dim];
 527   }
 528
 529   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
 530   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
 531     assert(Dim < 3);
 532     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
 533   }
 534
 535   /// \returns SGPR used for \p Dim's work group ID.
 536   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
 537     switch (Dim) {
 538     case 0:
 539       assert(hasWorkGroupIDX());
 540       return WorkGroupIDXSystemSGPR;
 541     case 1:
 542       assert(hasWorkGroupIDY());
 543       return WorkGroupIDYSystemSGPR;
 544     case 2:
 545       assert(hasWorkGroupIDZ());
 546       return WorkGroupIDZSystemSGPR;
 547     }
 548     llvm_unreachable("unexpected dimension");
 549   }
 550
 551   /// \returns VGPR used for \p Dim' work item ID.
 552   unsigned getWorkItemIDVGPR(unsigned Dim) const {
 553     switch (Dim) {
 554     case 0:
 555       assert(hasWorkItemIDX());
 556       return AMDGPU::VGPR0;
 557     case 1:
 558       assert(hasWorkItemIDY());
 559       return AMDGPU::VGPR1;
 560     case 2:
 561       assert(hasWorkItemIDZ());
 562       return AMDGPU::VGPR2;
 563     }
 564     llvm_unreachable("unexpected dimension");
 565   }
 566
 567   unsigned getLDSWaveSpillSize() const {
 568     return LDSWaveSpillSize;
 569   }
 570
 571   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
 572     return &BufferPSV;
 573   }
 574
 575   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
 576     return &ImagePSV;
 577   }
 578 };
 579
 580 } // end namespace llvm
 581
 582 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H