contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

   1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
  16
  17 #include "AMDGPUMachineFunction.h"
  18 #include "SIRegisterInfo.h"
  19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  20 #include "llvm/CodeGen/PseudoSourceValue.h"
  21 #include "llvm/MC/MCRegisterInfo.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include <array>
  24 #include <cassert>
  25 #include <map>
  26 #include <utility>
  27
  28 namespace llvm {
  29
  30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
  31 public:
  32   explicit AMDGPUImagePseudoSourceValue() :
  33     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
  34
  35   bool isConstant(const MachineFrameInfo *) const override {
  36     // This should probably be true for most images, but we will start by being
  37     // conservative.
  38     return false;
  39   }
  40
  41   bool isAliased(const MachineFrameInfo *) const override {
  42     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  43     // this could be true for some cases.
  44     return false;
  45   }
  46
  47   bool mayAlias(const MachineFrameInfo*) const override {
  48     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  49     // this could be true for some cases.
  50     return false;
  51   }
  52 };
  53
  54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
  55 public:
  56   explicit AMDGPUBufferPseudoSourceValue() :
  57     PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
  58
  59   bool isConstant(const MachineFrameInfo *) const override {
  60     // This should probably be true for most images, but we will start by being
  61     // conservative.
  62     return false;
  63   }
  64
  65   bool isAliased(const MachineFrameInfo *) const override {
  66     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  67     // this could be true for some cases.
  68     return false;
  69   }
  70
  71   bool mayAlias(const MachineFrameInfo*) const override {
  72     // FIXME: If we ever change image intrinsics to accept fat pointers, then
  73     // this could be true for some cases.
  74     return false;
  75   }
  76 };
  77
  78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
  79 /// tells the hardware which interpolation parameters to load.
  80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
  81   // FIXME: This should be removed and getPreloadedValue moved here.
  82   friend class SIRegisterInfo;
  83
  84   unsigned TIDReg;
  85
  86   // Registers that may be reserved for spilling purposes. These may be the same
  87   // as the input registers.
  88   unsigned ScratchRSrcReg;
  89   unsigned ScratchWaveOffsetReg;
  90
  91   // This is the current function's incremented size from the kernel's scratch
  92   // wave offset register. For an entry function, this is exactly the same as
  93   // the ScratchWaveOffsetReg.
  94   unsigned FrameOffsetReg;
  95
  96   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
  97   unsigned StackPtrOffsetReg;
  98
  99   // Input registers for non-HSA ABI
 100   unsigned PrivateMemoryPtrUserSGPR;
 101
 102   // Input registers setup for the HSA ABI.
 103   // User SGPRs in allocation order.
 104   unsigned PrivateSegmentBufferUserSGPR;
 105   unsigned DispatchPtrUserSGPR;
 106   unsigned QueuePtrUserSGPR;
 107   unsigned KernargSegmentPtrUserSGPR;
 108   unsigned DispatchIDUserSGPR;
 109   unsigned FlatScratchInitUserSGPR;
 110   unsigned PrivateSegmentSizeUserSGPR;
 111   unsigned GridWorkGroupCountXUserSGPR;
 112   unsigned GridWorkGroupCountYUserSGPR;
 113   unsigned GridWorkGroupCountZUserSGPR;
 114
 115   // System SGPRs in allocation order.
 116   unsigned WorkGroupIDXSystemSGPR;
 117   unsigned WorkGroupIDYSystemSGPR;
 118   unsigned WorkGroupIDZSystemSGPR;
 119   unsigned WorkGroupInfoSystemSGPR;
 120   unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
 121
 122   // Graphics info.
 123   unsigned PSInputAddr;
 124   unsigned PSInputEnable;
 125
 126   bool ReturnsVoid;
 127
 128   // A pair of default/requested minimum/maximum flat work group sizes.
 129   // Minimum - first, maximum - second.
 130   std::pair<unsigned, unsigned> FlatWorkGroupSizes;
 131
 132   // A pair of default/requested minimum/maximum number of waves per execution
 133   // unit. Minimum - first, maximum - second.
 134   std::pair<unsigned, unsigned> WavesPerEU;
 135
 136   // Stack object indices for work group IDs.
 137   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
 138   // Stack object indices for work item IDs.
 139   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
 140
 141   AMDGPUBufferPseudoSourceValue BufferPSV;
 142   AMDGPUImagePseudoSourceValue ImagePSV;
 143
 144 private:
 145   unsigned LDSWaveSpillSize;
 146   unsigned ScratchOffsetReg;
 147   unsigned NumUserSGPRs;
 148   unsigned NumSystemSGPRs;
 149
 150   bool HasSpilledSGPRs;
 151   bool HasSpilledVGPRs;
 152   bool HasNonSpillStackObjects;
 153
 154   unsigned NumSpilledSGPRs;
 155   unsigned NumSpilledVGPRs;
 156
 157   // Feature bits required for inputs passed in user SGPRs.
 158   bool PrivateSegmentBuffer : 1;
 159   bool DispatchPtr : 1;
 160   bool QueuePtr : 1;
 161   bool KernargSegmentPtr : 1;
 162   bool DispatchID : 1;
 163   bool FlatScratchInit : 1;
 164   bool GridWorkgroupCountX : 1;
 165   bool GridWorkgroupCountY : 1;
 166   bool GridWorkgroupCountZ : 1;
 167
 168   // Feature bits required for inputs passed in system SGPRs.
 169   bool WorkGroupIDX : 1; // Always initialized.
 170   bool WorkGroupIDY : 1;
 171   bool WorkGroupIDZ : 1;
 172   bool WorkGroupInfo : 1;
 173   bool PrivateSegmentWaveByteOffset : 1;
 174
 175   bool WorkItemIDX : 1; // Always initialized.
 176   bool WorkItemIDY : 1;
 177   bool WorkItemIDZ : 1;
 178
 179   // Private memory buffer
 180   // Compute directly in sgpr[0:1]
 181   // Other shaders indirect 64-bits at sgpr[0:1]
 182   bool PrivateMemoryInputPtr : 1;
 183
 184   MCPhysReg getNextUserSGPR() const {
 185     assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
 186     return AMDGPU::SGPR0 + NumUserSGPRs;
 187   }
 188
 189   MCPhysReg getNextSystemSGPR() const {
 190     return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 191   }
 192
 193 public:
 194   struct SpilledReg {
 195     unsigned VGPR = AMDGPU::NoRegister;
 196     int Lane = -1;
 197
 198     SpilledReg() = default;
 199     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
 200
 201     bool hasLane() { return Lane != -1;}
 202     bool hasReg() { return VGPR != AMDGPU::NoRegister;}
 203   };
 204
 205 private:
 206   // SGPR->VGPR spilling support.
 207   typedef std::pair<unsigned, unsigned> SpillRegMask;
 208
 209   // Track VGPR + wave index for each subregister of the SGPR spilled to
 210   // frameindex key.
 211   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 212   unsigned NumVGPRSpillLanes = 0;
 213   SmallVector<unsigned, 2> SpillVGPRs;
 214
 215 public:
 216
 217   SIMachineFunctionInfo(const MachineFunction &MF);
 218
 219   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 220     auto I = SGPRToVGPRSpills.find(FrameIndex);
 221     return (I == SGPRToVGPRSpills.end()) ?
 222       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 223   }
 224
 225   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 226   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
 227
 228   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
 229   unsigned getTIDReg() const { return TIDReg; };
 230   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 231
 232   // Add user SGPRs.
 233   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 234   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
 235   unsigned addQueuePtr(const SIRegisterInfo &TRI);
 236   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
 237   unsigned addDispatchID(const SIRegisterInfo &TRI);
 238   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
 239   unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
 240
 241   // Add system SGPRs.
 242   unsigned addWorkGroupIDX() {
 243     WorkGroupIDXSystemSGPR = getNextSystemSGPR();
 244     NumSystemSGPRs += 1;
 245     return WorkGroupIDXSystemSGPR;
 246   }
 247
 248   unsigned addWorkGroupIDY() {
 249     WorkGroupIDYSystemSGPR = getNextSystemSGPR();
 250     NumSystemSGPRs += 1;
 251     return WorkGroupIDYSystemSGPR;
 252   }
 253
 254   unsigned addWorkGroupIDZ() {
 255     WorkGroupIDZSystemSGPR = getNextSystemSGPR();
 256     NumSystemSGPRs += 1;
 257     return WorkGroupIDZSystemSGPR;
 258   }
 259
 260   unsigned addWorkGroupInfo() {
 261     WorkGroupInfoSystemSGPR = getNextSystemSGPR();
 262     NumSystemSGPRs += 1;
 263     return WorkGroupInfoSystemSGPR;
 264   }
 265
 266   unsigned addPrivateSegmentWaveByteOffset() {
 267     PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
 268     NumSystemSGPRs += 1;
 269     return PrivateSegmentWaveByteOffsetSystemSGPR;
 270   }
 271
 272   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
 273     PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
 274   }
 275
 276   bool hasPrivateSegmentBuffer() const {
 277     return PrivateSegmentBuffer;
 278   }
 279
 280   bool hasDispatchPtr() const {
 281     return DispatchPtr;
 282   }
 283
 284   bool hasQueuePtr() const {
 285     return QueuePtr;
 286   }
 287
 288   bool hasKernargSegmentPtr() const {
 289     return KernargSegmentPtr;
 290   }
 291
 292   bool hasDispatchID() const {
 293     return DispatchID;
 294   }
 295
 296   bool hasFlatScratchInit() const {
 297     return FlatScratchInit;
 298   }
 299
 300   bool hasGridWorkgroupCountX() const {
 301     return GridWorkgroupCountX;
 302   }
 303
 304   bool hasGridWorkgroupCountY() const {
 305     return GridWorkgroupCountY;
 306   }
 307
 308   bool hasGridWorkgroupCountZ() const {
 309     return GridWorkgroupCountZ;
 310   }
 311
 312   bool hasWorkGroupIDX() const {
 313     return WorkGroupIDX;
 314   }
 315
 316   bool hasWorkGroupIDY() const {
 317     return WorkGroupIDY;
 318   }
 319
 320   bool hasWorkGroupIDZ() const {
 321     return WorkGroupIDZ;
 322   }
 323
 324   bool hasWorkGroupInfo() const {
 325     return WorkGroupInfo;
 326   }
 327
 328   bool hasPrivateSegmentWaveByteOffset() const {
 329     return PrivateSegmentWaveByteOffset;
 330   }
 331
 332   bool hasWorkItemIDX() const {
 333     return WorkItemIDX;
 334   }
 335
 336   bool hasWorkItemIDY() const {
 337     return WorkItemIDY;
 338   }
 339
 340   bool hasWorkItemIDZ() const {
 341     return WorkItemIDZ;
 342   }
 343
 344   bool hasPrivateMemoryInputPtr() const {
 345     return PrivateMemoryInputPtr;
 346   }
 347
 348   unsigned getNumUserSGPRs() const {
 349     return NumUserSGPRs;
 350   }
 351
 352   unsigned getNumPreloadedSGPRs() const {
 353     return NumUserSGPRs + NumSystemSGPRs;
 354   }
 355
 356   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 357     return PrivateSegmentWaveByteOffsetSystemSGPR;
 358   }
 359
 360   /// \brief Returns the physical register reserved for use as the resource
 361   /// descriptor for scratch accesses.
 362   unsigned getScratchRSrcReg() const {
 363     return ScratchRSrcReg;
 364   }
 365
 366   void setScratchRSrcReg(unsigned Reg) {
 367     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 368     ScratchRSrcReg = Reg;
 369   }
 370
 371   unsigned getScratchWaveOffsetReg() const {
 372     return ScratchWaveOffsetReg;
 373   }
 374
 375   unsigned getFrameOffsetReg() const {
 376     return FrameOffsetReg;
 377   }
 378
 379   void setStackPtrOffsetReg(unsigned Reg) {
 380     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 381     StackPtrOffsetReg = Reg;
 382   }
 383
 384   unsigned getStackPtrOffsetReg() const {
 385     return StackPtrOffsetReg;
 386   }
 387
 388   void setScratchWaveOffsetReg(unsigned Reg) {
 389     assert(Reg != AMDGPU::NoRegister && "Should never be unset");
 390     ScratchWaveOffsetReg = Reg;
 391
 392     // FIXME: Only for entry functions.
 393     FrameOffsetReg = ScratchWaveOffsetReg;
 394   }
 395
 396   unsigned getQueuePtrUserSGPR() const {
 397     return QueuePtrUserSGPR;
 398   }
 399
 400   unsigned getPrivateMemoryPtrUserSGPR() const {
 401     return PrivateMemoryPtrUserSGPR;
 402   }
 403
 404   bool hasSpilledSGPRs() const {
 405     return HasSpilledSGPRs;
 406   }
 407
 408   void setHasSpilledSGPRs(bool Spill = true) {
 409     HasSpilledSGPRs = Spill;
 410   }
 411
 412   bool hasSpilledVGPRs() const {
 413     return HasSpilledVGPRs;
 414   }
 415
 416   void setHasSpilledVGPRs(bool Spill = true) {
 417     HasSpilledVGPRs = Spill;
 418   }
 419
 420   bool hasNonSpillStackObjects() const {
 421     return HasNonSpillStackObjects;
 422   }
 423
 424   void setHasNonSpillStackObjects(bool StackObject = true) {
 425     HasNonSpillStackObjects = StackObject;
 426   }
 427
 428   unsigned getNumSpilledSGPRs() const {
 429     return NumSpilledSGPRs;
 430   }
 431
 432   unsigned getNumSpilledVGPRs() const {
 433     return NumSpilledVGPRs;
 434   }
 435
 436   void addToSpilledSGPRs(unsigned num) {
 437     NumSpilledSGPRs += num;
 438   }
 439
 440   void addToSpilledVGPRs(unsigned num) {
 441     NumSpilledVGPRs += num;
 442   }
 443
 444   unsigned getPSInputAddr() const {
 445     return PSInputAddr;
 446   }
 447
 448   unsigned getPSInputEnable() const {
 449     return PSInputEnable;
 450   }
 451
 452   bool isPSInputAllocated(unsigned Index) const {
 453     return PSInputAddr & (1 << Index);
 454   }
 455
 456   void markPSInputAllocated(unsigned Index) {
 457     PSInputAddr |= 1 << Index;
 458   }
 459
 460   void markPSInputEnabled(unsigned Index) {
 461     PSInputEnable |= 1 << Index;
 462   }
 463
 464   bool returnsVoid() const {
 465     return ReturnsVoid;
 466   }
 467
 468   void setIfReturnsVoid(bool Value) {
 469     ReturnsVoid = Value;
 470   }
 471
 472   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 473   /// for this function.
 474   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 475     return FlatWorkGroupSizes;
 476   }
 477
 478   /// \returns Default/requested minimum flat work group size for this function.
 479   unsigned getMinFlatWorkGroupSize() const {
 480     return FlatWorkGroupSizes.first;
 481   }
 482
 483   /// \returns Default/requested maximum flat work group size for this function.
 484   unsigned getMaxFlatWorkGroupSize() const {
 485     return FlatWorkGroupSizes.second;
 486   }
 487
 488   /// \returns A pair of default/requested minimum/maximum number of waves per
 489   /// execution unit.
 490   std::pair<unsigned, unsigned> getWavesPerEU() const {
 491     return WavesPerEU;
 492   }
 493
 494   /// \returns Default/requested minimum number of waves per execution unit.
 495   unsigned getMinWavesPerEU() const {
 496     return WavesPerEU.first;
 497   }
 498
 499   /// \returns Default/requested maximum number of waves per execution unit.
 500   unsigned getMaxWavesPerEU() const {
 501     return WavesPerEU.second;
 502   }
 503
 504   /// \returns Stack object index for \p Dim's work group ID.
 505   int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
 506     assert(Dim < 3);
 507     return DebuggerWorkGroupIDStackObjectIndices[Dim];
 508   }
 509
 510   /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
 511   void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
 512     assert(Dim < 3);
 513     DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
 514   }
 515
 516   /// \returns Stack object index for \p Dim's work item ID.
 517   int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
 518     assert(Dim < 3);
 519     return DebuggerWorkItemIDStackObjectIndices[Dim];
 520   }
 521
 522   /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
 523   void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
 524     assert(Dim < 3);
 525     DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
 526   }
 527
 528   /// \returns SGPR used for \p Dim's work group ID.
 529   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
 530     switch (Dim) {
 531     case 0:
 532       assert(hasWorkGroupIDX());
 533       return WorkGroupIDXSystemSGPR;
 534     case 1:
 535       assert(hasWorkGroupIDY());
 536       return WorkGroupIDYSystemSGPR;
 537     case 2:
 538       assert(hasWorkGroupIDZ());
 539       return WorkGroupIDZSystemSGPR;
 540     }
 541     llvm_unreachable("unexpected dimension");
 542   }
 543
 544   /// \returns VGPR used for \p Dim' work item ID.
 545   unsigned getWorkItemIDVGPR(unsigned Dim) const {
 546     switch (Dim) {
 547     case 0:
 548       assert(hasWorkItemIDX());
 549       return AMDGPU::VGPR0;
 550     case 1:
 551       assert(hasWorkItemIDY());
 552       return AMDGPU::VGPR1;
 553     case 2:
 554       assert(hasWorkItemIDZ());
 555       return AMDGPU::VGPR2;
 556     }
 557     llvm_unreachable("unexpected dimension");
 558   }
 559
 560   unsigned getLDSWaveSpillSize() const {
 561     return LDSWaveSpillSize;
 562   }
 563
 564   const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
 565     return &BufferPSV;
 566   }
 567
 568   const AMDGPUImagePseudoSourceValue *getImagePSV() const {
 569     return &ImagePSV;
 570   }
 571 };
 572
 573 } // end namespace llvm
 574
 575 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H