lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

   1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "SIMachineFunctionInfo.h"
  11 #include "AMDGPUArgumentUsageInfo.h"
  12 #include "AMDGPUSubtarget.h"
  13 #include "SIRegisterInfo.h"
  14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  15 #include "Utils/AMDGPUBaseInfo.h"
  16 #include "llvm/ADT/Optional.h"
  17 #include "llvm/CodeGen/MachineBasicBlock.h"
  18 #include "llvm/CodeGen/MachineFrameInfo.h"
  19 #include "llvm/CodeGen/MachineFunction.h"
  20 #include "llvm/CodeGen/MachineRegisterInfo.h"
  21 #include "llvm/IR/CallingConv.h"
  22 #include "llvm/IR/Function.h"
  23 #include <cassert>
  24 #include <vector>
  25
  26 #define MAX_LANES 64
  27
  28 using namespace llvm;
  29
  30 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  31   : AMDGPUMachineFunction(MF),
  32     PrivateSegmentBuffer(false),
  33     DispatchPtr(false),
  34     QueuePtr(false),
  35     KernargSegmentPtr(false),
  36     DispatchID(false),
  37     FlatScratchInit(false),
  38     WorkGroupIDX(false),
  39     WorkGroupIDY(false),
  40     WorkGroupIDZ(false),
  41     WorkGroupInfo(false),
  42     PrivateSegmentWaveByteOffset(false),
  43     WorkItemIDX(false),
  44     WorkItemIDY(false),
  45     WorkItemIDZ(false),
  46     ImplicitBufferPtr(false),
  47     ImplicitArgPtr(false),
  48     GITPtrHigh(0xffffffff),
  49     HighBitsOf32BitAddress(0) {
  50   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  51   const Function &F = MF.getFunction();
  52   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
  53   WavesPerEU = ST.getWavesPerEU(F);
  54
  55   Occupancy = getMaxWavesPerEU();
  56   limitOccupancy(MF);
  57   CallingConv::ID CC = F.getCallingConv();
  58
  59   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
  60     if (!F.arg_empty())
  61       KernargSegmentPtr = true;
  62     WorkGroupIDX = true;
  63     WorkItemIDX = true;
  64   } else if (CC == CallingConv::AMDGPU_PS) {
  65     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
  66   }
  67
  68   if (!isEntryFunction()) {
  69     // Non-entry functions have no special inputs for now, other registers
  70     // required for scratch access.
  71     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
  72     ScratchWaveOffsetReg = AMDGPU::SGPR4;
  73     FrameOffsetReg = AMDGPU::SGPR5;
  74     StackPtrOffsetReg = AMDGPU::SGPR32;
  75
  76     ArgInfo.PrivateSegmentBuffer =
  77       ArgDescriptor::createRegister(ScratchRSrcReg);
  78     ArgInfo.PrivateSegmentWaveByteOffset =
  79       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
  80
  81     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  82       ImplicitArgPtr = true;
  83   } else {
  84     if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
  85       KernargSegmentPtr = true;
  86       MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
  87                                  MaxKernArgAlign);
  88     }
  89   }
  90
  91   if (ST.debuggerEmitPrologue()) {
  92     // Enable everything.
  93     WorkGroupIDX = true;
  94     WorkGroupIDY = true;
  95     WorkGroupIDZ = true;
  96     WorkItemIDX = true;
  97     WorkItemIDY = true;
  98     WorkItemIDZ = true;
  99   } else {
 100     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
 101       WorkGroupIDX = true;
 102
 103     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
 104       WorkGroupIDY = true;
 105
 106     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
 107       WorkGroupIDZ = true;
 108
 109     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
 110       WorkItemIDX = true;
 111
 112     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
 113       WorkItemIDY = true;
 114
 115     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
 116       WorkItemIDZ = true;
 117   }
 118
 119   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 120   bool HasStackObjects = FrameInfo.hasStackObjects();
 121
 122   if (isEntryFunction()) {
 123     // X, XY, and XYZ are the only supported combinations, so make sure Y is
 124     // enabled if Z is.
 125     if (WorkItemIDZ)
 126       WorkItemIDY = true;
 127
 128     PrivateSegmentWaveByteOffset = true;
 129
 130     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
 131     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
 132         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
 133       ArgInfo.PrivateSegmentWaveByteOffset =
 134           ArgDescriptor::createRegister(AMDGPU::SGPR5);
 135   }
 136
 137   bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
 138   if (isAmdHsaOrMesa) {
 139     PrivateSegmentBuffer = true;
 140
 141     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
 142       DispatchPtr = true;
 143
 144     if (F.hasFnAttribute("amdgpu-queue-ptr"))
 145       QueuePtr = true;
 146
 147     if (F.hasFnAttribute("amdgpu-dispatch-id"))
 148       DispatchID = true;
 149   } else if (ST.isMesaGfxShader(F)) {
 150     ImplicitBufferPtr = true;
 151   }
 152
 153   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
 154     KernargSegmentPtr = true;
 155
 156   if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
 157     // TODO: This could be refined a lot. The attribute is a poor way of
 158     // detecting calls that may require it before argument lowering.
 159     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
 160       FlatScratchInit = true;
 161   }
 162
 163   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
 164   StringRef S = A.getValueAsString();
 165   if (!S.empty())
 166     S.consumeInteger(0, GITPtrHigh);
 167
 168   A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
 169   S = A.getValueAsString();
 170   if (!S.empty())
 171     S.consumeInteger(0, HighBitsOf32BitAddress);
 172 }
 173
 174 void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
 175   limitOccupancy(getMaxWavesPerEU());
 176   const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
 177   limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
 178                  MF.getFunction()));
 179 }
 180
 181 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
 182   const SIRegisterInfo &TRI) {
 183   ArgInfo.PrivateSegmentBuffer =
 184     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 185     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
 186   NumUserSGPRs += 4;
 187   return ArgInfo.PrivateSegmentBuffer.getRegister();
 188 }
 189
 190 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
 191   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 192     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 193   NumUserSGPRs += 2;
 194   return ArgInfo.DispatchPtr.getRegister();
 195 }
 196
 197 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
 198   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 199     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 200   NumUserSGPRs += 2;
 201   return ArgInfo.QueuePtr.getRegister();
 202 }
 203
 204 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
 205   ArgInfo.KernargSegmentPtr
 206     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 207     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 208   NumUserSGPRs += 2;
 209   return ArgInfo.KernargSegmentPtr.getRegister();
 210 }
 211
 212 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
 213   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 214     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 215   NumUserSGPRs += 2;
 216   return ArgInfo.DispatchID.getRegister();
 217 }
 218
 219 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
 220   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 221     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 222   NumUserSGPRs += 2;
 223   return ArgInfo.FlatScratchInit.getRegister();
 224 }
 225
 226 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
 227   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 228     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 229   NumUserSGPRs += 2;
 230   return ArgInfo.ImplicitBufferPtr.getRegister();
 231 }
 232
 233 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
 234   for (unsigned I = 0; CSRegs[I]; ++I) {
 235     if (CSRegs[I] == Reg)
 236       return true;
 237   }
 238
 239   return false;
 240 }
 241
 242 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 243 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 244                                                     int FI) {
 245   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
 246
 247   // This has already been allocated.
 248   if (!SpillLanes.empty())
 249     return true;
 250
 251   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 252   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 253   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 254   MachineRegisterInfo &MRI = MF.getRegInfo();
 255   unsigned WaveSize = ST.getWavefrontSize();
 256
 257   unsigned Size = FrameInfo.getObjectSize(FI);
 258   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
 259   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
 260
 261   int NumLanes = Size / 4;
 262
 263   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
 264
 265   // Make sure to handle the case where a wide SGPR spill may span between two
 266   // VGPRs.
 267   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
 268     unsigned LaneVGPR;
 269     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
 270
 271     if (VGPRIndex == 0) {
 272       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
 273       if (LaneVGPR == AMDGPU::NoRegister) {
 274         // We have no VGPRs left for spilling SGPRs. Reset because we will not
 275         // partially spill the SGPR to VGPRs.
 276         SGPRToVGPRSpills.erase(FI);
 277         NumVGPRSpillLanes -= I;
 278         return false;
 279       }
 280
 281       Optional<int> CSRSpillFI;
 282       if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
 283           isCalleeSavedReg(CSRegs, LaneVGPR)) {
 284         CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
 285       }
 286
 287       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
 288
 289       // Add this register as live-in to all blocks to avoid machine verifer
 290       // complaining about use of an undefined physical register.
 291       for (MachineBasicBlock &BB : MF)
 292         BB.addLiveIn(LaneVGPR);
 293     } else {
 294       LaneVGPR = SpillVGPRs.back().VGPR;
 295     }
 296
 297     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
 298   }
 299
 300   return true;
 301 }
 302
 303 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
 304   for (auto &R : SGPRToVGPRSpills)
 305     MFI.RemoveStackObject(R.first);
 306 }
 307
 308
 309 /// \returns VGPR used for \p Dim' work item ID.
 310 unsigned SIMachineFunctionInfo::getWorkItemIDVGPR(unsigned Dim) const {
 311   switch (Dim) {
 312   case 0:
 313     assert(hasWorkItemIDX());
 314     return AMDGPU::VGPR0;
 315   case 1:
 316     assert(hasWorkItemIDY());
 317     return AMDGPU::VGPR1;
 318   case 2:
 319     assert(hasWorkItemIDZ());
 320     return AMDGPU::VGPR2;
 321   }
 322   llvm_unreachable("unexpected dimension");
 323 }
 324
 325 MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
 326   assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
 327   return AMDGPU::SGPR0 + NumUserSGPRs;
 328 }
 329
 330 MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
 331   return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
 332 }