contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

   1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "SIMachineFunctionInfo.h"
  11 #include "AMDGPUArgumentUsageInfo.h"
  12 #include "AMDGPUSubtarget.h"
  13 #include "SIRegisterInfo.h"
  14 #include "Utils/AMDGPUBaseInfo.h"
  15 #include "llvm/ADT/Optional.h"
  16 #include "llvm/CodeGen/MachineBasicBlock.h"
  17 #include "llvm/CodeGen/MachineFrameInfo.h"
  18 #include "llvm/CodeGen/MachineFunction.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/IR/CallingConv.h"
  21 #include "llvm/IR/Function.h"
  22 #include <cassert>
  23 #include <vector>
  24
  25 #define MAX_LANES 64
  26
  27 using namespace llvm;
  28
  29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  30   : AMDGPUMachineFunction(MF),
  31     PrivateSegmentBuffer(false),
  32     DispatchPtr(false),
  33     QueuePtr(false),
  34     KernargSegmentPtr(false),
  35     DispatchID(false),
  36     FlatScratchInit(false),
  37     GridWorkgroupCountX(false),
  38     GridWorkgroupCountY(false),
  39     GridWorkgroupCountZ(false),
  40     WorkGroupIDX(false),
  41     WorkGroupIDY(false),
  42     WorkGroupIDZ(false),
  43     WorkGroupInfo(false),
  44     PrivateSegmentWaveByteOffset(false),
  45     WorkItemIDX(false),
  46     WorkItemIDY(false),
  47     WorkItemIDZ(false),
  48     ImplicitBufferPtr(false),
  49     ImplicitArgPtr(false),
  50     GITPtrHigh(0xffffffff) {
  51   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  52   const Function &F = MF.getFunction();
  53   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
  54   WavesPerEU = ST.getWavesPerEU(F);
  55
  56   if (!isEntryFunction()) {
  57     // Non-entry functions have no special inputs for now, other registers
  58     // required for scratch access.
  59     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
  60     ScratchWaveOffsetReg = AMDGPU::SGPR4;
  61     FrameOffsetReg = AMDGPU::SGPR5;
  62     StackPtrOffsetReg = AMDGPU::SGPR32;
  63
  64     ArgInfo.PrivateSegmentBuffer =
  65       ArgDescriptor::createRegister(ScratchRSrcReg);
  66     ArgInfo.PrivateSegmentWaveByteOffset =
  67       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
  68
  69     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  70       ImplicitArgPtr = true;
  71   } else {
  72     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  73       KernargSegmentPtr = true;
  74   }
  75
  76   CallingConv::ID CC = F.getCallingConv();
  77   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
  78     if (!F.arg_empty())
  79       KernargSegmentPtr = true;
  80     WorkGroupIDX = true;
  81     WorkItemIDX = true;
  82   } else if (CC == CallingConv::AMDGPU_PS) {
  83     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
  84   }
  85
  86   if (ST.debuggerEmitPrologue()) {
  87     // Enable everything.
  88     WorkGroupIDX = true;
  89     WorkGroupIDY = true;
  90     WorkGroupIDZ = true;
  91     WorkItemIDX = true;
  92     WorkItemIDY = true;
  93     WorkItemIDZ = true;
  94   } else {
  95     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
  96       WorkGroupIDX = true;
  97
  98     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
  99       WorkGroupIDY = true;
 100
 101     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
 102       WorkGroupIDZ = true;
 103
 104     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
 105       WorkItemIDX = true;
 106
 107     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
 108       WorkItemIDY = true;
 109
 110     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
 111       WorkItemIDZ = true;
 112   }
 113
 114   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 115   bool MaySpill = ST.isVGPRSpillingEnabled(F);
 116   bool HasStackObjects = FrameInfo.hasStackObjects();
 117
 118   if (isEntryFunction()) {
 119     // X, XY, and XYZ are the only supported combinations, so make sure Y is
 120     // enabled if Z is.
 121     if (WorkItemIDZ)
 122       WorkItemIDY = true;
 123
 124     if (HasStackObjects || MaySpill) {
 125       PrivateSegmentWaveByteOffset = true;
 126
 127     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
 128     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
 129         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
 130       ArgInfo.PrivateSegmentWaveByteOffset
 131         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
 132     }
 133   }
 134
 135   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
 136   if (IsCOV2) {
 137     if (HasStackObjects || MaySpill)
 138       PrivateSegmentBuffer = true;
 139
 140     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
 141       DispatchPtr = true;
 142
 143     if (F.hasFnAttribute("amdgpu-queue-ptr"))
 144       QueuePtr = true;
 145
 146     if (F.hasFnAttribute("amdgpu-dispatch-id"))
 147       DispatchID = true;
 148   } else if (ST.isMesaGfxShader(MF)) {
 149     if (HasStackObjects || MaySpill)
 150       ImplicitBufferPtr = true;
 151   }
 152
 153   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
 154     KernargSegmentPtr = true;
 155
 156   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
 157     // TODO: This could be refined a lot. The attribute is a poor way of
 158     // detecting calls that may require it before argument lowering.
 159     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
 160       FlatScratchInit = true;
 161   }
 162
 163   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
 164   StringRef S = A.getValueAsString();
 165   if (!S.empty())
 166     S.consumeInteger(0, GITPtrHigh);
 167 }
 168
 169 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
 170   const SIRegisterInfo &TRI) {
 171   ArgInfo.PrivateSegmentBuffer =
 172     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 173     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
 174   NumUserSGPRs += 4;
 175   return ArgInfo.PrivateSegmentBuffer.getRegister();
 176 }
 177
 178 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
 179   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 180     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 181   NumUserSGPRs += 2;
 182   return ArgInfo.DispatchPtr.getRegister();
 183 }
 184
 185 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
 186   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 187     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 188   NumUserSGPRs += 2;
 189   return ArgInfo.QueuePtr.getRegister();
 190 }
 191
 192 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
 193   ArgInfo.KernargSegmentPtr
 194     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 195     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 196   NumUserSGPRs += 2;
 197   return ArgInfo.KernargSegmentPtr.getRegister();
 198 }
 199
 200 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
 201   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 202     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 203   NumUserSGPRs += 2;
 204   return ArgInfo.DispatchID.getRegister();
 205 }
 206
 207 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
 208   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 209     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 210   NumUserSGPRs += 2;
 211   return ArgInfo.FlatScratchInit.getRegister();
 212 }
 213
 214 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
 215   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 216     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 217   NumUserSGPRs += 2;
 218   return ArgInfo.ImplicitBufferPtr.getRegister();
 219 }
 220
 221 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
 222   for (unsigned I = 0; CSRegs[I]; ++I) {
 223     if (CSRegs[I] == Reg)
 224       return true;
 225   }
 226
 227   return false;
 228 }
 229
 230 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 231 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 232                                                     int FI) {
 233   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
 234
 235   // This has already been allocated.
 236   if (!SpillLanes.empty())
 237     return true;
 238
 239   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 240   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 241   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 242   MachineRegisterInfo &MRI = MF.getRegInfo();
 243   unsigned WaveSize = ST.getWavefrontSize();
 244
 245   unsigned Size = FrameInfo.getObjectSize(FI);
 246   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
 247   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
 248
 249   int NumLanes = Size / 4;
 250
 251   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
 252
 253   // Make sure to handle the case where a wide SGPR spill may span between two
 254   // VGPRs.
 255   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
 256     unsigned LaneVGPR;
 257     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
 258
 259     if (VGPRIndex == 0) {
 260       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
 261       if (LaneVGPR == AMDGPU::NoRegister) {
 262         // We have no VGPRs left for spilling SGPRs. Reset because we will not
 263         // partially spill the SGPR to VGPRs.
 264         SGPRToVGPRSpills.erase(FI);
 265         NumVGPRSpillLanes -= I;
 266         return false;
 267       }
 268
 269       Optional<int> CSRSpillFI;
 270       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
 271         // TODO: Should this be a CreateSpillStackObject? This is technically a
 272         // weird CSR spill.
 273         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
 274       }
 275
 276       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
 277
 278       // Add this register as live-in to all blocks to avoid machine verifer
 279       // complaining about use of an undefined physical register.
 280       for (MachineBasicBlock &BB : MF)
 281         BB.addLiveIn(LaneVGPR);
 282     } else {
 283       LaneVGPR = SpillVGPRs.back().VGPR;
 284     }
 285
 286     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
 287   }
 288
 289   return true;
 290 }
 291
 292 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
 293   for (auto &R : SGPRToVGPRSpills)
 294     MFI.RemoveStackObject(R.first);
 295 }