contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

   1 //===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "SIMachineFunctionInfo.h"
  11 #include "AMDGPUArgumentUsageInfo.h"
  12 #include "AMDGPUSubtarget.h"
  13 #include "SIRegisterInfo.h"
  14 #include "Utils/AMDGPUBaseInfo.h"
  15 #include "llvm/ADT/Optional.h"
  16 #include "llvm/CodeGen/MachineBasicBlock.h"
  17 #include "llvm/CodeGen/MachineFrameInfo.h"
  18 #include "llvm/CodeGen/MachineFunction.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/IR/CallingConv.h"
  21 #include "llvm/IR/Function.h"
  22 #include <cassert>
  23 #include <vector>
  24
  25 #define MAX_LANES 64
  26
  27 using namespace llvm;
  28
  29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  30   : AMDGPUMachineFunction(MF),
  31     BufferPSV(*(MF.getSubtarget().getInstrInfo())),
  32     ImagePSV(*(MF.getSubtarget().getInstrInfo())),
  33     PrivateSegmentBuffer(false),
  34     DispatchPtr(false),
  35     QueuePtr(false),
  36     KernargSegmentPtr(false),
  37     DispatchID(false),
  38     FlatScratchInit(false),
  39     GridWorkgroupCountX(false),
  40     GridWorkgroupCountY(false),
  41     GridWorkgroupCountZ(false),
  42     WorkGroupIDX(false),
  43     WorkGroupIDY(false),
  44     WorkGroupIDZ(false),
  45     WorkGroupInfo(false),
  46     PrivateSegmentWaveByteOffset(false),
  47     WorkItemIDX(false),
  48     WorkItemIDY(false),
  49     WorkItemIDZ(false),
  50     ImplicitBufferPtr(false),
  51     ImplicitArgPtr(false),
  52     GITPtrHigh(0xffffffff) {
  53   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
  54   const Function &F = MF.getFunction();
  55   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
  56   WavesPerEU = ST.getWavesPerEU(F);
  57
  58   if (!isEntryFunction()) {
  59     // Non-entry functions have no special inputs for now, other registers
  60     // required for scratch access.
  61     ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
  62     ScratchWaveOffsetReg = AMDGPU::SGPR4;
  63     FrameOffsetReg = AMDGPU::SGPR5;
  64     StackPtrOffsetReg = AMDGPU::SGPR32;
  65
  66     ArgInfo.PrivateSegmentBuffer =
  67       ArgDescriptor::createRegister(ScratchRSrcReg);
  68     ArgInfo.PrivateSegmentWaveByteOffset =
  69       ArgDescriptor::createRegister(ScratchWaveOffsetReg);
  70
  71     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  72       ImplicitArgPtr = true;
  73   } else {
  74     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
  75       KernargSegmentPtr = true;
  76   }
  77
  78   CallingConv::ID CC = F.getCallingConv();
  79   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
  80     if (!F.arg_empty())
  81       KernargSegmentPtr = true;
  82     WorkGroupIDX = true;
  83     WorkItemIDX = true;
  84   } else if (CC == CallingConv::AMDGPU_PS) {
  85     PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
  86   }
  87
  88   if (ST.debuggerEmitPrologue()) {
  89     // Enable everything.
  90     WorkGroupIDX = true;
  91     WorkGroupIDY = true;
  92     WorkGroupIDZ = true;
  93     WorkItemIDX = true;
  94     WorkItemIDY = true;
  95     WorkItemIDZ = true;
  96   } else {
  97     if (F.hasFnAttribute("amdgpu-work-group-id-x"))
  98       WorkGroupIDX = true;
  99
 100     if (F.hasFnAttribute("amdgpu-work-group-id-y"))
 101       WorkGroupIDY = true;
 102
 103     if (F.hasFnAttribute("amdgpu-work-group-id-z"))
 104       WorkGroupIDZ = true;
 105
 106     if (F.hasFnAttribute("amdgpu-work-item-id-x"))
 107       WorkItemIDX = true;
 108
 109     if (F.hasFnAttribute("amdgpu-work-item-id-y"))
 110       WorkItemIDY = true;
 111
 112     if (F.hasFnAttribute("amdgpu-work-item-id-z"))
 113       WorkItemIDZ = true;
 114   }
 115
 116   const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 117   bool MaySpill = ST.isVGPRSpillingEnabled(F);
 118   bool HasStackObjects = FrameInfo.hasStackObjects();
 119
 120   if (isEntryFunction()) {
 121     // X, XY, and XYZ are the only supported combinations, so make sure Y is
 122     // enabled if Z is.
 123     if (WorkItemIDZ)
 124       WorkItemIDY = true;
 125
 126     if (HasStackObjects || MaySpill) {
 127       PrivateSegmentWaveByteOffset = true;
 128
 129     // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
 130     if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
 131         (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
 132       ArgInfo.PrivateSegmentWaveByteOffset
 133         = ArgDescriptor::createRegister(AMDGPU::SGPR5);
 134     }
 135   }
 136
 137   bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
 138   if (IsCOV2) {
 139     if (HasStackObjects || MaySpill)
 140       PrivateSegmentBuffer = true;
 141
 142     if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
 143       DispatchPtr = true;
 144
 145     if (F.hasFnAttribute("amdgpu-queue-ptr"))
 146       QueuePtr = true;
 147
 148     if (F.hasFnAttribute("amdgpu-dispatch-id"))
 149       DispatchID = true;
 150   } else if (ST.isMesaGfxShader(MF)) {
 151     if (HasStackObjects || MaySpill)
 152       ImplicitBufferPtr = true;
 153   }
 154
 155   if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
 156     KernargSegmentPtr = true;
 157
 158   if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
 159     // TODO: This could be refined a lot. The attribute is a poor way of
 160     // detecting calls that may require it before argument lowering.
 161     if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch"))
 162       FlatScratchInit = true;
 163   }
 164
 165   Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
 166   StringRef S = A.getValueAsString();
 167   if (!S.empty())
 168     S.consumeInteger(0, GITPtrHigh);
 169 }
 170
 171 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
 172   const SIRegisterInfo &TRI) {
 173   ArgInfo.PrivateSegmentBuffer =
 174     ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 175     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass));
 176   NumUserSGPRs += 4;
 177   return ArgInfo.PrivateSegmentBuffer.getRegister();
 178 }
 179
 180 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
 181   ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 182     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 183   NumUserSGPRs += 2;
 184   return ArgInfo.DispatchPtr.getRegister();
 185 }
 186
 187 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
 188   ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 189     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 190   NumUserSGPRs += 2;
 191   return ArgInfo.QueuePtr.getRegister();
 192 }
 193
 194 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
 195   ArgInfo.KernargSegmentPtr
 196     = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 197     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 198   NumUserSGPRs += 2;
 199   return ArgInfo.KernargSegmentPtr.getRegister();
 200 }
 201
 202 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
 203   ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 204     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 205   NumUserSGPRs += 2;
 206   return ArgInfo.DispatchID.getRegister();
 207 }
 208
 209 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
 210   ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 211     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 212   NumUserSGPRs += 2;
 213   return ArgInfo.FlatScratchInit.getRegister();
 214 }
 215
 216 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
 217   ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
 218     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
 219   NumUserSGPRs += 2;
 220   return ArgInfo.ImplicitBufferPtr.getRegister();
 221 }
 222
 223 static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
 224   for (unsigned I = 0; CSRegs[I]; ++I) {
 225     if (CSRegs[I] == Reg)
 226       return true;
 227   }
 228
 229   return false;
 230 }
 231
 232 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
 233 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
 234                                                     int FI) {
 235   std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
 236
 237   // This has already been allocated.
 238   if (!SpillLanes.empty())
 239     return true;
 240
 241   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 242   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 243   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 244   MachineRegisterInfo &MRI = MF.getRegInfo();
 245   unsigned WaveSize = ST.getWavefrontSize();
 246
 247   unsigned Size = FrameInfo.getObjectSize(FI);
 248   assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
 249   assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
 250
 251   int NumLanes = Size / 4;
 252
 253   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
 254
 255   // Make sure to handle the case where a wide SGPR spill may span between two
 256   // VGPRs.
 257   for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
 258     unsigned LaneVGPR;
 259     unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
 260
 261     if (VGPRIndex == 0) {
 262       LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
 263       if (LaneVGPR == AMDGPU::NoRegister) {
 264         // We have no VGPRs left for spilling SGPRs. Reset because we will not
 265         // partially spill the SGPR to VGPRs.
 266         SGPRToVGPRSpills.erase(FI);
 267         NumVGPRSpillLanes -= I;
 268         return false;
 269       }
 270
 271       Optional<int> CSRSpillFI;
 272       if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) {
 273         // TODO: Should this be a CreateSpillStackObject? This is technically a
 274         // weird CSR spill.
 275         CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false);
 276       }
 277
 278       SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
 279
 280       // Add this register as live-in to all blocks to avoid machine verifer
 281       // complaining about use of an undefined physical register.
 282       for (MachineBasicBlock &BB : MF)
 283         BB.addLiveIn(LaneVGPR);
 284     } else {
 285       LaneVGPR = SpillVGPRs.back().VGPR;
 286     }
 287
 288     SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
 289   }
 290
 291   return true;
 292 }
 293
 294 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
 295   for (auto &R : SGPRToVGPRSpills)
 296     MFI.RemoveStackObject(R.first);
 297 }