1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
24 : AMDGPUMachineFunction(MF),
25 TIDReg(AMDGPU::NoRegister),
26 ScratchRSrcReg(AMDGPU::NoRegister),
27 ScratchWaveOffsetReg(AMDGPU::NoRegister),
28 FrameOffsetReg(AMDGPU::NoRegister),
29 StackPtrOffsetReg(AMDGPU::NoRegister),
30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
31 DispatchPtrUserSGPR(AMDGPU::NoRegister),
32 QueuePtrUserSGPR(AMDGPU::NoRegister),
33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
34 DispatchIDUserSGPR(AMDGPU::NoRegister),
35 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
48 FlatWorkGroupSizes(0, 0),
50 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
51 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
55 HasSpilledSGPRs(false),
56 HasSpilledVGPRs(false),
57 HasNonSpillStackObjects(false),
60 PrivateSegmentBuffer(false),
63 KernargSegmentPtr(false),
65 FlatScratchInit(false),
66 GridWorkgroupCountX(false),
67 GridWorkgroupCountY(false),
68 GridWorkgroupCountZ(false),
73 PrivateSegmentWaveByteOffset(false),
77 PrivateMemoryInputPtr(false) {
78 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
79 const Function *F = MF.getFunction();
80 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
81 WavesPerEU = ST.getWavesPerEU(*F);
83 // Non-entry functions have no special inputs for now.
84 // TODO: Return early for non-entry CCs.
86 CallingConv::ID CC = F->getCallingConv();
87 if (CC == CallingConv::AMDGPU_PS)
88 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
90 if (AMDGPU::isKernel(CC)) {
91 KernargSegmentPtr = true;
96 if (ST.debuggerEmitPrologue()) {
103 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
106 if (F->hasFnAttribute("amdgpu-work-group-id-z"))
109 if (F->hasFnAttribute("amdgpu-work-item-id-y"))
112 if (F->hasFnAttribute("amdgpu-work-item-id-z"))
116 // X, XY, and XYZ are the only supported combinations, so make sure Y is
121 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
122 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
123 bool HasStackObjects = FrameInfo.hasStackObjects();
125 if (HasStackObjects || MaySpill) {
126 PrivateSegmentWaveByteOffset = true;
128 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
129 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
130 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
131 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
134 if (ST.isAmdCodeObjectV2(MF)) {
135 if (HasStackObjects || MaySpill)
136 PrivateSegmentBuffer = true;
138 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
141 if (F->hasFnAttribute("amdgpu-queue-ptr"))
144 if (F->hasFnAttribute("amdgpu-dispatch-id"))
146 } else if (ST.isMesaGfxShader(MF)) {
147 if (HasStackObjects || MaySpill)
148 PrivateMemoryInputPtr = true;
151 // We don't need to worry about accessing spills with flat instructions.
152 // TODO: On VI where we must use flat for global, we should be able to omit
153 // this if it is never used for generic access.
154 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
155 FlatScratchInit = true;
158 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
159 const SIRegisterInfo &TRI) {
160 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
161 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
163 return PrivateSegmentBufferUserSGPR;
166 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
167 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
168 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
170 return DispatchPtrUserSGPR;
173 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
174 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
175 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
177 return QueuePtrUserSGPR;
180 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
181 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
182 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
184 return KernargSegmentPtrUserSGPR;
187 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
188 DispatchIDUserSGPR = TRI.getMatchingSuperReg(
189 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
191 return DispatchIDUserSGPR;
194 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
195 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
196 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
198 return FlatScratchInitUserSGPR;
201 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
202 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
203 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
205 return PrivateMemoryPtrUserSGPR;
208 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
209 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
211 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
213 // This has already been allocated.
214 if (!SpillLanes.empty())
217 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
218 const SIRegisterInfo *TRI = ST.getRegisterInfo();
219 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
220 MachineRegisterInfo &MRI = MF.getRegInfo();
221 unsigned WaveSize = ST.getWavefrontSize();
223 unsigned Size = FrameInfo.getObjectSize(FI);
224 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
225 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
227 int NumLanes = Size / 4;
229 // Make sure to handle the case where a wide SGPR spill may span between two
231 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
233 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
235 if (VGPRIndex == 0) {
236 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
237 if (LaneVGPR == AMDGPU::NoRegister) {
238 // We have no VGPRs left for spilling SGPRs. Reset because we won't
239 // partially spill the SGPR to VGPRs.
240 SGPRToVGPRSpills.erase(FI);
241 NumVGPRSpillLanes -= I;
245 SpillVGPRs.push_back(LaneVGPR);
247 // Add this register as live-in to all blocks to avoid machine verifer
248 // complaining about use of an undefined physical register.
249 for (MachineBasicBlock &BB : MF)
250 BB.addLiveIn(LaneVGPR);
252 LaneVGPR = SpillVGPRs.back();
255 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
261 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
262 for (auto &R : SGPRToVGPRSpills)
263 MFI.RemoveStackObject(R.first);