1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
24 : AMDGPUMachineFunction(MF),
25 TIDReg(AMDGPU::NoRegister),
26 ScratchRSrcReg(AMDGPU::PRIVATE_RSRC_REG),
27 ScratchWaveOffsetReg(AMDGPU::SCRATCH_WAVE_OFFSET_REG),
28 FrameOffsetReg(AMDGPU::FP_REG),
29 StackPtrOffsetReg(AMDGPU::SP_REG),
30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
31 DispatchPtrUserSGPR(AMDGPU::NoRegister),
32 QueuePtrUserSGPR(AMDGPU::NoRegister),
33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
34 DispatchIDUserSGPR(AMDGPU::NoRegister),
35 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
45 WorkItemIDXVGPR(AMDGPU::NoRegister),
46 WorkItemIDYVGPR(AMDGPU::NoRegister),
47 WorkItemIDZVGPR(AMDGPU::NoRegister),
51 FlatWorkGroupSizes(0, 0),
53 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
54 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
58 HasSpilledSGPRs(false),
59 HasSpilledVGPRs(false),
60 HasNonSpillStackObjects(false),
63 PrivateSegmentBuffer(false),
66 KernargSegmentPtr(false),
68 FlatScratchInit(false),
69 GridWorkgroupCountX(false),
70 GridWorkgroupCountY(false),
71 GridWorkgroupCountZ(false),
76 PrivateSegmentWaveByteOffset(false),
80 ImplicitBufferPtr(false) {
81 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
82 const Function *F = MF.getFunction();
83 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
84 WavesPerEU = ST.getWavesPerEU(*F);
86 if (!isEntryFunction()) {
87 // Non-entry functions have no special inputs for now, other registers
88 // required for scratch access.
89 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
90 ScratchWaveOffsetReg = AMDGPU::SGPR4;
91 FrameOffsetReg = AMDGPU::SGPR5;
92 StackPtrOffsetReg = AMDGPU::SGPR32;
94 // FIXME: Not really a system SGPR.
95 PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
98 CallingConv::ID CC = F->getCallingConv();
99 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
100 KernargSegmentPtr = !F->arg_empty();
103 } else if (CC == CallingConv::AMDGPU_PS) {
104 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
107 if (ST.debuggerEmitPrologue()) {
108 // Enable everything.
116 if (F->hasFnAttribute("amdgpu-work-group-id-x"))
119 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
122 if (F->hasFnAttribute("amdgpu-work-group-id-z"))
125 if (F->hasFnAttribute("amdgpu-work-item-id-x"))
128 if (F->hasFnAttribute("amdgpu-work-item-id-y"))
131 if (F->hasFnAttribute("amdgpu-work-item-id-z"))
135 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
136 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
137 bool HasStackObjects = FrameInfo.hasStackObjects();
139 if (isEntryFunction()) {
140 // X, XY, and XYZ are the only supported combinations, so make sure Y is
145 if (HasStackObjects || MaySpill) {
146 PrivateSegmentWaveByteOffset = true;
148 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
149 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
150 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
151 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
155 bool IsCOV2 = ST.isAmdCodeObjectV2(MF);
157 if (HasStackObjects || MaySpill)
158 PrivateSegmentBuffer = true;
160 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
163 if (F->hasFnAttribute("amdgpu-queue-ptr"))
166 if (F->hasFnAttribute("amdgpu-dispatch-id"))
168 } else if (ST.isMesaGfxShader(MF)) {
169 if (HasStackObjects || MaySpill)
170 ImplicitBufferPtr = true;
173 if (F->hasFnAttribute("amdgpu-kernarg-segment-ptr"))
174 KernargSegmentPtr = true;
176 if (ST.hasFlatAddressSpace() && isEntryFunction() && IsCOV2) {
177 // TODO: This could be refined a lot. The attribute is a poor way of
178 // detecting calls that may require it before argument lowering.
179 if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
180 FlatScratchInit = true;
184 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
185 const SIRegisterInfo &TRI) {
186 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
187 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
189 return PrivateSegmentBufferUserSGPR;
192 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
193 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
194 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
196 return DispatchPtrUserSGPR;
199 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
200 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
201 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
203 return QueuePtrUserSGPR;
206 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
207 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
208 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
210 return KernargSegmentPtrUserSGPR;
213 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
214 DispatchIDUserSGPR = TRI.getMatchingSuperReg(
215 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
217 return DispatchIDUserSGPR;
220 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
221 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
222 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
224 return FlatScratchInitUserSGPR;
227 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
228 ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
229 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
231 return ImplicitBufferPtrUserSGPR;
234 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
235 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
237 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
239 // This has already been allocated.
240 if (!SpillLanes.empty())
243 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
244 const SIRegisterInfo *TRI = ST.getRegisterInfo();
245 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
246 MachineRegisterInfo &MRI = MF.getRegInfo();
247 unsigned WaveSize = ST.getWavefrontSize();
249 unsigned Size = FrameInfo.getObjectSize(FI);
250 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
251 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
253 int NumLanes = Size / 4;
255 // Make sure to handle the case where a wide SGPR spill may span between two
257 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
259 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
261 if (VGPRIndex == 0) {
262 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
263 if (LaneVGPR == AMDGPU::NoRegister) {
264 // We have no VGPRs left for spilling SGPRs. Reset because we won't
265 // partially spill the SGPR to VGPRs.
266 SGPRToVGPRSpills.erase(FI);
267 NumVGPRSpillLanes -= I;
271 SpillVGPRs.push_back(LaneVGPR);
273 // Add this register as live-in to all blocks to avoid machine verifer
274 // complaining about use of an undefined physical register.
275 for (MachineBasicBlock &BB : MF)
276 BB.addLiveIn(LaneVGPR);
278 LaneVGPR = SpillVGPRs.back();
281 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
287 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
288 for (auto &R : SGPRToVGPRSpills)
289 MFI.RemoveStackObject(R.first);