1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
23 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
24 : AMDGPUMachineFunction(MF),
25 TIDReg(AMDGPU::NoRegister),
26 ScratchRSrcReg(AMDGPU::NoRegister),
27 ScratchWaveOffsetReg(AMDGPU::NoRegister),
28 FrameOffsetReg(AMDGPU::NoRegister),
29 StackPtrOffsetReg(AMDGPU::NoRegister),
30 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
31 DispatchPtrUserSGPR(AMDGPU::NoRegister),
32 QueuePtrUserSGPR(AMDGPU::NoRegister),
33 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
34 DispatchIDUserSGPR(AMDGPU::NoRegister),
35 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
36 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
37 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
38 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
39 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
40 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
41 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
42 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
43 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
44 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
48 FlatWorkGroupSizes(0, 0),
50 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
51 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
55 HasSpilledSGPRs(false),
56 HasSpilledVGPRs(false),
57 HasNonSpillStackObjects(false),
60 PrivateSegmentBuffer(false),
63 KernargSegmentPtr(false),
65 FlatScratchInit(false),
66 GridWorkgroupCountX(false),
67 GridWorkgroupCountY(false),
68 GridWorkgroupCountZ(false),
73 PrivateSegmentWaveByteOffset(false),
77 ImplicitBufferPtr(false) {
78 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
79 const Function *F = MF.getFunction();
80 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
81 WavesPerEU = ST.getWavesPerEU(*F);
83 if (!isEntryFunction()) {
84 // Non-entry functions have no special inputs for now, other registers
85 // required for scratch access.
86 ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
87 ScratchWaveOffsetReg = AMDGPU::SGPR4;
88 FrameOffsetReg = AMDGPU::SGPR5;
89 StackPtrOffsetReg = AMDGPU::SGPR32;
93 CallingConv::ID CC = F->getCallingConv();
94 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
95 KernargSegmentPtr = true;
98 } else if (CC == CallingConv::AMDGPU_PS) {
99 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
102 if (ST.debuggerEmitPrologue()) {
103 // Enable everything.
109 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
112 if (F->hasFnAttribute("amdgpu-work-group-id-z"))
115 if (F->hasFnAttribute("amdgpu-work-item-id-y"))
118 if (F->hasFnAttribute("amdgpu-work-item-id-z"))
122 // X, XY, and XYZ are the only supported combinations, so make sure Y is
127 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
128 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
129 bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls();
131 if (HasStackObjects || MaySpill) {
132 PrivateSegmentWaveByteOffset = true;
134 // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
135 if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
136 (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
137 PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
140 if (ST.isAmdCodeObjectV2(MF)) {
141 if (HasStackObjects || MaySpill)
142 PrivateSegmentBuffer = true;
144 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
147 if (F->hasFnAttribute("amdgpu-queue-ptr"))
150 if (F->hasFnAttribute("amdgpu-dispatch-id"))
152 } else if (ST.isMesaGfxShader(MF)) {
153 if (HasStackObjects || MaySpill)
154 ImplicitBufferPtr = true;
157 // We don't need to worry about accessing spills with flat instructions.
158 // TODO: On VI where we must use flat for global, we should be able to omit
159 // this if it is never used for generic access.
160 if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
161 FlatScratchInit = true;
164 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
165 const SIRegisterInfo &TRI) {
166 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
167 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
169 return PrivateSegmentBufferUserSGPR;
172 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
173 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
174 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
176 return DispatchPtrUserSGPR;
179 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
180 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
181 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
183 return QueuePtrUserSGPR;
186 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
187 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
188 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
190 return KernargSegmentPtrUserSGPR;
193 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
194 DispatchIDUserSGPR = TRI.getMatchingSuperReg(
195 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
197 return DispatchIDUserSGPR;
200 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
201 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
202 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
204 return FlatScratchInitUserSGPR;
207 unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
208 ImplicitBufferPtrUserSGPR = TRI.getMatchingSuperReg(
209 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
211 return ImplicitBufferPtrUserSGPR;
214 /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
215 bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
217 std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
219 // This has already been allocated.
220 if (!SpillLanes.empty())
223 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
224 const SIRegisterInfo *TRI = ST.getRegisterInfo();
225 MachineFrameInfo &FrameInfo = MF.getFrameInfo();
226 MachineRegisterInfo &MRI = MF.getRegInfo();
227 unsigned WaveSize = ST.getWavefrontSize();
229 unsigned Size = FrameInfo.getObjectSize(FI);
230 assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
231 assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
233 int NumLanes = Size / 4;
235 // Make sure to handle the case where a wide SGPR spill may span between two
237 for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
239 unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
241 if (VGPRIndex == 0) {
242 LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
243 if (LaneVGPR == AMDGPU::NoRegister) {
244 // We have no VGPRs left for spilling SGPRs. Reset because we won't
245 // partially spill the SGPR to VGPRs.
246 SGPRToVGPRSpills.erase(FI);
247 NumVGPRSpillLanes -= I;
251 SpillVGPRs.push_back(LaneVGPR);
253 // Add this register as live-in to all blocks to avoid machine verifer
254 // complaining about use of an undefined physical register.
255 for (MachineBasicBlock &BB : MF)
256 BB.addLiveIn(LaneVGPR);
258 LaneVGPR = SpillVGPRs.back();
261 SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
267 void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
268 for (auto &R : SGPRToVGPRSpills)
269 MFI.RemoveStackObject(R.first);