1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
23 static cl::opt<bool> EnableSpillSGPRToVGPR(
24 "amdgpu-spill-sgpr-to-vgpr",
25 cl::desc("Enable spilling VGPRs to SGPRs"),
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30 : AMDGPUMachineFunction(MF),
31 TIDReg(AMDGPU::NoRegister),
32 ScratchRSrcReg(AMDGPU::NoRegister),
33 ScratchWaveOffsetReg(AMDGPU::NoRegister),
34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35 DispatchPtrUserSGPR(AMDGPU::NoRegister),
36 QueuePtrUserSGPR(AMDGPU::NoRegister),
37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38 DispatchIDUserSGPR(AMDGPU::NoRegister),
39 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
51 FlatWorkGroupSizes(0, 0),
53 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
54 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
59 HasSpilledSGPRs(false),
60 HasSpilledVGPRs(false),
61 HasNonSpillStackObjects(false),
64 PrivateSegmentBuffer(false),
67 KernargSegmentPtr(false),
69 FlatScratchInit(false),
70 GridWorkgroupCountX(false),
71 GridWorkgroupCountY(false),
72 GridWorkgroupCountZ(false),
77 PrivateSegmentWaveByteOffset(false),
81 PrivateMemoryInputPtr(false) {
82 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
83 const Function *F = MF.getFunction();
85 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
87 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
89 if (!AMDGPU::isShader(F->getCallingConv())) {
90 KernargSegmentPtr = true;
95 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
98 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
101 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
104 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
107 // X, XY, and XYZ are the only supported combinations, so make sure Y is
112 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
113 bool HasStackObjects = FrameInfo.hasStackObjects();
115 if (HasStackObjects || MaySpill)
116 PrivateSegmentWaveByteOffset = true;
118 if (ST.isAmdCodeObjectV2(MF)) {
119 if (HasStackObjects || MaySpill)
120 PrivateSegmentBuffer = true;
122 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
125 if (F->hasFnAttribute("amdgpu-queue-ptr"))
128 if (F->hasFnAttribute("amdgpu-dispatch-id"))
130 } else if (ST.isMesaGfxShader(MF)) {
131 if (HasStackObjects || MaySpill)
132 PrivateMemoryInputPtr = true;
135 // We don't need to worry about accessing spills with flat instructions.
136 // TODO: On VI where we must use flat for global, we should be able to omit
137 // this if it is never used for generic access.
138 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
140 FlatScratchInit = true;
142 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
143 WavesPerEU = ST.getWavesPerEU(*F);
146 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
147 const SIRegisterInfo &TRI) {
148 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
149 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
151 return PrivateSegmentBufferUserSGPR;
154 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
155 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
156 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
158 return DispatchPtrUserSGPR;
161 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
162 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
163 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
165 return QueuePtrUserSGPR;
168 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
169 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
170 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
172 return KernargSegmentPtrUserSGPR;
175 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
176 DispatchIDUserSGPR = TRI.getMatchingSuperReg(
177 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
179 return DispatchIDUserSGPR;
182 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
183 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
184 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
186 return FlatScratchInitUserSGPR;
189 unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
190 PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
191 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
193 return PrivateMemoryPtrUserSGPR;
196 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
200 if (!EnableSpillSGPRToVGPR)
203 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
204 const SIRegisterInfo *TRI = ST.getRegisterInfo();
206 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
207 MachineRegisterInfo &MRI = MF->getRegInfo();
208 int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
209 Offset += SubIdx * 4;
211 unsigned LaneVGPRIdx = Offset / (64 * 4);
212 unsigned Lane = (Offset / 4) % 64;
214 struct SpilledReg Spill;
217 if (!LaneVGPRs.count(LaneVGPRIdx)) {
218 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
221 if (LaneVGPR == AMDGPU::NoRegister)
222 // We have no VGPRs left for spilling SGPRs.
225 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
227 // Add this register as live-in to all blocks to avoid machine verifer
228 // complaining about use of an undefined physical register.
229 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
231 BI->addLiveIn(LaneVGPR);
235 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];