1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
23 static cl::opt<bool> EnableSpillSGPRToVGPR(
24 "amdgpu-spill-sgpr-to-vgpr",
25 cl::desc("Enable spilling VGPRs to SGPRs"),
29 // Pin the vtable to this file.
30 void SIMachineFunctionInfo::anchor() {}
32 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33 : AMDGPUMachineFunction(MF),
34 TIDReg(AMDGPU::NoRegister),
35 ScratchRSrcReg(AMDGPU::NoRegister),
36 ScratchWaveOffsetReg(AMDGPU::NoRegister),
37 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
38 DispatchPtrUserSGPR(AMDGPU::NoRegister),
39 QueuePtrUserSGPR(AMDGPU::NoRegister),
40 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
41 DispatchIDUserSGPR(AMDGPU::NoRegister),
42 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
43 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
44 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
45 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
46 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
47 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
48 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
49 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
50 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
51 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
54 MaximumWorkGroupSize(0),
55 DebuggerReservedVGPRCount(0),
56 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
57 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
62 HasSpilledSGPRs(false),
63 HasSpilledVGPRs(false),
64 HasNonSpillStackObjects(false),
65 HasFlatInstructions(false),
68 PrivateSegmentBuffer(false),
72 KernargSegmentPtr(false),
73 FlatScratchInit(false),
74 GridWorkgroupCountX(false),
75 GridWorkgroupCountY(false),
76 GridWorkgroupCountZ(false),
81 PrivateSegmentWaveByteOffset(false),
85 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
86 const Function *F = MF.getFunction();
88 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
90 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
92 if (!AMDGPU::isShader(F->getCallingConv())) {
93 KernargSegmentPtr = true;
98 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
101 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
104 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
107 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
110 // X, XY, and XYZ are the only supported combinations, so make sure Y is
115 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
116 bool HasStackObjects = FrameInfo->hasStackObjects();
118 if (HasStackObjects || MaySpill)
119 PrivateSegmentWaveByteOffset = true;
121 if (ST.isAmdHsaOS()) {
122 if (HasStackObjects || MaySpill)
123 PrivateSegmentBuffer = true;
125 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
128 if (F->hasFnAttribute("amdgpu-queue-ptr"))
132 // We don't need to worry about accessing spills with flat instructions.
133 // TODO: On VI where we must use flat for global, we should be able to omit
134 // this if it is never used for generic access.
135 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
137 FlatScratchInit = true;
139 if (AMDGPU::isCompute(F->getCallingConv()))
140 MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F);
142 MaximumWorkGroupSize = ST.getWavefrontSize();
144 if (ST.debuggerReserveRegs())
145 DebuggerReservedVGPRCount = 4;
148 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
149 const SIRegisterInfo &TRI) {
150 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
151 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
153 return PrivateSegmentBufferUserSGPR;
156 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
157 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
158 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
160 return DispatchPtrUserSGPR;
163 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
164 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
165 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
167 return QueuePtrUserSGPR;
170 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
171 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
172 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
174 return KernargSegmentPtrUserSGPR;
177 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
178 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
179 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
181 return FlatScratchInitUserSGPR;
184 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
188 if (!EnableSpillSGPRToVGPR)
191 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
192 const SIRegisterInfo *TRI = ST.getRegisterInfo();
194 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
195 MachineRegisterInfo &MRI = MF->getRegInfo();
196 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
197 Offset += SubIdx * 4;
199 unsigned LaneVGPRIdx = Offset / (64 * 4);
200 unsigned Lane = (Offset / 4) % 64;
202 struct SpilledReg Spill;
205 if (!LaneVGPRs.count(LaneVGPRIdx)) {
206 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
209 if (LaneVGPR == AMDGPU::NoRegister)
210 // We have no VGPRs left for spilling SGPRs.
213 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
215 // Add this register as live-in to all blocks to avoid machine verifer
216 // complaining about use of an undefined physical register.
217 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
219 BI->addLiveIn(LaneVGPR);
223 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
227 unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
228 const MachineFunction &MF) const {
229 return MaximumWorkGroupSize;