1 //===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "SIMachineFunctionInfo.h"
11 #include "AMDGPUSubtarget.h"
12 #include "SIInstrInfo.h"
13 #include "llvm/CodeGen/MachineFrameInfo.h"
14 #include "llvm/CodeGen/MachineInstrBuilder.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/LLVMContext.h"
23 static cl::opt<bool> EnableSpillSGPRToVGPR(
24 "amdgpu-spill-sgpr-to-vgpr",
25 cl::desc("Enable spilling VGPRs to SGPRs"),
29 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30 : AMDGPUMachineFunction(MF),
31 TIDReg(AMDGPU::NoRegister),
32 ScratchRSrcReg(AMDGPU::NoRegister),
33 ScratchWaveOffsetReg(AMDGPU::NoRegister),
34 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35 DispatchPtrUserSGPR(AMDGPU::NoRegister),
36 QueuePtrUserSGPR(AMDGPU::NoRegister),
37 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38 DispatchIDUserSGPR(AMDGPU::NoRegister),
39 FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
51 FlatWorkGroupSizes(0, 0),
53 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
54 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
59 HasSpilledSGPRs(false),
60 HasSpilledVGPRs(false),
61 HasNonSpillStackObjects(false),
64 PrivateSegmentBuffer(false),
67 KernargSegmentPtr(false),
69 FlatScratchInit(false),
70 GridWorkgroupCountX(false),
71 GridWorkgroupCountY(false),
72 GridWorkgroupCountZ(false),
77 PrivateSegmentWaveByteOffset(false),
81 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
82 const Function *F = MF.getFunction();
84 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
86 const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
88 if (!AMDGPU::isShader(F->getCallingConv())) {
89 KernargSegmentPtr = true;
94 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
97 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
100 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
103 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
106 // X, XY, and XYZ are the only supported combinations, so make sure Y is
111 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
112 bool HasStackObjects = FrameInfo.hasStackObjects();
114 if (HasStackObjects || MaySpill)
115 PrivateSegmentWaveByteOffset = true;
117 if (ST.isAmdCodeObjectV2()) {
118 if (HasStackObjects || MaySpill)
119 PrivateSegmentBuffer = true;
121 if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
124 if (F->hasFnAttribute("amdgpu-queue-ptr"))
127 if (F->hasFnAttribute("amdgpu-dispatch-id"))
131 // We don't need to worry about accessing spills with flat instructions.
132 // TODO: On VI where we must use flat for global, we should be able to omit
133 // this if it is never used for generic access.
134 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
136 FlatScratchInit = true;
138 FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
139 WavesPerEU = ST.getWavesPerEU(*F);
142 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
143 const SIRegisterInfo &TRI) {
144 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
145 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
147 return PrivateSegmentBufferUserSGPR;
150 unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
151 DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
152 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
154 return DispatchPtrUserSGPR;
157 unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
158 QueuePtrUserSGPR = TRI.getMatchingSuperReg(
159 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
161 return QueuePtrUserSGPR;
164 unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
165 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
166 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
168 return KernargSegmentPtrUserSGPR;
171 unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
172 DispatchIDUserSGPR = TRI.getMatchingSuperReg(
173 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
175 return DispatchIDUserSGPR;
178 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
179 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
180 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
182 return FlatScratchInitUserSGPR;
185 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
189 if (!EnableSpillSGPRToVGPR)
192 const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
193 const SIRegisterInfo *TRI = ST.getRegisterInfo();
195 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
196 MachineRegisterInfo &MRI = MF->getRegInfo();
197 int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
198 Offset += SubIdx * 4;
200 unsigned LaneVGPRIdx = Offset / (64 * 4);
201 unsigned Lane = (Offset / 4) % 64;
203 struct SpilledReg Spill;
206 if (!LaneVGPRs.count(LaneVGPRIdx)) {
207 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
210 if (LaneVGPR == AMDGPU::NoRegister)
211 // We have no VGPRs left for spilling SGPRs.
214 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
216 // Add this register as live-in to all blocks to avoid machine verifer
217 // complaining about use of an undefined physical register.
218 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
220 BI->addLiveIn(LaneVGPR);
224 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];