1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUMachineFunction.h"
18 #include "SIRegisterInfo.h"
19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
32 explicit AMDGPUImagePseudoSourceValue() :
33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
35 bool isConstant(const MachineFrameInfo *) const override {
36 // This should probably be true for most images, but we will start by being
41 bool isAliased(const MachineFrameInfo *) const override {
42 // FIXME: If we ever change image intrinsics to accept fat pointers, then
43 // this could be true for some cases.
47 bool mayAlias(const MachineFrameInfo*) const override {
48 // FIXME: If we ever change image intrinsics to accept fat pointers, then
49 // this could be true for some cases.
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
56 explicit AMDGPUBufferPseudoSourceValue() :
57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
59 bool isConstant(const MachineFrameInfo *) const override {
60 // This should probably be true for most images, but we will start by being
65 bool isAliased(const MachineFrameInfo *) const override {
66 // FIXME: If we ever change image intrinsics to accept fat pointers, then
67 // this could be true for some cases.
71 bool mayAlias(const MachineFrameInfo*) const override {
72 // FIXME: If we ever change image intrinsics to accept fat pointers, then
73 // this could be true for some cases.
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81 // FIXME: This should be removed and getPreloadedValue moved here.
82 friend class SIRegisterInfo;
86 // Registers that may be reserved for spilling purposes. These may be the same
87 // as the input registers.
88 unsigned ScratchRSrcReg;
89 unsigned ScratchWaveOffsetReg;
91 // Input registers for non-HSA ABI
92 unsigned PrivateMemoryPtrUserSGPR;
94 // Input registers setup for the HSA ABI.
95 // User SGPRs in allocation order.
96 unsigned PrivateSegmentBufferUserSGPR;
97 unsigned DispatchPtrUserSGPR;
98 unsigned QueuePtrUserSGPR;
99 unsigned KernargSegmentPtrUserSGPR;
100 unsigned DispatchIDUserSGPR;
101 unsigned FlatScratchInitUserSGPR;
102 unsigned PrivateSegmentSizeUserSGPR;
103 unsigned GridWorkGroupCountXUserSGPR;
104 unsigned GridWorkGroupCountYUserSGPR;
105 unsigned GridWorkGroupCountZUserSGPR;
107 // System SGPRs in allocation order.
108 unsigned WorkGroupIDXSystemSGPR;
109 unsigned WorkGroupIDYSystemSGPR;
110 unsigned WorkGroupIDZSystemSGPR;
111 unsigned WorkGroupInfoSystemSGPR;
112 unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
115 unsigned PSInputAddr;
116 unsigned PSInputEnable;
120 // A pair of default/requested minimum/maximum flat work group sizes.
121 // Minimum - first, maximum - second.
122 std::pair<unsigned, unsigned> FlatWorkGroupSizes;
124 // A pair of default/requested minimum/maximum number of waves per execution
125 // unit. Minimum - first, maximum - second.
126 std::pair<unsigned, unsigned> WavesPerEU;
128 // Stack object indices for work group IDs.
129 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
130 // Stack object indices for work item IDs.
131 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
133 AMDGPUBufferPseudoSourceValue BufferPSV;
134 AMDGPUImagePseudoSourceValue ImagePSV;
137 unsigned LDSWaveSpillSize;
138 unsigned ScratchOffsetReg;
139 unsigned NumUserSGPRs;
140 unsigned NumSystemSGPRs;
142 bool HasSpilledSGPRs;
143 bool HasSpilledVGPRs;
144 bool HasNonSpillStackObjects;
146 unsigned NumSpilledSGPRs;
147 unsigned NumSpilledVGPRs;
149 // Feature bits required for inputs passed in user SGPRs.
150 bool PrivateSegmentBuffer : 1;
151 bool DispatchPtr : 1;
153 bool KernargSegmentPtr : 1;
155 bool FlatScratchInit : 1;
156 bool GridWorkgroupCountX : 1;
157 bool GridWorkgroupCountY : 1;
158 bool GridWorkgroupCountZ : 1;
160 // Feature bits required for inputs passed in system SGPRs.
161 bool WorkGroupIDX : 1; // Always initialized.
162 bool WorkGroupIDY : 1;
163 bool WorkGroupIDZ : 1;
164 bool WorkGroupInfo : 1;
165 bool PrivateSegmentWaveByteOffset : 1;
167 bool WorkItemIDX : 1; // Always initialized.
168 bool WorkItemIDY : 1;
169 bool WorkItemIDZ : 1;
171 // Private memory buffer
172 // Compute directly in sgpr[0:1]
173 // Other shaders indirect 64-bits at sgpr[0:1]
174 bool PrivateMemoryInputPtr : 1;
176 MCPhysReg getNextUserSGPR() const {
177 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
178 return AMDGPU::SGPR0 + NumUserSGPRs;
181 MCPhysReg getNextSystemSGPR() const {
182 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
187 unsigned VGPR = AMDGPU::NoRegister;
190 SpilledReg() = default;
191 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
193 bool hasLane() { return Lane != -1;}
194 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
198 // SGPR->VGPR spilling support.
199 typedef std::pair<unsigned, unsigned> SpillRegMask;
201 // Track VGPR + wave index for each subregister of the SGPR spilled to
203 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
204 unsigned NumVGPRSpillLanes = 0;
205 SmallVector<unsigned, 2> SpillVGPRs;
209 SIMachineFunctionInfo(const MachineFunction &MF);
211 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
212 auto I = SGPRToVGPRSpills.find(FrameIndex);
213 return (I == SGPRToVGPRSpills.end()) ?
214 ArrayRef<SpilledReg>() : makeArrayRef(I->second);
217 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
218 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
220 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
221 unsigned getTIDReg() const { return TIDReg; };
222 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
225 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
226 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
227 unsigned addQueuePtr(const SIRegisterInfo &TRI);
228 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
229 unsigned addDispatchID(const SIRegisterInfo &TRI);
230 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
231 unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
234 unsigned addWorkGroupIDX() {
235 WorkGroupIDXSystemSGPR = getNextSystemSGPR();
237 return WorkGroupIDXSystemSGPR;
240 unsigned addWorkGroupIDY() {
241 WorkGroupIDYSystemSGPR = getNextSystemSGPR();
243 return WorkGroupIDYSystemSGPR;
246 unsigned addWorkGroupIDZ() {
247 WorkGroupIDZSystemSGPR = getNextSystemSGPR();
249 return WorkGroupIDZSystemSGPR;
252 unsigned addWorkGroupInfo() {
253 WorkGroupInfoSystemSGPR = getNextSystemSGPR();
255 return WorkGroupInfoSystemSGPR;
258 unsigned addPrivateSegmentWaveByteOffset() {
259 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
261 return PrivateSegmentWaveByteOffsetSystemSGPR;
264 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
265 PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
268 bool hasPrivateSegmentBuffer() const {
269 return PrivateSegmentBuffer;
272 bool hasDispatchPtr() const {
276 bool hasQueuePtr() const {
280 bool hasKernargSegmentPtr() const {
281 return KernargSegmentPtr;
284 bool hasDispatchID() const {
288 bool hasFlatScratchInit() const {
289 return FlatScratchInit;
292 bool hasGridWorkgroupCountX() const {
293 return GridWorkgroupCountX;
296 bool hasGridWorkgroupCountY() const {
297 return GridWorkgroupCountY;
300 bool hasGridWorkgroupCountZ() const {
301 return GridWorkgroupCountZ;
304 bool hasWorkGroupIDX() const {
308 bool hasWorkGroupIDY() const {
312 bool hasWorkGroupIDZ() const {
316 bool hasWorkGroupInfo() const {
317 return WorkGroupInfo;
320 bool hasPrivateSegmentWaveByteOffset() const {
321 return PrivateSegmentWaveByteOffset;
324 bool hasWorkItemIDX() const {
328 bool hasWorkItemIDY() const {
332 bool hasWorkItemIDZ() const {
336 bool hasPrivateMemoryInputPtr() const {
337 return PrivateMemoryInputPtr;
340 unsigned getNumUserSGPRs() const {
344 unsigned getNumPreloadedSGPRs() const {
345 return NumUserSGPRs + NumSystemSGPRs;
348 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
349 return PrivateSegmentWaveByteOffsetSystemSGPR;
352 /// \brief Returns the physical register reserved for use as the resource
353 /// descriptor for scratch accesses.
354 unsigned getScratchRSrcReg() const {
355 return ScratchRSrcReg;
358 void setScratchRSrcReg(unsigned Reg) {
359 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
360 ScratchRSrcReg = Reg;
363 unsigned getScratchWaveOffsetReg() const {
364 return ScratchWaveOffsetReg;
367 void setScratchWaveOffsetReg(unsigned Reg) {
368 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
369 ScratchWaveOffsetReg = Reg;
372 unsigned getQueuePtrUserSGPR() const {
373 return QueuePtrUserSGPR;
376 unsigned getPrivateMemoryPtrUserSGPR() const {
377 return PrivateMemoryPtrUserSGPR;
380 bool hasSpilledSGPRs() const {
381 return HasSpilledSGPRs;
384 void setHasSpilledSGPRs(bool Spill = true) {
385 HasSpilledSGPRs = Spill;
388 bool hasSpilledVGPRs() const {
389 return HasSpilledVGPRs;
392 void setHasSpilledVGPRs(bool Spill = true) {
393 HasSpilledVGPRs = Spill;
396 bool hasNonSpillStackObjects() const {
397 return HasNonSpillStackObjects;
400 void setHasNonSpillStackObjects(bool StackObject = true) {
401 HasNonSpillStackObjects = StackObject;
404 unsigned getNumSpilledSGPRs() const {
405 return NumSpilledSGPRs;
408 unsigned getNumSpilledVGPRs() const {
409 return NumSpilledVGPRs;
412 void addToSpilledSGPRs(unsigned num) {
413 NumSpilledSGPRs += num;
416 void addToSpilledVGPRs(unsigned num) {
417 NumSpilledVGPRs += num;
420 unsigned getPSInputAddr() const {
424 unsigned getPSInputEnable() const {
425 return PSInputEnable;
428 bool isPSInputAllocated(unsigned Index) const {
429 return PSInputAddr & (1 << Index);
432 void markPSInputAllocated(unsigned Index) {
433 PSInputAddr |= 1 << Index;
436 void markPSInputEnabled(unsigned Index) {
437 PSInputEnable |= 1 << Index;
440 bool returnsVoid() const {
444 void setIfReturnsVoid(bool Value) {
448 /// \returns A pair of default/requested minimum/maximum flat work group sizes
449 /// for this function.
450 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
451 return FlatWorkGroupSizes;
454 /// \returns Default/requested minimum flat work group size for this function.
455 unsigned getMinFlatWorkGroupSize() const {
456 return FlatWorkGroupSizes.first;
459 /// \returns Default/requested maximum flat work group size for this function.
460 unsigned getMaxFlatWorkGroupSize() const {
461 return FlatWorkGroupSizes.second;
464 /// \returns A pair of default/requested minimum/maximum number of waves per
466 std::pair<unsigned, unsigned> getWavesPerEU() const {
470 /// \returns Default/requested minimum number of waves per execution unit.
471 unsigned getMinWavesPerEU() const {
472 return WavesPerEU.first;
475 /// \returns Default/requested maximum number of waves per execution unit.
476 unsigned getMaxWavesPerEU() const {
477 return WavesPerEU.second;
480 /// \returns Stack object index for \p Dim's work group ID.
481 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
483 return DebuggerWorkGroupIDStackObjectIndices[Dim];
486 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
487 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
489 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
492 /// \returns Stack object index for \p Dim's work item ID.
493 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
495 return DebuggerWorkItemIDStackObjectIndices[Dim];
498 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
499 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
501 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
504 /// \returns SGPR used for \p Dim's work group ID.
505 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
508 assert(hasWorkGroupIDX());
509 return WorkGroupIDXSystemSGPR;
511 assert(hasWorkGroupIDY());
512 return WorkGroupIDYSystemSGPR;
514 assert(hasWorkGroupIDZ());
515 return WorkGroupIDZSystemSGPR;
517 llvm_unreachable("unexpected dimension");
520 /// \returns VGPR used for \p Dim' work item ID.
521 unsigned getWorkItemIDVGPR(unsigned Dim) const {
524 assert(hasWorkItemIDX());
525 return AMDGPU::VGPR0;
527 assert(hasWorkItemIDY());
528 return AMDGPU::VGPR1;
530 assert(hasWorkItemIDZ());
531 return AMDGPU::VGPR2;
533 llvm_unreachable("unexpected dimension");
536 unsigned getLDSWaveSpillSize() const {
537 return LDSWaveSpillSize;
540 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
544 const AMDGPUImagePseudoSourceValue *getImagePSV() const {
549 } // end namespace llvm
551 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H