1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUMachineFunction.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
32 explicit AMDGPUImagePseudoSourceValue() :
33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
35 bool isConstant(const MachineFrameInfo *) const override {
36 // This should probably be true for most images, but we will start by being
41 bool isAliased(const MachineFrameInfo *) const override {
42 // FIXME: If we ever change image intrinsics to accept fat pointers, then
43 // this could be true for some cases.
47 bool mayAlias(const MachineFrameInfo*) const override {
48 // FIXME: If we ever change image intrinsics to accept fat pointers, then
49 // this could be true for some cases.
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
56 explicit AMDGPUBufferPseudoSourceValue() :
57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
59 bool isConstant(const MachineFrameInfo *) const override {
60 // This should probably be true for most images, but we will start by being
65 bool isAliased(const MachineFrameInfo *) const override {
66 // FIXME: If we ever change image intrinsics to accept fat pointers, then
67 // this could be true for some cases.
71 bool mayAlias(const MachineFrameInfo*) const override {
72 // FIXME: If we ever change image intrinsics to accept fat pointers, then
73 // this could be true for some cases.
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81 // FIXME: This should be removed and getPreloadedValue moved here.
82 friend class SIRegisterInfo;
86 // Registers that may be reserved for spilling purposes. These may be the same
87 // as the input registers.
88 unsigned ScratchRSrcReg;
89 unsigned ScratchWaveOffsetReg;
91 // This is the current function's incremented size from the kernel's scratch
92 // wave offset register. For an entry function, this is exactly the same as
93 // the ScratchWaveOffsetReg.
94 unsigned FrameOffsetReg;
96 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
97 unsigned StackPtrOffsetReg;
99 // Input registers for non-HSA ABI
100 unsigned ImplicitBufferPtrUserSGPR;
102 // Input registers setup for the HSA ABI.
103 // User SGPRs in allocation order.
104 unsigned PrivateSegmentBufferUserSGPR;
105 unsigned DispatchPtrUserSGPR;
106 unsigned QueuePtrUserSGPR;
107 unsigned KernargSegmentPtrUserSGPR;
108 unsigned DispatchIDUserSGPR;
109 unsigned FlatScratchInitUserSGPR;
110 unsigned PrivateSegmentSizeUserSGPR;
111 unsigned GridWorkGroupCountXUserSGPR;
112 unsigned GridWorkGroupCountYUserSGPR;
113 unsigned GridWorkGroupCountZUserSGPR;
115 // System SGPRs in allocation order.
116 unsigned WorkGroupIDXSystemSGPR;
117 unsigned WorkGroupIDYSystemSGPR;
118 unsigned WorkGroupIDZSystemSGPR;
119 unsigned WorkGroupInfoSystemSGPR;
120 unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
122 // VGPR inputs. These are always v0, v1 and v2 for entry functions.
123 unsigned WorkItemIDXVGPR;
124 unsigned WorkItemIDYVGPR;
125 unsigned WorkItemIDZVGPR;
128 unsigned PSInputAddr;
129 unsigned PSInputEnable;
133 // A pair of default/requested minimum/maximum flat work group sizes.
134 // Minimum - first, maximum - second.
135 std::pair<unsigned, unsigned> FlatWorkGroupSizes;
137 // A pair of default/requested minimum/maximum number of waves per execution
138 // unit. Minimum - first, maximum - second.
139 std::pair<unsigned, unsigned> WavesPerEU;
141 // Stack object indices for work group IDs.
142 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
143 // Stack object indices for work item IDs.
144 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
146 AMDGPUBufferPseudoSourceValue BufferPSV;
147 AMDGPUImagePseudoSourceValue ImagePSV;
150 unsigned LDSWaveSpillSize;
151 unsigned ScratchOffsetReg;
152 unsigned NumUserSGPRs;
153 unsigned NumSystemSGPRs;
155 bool HasSpilledSGPRs;
156 bool HasSpilledVGPRs;
157 bool HasNonSpillStackObjects;
159 unsigned NumSpilledSGPRs;
160 unsigned NumSpilledVGPRs;
162 // Feature bits required for inputs passed in user SGPRs.
163 bool PrivateSegmentBuffer : 1;
164 bool DispatchPtr : 1;
166 bool KernargSegmentPtr : 1;
168 bool FlatScratchInit : 1;
169 bool GridWorkgroupCountX : 1;
170 bool GridWorkgroupCountY : 1;
171 bool GridWorkgroupCountZ : 1;
173 // Feature bits required for inputs passed in system SGPRs.
174 bool WorkGroupIDX : 1; // Always initialized.
175 bool WorkGroupIDY : 1;
176 bool WorkGroupIDZ : 1;
177 bool WorkGroupInfo : 1;
178 bool PrivateSegmentWaveByteOffset : 1;
180 bool WorkItemIDX : 1; // Always initialized.
181 bool WorkItemIDY : 1;
182 bool WorkItemIDZ : 1;
184 // Private memory buffer
185 // Compute directly in sgpr[0:1]
186 // Other shaders indirect 64-bits at sgpr[0:1]
187 bool ImplicitBufferPtr : 1;
189 MCPhysReg getNextUserSGPR() const {
190 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
191 return AMDGPU::SGPR0 + NumUserSGPRs;
194 MCPhysReg getNextSystemSGPR() const {
195 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
200 unsigned VGPR = AMDGPU::NoRegister;
203 SpilledReg() = default;
204 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
206 bool hasLane() { return Lane != -1;}
207 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
211 // SGPR->VGPR spilling support.
212 typedef std::pair<unsigned, unsigned> SpillRegMask;
214 // Track VGPR + wave index for each subregister of the SGPR spilled to
216 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
217 unsigned NumVGPRSpillLanes = 0;
218 SmallVector<unsigned, 2> SpillVGPRs;
222 SIMachineFunctionInfo(const MachineFunction &MF);
224 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
225 auto I = SGPRToVGPRSpills.find(FrameIndex);
226 return (I == SGPRToVGPRSpills.end()) ?
227 ArrayRef<SpilledReg>() : makeArrayRef(I->second);
230 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
231 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
233 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
234 unsigned getTIDReg() const { return TIDReg; };
235 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
238 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
239 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
240 unsigned addQueuePtr(const SIRegisterInfo &TRI);
241 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
242 unsigned addDispatchID(const SIRegisterInfo &TRI);
243 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
244 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
247 unsigned addWorkGroupIDX() {
248 WorkGroupIDXSystemSGPR = getNextSystemSGPR();
250 return WorkGroupIDXSystemSGPR;
253 unsigned addWorkGroupIDY() {
254 WorkGroupIDYSystemSGPR = getNextSystemSGPR();
256 return WorkGroupIDYSystemSGPR;
259 unsigned addWorkGroupIDZ() {
260 WorkGroupIDZSystemSGPR = getNextSystemSGPR();
262 return WorkGroupIDZSystemSGPR;
265 unsigned addWorkGroupInfo() {
266 WorkGroupInfoSystemSGPR = getNextSystemSGPR();
268 return WorkGroupInfoSystemSGPR;
271 unsigned addPrivateSegmentWaveByteOffset() {
272 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
274 return PrivateSegmentWaveByteOffsetSystemSGPR;
277 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
278 PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
281 bool hasPrivateSegmentBuffer() const {
282 return PrivateSegmentBuffer;
285 bool hasDispatchPtr() const {
289 bool hasQueuePtr() const {
293 bool hasKernargSegmentPtr() const {
294 return KernargSegmentPtr;
297 bool hasDispatchID() const {
301 bool hasFlatScratchInit() const {
302 return FlatScratchInit;
305 bool hasGridWorkgroupCountX() const {
306 return GridWorkgroupCountX;
309 bool hasGridWorkgroupCountY() const {
310 return GridWorkgroupCountY;
313 bool hasGridWorkgroupCountZ() const {
314 return GridWorkgroupCountZ;
317 bool hasWorkGroupIDX() const {
321 bool hasWorkGroupIDY() const {
325 bool hasWorkGroupIDZ() const {
329 bool hasWorkGroupInfo() const {
330 return WorkGroupInfo;
333 bool hasPrivateSegmentWaveByteOffset() const {
334 return PrivateSegmentWaveByteOffset;
337 bool hasWorkItemIDX() const {
341 bool hasWorkItemIDY() const {
345 bool hasWorkItemIDZ() const {
349 bool hasImplicitBufferPtr() const {
350 return ImplicitBufferPtr;
353 unsigned getNumUserSGPRs() const {
357 unsigned getNumPreloadedSGPRs() const {
358 return NumUserSGPRs + NumSystemSGPRs;
361 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
362 return PrivateSegmentWaveByteOffsetSystemSGPR;
365 /// \brief Returns the physical register reserved for use as the resource
366 /// descriptor for scratch accesses.
367 unsigned getScratchRSrcReg() const {
368 return ScratchRSrcReg;
371 void setScratchRSrcReg(unsigned Reg) {
372 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
373 ScratchRSrcReg = Reg;
376 unsigned getScratchWaveOffsetReg() const {
377 return ScratchWaveOffsetReg;
380 unsigned getFrameOffsetReg() const {
381 return FrameOffsetReg;
384 void setStackPtrOffsetReg(unsigned Reg) {
385 StackPtrOffsetReg = Reg;
388 // Note the unset value for this is AMDGPU::SP_REG rather than
389 // NoRegister. This is mostly a workaround for MIR tests where state that
390 // can't be directly computed from the function is not preserved in serialized
392 unsigned getStackPtrOffsetReg() const {
393 return StackPtrOffsetReg;
396 void setScratchWaveOffsetReg(unsigned Reg) {
397 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
398 ScratchWaveOffsetReg = Reg;
399 if (isEntryFunction())
400 FrameOffsetReg = ScratchWaveOffsetReg;
403 unsigned getQueuePtrUserSGPR() const {
404 return QueuePtrUserSGPR;
407 unsigned getImplicitBufferPtrUserSGPR() const {
408 return ImplicitBufferPtrUserSGPR;
411 bool hasSpilledSGPRs() const {
412 return HasSpilledSGPRs;
415 void setHasSpilledSGPRs(bool Spill = true) {
416 HasSpilledSGPRs = Spill;
419 bool hasSpilledVGPRs() const {
420 return HasSpilledVGPRs;
423 void setHasSpilledVGPRs(bool Spill = true) {
424 HasSpilledVGPRs = Spill;
427 bool hasNonSpillStackObjects() const {
428 return HasNonSpillStackObjects;
431 void setHasNonSpillStackObjects(bool StackObject = true) {
432 HasNonSpillStackObjects = StackObject;
435 unsigned getNumSpilledSGPRs() const {
436 return NumSpilledSGPRs;
439 unsigned getNumSpilledVGPRs() const {
440 return NumSpilledVGPRs;
443 void addToSpilledSGPRs(unsigned num) {
444 NumSpilledSGPRs += num;
447 void addToSpilledVGPRs(unsigned num) {
448 NumSpilledVGPRs += num;
451 unsigned getPSInputAddr() const {
455 unsigned getPSInputEnable() const {
456 return PSInputEnable;
459 bool isPSInputAllocated(unsigned Index) const {
460 return PSInputAddr & (1 << Index);
463 void markPSInputAllocated(unsigned Index) {
464 PSInputAddr |= 1 << Index;
467 void markPSInputEnabled(unsigned Index) {
468 PSInputEnable |= 1 << Index;
471 bool returnsVoid() const {
475 void setIfReturnsVoid(bool Value) {
479 /// \returns A pair of default/requested minimum/maximum flat work group sizes
480 /// for this function.
481 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
482 return FlatWorkGroupSizes;
485 /// \returns Default/requested minimum flat work group size for this function.
486 unsigned getMinFlatWorkGroupSize() const {
487 return FlatWorkGroupSizes.first;
490 /// \returns Default/requested maximum flat work group size for this function.
491 unsigned getMaxFlatWorkGroupSize() const {
492 return FlatWorkGroupSizes.second;
495 /// \returns A pair of default/requested minimum/maximum number of waves per
497 std::pair<unsigned, unsigned> getWavesPerEU() const {
501 /// \returns Default/requested minimum number of waves per execution unit.
502 unsigned getMinWavesPerEU() const {
503 return WavesPerEU.first;
506 /// \returns Default/requested maximum number of waves per execution unit.
507 unsigned getMaxWavesPerEU() const {
508 return WavesPerEU.second;
511 /// \returns Stack object index for \p Dim's work group ID.
512 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
514 return DebuggerWorkGroupIDStackObjectIndices[Dim];
517 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
518 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
520 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
523 /// \returns Stack object index for \p Dim's work item ID.
524 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
526 return DebuggerWorkItemIDStackObjectIndices[Dim];
529 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
530 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
532 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
535 /// \returns SGPR used for \p Dim's work group ID.
536 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
539 assert(hasWorkGroupIDX());
540 return WorkGroupIDXSystemSGPR;
542 assert(hasWorkGroupIDY());
543 return WorkGroupIDYSystemSGPR;
545 assert(hasWorkGroupIDZ());
546 return WorkGroupIDZSystemSGPR;
548 llvm_unreachable("unexpected dimension");
551 /// \returns VGPR used for \p Dim' work item ID.
552 unsigned getWorkItemIDVGPR(unsigned Dim) const {
555 assert(hasWorkItemIDX());
556 return AMDGPU::VGPR0;
558 assert(hasWorkItemIDY());
559 return AMDGPU::VGPR1;
561 assert(hasWorkItemIDZ());
562 return AMDGPU::VGPR2;
564 llvm_unreachable("unexpected dimension");
567 unsigned getLDSWaveSpillSize() const {
568 return LDSWaveSpillSize;
571 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
575 const AMDGPUImagePseudoSourceValue *getImagePSV() const {
580 } // end namespace llvm
582 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H