1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUMachineFunction.h"
18 #include "SIRegisterInfo.h"
19 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20 #include "llvm/CodeGen/PseudoSourceValue.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/Support/ErrorHandling.h"
30 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
32 explicit AMDGPUImagePseudoSourceValue() :
33 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
35 bool isConstant(const MachineFrameInfo *) const override {
36 // This should probably be true for most images, but we will start by being
41 bool isAliased(const MachineFrameInfo *) const override {
42 // FIXME: If we ever change image intrinsics to accept fat pointers, then
43 // this could be true for some cases.
47 bool mayAlias(const MachineFrameInfo*) const override {
48 // FIXME: If we ever change image intrinsics to accept fat pointers, then
49 // this could be true for some cases.
54 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
56 explicit AMDGPUBufferPseudoSourceValue() :
57 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
59 bool isConstant(const MachineFrameInfo *) const override {
60 // This should probably be true for most images, but we will start by being
65 bool isAliased(const MachineFrameInfo *) const override {
66 // FIXME: If we ever change image intrinsics to accept fat pointers, then
67 // this could be true for some cases.
71 bool mayAlias(const MachineFrameInfo*) const override {
72 // FIXME: If we ever change image intrinsics to accept fat pointers, then
73 // this could be true for some cases.
78 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
79 /// tells the hardware which interpolation parameters to load.
80 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
81 // FIXME: This should be removed and getPreloadedValue moved here.
82 friend class SIRegisterInfo;
86 // Registers that may be reserved for spilling purposes. These may be the same
87 // as the input registers.
88 unsigned ScratchRSrcReg;
89 unsigned ScratchWaveOffsetReg;
91 // Input registers for non-HSA ABI
92 unsigned PrivateMemoryPtrUserSGPR;
94 // Input registers setup for the HSA ABI.
95 // User SGPRs in allocation order.
96 unsigned PrivateSegmentBufferUserSGPR;
97 unsigned DispatchPtrUserSGPR;
98 unsigned QueuePtrUserSGPR;
99 unsigned KernargSegmentPtrUserSGPR;
100 unsigned DispatchIDUserSGPR;
101 unsigned FlatScratchInitUserSGPR;
102 unsigned PrivateSegmentSizeUserSGPR;
103 unsigned GridWorkGroupCountXUserSGPR;
104 unsigned GridWorkGroupCountYUserSGPR;
105 unsigned GridWorkGroupCountZUserSGPR;
107 // System SGPRs in allocation order.
108 unsigned WorkGroupIDXSystemSGPR;
109 unsigned WorkGroupIDYSystemSGPR;
110 unsigned WorkGroupIDZSystemSGPR;
111 unsigned WorkGroupInfoSystemSGPR;
112 unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
115 unsigned PSInputAddr;
116 unsigned PSInputEnable;
120 // A pair of default/requested minimum/maximum flat work group sizes.
121 // Minimum - first, maximum - second.
122 std::pair<unsigned, unsigned> FlatWorkGroupSizes;
124 // A pair of default/requested minimum/maximum number of waves per execution
125 // unit. Minimum - first, maximum - second.
126 std::pair<unsigned, unsigned> WavesPerEU;
128 // Stack object indices for work group IDs.
129 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
130 // Stack object indices for work item IDs.
131 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
133 AMDGPUBufferPseudoSourceValue BufferPSV;
134 AMDGPUImagePseudoSourceValue ImagePSV;
137 // FIXME: Make private
138 unsigned LDSWaveSpillSize;
139 unsigned ScratchOffsetReg;
140 unsigned NumUserSGPRs;
141 unsigned NumSystemSGPRs;
144 bool HasSpilledSGPRs;
145 bool HasSpilledVGPRs;
146 bool HasNonSpillStackObjects;
148 unsigned NumSpilledSGPRs;
149 unsigned NumSpilledVGPRs;
151 // Feature bits required for inputs passed in user SGPRs.
152 bool PrivateSegmentBuffer : 1;
153 bool DispatchPtr : 1;
155 bool KernargSegmentPtr : 1;
157 bool FlatScratchInit : 1;
158 bool GridWorkgroupCountX : 1;
159 bool GridWorkgroupCountY : 1;
160 bool GridWorkgroupCountZ : 1;
162 // Feature bits required for inputs passed in system SGPRs.
163 bool WorkGroupIDX : 1; // Always initialized.
164 bool WorkGroupIDY : 1;
165 bool WorkGroupIDZ : 1;
166 bool WorkGroupInfo : 1;
167 bool PrivateSegmentWaveByteOffset : 1;
169 bool WorkItemIDX : 1; // Always initialized.
170 bool WorkItemIDY : 1;
171 bool WorkItemIDZ : 1;
173 // Private memory buffer
174 // Compute directly in sgpr[0:1]
175 // Other shaders indirect 64-bits at sgpr[0:1]
176 bool PrivateMemoryInputPtr : 1;
178 MCPhysReg getNextUserSGPR() const {
179 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
180 return AMDGPU::SGPR0 + NumUserSGPRs;
183 MCPhysReg getNextSystemSGPR() const {
184 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
189 unsigned VGPR = AMDGPU::NoRegister;
192 SpilledReg() = default;
193 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
195 bool hasLane() { return Lane != -1;}
196 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
200 // SGPR->VGPR spilling support.
201 typedef std::pair<unsigned, unsigned> SpillRegMask;
203 // Track VGPR + wave index for each subregister of the SGPR spilled to
205 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
206 unsigned NumVGPRSpillLanes = 0;
207 SmallVector<unsigned, 2> SpillVGPRs;
211 SIMachineFunctionInfo(const MachineFunction &MF);
213 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
214 auto I = SGPRToVGPRSpills.find(FrameIndex);
215 return (I == SGPRToVGPRSpills.end()) ?
216 ArrayRef<SpilledReg>() : makeArrayRef(I->second);
219 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
220 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
222 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
223 unsigned getTIDReg() const { return TIDReg; };
224 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
227 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
228 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
229 unsigned addQueuePtr(const SIRegisterInfo &TRI);
230 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
231 unsigned addDispatchID(const SIRegisterInfo &TRI);
232 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
233 unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
236 unsigned addWorkGroupIDX() {
237 WorkGroupIDXSystemSGPR = getNextSystemSGPR();
239 return WorkGroupIDXSystemSGPR;
242 unsigned addWorkGroupIDY() {
243 WorkGroupIDYSystemSGPR = getNextSystemSGPR();
245 return WorkGroupIDYSystemSGPR;
248 unsigned addWorkGroupIDZ() {
249 WorkGroupIDZSystemSGPR = getNextSystemSGPR();
251 return WorkGroupIDZSystemSGPR;
254 unsigned addWorkGroupInfo() {
255 WorkGroupInfoSystemSGPR = getNextSystemSGPR();
257 return WorkGroupInfoSystemSGPR;
260 unsigned addPrivateSegmentWaveByteOffset() {
261 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
263 return PrivateSegmentWaveByteOffsetSystemSGPR;
266 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
267 PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
270 bool hasPrivateSegmentBuffer() const {
271 return PrivateSegmentBuffer;
274 bool hasDispatchPtr() const {
278 bool hasQueuePtr() const {
282 bool hasKernargSegmentPtr() const {
283 return KernargSegmentPtr;
286 bool hasDispatchID() const {
290 bool hasFlatScratchInit() const {
291 return FlatScratchInit;
294 bool hasGridWorkgroupCountX() const {
295 return GridWorkgroupCountX;
298 bool hasGridWorkgroupCountY() const {
299 return GridWorkgroupCountY;
302 bool hasGridWorkgroupCountZ() const {
303 return GridWorkgroupCountZ;
306 bool hasWorkGroupIDX() const {
310 bool hasWorkGroupIDY() const {
314 bool hasWorkGroupIDZ() const {
318 bool hasWorkGroupInfo() const {
319 return WorkGroupInfo;
322 bool hasPrivateSegmentWaveByteOffset() const {
323 return PrivateSegmentWaveByteOffset;
326 bool hasWorkItemIDX() const {
330 bool hasWorkItemIDY() const {
334 bool hasWorkItemIDZ() const {
338 bool hasPrivateMemoryInputPtr() const {
339 return PrivateMemoryInputPtr;
342 unsigned getNumUserSGPRs() const {
346 unsigned getNumPreloadedSGPRs() const {
347 return NumUserSGPRs + NumSystemSGPRs;
350 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
351 return PrivateSegmentWaveByteOffsetSystemSGPR;
354 /// \brief Returns the physical register reserved for use as the resource
355 /// descriptor for scratch accesses.
356 unsigned getScratchRSrcReg() const {
357 return ScratchRSrcReg;
360 void setScratchRSrcReg(unsigned Reg) {
361 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
362 ScratchRSrcReg = Reg;
365 unsigned getScratchWaveOffsetReg() const {
366 return ScratchWaveOffsetReg;
369 void setScratchWaveOffsetReg(unsigned Reg) {
370 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
371 ScratchWaveOffsetReg = Reg;
374 unsigned getQueuePtrUserSGPR() const {
375 return QueuePtrUserSGPR;
378 unsigned getPrivateMemoryPtrUserSGPR() const {
379 return PrivateMemoryPtrUserSGPR;
382 bool hasSpilledSGPRs() const {
383 return HasSpilledSGPRs;
386 void setHasSpilledSGPRs(bool Spill = true) {
387 HasSpilledSGPRs = Spill;
390 bool hasSpilledVGPRs() const {
391 return HasSpilledVGPRs;
394 void setHasSpilledVGPRs(bool Spill = true) {
395 HasSpilledVGPRs = Spill;
398 bool hasNonSpillStackObjects() const {
399 return HasNonSpillStackObjects;
402 void setHasNonSpillStackObjects(bool StackObject = true) {
403 HasNonSpillStackObjects = StackObject;
406 unsigned getNumSpilledSGPRs() const {
407 return NumSpilledSGPRs;
410 unsigned getNumSpilledVGPRs() const {
411 return NumSpilledVGPRs;
414 void addToSpilledSGPRs(unsigned num) {
415 NumSpilledSGPRs += num;
418 void addToSpilledVGPRs(unsigned num) {
419 NumSpilledVGPRs += num;
422 unsigned getPSInputAddr() const {
426 unsigned getPSInputEnable() const {
427 return PSInputEnable;
430 bool isPSInputAllocated(unsigned Index) const {
431 return PSInputAddr & (1 << Index);
434 void markPSInputAllocated(unsigned Index) {
435 PSInputAddr |= 1 << Index;
438 void markPSInputEnabled(unsigned Index) {
439 PSInputEnable |= 1 << Index;
442 bool returnsVoid() const {
446 void setIfReturnsVoid(bool Value) {
450 /// \returns A pair of default/requested minimum/maximum flat work group sizes
451 /// for this function.
452 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
453 return FlatWorkGroupSizes;
456 /// \returns Default/requested minimum flat work group size for this function.
457 unsigned getMinFlatWorkGroupSize() const {
458 return FlatWorkGroupSizes.first;
461 /// \returns Default/requested maximum flat work group size for this function.
462 unsigned getMaxFlatWorkGroupSize() const {
463 return FlatWorkGroupSizes.second;
466 /// \returns A pair of default/requested minimum/maximum number of waves per
468 std::pair<unsigned, unsigned> getWavesPerEU() const {
472 /// \returns Default/requested minimum number of waves per execution unit.
473 unsigned getMinWavesPerEU() const {
474 return WavesPerEU.first;
477 /// \returns Default/requested maximum number of waves per execution unit.
478 unsigned getMaxWavesPerEU() const {
479 return WavesPerEU.second;
482 /// \returns Stack object index for \p Dim's work group ID.
483 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
485 return DebuggerWorkGroupIDStackObjectIndices[Dim];
488 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
489 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
491 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
494 /// \returns Stack object index for \p Dim's work item ID.
495 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
497 return DebuggerWorkItemIDStackObjectIndices[Dim];
500 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
501 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
503 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
506 /// \returns SGPR used for \p Dim's work group ID.
507 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
510 assert(hasWorkGroupIDX());
511 return WorkGroupIDXSystemSGPR;
513 assert(hasWorkGroupIDY());
514 return WorkGroupIDYSystemSGPR;
516 assert(hasWorkGroupIDZ());
517 return WorkGroupIDZSystemSGPR;
519 llvm_unreachable("unexpected dimension");
522 /// \returns VGPR used for \p Dim' work item ID.
523 unsigned getWorkItemIDVGPR(unsigned Dim) const {
526 assert(hasWorkItemIDX());
527 return AMDGPU::VGPR0;
529 assert(hasWorkItemIDY());
530 return AMDGPU::VGPR1;
532 assert(hasWorkItemIDZ());
533 return AMDGPU::VGPR2;
535 llvm_unreachable("unexpected dimension");
538 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
542 const AMDGPUImagePseudoSourceValue *getImagePSV() const {
547 } // end namespace llvm
549 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H