1 //===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUMachineFunction.h"
18 #include "SIRegisterInfo.h"
24 class MachineRegisterInfo;
26 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
28 explicit AMDGPUImagePseudoSourceValue() :
29 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
31 bool isConstant(const MachineFrameInfo *) const override {
32 // This should probably be true for most images, but we will start by being
37 bool isAliased(const MachineFrameInfo *) const override {
38 // FIXME: If we ever change image intrinsics to accept fat pointers, then
39 // this could be true for some cases.
43 bool mayAlias(const MachineFrameInfo*) const override {
44 // FIXME: If we ever change image intrinsics to accept fat pointers, then
45 // this could be true for some cases.
50 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
52 explicit AMDGPUBufferPseudoSourceValue() :
53 PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
55 bool isConstant(const MachineFrameInfo *) const override {
56 // This should probably be true for most images, but we will start by being
61 bool isAliased(const MachineFrameInfo *) const override {
62 // FIXME: If we ever change image intrinsics to accept fat pointers, then
63 // this could be true for some cases.
67 bool mayAlias(const MachineFrameInfo*) const override {
68 // FIXME: If we ever change image intrinsics to accept fat pointers, then
69 // this could be true for some cases.
74 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
75 /// tells the hardware which interpolation parameters to load.
76 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
77 // FIXME: This should be removed and getPreloadedValue moved here.
78 friend class SIRegisterInfo;
82 // Registers that may be reserved for spilling purposes. These may be the same
83 // as the input registers.
84 unsigned ScratchRSrcReg;
85 unsigned ScratchWaveOffsetReg;
87 // Input registers for non-HSA ABI
88 unsigned PrivateMemoryPtrUserSGPR;
90 // Input registers setup for the HSA ABI.
91 // User SGPRs in allocation order.
92 unsigned PrivateSegmentBufferUserSGPR;
93 unsigned DispatchPtrUserSGPR;
94 unsigned QueuePtrUserSGPR;
95 unsigned KernargSegmentPtrUserSGPR;
96 unsigned DispatchIDUserSGPR;
97 unsigned FlatScratchInitUserSGPR;
98 unsigned PrivateSegmentSizeUserSGPR;
99 unsigned GridWorkGroupCountXUserSGPR;
100 unsigned GridWorkGroupCountYUserSGPR;
101 unsigned GridWorkGroupCountZUserSGPR;
103 // System SGPRs in allocation order.
104 unsigned WorkGroupIDXSystemSGPR;
105 unsigned WorkGroupIDYSystemSGPR;
106 unsigned WorkGroupIDZSystemSGPR;
107 unsigned WorkGroupInfoSystemSGPR;
108 unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
111 unsigned PSInputAddr;
114 // A pair of default/requested minimum/maximum flat work group sizes.
115 // Minimum - first, maximum - second.
116 std::pair<unsigned, unsigned> FlatWorkGroupSizes;
118 // A pair of default/requested minimum/maximum number of waves per execution
119 // unit. Minimum - first, maximum - second.
120 std::pair<unsigned, unsigned> WavesPerEU;
122 // Stack object indices for work group IDs.
123 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
124 // Stack object indices for work item IDs.
125 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
127 AMDGPUBufferPseudoSourceValue BufferPSV;
128 AMDGPUImagePseudoSourceValue ImagePSV;
131 // FIXME: Make private
132 unsigned LDSWaveSpillSize;
134 std::map<unsigned, unsigned> LaneVGPRs;
135 unsigned ScratchOffsetReg;
136 unsigned NumUserSGPRs;
137 unsigned NumSystemSGPRs;
140 bool HasSpilledSGPRs;
141 bool HasSpilledVGPRs;
142 bool HasNonSpillStackObjects;
144 unsigned NumSpilledSGPRs;
145 unsigned NumSpilledVGPRs;
147 // Feature bits required for inputs passed in user SGPRs.
148 bool PrivateSegmentBuffer : 1;
149 bool DispatchPtr : 1;
151 bool KernargSegmentPtr : 1;
153 bool FlatScratchInit : 1;
154 bool GridWorkgroupCountX : 1;
155 bool GridWorkgroupCountY : 1;
156 bool GridWorkgroupCountZ : 1;
158 // Feature bits required for inputs passed in system SGPRs.
159 bool WorkGroupIDX : 1; // Always initialized.
160 bool WorkGroupIDY : 1;
161 bool WorkGroupIDZ : 1;
162 bool WorkGroupInfo : 1;
163 bool PrivateSegmentWaveByteOffset : 1;
165 bool WorkItemIDX : 1; // Always initialized.
166 bool WorkItemIDY : 1;
167 bool WorkItemIDZ : 1;
169 // Private memory buffer
170 // Compute directly in sgpr[0:1]
171 // Other shaders indirect 64-bits at sgpr[0:1]
172 bool PrivateMemoryInputPtr : 1;
174 MCPhysReg getNextUserSGPR() const {
175 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
176 return AMDGPU::SGPR0 + NumUserSGPRs;
179 MCPhysReg getNextSystemSGPR() const {
180 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
187 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
188 SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
189 bool hasLane() { return Lane != -1;}
190 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
193 // SIMachineFunctionInfo definition
195 SIMachineFunctionInfo(const MachineFunction &MF);
196 SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
198 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
199 unsigned getTIDReg() const { return TIDReg; };
200 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
203 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
204 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
205 unsigned addQueuePtr(const SIRegisterInfo &TRI);
206 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
207 unsigned addDispatchID(const SIRegisterInfo &TRI);
208 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
209 unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
212 unsigned addWorkGroupIDX() {
213 WorkGroupIDXSystemSGPR = getNextSystemSGPR();
215 return WorkGroupIDXSystemSGPR;
218 unsigned addWorkGroupIDY() {
219 WorkGroupIDYSystemSGPR = getNextSystemSGPR();
221 return WorkGroupIDYSystemSGPR;
224 unsigned addWorkGroupIDZ() {
225 WorkGroupIDZSystemSGPR = getNextSystemSGPR();
227 return WorkGroupIDZSystemSGPR;
230 unsigned addWorkGroupInfo() {
231 WorkGroupInfoSystemSGPR = getNextSystemSGPR();
233 return WorkGroupInfoSystemSGPR;
236 unsigned addPrivateSegmentWaveByteOffset() {
237 PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
239 return PrivateSegmentWaveByteOffsetSystemSGPR;
242 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
243 PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
246 bool hasPrivateSegmentBuffer() const {
247 return PrivateSegmentBuffer;
250 bool hasDispatchPtr() const {
254 bool hasQueuePtr() const {
258 bool hasKernargSegmentPtr() const {
259 return KernargSegmentPtr;
262 bool hasDispatchID() const {
266 bool hasFlatScratchInit() const {
267 return FlatScratchInit;
270 bool hasGridWorkgroupCountX() const {
271 return GridWorkgroupCountX;
274 bool hasGridWorkgroupCountY() const {
275 return GridWorkgroupCountY;
278 bool hasGridWorkgroupCountZ() const {
279 return GridWorkgroupCountZ;
282 bool hasWorkGroupIDX() const {
286 bool hasWorkGroupIDY() const {
290 bool hasWorkGroupIDZ() const {
294 bool hasWorkGroupInfo() const {
295 return WorkGroupInfo;
298 bool hasPrivateSegmentWaveByteOffset() const {
299 return PrivateSegmentWaveByteOffset;
302 bool hasWorkItemIDX() const {
306 bool hasWorkItemIDY() const {
310 bool hasWorkItemIDZ() const {
314 bool hasPrivateMemoryInputPtr() const {
315 return PrivateMemoryInputPtr;
318 unsigned getNumUserSGPRs() const {
322 unsigned getNumPreloadedSGPRs() const {
323 return NumUserSGPRs + NumSystemSGPRs;
326 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
327 return PrivateSegmentWaveByteOffsetSystemSGPR;
330 /// \brief Returns the physical register reserved for use as the resource
331 /// descriptor for scratch accesses.
332 unsigned getScratchRSrcReg() const {
333 return ScratchRSrcReg;
336 void setScratchRSrcReg(unsigned Reg) {
337 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
338 ScratchRSrcReg = Reg;
341 unsigned getScratchWaveOffsetReg() const {
342 return ScratchWaveOffsetReg;
345 void setScratchWaveOffsetReg(unsigned Reg) {
346 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
347 ScratchWaveOffsetReg = Reg;
350 unsigned getQueuePtrUserSGPR() const {
351 return QueuePtrUserSGPR;
354 unsigned getPrivateMemoryPtrUserSGPR() const {
355 return PrivateMemoryPtrUserSGPR;
358 bool hasSpilledSGPRs() const {
359 return HasSpilledSGPRs;
362 void setHasSpilledSGPRs(bool Spill = true) {
363 HasSpilledSGPRs = Spill;
366 bool hasSpilledVGPRs() const {
367 return HasSpilledVGPRs;
370 void setHasSpilledVGPRs(bool Spill = true) {
371 HasSpilledVGPRs = Spill;
374 bool hasNonSpillStackObjects() const {
375 return HasNonSpillStackObjects;
378 void setHasNonSpillStackObjects(bool StackObject = true) {
379 HasNonSpillStackObjects = StackObject;
382 unsigned getNumSpilledSGPRs() const {
383 return NumSpilledSGPRs;
386 unsigned getNumSpilledVGPRs() const {
387 return NumSpilledVGPRs;
390 void addToSpilledSGPRs(unsigned num) {
391 NumSpilledSGPRs += num;
394 void addToSpilledVGPRs(unsigned num) {
395 NumSpilledVGPRs += num;
398 unsigned getPSInputAddr() const {
402 bool isPSInputAllocated(unsigned Index) const {
403 return PSInputAddr & (1 << Index);
406 void markPSInputAllocated(unsigned Index) {
407 PSInputAddr |= 1 << Index;
410 bool returnsVoid() const {
414 void setIfReturnsVoid(bool Value) {
418 /// \returns A pair of default/requested minimum/maximum flat work group sizes
419 /// for this function.
420 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
421 return FlatWorkGroupSizes;
424 /// \returns Default/requested minimum flat work group size for this function.
425 unsigned getMinFlatWorkGroupSize() const {
426 return FlatWorkGroupSizes.first;
429 /// \returns Default/requested maximum flat work group size for this function.
430 unsigned getMaxFlatWorkGroupSize() const {
431 return FlatWorkGroupSizes.second;
434 /// \returns A pair of default/requested minimum/maximum number of waves per
436 std::pair<unsigned, unsigned> getWavesPerEU() const {
440 /// \returns Default/requested minimum number of waves per execution unit.
441 unsigned getMinWavesPerEU() const {
442 return WavesPerEU.first;
445 /// \returns Default/requested maximum number of waves per execution unit.
446 unsigned getMaxWavesPerEU() const {
447 return WavesPerEU.second;
450 /// \returns Stack object index for \p Dim's work group ID.
451 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
453 return DebuggerWorkGroupIDStackObjectIndices[Dim];
456 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
457 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
459 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
462 /// \returns Stack object index for \p Dim's work item ID.
463 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
465 return DebuggerWorkItemIDStackObjectIndices[Dim];
468 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
469 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
471 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
474 /// \returns SGPR used for \p Dim's work group ID.
475 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
478 assert(hasWorkGroupIDX());
479 return WorkGroupIDXSystemSGPR;
481 assert(hasWorkGroupIDY());
482 return WorkGroupIDYSystemSGPR;
484 assert(hasWorkGroupIDZ());
485 return WorkGroupIDZSystemSGPR;
487 llvm_unreachable("unexpected dimension");
490 /// \returns VGPR used for \p Dim' work item ID.
491 unsigned getWorkItemIDVGPR(unsigned Dim) const {
494 assert(hasWorkItemIDX());
495 return AMDGPU::VGPR0;
497 assert(hasWorkItemIDY());
498 return AMDGPU::VGPR1;
500 assert(hasWorkItemIDZ());
501 return AMDGPU::VGPR2;
503 llvm_unreachable("unexpected dimension");
506 const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
510 const AMDGPUImagePseudoSourceValue *getImagePSV() const {
515 } // End namespace llvm