1 //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
12 //===----------------------------------------------------------------------===//
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
17 #include "AMDGPUArgumentUsageInfo.h"
18 #include "AMDGPUMachineFunction.h"
19 #include "SIRegisterInfo.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/MC/MCRegisterInfo.h"
27 #include "llvm/Support/ErrorHandling.h"
35 class MachineFrameInfo;
36 class MachineFunction;
38 class TargetRegisterClass;
40 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
42 // TODO: Is the img rsrc useful?
43 explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
44 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
46 bool isConstant(const MachineFrameInfo *) const override {
47 // This should probably be true for most images, but we will start by being
52 bool isAliased(const MachineFrameInfo *) const override {
53 // FIXME: If we ever change image intrinsics to accept fat pointers, then
54 // this could be true for some cases.
58 bool mayAlias(const MachineFrameInfo *) const override {
59 // FIXME: If we ever change image intrinsics to accept fat pointers, then
60 // this could be true for some cases.
65 class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
67 explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
68 PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
70 bool isConstant(const MachineFrameInfo *) const override {
71 // This should probably be true for most images, but we will start by being
76 bool isAliased(const MachineFrameInfo *) const override {
77 // FIXME: If we ever change image intrinsics to accept fat pointers, then
78 // this could be true for some cases.
82 bool mayAlias(const MachineFrameInfo *) const override {
83 // FIXME: If we ever change image intrinsics to accept fat pointers, then
84 // this could be true for some cases.
89 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
90 /// tells the hardware which interpolation parameters to load.
91 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
92 unsigned TIDReg = AMDGPU::NoRegister;
94 // Registers that may be reserved for spilling purposes. These may be the same
95 // as the input registers.
96 unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
97 unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
99 // This is the current function's incremented size from the kernel's scratch
100 // wave offset register. For an entry function, this is exactly the same as
101 // the ScratchWaveOffsetReg.
102 unsigned FrameOffsetReg = AMDGPU::FP_REG;
104 // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
105 unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
107 AMDGPUFunctionArgInfo ArgInfo;
110 unsigned PSInputAddr = 0;
111 unsigned PSInputEnable = 0;
113 /// Number of bytes of arguments this function has on the stack. If the callee
114 /// is expected to restore the argument stack this should be a multiple of 16,
115 /// all usable during a tail call.
117 /// The alternative would forbid tail call optimisation in some cases: if we
118 /// want to transfer control from a function with 8-bytes of stack-argument
119 /// space to a function with 16-bytes then misalignment of this value would
120 /// make a stack adjustment necessary, which could not be undone by the
122 unsigned BytesInStackArgArea = 0;
124 bool ReturnsVoid = true;
126 // A pair of default/requested minimum/maximum flat work group sizes.
127 // Minimum - first, maximum - second.
128 std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
130 // A pair of default/requested minimum/maximum number of waves per execution
131 // unit. Minimum - first, maximum - second.
132 std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
134 // Stack object indices for work group IDs.
135 std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices = {{0, 0, 0}};
137 // Stack object indices for work item IDs.
138 std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
140 DenseMap<const Value *,
141 std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
142 DenseMap<const Value *,
143 std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
146 unsigned LDSWaveSpillSize = 0;
147 unsigned NumUserSGPRs = 0;
148 unsigned NumSystemSGPRs = 0;
150 bool HasSpilledSGPRs = false;
151 bool HasSpilledVGPRs = false;
152 bool HasNonSpillStackObjects = false;
154 unsigned NumSpilledSGPRs = 0;
155 unsigned NumSpilledVGPRs = 0;
157 // Feature bits required for inputs passed in user SGPRs.
158 bool PrivateSegmentBuffer : 1;
159 bool DispatchPtr : 1;
161 bool KernargSegmentPtr : 1;
163 bool FlatScratchInit : 1;
164 bool GridWorkgroupCountX : 1;
165 bool GridWorkgroupCountY : 1;
166 bool GridWorkgroupCountZ : 1;
168 // Feature bits required for inputs passed in system SGPRs.
169 bool WorkGroupIDX : 1; // Always initialized.
170 bool WorkGroupIDY : 1;
171 bool WorkGroupIDZ : 1;
172 bool WorkGroupInfo : 1;
173 bool PrivateSegmentWaveByteOffset : 1;
175 bool WorkItemIDX : 1; // Always initialized.
176 bool WorkItemIDY : 1;
177 bool WorkItemIDZ : 1;
179 // Private memory buffer
180 // Compute directly in sgpr[0:1]
181 // Other shaders indirect 64-bits at sgpr[0:1]
182 bool ImplicitBufferPtr : 1;
184 // Pointer to where the ABI inserts special kernel arguments separate from the
185 // user arguments. This is an offset from the KernargSegmentPtr.
186 bool ImplicitArgPtr : 1;
188 // The hard-wired high half of the address of the global information table
189 // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
190 // current hardware only allows a 16 bit value.
193 MCPhysReg getNextUserSGPR() const {
194 assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
195 return AMDGPU::SGPR0 + NumUserSGPRs;
198 MCPhysReg getNextSystemSGPR() const {
199 return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
204 unsigned VGPR = AMDGPU::NoRegister;
207 SpilledReg() = default;
208 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
210 bool hasLane() { return Lane != -1;}
211 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
214 struct SGPRSpillVGPRCSR {
215 // VGPR used for SGPR spills
218 // If the VGPR is a CSR, the stack slot used to save/restore it in the
222 SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
226 // SGPR->VGPR spilling support.
227 using SpillRegMask = std::pair<unsigned, unsigned>;
229 // Track VGPR + wave index for each subregister of the SGPR spilled to
231 DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
232 unsigned NumVGPRSpillLanes = 0;
233 SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
236 SIMachineFunctionInfo(const MachineFunction &MF);
238 ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
239 auto I = SGPRToVGPRSpills.find(FrameIndex);
240 return (I == SGPRToVGPRSpills.end()) ?
241 ArrayRef<SpilledReg>() : makeArrayRef(I->second);
244 ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
248 bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
249 void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
251 bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }
252 unsigned getTIDReg() const { return TIDReg; }
253 void setTIDReg(unsigned Reg) { TIDReg = Reg; }
255 unsigned getBytesInStackArgArea() const {
256 return BytesInStackArgArea;
259 void setBytesInStackArgArea(unsigned Bytes) {
260 BytesInStackArgArea = Bytes;
264 unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
265 unsigned addDispatchPtr(const SIRegisterInfo &TRI);
266 unsigned addQueuePtr(const SIRegisterInfo &TRI);
267 unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
268 unsigned addDispatchID(const SIRegisterInfo &TRI);
269 unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
270 unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
273 unsigned addWorkGroupIDX() {
274 ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
276 return ArgInfo.WorkGroupIDX.getRegister();
279 unsigned addWorkGroupIDY() {
280 ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
282 return ArgInfo.WorkGroupIDY.getRegister();
285 unsigned addWorkGroupIDZ() {
286 ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
288 return ArgInfo.WorkGroupIDZ.getRegister();
291 unsigned addWorkGroupInfo() {
292 ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
294 return ArgInfo.WorkGroupInfo.getRegister();
297 // Add special VGPR inputs
298 void setWorkItemIDX(ArgDescriptor Arg) {
299 ArgInfo.WorkItemIDX = Arg;
302 void setWorkItemIDY(ArgDescriptor Arg) {
303 ArgInfo.WorkItemIDY = Arg;
306 void setWorkItemIDZ(ArgDescriptor Arg) {
307 ArgInfo.WorkItemIDZ = Arg;
310 unsigned addPrivateSegmentWaveByteOffset() {
311 ArgInfo.PrivateSegmentWaveByteOffset
312 = ArgDescriptor::createRegister(getNextSystemSGPR());
314 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
317 void setPrivateSegmentWaveByteOffset(unsigned Reg) {
318 ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
321 bool hasPrivateSegmentBuffer() const {
322 return PrivateSegmentBuffer;
325 bool hasDispatchPtr() const {
329 bool hasQueuePtr() const {
333 bool hasKernargSegmentPtr() const {
334 return KernargSegmentPtr;
337 bool hasDispatchID() const {
341 bool hasFlatScratchInit() const {
342 return FlatScratchInit;
345 bool hasGridWorkgroupCountX() const {
346 return GridWorkgroupCountX;
349 bool hasGridWorkgroupCountY() const {
350 return GridWorkgroupCountY;
353 bool hasGridWorkgroupCountZ() const {
354 return GridWorkgroupCountZ;
357 bool hasWorkGroupIDX() const {
361 bool hasWorkGroupIDY() const {
365 bool hasWorkGroupIDZ() const {
369 bool hasWorkGroupInfo() const {
370 return WorkGroupInfo;
373 bool hasPrivateSegmentWaveByteOffset() const {
374 return PrivateSegmentWaveByteOffset;
377 bool hasWorkItemIDX() const {
381 bool hasWorkItemIDY() const {
385 bool hasWorkItemIDZ() const {
389 bool hasImplicitArgPtr() const {
390 return ImplicitArgPtr;
393 bool hasImplicitBufferPtr() const {
394 return ImplicitBufferPtr;
397 AMDGPUFunctionArgInfo &getArgInfo() {
401 const AMDGPUFunctionArgInfo &getArgInfo() const {
405 std::pair<const ArgDescriptor *, const TargetRegisterClass *>
406 getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
407 return ArgInfo.getPreloadedValue(Value);
410 unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
411 return ArgInfo.getPreloadedValue(Value).first->getRegister();
414 unsigned getGITPtrHigh() const {
418 unsigned getNumUserSGPRs() const {
422 unsigned getNumPreloadedSGPRs() const {
423 return NumUserSGPRs + NumSystemSGPRs;
426 unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
427 return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
430 /// \brief Returns the physical register reserved for use as the resource
431 /// descriptor for scratch accesses.
432 unsigned getScratchRSrcReg() const {
433 return ScratchRSrcReg;
436 void setScratchRSrcReg(unsigned Reg) {
437 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
438 ScratchRSrcReg = Reg;
441 unsigned getScratchWaveOffsetReg() const {
442 return ScratchWaveOffsetReg;
445 unsigned getFrameOffsetReg() const {
446 return FrameOffsetReg;
449 void setStackPtrOffsetReg(unsigned Reg) {
450 StackPtrOffsetReg = Reg;
453 // Note the unset value for this is AMDGPU::SP_REG rather than
454 // NoRegister. This is mostly a workaround for MIR tests where state that
455 // can't be directly computed from the function is not preserved in serialized
457 unsigned getStackPtrOffsetReg() const {
458 return StackPtrOffsetReg;
461 void setScratchWaveOffsetReg(unsigned Reg) {
462 assert(Reg != AMDGPU::NoRegister && "Should never be unset");
463 ScratchWaveOffsetReg = Reg;
464 if (isEntryFunction())
465 FrameOffsetReg = ScratchWaveOffsetReg;
468 unsigned getQueuePtrUserSGPR() const {
469 return ArgInfo.QueuePtr.getRegister();
472 unsigned getImplicitBufferPtrUserSGPR() const {
473 return ArgInfo.ImplicitBufferPtr.getRegister();
476 bool hasSpilledSGPRs() const {
477 return HasSpilledSGPRs;
480 void setHasSpilledSGPRs(bool Spill = true) {
481 HasSpilledSGPRs = Spill;
484 bool hasSpilledVGPRs() const {
485 return HasSpilledVGPRs;
488 void setHasSpilledVGPRs(bool Spill = true) {
489 HasSpilledVGPRs = Spill;
492 bool hasNonSpillStackObjects() const {
493 return HasNonSpillStackObjects;
496 void setHasNonSpillStackObjects(bool StackObject = true) {
497 HasNonSpillStackObjects = StackObject;
500 unsigned getNumSpilledSGPRs() const {
501 return NumSpilledSGPRs;
504 unsigned getNumSpilledVGPRs() const {
505 return NumSpilledVGPRs;
508 void addToSpilledSGPRs(unsigned num) {
509 NumSpilledSGPRs += num;
512 void addToSpilledVGPRs(unsigned num) {
513 NumSpilledVGPRs += num;
516 unsigned getPSInputAddr() const {
520 unsigned getPSInputEnable() const {
521 return PSInputEnable;
524 bool isPSInputAllocated(unsigned Index) const {
525 return PSInputAddr & (1 << Index);
528 void markPSInputAllocated(unsigned Index) {
529 PSInputAddr |= 1 << Index;
532 void markPSInputEnabled(unsigned Index) {
533 PSInputEnable |= 1 << Index;
536 bool returnsVoid() const {
540 void setIfReturnsVoid(bool Value) {
544 /// \returns A pair of default/requested minimum/maximum flat work group sizes
545 /// for this function.
546 std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
547 return FlatWorkGroupSizes;
550 /// \returns Default/requested minimum flat work group size for this function.
551 unsigned getMinFlatWorkGroupSize() const {
552 return FlatWorkGroupSizes.first;
555 /// \returns Default/requested maximum flat work group size for this function.
556 unsigned getMaxFlatWorkGroupSize() const {
557 return FlatWorkGroupSizes.second;
560 /// \returns A pair of default/requested minimum/maximum number of waves per
562 std::pair<unsigned, unsigned> getWavesPerEU() const {
566 /// \returns Default/requested minimum number of waves per execution unit.
567 unsigned getMinWavesPerEU() const {
568 return WavesPerEU.first;
571 /// \returns Default/requested maximum number of waves per execution unit.
572 unsigned getMaxWavesPerEU() const {
573 return WavesPerEU.second;
576 /// \returns Stack object index for \p Dim's work group ID.
577 int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
579 return DebuggerWorkGroupIDStackObjectIndices[Dim];
582 /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
583 void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
585 DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
588 /// \returns Stack object index for \p Dim's work item ID.
589 int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
591 return DebuggerWorkItemIDStackObjectIndices[Dim];
594 /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
595 void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
597 DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
600 /// \returns SGPR used for \p Dim's work group ID.
601 unsigned getWorkGroupIDSGPR(unsigned Dim) const {
604 assert(hasWorkGroupIDX());
605 return ArgInfo.WorkGroupIDX.getRegister();
607 assert(hasWorkGroupIDY());
608 return ArgInfo.WorkGroupIDY.getRegister();
610 assert(hasWorkGroupIDZ());
611 return ArgInfo.WorkGroupIDZ.getRegister();
613 llvm_unreachable("unexpected dimension");
616 /// \returns VGPR used for \p Dim' work item ID.
617 unsigned getWorkItemIDVGPR(unsigned Dim) const {
620 assert(hasWorkItemIDX());
621 return AMDGPU::VGPR0;
623 assert(hasWorkItemIDY());
624 return AMDGPU::VGPR1;
626 assert(hasWorkItemIDZ());
627 return AMDGPU::VGPR2;
629 llvm_unreachable("unexpected dimension");
632 unsigned getLDSWaveSpillSize() const {
633 return LDSWaveSpillSize;
636 const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
637 const Value *BufferRsrc) {
639 auto PSV = BufferPSVs.try_emplace(
641 llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
642 return PSV.first->second.get();
645 const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
646 const Value *ImgRsrc) {
648 auto PSV = ImagePSVs.try_emplace(
650 llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
651 return PSV.first->second.get();
655 } // end namespace llvm
657 #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H