1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
14 #include "AMDKernelCodeT.h"
15 #include "SIDefines.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/IR/CallingConv.h"
18 #include "llvm/MC/MCInstrDesc.h"
19 #include "llvm/Support/Compiler.h"
20 #include "llvm/Support/ErrorHandling.h"
29 class MachineMemOperand;
31 class MCRegisterClass;
34 class MCSubtargetInfo;
41 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
42 // doesn't spill SGPRs as much as when 80 is set.
43 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
46 /// \brief Instruction set architecture version.
53 /// \returns Isa version for given subtarget \p Features.
54 IsaVersion getIsaVersion(const FeatureBitset &Features);
56 /// \returns Wavefront size for given subtarget \p Features.
57 unsigned getWavefrontSize(const FeatureBitset &Features);
59 /// \returns Local memory size in bytes for given subtarget \p Features.
60 unsigned getLocalMemorySize(const FeatureBitset &Features);
62 /// \returns Number of execution units per compute unit for given subtarget \p
64 unsigned getEUsPerCU(const FeatureBitset &Features);
66 /// \returns Maximum number of work groups per compute unit for given subtarget
67 /// \p Features and limited by given \p FlatWorkGroupSize.
68 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
69 unsigned FlatWorkGroupSize);
71 /// \returns Maximum number of waves per compute unit for given subtarget \p
72 /// Features without any kind of limitation.
73 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
75 /// \returns Maximum number of waves per compute unit for given subtarget \p
76 /// Features and limited by given \p FlatWorkGroupSize.
77 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
78 unsigned FlatWorkGroupSize);
80 /// \returns Minimum number of waves per execution unit for given subtarget \p
82 unsigned getMinWavesPerEU(const FeatureBitset &Features);
84 /// \returns Maximum number of waves per execution unit for given subtarget \p
85 /// Features without any kind of limitation.
86 unsigned getMaxWavesPerEU(const FeatureBitset &Features);
88 /// \returns Maximum number of waves per execution unit for given subtarget \p
89 /// Features and limited by given \p FlatWorkGroupSize.
90 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
91 unsigned FlatWorkGroupSize);
93 /// \returns Minimum flat work group size for given subtarget \p Features.
94 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
96 /// \returns Maximum flat work group size for given subtarget \p Features.
97 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
99 /// \returns Number of waves per work group for given subtarget \p Features and
100 /// limited by given \p FlatWorkGroupSize.
101 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
102 unsigned FlatWorkGroupSize);
104 /// \returns SGPR allocation granularity for given subtarget \p Features.
105 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
107 /// \returns SGPR encoding granularity for given subtarget \p Features.
108 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
110 /// \returns Total number of SGPRs for given subtarget \p Features.
111 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
113 /// \returns Addressable number of SGPRs for given subtarget \p Features.
114 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
116 /// \returns Minimum number of SGPRs that meets the given number of waves per
117 /// execution unit requirement for given subtarget \p Features.
118 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
120 /// \returns Maximum number of SGPRs that meets the given number of waves per
121 /// execution unit requirement for given subtarget \p Features.
122 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
125 /// \returns VGPR allocation granularity for given subtarget \p Features.
126 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
128 /// \returns VGPR encoding granularity for given subtarget \p Features.
129 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
131 /// \returns Total number of VGPRs for given subtarget \p Features.
132 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
134 /// \returns Addressable number of VGPRs for given subtarget \p Features.
135 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
137 /// \returns Minimum number of VGPRs that meets given number of waves per
138 /// execution unit requirement for given subtarget \p Features.
139 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
141 /// \returns Maximum number of VGPRs that meets given number of waves per
142 /// execution unit requirement for given subtarget \p Features.
143 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
145 } // end namespace IsaInfo
148 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
150 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
151 const FeatureBitset &Features);
152 MCSection *getHSATextSection(MCContext &Ctx);
154 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
156 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
158 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
160 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
161 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
162 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
164 /// \returns True if constants should be emitted to .text section for given
165 /// target triple \p TT, false otherwise.
166 bool shouldEmitConstantsToTextSection(const Triple &TT);
168 /// \returns Integer value requested using \p F's \p Name attribute.
170 /// \returns \p Default if attribute is not present.
172 /// \returns \p Default and emits error if requested value cannot be converted
174 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
176 /// \returns A pair of integer values requested using \p F's \p Name attribute
177 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
180 /// \returns \p Default if attribute is not present.
182 /// \returns \p Default and emits error if one of the requested values cannot be
183 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
185 std::pair<int, int> getIntegerPairAttribute(const Function &F,
187 std::pair<int, int> Default,
188 bool OnlyFirstRequired = false);
190 /// \returns Vmcnt bit mask for given isa \p Version.
191 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
193 /// \returns Expcnt bit mask for given isa \p Version.
194 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
196 /// \returns Lgkmcnt bit mask for given isa \p Version.
197 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
199 /// \returns Waitcnt bit mask for given isa \p Version.
200 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
202 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
203 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
205 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
206 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
208 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
209 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
211 /// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
212 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
213 /// \p Lgkmcnt respectively.
215 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
216 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
217 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
218 /// \p Expcnt = \p Waitcnt[6:4]
219 /// \p Lgkmcnt = \p Waitcnt[11:8]
220 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
221 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
223 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
224 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
227 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
228 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
231 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
232 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
235 /// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
238 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
239 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
240 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
241 /// Waitcnt[6:4] = \p Expcnt
242 /// Waitcnt[11:8] = \p Lgkmcnt
243 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
245 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
247 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
248 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
250 unsigned getInitialPSInputAddr(const Function &F);
253 bool isShader(CallingConv::ID CC);
256 bool isCompute(CallingConv::ID CC);
259 bool isEntryFunctionCC(CallingConv::ID CC);
261 // FIXME: Remove this when calling conventions cleaned up
263 inline bool isKernel(CallingConv::ID CC) {
265 case CallingConv::AMDGPU_KERNEL:
266 case CallingConv::SPIR_KERNEL:
273 bool isSI(const MCSubtargetInfo &STI);
274 bool isCI(const MCSubtargetInfo &STI);
275 bool isVI(const MCSubtargetInfo &STI);
276 bool isGFX9(const MCSubtargetInfo &STI);
278 /// \brief Is Reg - scalar register
279 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
281 /// If \p Reg is a pseudo reg, return the correct hardware register given
282 /// \p STI otherwise return \p Reg.
283 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
285 /// \brief Convert hardware register \p Reg to a pseudo register
287 unsigned mc2PseudoReg(unsigned Reg);
289 /// \brief Can this operand also contain immediate values?
290 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
292 /// \brief Is this floating-point operand?
293 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
295 /// \brief Does this opearnd support only inlinable literals?
296 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
298 /// \brief Get the size in bits of a register from the register class \p RC.
299 unsigned getRegBitWidth(unsigned RCID);
301 /// \brief Get the size in bits of a register from the register class \p RC.
302 unsigned getRegBitWidth(const MCRegisterClass &RC);
304 /// \brief Get size of register operand
305 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
309 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
310 switch (OpInfo.OperandType) {
311 case AMDGPU::OPERAND_REG_IMM_INT32:
312 case AMDGPU::OPERAND_REG_IMM_FP32:
313 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
314 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
317 case AMDGPU::OPERAND_REG_IMM_INT64:
318 case AMDGPU::OPERAND_REG_IMM_FP64:
319 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
320 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
323 case AMDGPU::OPERAND_REG_IMM_INT16:
324 case AMDGPU::OPERAND_REG_IMM_FP16:
325 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
326 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
327 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
328 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
332 llvm_unreachable("unhandled operand type");
337 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
338 return getOperandSize(Desc.OpInfo[OpNo]);
341 /// \brief Is this literal inlinable
343 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
346 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
349 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
352 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
354 bool isUniformMMO(const MachineMemOperand *MMO);
356 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
358 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
360 /// \returns true if this offset is small enough to fit in the SMRD
361 /// offset field. \p ByteOffset should be the offset in bytes and
362 /// not the encoded offset.
363 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
365 } // end namespace AMDGPU
366 } // end namespace llvm
368 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H