1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
9 #include "AMDGPUBaseInfo.h"
11 #include "SIDefines.h"
12 #include "llvm/IR/LLVMContext.h"
13 #include "llvm/IR/Function.h"
14 #include "llvm/IR/GlobalValue.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCRegisterInfo.h"
18 #include "llvm/MC/MCSectionELF.h"
19 #include "llvm/MC/MCSubtargetInfo.h"
20 #include "llvm/MC/SubtargetFeature.h"
22 #define GET_SUBTARGETINFO_ENUM
23 #include "AMDGPUGenSubtargetInfo.inc"
24 #undef GET_SUBTARGETINFO_ENUM
26 #define GET_REGINFO_ENUM
27 #include "AMDGPUGenRegisterInfo.inc"
28 #undef GET_REGINFO_ENUM
30 #define GET_INSTRINFO_NAMED_OPS
31 #define GET_INSTRINFO_ENUM
32 #include "AMDGPUGenInstrInfo.inc"
33 #undef GET_INSTRINFO_NAMED_OPS
34 #undef GET_INSTRINFO_ENUM
38 /// \returns Bit mask for given bit \p Shift and bit \p Width.
39 unsigned getBitMask(unsigned Shift, unsigned Width) {
40 return ((1 << Width) - 1) << Shift;
43 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
45 /// \returns Packed \p Dst.
46 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
47 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
48 Dst |= (Src << Shift) & getBitMask(Shift, Width);
52 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
54 /// \returns Unpacked bits.
55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
56 return (Src & getBitMask(Shift, Width)) >> Shift;
59 /// \returns Vmcnt bit shift.
60 unsigned getVmcntBitShift() { return 0; }
62 /// \returns Vmcnt bit width.
63 unsigned getVmcntBitWidth() { return 4; }
65 /// \returns Expcnt bit shift.
66 unsigned getExpcntBitShift() { return 4; }
68 /// \returns Expcnt bit width.
69 unsigned getExpcntBitWidth() { return 3; }
71 /// \returns Lgkmcnt bit shift.
72 unsigned getLgkmcntBitShift() { return 8; }
74 /// \returns Lgkmcnt bit width.
75 unsigned getLgkmcntBitWidth() { return 4; }
77 } // anonymous namespace
82 IsaVersion getIsaVersion(const FeatureBitset &Features) {
84 if (Features.test(FeatureISAVersion7_0_0))
87 if (Features.test(FeatureISAVersion7_0_1))
90 if (Features.test(FeatureISAVersion7_0_2))
93 if (Features.test(FeatureISAVersion8_0_0))
96 if (Features.test(FeatureISAVersion8_0_1))
99 if (Features.test(FeatureISAVersion8_0_2))
102 if (Features.test(FeatureISAVersion8_0_3))
105 if (Features.test(FeatureISAVersion8_0_4))
108 if (Features.test(FeatureISAVersion8_1_0))
114 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
115 const FeatureBitset &Features) {
117 IsaVersion ISA = getIsaVersion(Features);
119 memset(&Header, 0, sizeof(Header));
121 Header.amd_kernel_code_version_major = 1;
122 Header.amd_kernel_code_version_minor = 0;
123 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
124 Header.amd_machine_version_major = ISA.Major;
125 Header.amd_machine_version_minor = ISA.Minor;
126 Header.amd_machine_version_stepping = ISA.Stepping;
127 Header.kernel_code_entry_byte_offset = sizeof(Header);
128 // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
129 Header.wavefront_size = 6;
130 // These alignment values are specified in powers of two, so alignment =
131 // 2^n. The minimum alignment is 2^4 = 16.
132 Header.kernarg_segment_alignment = 4;
133 Header.group_segment_alignment = 4;
134 Header.private_segment_alignment = 4;
137 MCSection *getHSATextSection(MCContext &Ctx) {
138 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
139 ELF::SHF_ALLOC | ELF::SHF_WRITE |
141 ELF::SHF_AMDGPU_HSA_AGENT |
142 ELF::SHF_AMDGPU_HSA_CODE);
145 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
146 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
147 ELF::SHF_ALLOC | ELF::SHF_WRITE |
148 ELF::SHF_AMDGPU_HSA_GLOBAL |
149 ELF::SHF_AMDGPU_HSA_AGENT);
152 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
153 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
154 ELF::SHF_ALLOC | ELF::SHF_WRITE |
155 ELF::SHF_AMDGPU_HSA_GLOBAL);
158 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
159 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
160 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
161 ELF::SHF_AMDGPU_HSA_AGENT);
164 bool isGroupSegment(const GlobalValue *GV) {
165 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
168 bool isGlobalSegment(const GlobalValue *GV) {
169 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
172 bool isReadOnlySegment(const GlobalValue *GV) {
173 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
176 bool shouldEmitConstantsToTextSection(const Triple &TT) {
177 return TT.getOS() != Triple::AMDHSA;
180 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
181 Attribute A = F.getFnAttribute(Name);
182 int Result = Default;
184 if (A.isStringAttribute()) {
185 StringRef Str = A.getValueAsString();
186 if (Str.getAsInteger(0, Result)) {
187 LLVMContext &Ctx = F.getContext();
188 Ctx.emitError("can't parse integer attribute " + Name);
195 std::pair<int, int> getIntegerPairAttribute(const Function &F,
197 std::pair<int, int> Default,
198 bool OnlyFirstRequired) {
199 Attribute A = F.getFnAttribute(Name);
200 if (!A.isStringAttribute())
203 LLVMContext &Ctx = F.getContext();
204 std::pair<int, int> Ints = Default;
205 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
206 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
207 Ctx.emitError("can't parse first integer attribute " + Name);
210 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
211 if (!OnlyFirstRequired || Strs.second.trim().size()) {
212 Ctx.emitError("can't parse second integer attribute " + Name);
220 unsigned getWaitcntBitMask(IsaVersion Version) {
221 unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
222 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
223 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
224 return Vmcnt | Expcnt | Lgkmcnt;
227 unsigned getVmcntBitMask(IsaVersion Version) {
228 return (1 << getVmcntBitWidth()) - 1;
231 unsigned getExpcntBitMask(IsaVersion Version) {
232 return (1 << getExpcntBitWidth()) - 1;
235 unsigned getLgkmcntBitMask(IsaVersion Version) {
236 return (1 << getLgkmcntBitWidth()) - 1;
239 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
240 return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
243 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
244 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
247 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
248 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
251 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
252 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
253 Vmcnt = decodeVmcnt(Version, Waitcnt);
254 Expcnt = decodeExpcnt(Version, Waitcnt);
255 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
258 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
259 return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
262 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
263 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
266 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
267 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
270 unsigned encodeWaitcnt(IsaVersion Version,
271 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
272 unsigned Waitcnt = getWaitcntBitMask(Version);
273 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
274 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
275 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
279 unsigned getInitialPSInputAddr(const Function &F) {
280 return getIntegerAttribute(F, "InitialPSInputAddr", 0);
283 bool isShader(CallingConv::ID cc) {
285 case CallingConv::AMDGPU_VS:
286 case CallingConv::AMDGPU_GS:
287 case CallingConv::AMDGPU_PS:
288 case CallingConv::AMDGPU_CS:
295 bool isCompute(CallingConv::ID cc) {
296 return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
299 bool isSI(const MCSubtargetInfo &STI) {
300 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
303 bool isCI(const MCSubtargetInfo &STI) {
304 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
307 bool isVI(const MCSubtargetInfo &STI) {
308 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
311 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
315 case AMDGPU::FLAT_SCR:
317 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
319 case AMDGPU::FLAT_SCR_LO:
321 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
323 case AMDGPU::FLAT_SCR_HI:
325 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
330 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
331 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
332 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
333 OpType <= AMDGPU::OPERAND_SRC_LAST;
336 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
337 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
339 case AMDGPU::OPERAND_REG_IMM_FP32:
340 case AMDGPU::OPERAND_REG_IMM_FP64:
341 case AMDGPU::OPERAND_REG_IMM_FP16:
342 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
343 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
344 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
351 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
352 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
353 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
354 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
357 // Avoid using MCRegisterClass::getSize, since that function will go away
358 // (move from MC* level to Target* level). Return size in bits.
359 unsigned getRegBitWidth(unsigned RCID) {
361 case AMDGPU::SGPR_32RegClassID:
362 case AMDGPU::VGPR_32RegClassID:
363 case AMDGPU::VS_32RegClassID:
364 case AMDGPU::SReg_32RegClassID:
365 case AMDGPU::SReg_32_XM0RegClassID:
367 case AMDGPU::SGPR_64RegClassID:
368 case AMDGPU::VS_64RegClassID:
369 case AMDGPU::SReg_64RegClassID:
370 case AMDGPU::VReg_64RegClassID:
372 case AMDGPU::VReg_96RegClassID:
374 case AMDGPU::SGPR_128RegClassID:
375 case AMDGPU::SReg_128RegClassID:
376 case AMDGPU::VReg_128RegClassID:
378 case AMDGPU::SReg_256RegClassID:
379 case AMDGPU::VReg_256RegClassID:
381 case AMDGPU::SReg_512RegClassID:
382 case AMDGPU::VReg_512RegClassID:
385 llvm_unreachable("Unexpected register class");
389 unsigned getRegBitWidth(const MCRegisterClass &RC) {
390 return getRegBitWidth(RC.getID());
393 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
395 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
396 return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
399 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
400 if (Literal >= -16 && Literal <= 64)
403 uint64_t Val = static_cast<uint64_t>(Literal);
404 return (Val == DoubleToBits(0.0)) ||
405 (Val == DoubleToBits(1.0)) ||
406 (Val == DoubleToBits(-1.0)) ||
407 (Val == DoubleToBits(0.5)) ||
408 (Val == DoubleToBits(-0.5)) ||
409 (Val == DoubleToBits(2.0)) ||
410 (Val == DoubleToBits(-2.0)) ||
411 (Val == DoubleToBits(4.0)) ||
412 (Val == DoubleToBits(-4.0)) ||
413 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
416 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
417 if (Literal >= -16 && Literal <= 64)
420 // The actual type of the operand does not seem to matter as long
421 // as the bits match one of the inline immediate values. For example:
423 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
424 // so it is a legal inline immediate.
426 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
427 // floating-point, so it is a legal inline immediate.
429 uint32_t Val = static_cast<uint32_t>(Literal);
430 return (Val == FloatToBits(0.0f)) ||
431 (Val == FloatToBits(1.0f)) ||
432 (Val == FloatToBits(-1.0f)) ||
433 (Val == FloatToBits(0.5f)) ||
434 (Val == FloatToBits(-0.5f)) ||
435 (Val == FloatToBits(2.0f)) ||
436 (Val == FloatToBits(-2.0f)) ||
437 (Val == FloatToBits(4.0f)) ||
438 (Val == FloatToBits(-4.0f)) ||
439 (Val == 0x3e22f983 && HasInv2Pi);
442 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
445 if (Literal >= -16 && Literal <= 64)
448 uint16_t Val = static_cast<uint16_t>(Literal);
449 return Val == 0x3C00 || // 1.0
450 Val == 0xBC00 || // -1.0
451 Val == 0x3800 || // 0.5
452 Val == 0xB800 || // -0.5
453 Val == 0x4000 || // 2.0
454 Val == 0xC000 || // -2.0
455 Val == 0x4400 || // 4.0
456 Val == 0xC400 || // -4.0
457 Val == 0x3118; // 1/2pi
460 } // End namespace AMDGPU
461 } // End namespace llvm