1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "AMDGPUBaseInfo.h"
12 #include "SIDefines.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/CodeGen/MachineMemOperand.h"
16 #include "llvm/IR/Attributes.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/Function.h"
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/IR/Instruction.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCInstrDesc.h"
25 #include "llvm/MC/MCRegisterInfo.h"
26 #include "llvm/MC/MCSectionELF.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/ELF.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/MathExtras.h"
39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
42 #define GET_INSTRINFO_NAMED_OPS
43 #include "AMDGPUGenInstrInfo.inc"
44 #undef GET_INSTRINFO_NAMED_OPS
48 /// \returns Bit mask for given bit \p Shift and bit \p Width.
49 unsigned getBitMask(unsigned Shift, unsigned Width) {
50 return ((1 << Width) - 1) << Shift;
53 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
55 /// \returns Packed \p Dst.
56 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
57 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
58 Dst |= (Src << Shift) & getBitMask(Shift, Width);
62 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
64 /// \returns Unpacked bits.
65 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
66 return (Src & getBitMask(Shift, Width)) >> Shift;
69 /// \returns Vmcnt bit shift (lower bits).
70 unsigned getVmcntBitShiftLo() { return 0; }
72 /// \returns Vmcnt bit width (lower bits).
73 unsigned getVmcntBitWidthLo() { return 4; }
75 /// \returns Expcnt bit shift.
76 unsigned getExpcntBitShift() { return 4; }
78 /// \returns Expcnt bit width.
79 unsigned getExpcntBitWidth() { return 3; }
81 /// \returns Lgkmcnt bit shift.
82 unsigned getLgkmcntBitShift() { return 8; }
84 /// \returns Lgkmcnt bit width.
85 unsigned getLgkmcntBitWidth() { return 4; }
87 /// \returns Vmcnt bit shift (higher bits).
88 unsigned getVmcntBitShiftHi() { return 14; }
90 /// \returns Vmcnt bit width (higher bits).
91 unsigned getVmcntBitWidthHi() { return 2; }
93 } // end namespace anonymous
100 IsaVersion getIsaVersion(const FeatureBitset &Features) {
102 if (Features.test(FeatureISAVersion7_0_0))
104 if (Features.test(FeatureISAVersion7_0_1))
106 if (Features.test(FeatureISAVersion7_0_2))
110 if (Features.test(FeatureISAVersion8_0_0))
112 if (Features.test(FeatureISAVersion8_0_1))
114 if (Features.test(FeatureISAVersion8_0_2))
116 if (Features.test(FeatureISAVersion8_0_3))
118 if (Features.test(FeatureISAVersion8_0_4))
120 if (Features.test(FeatureISAVersion8_1_0))
124 if (Features.test(FeatureISAVersion9_0_0))
126 if (Features.test(FeatureISAVersion9_0_1))
129 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
134 unsigned getWavefrontSize(const FeatureBitset &Features) {
135 if (Features.test(FeatureWavefrontSize16))
137 if (Features.test(FeatureWavefrontSize32))
143 unsigned getLocalMemorySize(const FeatureBitset &Features) {
144 if (Features.test(FeatureLocalMemorySize32768))
146 if (Features.test(FeatureLocalMemorySize65536))
152 unsigned getEUsPerCU(const FeatureBitset &Features) {
156 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
157 unsigned FlatWorkGroupSize) {
158 if (!Features.test(FeatureGCN))
160 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
164 return std::min(N, 16u);
167 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
168 return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
171 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
172 unsigned FlatWorkGroupSize) {
173 return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
176 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
180 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
181 if (!Features.test(FeatureGCN))
183 // FIXME: Need to take scratch memory into account.
187 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
188 unsigned FlatWorkGroupSize) {
189 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
190 getEUsPerCU(Features)) / getEUsPerCU(Features);
193 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
197 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
201 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
202 unsigned FlatWorkGroupSize) {
203 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
204 getWavefrontSize(Features);
207 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
208 IsaVersion Version = getIsaVersion(Features);
209 if (Version.Major >= 8)
214 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
218 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
219 IsaVersion Version = getIsaVersion(Features);
220 if (Version.Major >= 8)
225 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
226 if (Features.test(FeatureSGPRInitBug))
227 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
229 IsaVersion Version = getIsaVersion(Features);
230 if (Version.Major >= 8)
235 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
236 assert(WavesPerEU != 0);
238 if (WavesPerEU >= getMaxWavesPerEU(Features))
240 unsigned MinNumSGPRs =
241 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
242 getSGPRAllocGranule(Features)) + 1;
243 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
246 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
248 assert(WavesPerEU != 0);
250 IsaVersion Version = getIsaVersion(Features);
251 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
252 getSGPRAllocGranule(Features));
253 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
254 if (Version.Major >= 8 && !Addressable)
255 AddressableNumSGPRs = 112;
256 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
259 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
263 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
264 return getVGPRAllocGranule(Features);
267 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
271 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
272 return getTotalNumVGPRs(Features);
275 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
276 assert(WavesPerEU != 0);
278 if (WavesPerEU >= getMaxWavesPerEU(Features))
280 unsigned MinNumVGPRs =
281 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
282 getVGPRAllocGranule(Features)) + 1;
283 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
286 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
287 assert(WavesPerEU != 0);
289 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
290 getVGPRAllocGranule(Features));
291 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
292 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
295 } // end namespace IsaInfo
297 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
298 const FeatureBitset &Features) {
299 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
301 memset(&Header, 0, sizeof(Header));
303 Header.amd_kernel_code_version_major = 1;
304 Header.amd_kernel_code_version_minor = 1;
305 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
306 Header.amd_machine_version_major = ISA.Major;
307 Header.amd_machine_version_minor = ISA.Minor;
308 Header.amd_machine_version_stepping = ISA.Stepping;
309 Header.kernel_code_entry_byte_offset = sizeof(Header);
310 // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
311 Header.wavefront_size = 6;
313 // If the code object does not support indirect functions, then the value must
315 Header.call_convention = -1;
317 // These alignment values are specified in powers of two, so alignment =
318 // 2^n. The minimum alignment is 2^4 = 16.
319 Header.kernarg_segment_alignment = 4;
320 Header.group_segment_alignment = 4;
321 Header.private_segment_alignment = 4;
324 MCSection *getHSATextSection(MCContext &Ctx) {
325 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
326 ELF::SHF_ALLOC | ELF::SHF_WRITE |
328 ELF::SHF_AMDGPU_HSA_AGENT |
329 ELF::SHF_AMDGPU_HSA_CODE);
332 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
333 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
334 ELF::SHF_ALLOC | ELF::SHF_WRITE |
335 ELF::SHF_AMDGPU_HSA_GLOBAL |
336 ELF::SHF_AMDGPU_HSA_AGENT);
339 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
340 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
341 ELF::SHF_ALLOC | ELF::SHF_WRITE |
342 ELF::SHF_AMDGPU_HSA_GLOBAL);
345 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
346 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
347 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
348 ELF::SHF_AMDGPU_HSA_AGENT);
351 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
352 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
355 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
356 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
359 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
360 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
363 bool shouldEmitConstantsToTextSection(const Triple &TT) {
364 return TT.getOS() != Triple::AMDHSA;
367 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
368 Attribute A = F.getFnAttribute(Name);
369 int Result = Default;
371 if (A.isStringAttribute()) {
372 StringRef Str = A.getValueAsString();
373 if (Str.getAsInteger(0, Result)) {
374 LLVMContext &Ctx = F.getContext();
375 Ctx.emitError("can't parse integer attribute " + Name);
382 std::pair<int, int> getIntegerPairAttribute(const Function &F,
384 std::pair<int, int> Default,
385 bool OnlyFirstRequired) {
386 Attribute A = F.getFnAttribute(Name);
387 if (!A.isStringAttribute())
390 LLVMContext &Ctx = F.getContext();
391 std::pair<int, int> Ints = Default;
392 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
393 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
394 Ctx.emitError("can't parse first integer attribute " + Name);
397 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
398 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
399 Ctx.emitError("can't parse second integer attribute " + Name);
407 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
408 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
409 if (Version.Major < 9)
412 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
413 return VmcntLo | VmcntHi;
416 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
417 return (1 << getExpcntBitWidth()) - 1;
420 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
421 return (1 << getLgkmcntBitWidth()) - 1;
424 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
425 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
426 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
427 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
428 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
429 if (Version.Major < 9)
432 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
433 return Waitcnt | VmcntHi;
436 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
438 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
439 if (Version.Major < 9)
443 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
444 VmcntHi <<= getVmcntBitWidthLo();
445 return VmcntLo | VmcntHi;
448 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
449 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
452 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
453 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
456 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
457 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
458 Vmcnt = decodeVmcnt(Version, Waitcnt);
459 Expcnt = decodeExpcnt(Version, Waitcnt);
460 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
463 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
466 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
467 if (Version.Major < 9)
470 Vmcnt >>= getVmcntBitWidthLo();
471 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
474 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
476 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
479 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
481 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
484 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
485 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
486 unsigned Waitcnt = getWaitcntBitMask(Version);
487 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
488 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
489 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
493 unsigned getInitialPSInputAddr(const Function &F) {
494 return getIntegerAttribute(F, "InitialPSInputAddr", 0);
497 bool isShader(CallingConv::ID cc) {
499 case CallingConv::AMDGPU_VS:
500 case CallingConv::AMDGPU_GS:
501 case CallingConv::AMDGPU_PS:
502 case CallingConv::AMDGPU_CS:
509 bool isCompute(CallingConv::ID cc) {
510 return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
513 bool isEntryFunctionCC(CallingConv::ID CC) {
517 bool isSI(const MCSubtargetInfo &STI) {
518 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
521 bool isCI(const MCSubtargetInfo &STI) {
522 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
525 bool isVI(const MCSubtargetInfo &STI) {
526 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
529 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
533 case AMDGPU::FLAT_SCR:
535 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
537 case AMDGPU::FLAT_SCR_LO:
539 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
541 case AMDGPU::FLAT_SCR_HI:
543 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
548 unsigned mc2PseudoReg(unsigned Reg) {
550 case AMDGPU::FLAT_SCR_ci:
551 case AMDGPU::FLAT_SCR_vi:
554 case AMDGPU::FLAT_SCR_LO_ci:
555 case AMDGPU::FLAT_SCR_LO_vi:
556 return AMDGPU::FLAT_SCR_LO;
558 case AMDGPU::FLAT_SCR_HI_ci:
559 case AMDGPU::FLAT_SCR_HI_vi:
560 return AMDGPU::FLAT_SCR_HI;
567 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
568 assert(OpNo < Desc.NumOperands);
569 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
570 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
571 OpType <= AMDGPU::OPERAND_SRC_LAST;
574 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
575 assert(OpNo < Desc.NumOperands);
576 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
578 case AMDGPU::OPERAND_REG_IMM_FP32:
579 case AMDGPU::OPERAND_REG_IMM_FP64:
580 case AMDGPU::OPERAND_REG_IMM_FP16:
581 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
582 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
583 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
584 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
591 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
592 assert(OpNo < Desc.NumOperands);
593 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
594 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
595 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
598 // Avoid using MCRegisterClass::getSize, since that function will go away
599 // (move from MC* level to Target* level). Return size in bits.
600 unsigned getRegBitWidth(unsigned RCID) {
602 case AMDGPU::SGPR_32RegClassID:
603 case AMDGPU::VGPR_32RegClassID:
604 case AMDGPU::VS_32RegClassID:
605 case AMDGPU::SReg_32RegClassID:
606 case AMDGPU::SReg_32_XM0RegClassID:
608 case AMDGPU::SGPR_64RegClassID:
609 case AMDGPU::VS_64RegClassID:
610 case AMDGPU::SReg_64RegClassID:
611 case AMDGPU::VReg_64RegClassID:
613 case AMDGPU::VReg_96RegClassID:
615 case AMDGPU::SGPR_128RegClassID:
616 case AMDGPU::SReg_128RegClassID:
617 case AMDGPU::VReg_128RegClassID:
619 case AMDGPU::SReg_256RegClassID:
620 case AMDGPU::VReg_256RegClassID:
622 case AMDGPU::SReg_512RegClassID:
623 case AMDGPU::VReg_512RegClassID:
626 llvm_unreachable("Unexpected register class");
630 unsigned getRegBitWidth(const MCRegisterClass &RC) {
631 return getRegBitWidth(RC.getID());
634 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
636 assert(OpNo < Desc.NumOperands);
637 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
638 return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
641 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
642 if (Literal >= -16 && Literal <= 64)
645 uint64_t Val = static_cast<uint64_t>(Literal);
646 return (Val == DoubleToBits(0.0)) ||
647 (Val == DoubleToBits(1.0)) ||
648 (Val == DoubleToBits(-1.0)) ||
649 (Val == DoubleToBits(0.5)) ||
650 (Val == DoubleToBits(-0.5)) ||
651 (Val == DoubleToBits(2.0)) ||
652 (Val == DoubleToBits(-2.0)) ||
653 (Val == DoubleToBits(4.0)) ||
654 (Val == DoubleToBits(-4.0)) ||
655 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
658 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
659 if (Literal >= -16 && Literal <= 64)
662 // The actual type of the operand does not seem to matter as long
663 // as the bits match one of the inline immediate values. For example:
665 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
666 // so it is a legal inline immediate.
668 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
669 // floating-point, so it is a legal inline immediate.
671 uint32_t Val = static_cast<uint32_t>(Literal);
672 return (Val == FloatToBits(0.0f)) ||
673 (Val == FloatToBits(1.0f)) ||
674 (Val == FloatToBits(-1.0f)) ||
675 (Val == FloatToBits(0.5f)) ||
676 (Val == FloatToBits(-0.5f)) ||
677 (Val == FloatToBits(2.0f)) ||
678 (Val == FloatToBits(-2.0f)) ||
679 (Val == FloatToBits(4.0f)) ||
680 (Val == FloatToBits(-4.0f)) ||
681 (Val == 0x3e22f983 && HasInv2Pi);
684 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
688 if (Literal >= -16 && Literal <= 64)
691 uint16_t Val = static_cast<uint16_t>(Literal);
692 return Val == 0x3C00 || // 1.0
693 Val == 0xBC00 || // -1.0
694 Val == 0x3800 || // 0.5
695 Val == 0xB800 || // -0.5
696 Val == 0x4000 || // 2.0
697 Val == 0xC000 || // -2.0
698 Val == 0x4400 || // 4.0
699 Val == 0xC400 || // -4.0
700 Val == 0x3118; // 1/2pi
703 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
706 int16_t Lo16 = static_cast<int16_t>(Literal);
707 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
708 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
711 bool isUniformMMO(const MachineMemOperand *MMO) {
712 const Value *Ptr = MMO->getValue();
713 // UndefValue means this is a load of a kernel input. These are uniform.
714 // Sometimes LDS instructions have constant pointers.
715 // If Ptr is null, then that means this mem operand contains a
716 // PseudoSourceValue like GOT.
717 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
718 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
721 const Instruction *I = dyn_cast<Instruction>(Ptr);
722 return I && I->getMetadata("amdgpu.uniform");
725 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
726 if (isSI(ST) || isCI(ST))
727 return ByteOffset >> 2;
732 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
733 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
734 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
735 isUInt<20>(EncodedOffset);
737 } // end namespace AMDGPU
739 } // end namespace llvm
741 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
742 const unsigned AMDGPUAS::GLOBAL_ADDRESS;
743 const unsigned AMDGPUAS::LOCAL_ADDRESS;
744 const unsigned AMDGPUAS::PARAM_D_ADDRESS;
745 const unsigned AMDGPUAS::PARAM_I_ADDRESS;
746 const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
747 const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
748 const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
749 const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
750 const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
751 const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
752 const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
753 const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
754 const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
755 const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
756 const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
757 const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
758 const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
759 const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
760 const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
761 const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
762 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
767 AMDGPUAS getAMDGPUAS(Triple T) {
768 auto Env = T.getEnvironmentName();
770 if (Env == "amdgiz" || Env == "amdgizcl") {
772 AS.PRIVATE_ADDRESS = 5;
773 AS.REGION_ADDRESS = 4;
777 AS.PRIVATE_ADDRESS = 0;
778 AS.REGION_ADDRESS = 5;
783 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
784 return getAMDGPUAS(M.getTargetTriple());
787 AMDGPUAS getAMDGPUAS(const Module &M) {
788 return getAMDGPUAS(Triple(M.getTargetTriple()));
790 } // namespace AMDGPU