lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

   1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 #include "AMDGPUBaseInfo.h"
  10 #include "AMDGPU.h"
  11 #include "SIDefines.h"
  12 #include "llvm/IR/LLVMContext.h"
  13 #include "llvm/IR/Function.h"
  14 #include "llvm/IR/GlobalValue.h"
  15 #include "llvm/MC/MCContext.h"
  16 #include "llvm/MC/MCInstrInfo.h"
  17 #include "llvm/MC/MCRegisterInfo.h"
  18 #include "llvm/MC/MCSectionELF.h"
  19 #include "llvm/MC/MCSubtargetInfo.h"
  20 #include "llvm/MC/SubtargetFeature.h"
  21
  22 #define GET_SUBTARGETINFO_ENUM
  23 #include "AMDGPUGenSubtargetInfo.inc"
  24 #undef GET_SUBTARGETINFO_ENUM
  25
  26 #define GET_REGINFO_ENUM
  27 #include "AMDGPUGenRegisterInfo.inc"
  28 #undef GET_REGINFO_ENUM
  29
  30 #define GET_INSTRINFO_NAMED_OPS
  31 #define GET_INSTRINFO_ENUM
  32 #include "AMDGPUGenInstrInfo.inc"
  33 #undef GET_INSTRINFO_NAMED_OPS
  34 #undef GET_INSTRINFO_ENUM
  35
  36 namespace {
  37
  38 /// \returns Bit mask for given bit \p Shift and bit \p Width.
  39 unsigned getBitMask(unsigned Shift, unsigned Width) {
  40   return ((1 << Width) - 1) << Shift;
  41 }
  42
  43 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
  44 ///
  45 /// \returns Packed \p Dst.
  46 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
  47   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
  48   Dst |= (Src << Shift) & getBitMask(Shift, Width);
  49   return Dst;
  50 }
  51
  52 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
  53 ///
  54 /// \returns Unpacked bits.
  55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
  56   return (Src & getBitMask(Shift, Width)) >> Shift;
  57 }
  58
  59 /// \returns Vmcnt bit shift.
  60 unsigned getVmcntBitShift() { return 0; }
  61
  62 /// \returns Vmcnt bit width.
  63 unsigned getVmcntBitWidth() { return 4; }
  64
  65 /// \returns Expcnt bit shift.
  66 unsigned getExpcntBitShift() { return 4; }
  67
  68 /// \returns Expcnt bit width.
  69 unsigned getExpcntBitWidth() { return 3; }
  70
  71 /// \returns Lgkmcnt bit shift.
  72 unsigned getLgkmcntBitShift() { return 8; }
  73
  74 /// \returns Lgkmcnt bit width.
  75 unsigned getLgkmcntBitWidth() { return 4; }
  76
  77 } // anonymous namespace
  78
  79 namespace llvm {
  80 namespace AMDGPU {
  81
  82 IsaVersion getIsaVersion(const FeatureBitset &Features) {
  83
  84   if (Features.test(FeatureISAVersion7_0_0))
  85     return {7, 0, 0};
  86
  87   if (Features.test(FeatureISAVersion7_0_1))
  88     return {7, 0, 1};
  89
  90   if (Features.test(FeatureISAVersion7_0_2))
  91     return {7, 0, 2};
  92
  93   if (Features.test(FeatureISAVersion8_0_0))
  94     return {8, 0, 0};
  95
  96   if (Features.test(FeatureISAVersion8_0_1))
  97     return {8, 0, 1};
  98
  99   if (Features.test(FeatureISAVersion8_0_2))
 100     return {8, 0, 2};
 101
 102   if (Features.test(FeatureISAVersion8_0_3))
 103     return {8, 0, 3};
 104
 105   if (Features.test(FeatureISAVersion8_0_4))
 106     return {8, 0, 4};
 107
 108   if (Features.test(FeatureISAVersion8_1_0))
 109     return {8, 1, 0};
 110
 111   return {0, 0, 0};
 112 }
 113
 114 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 115                                const FeatureBitset &Features) {
 116
 117   IsaVersion ISA = getIsaVersion(Features);
 118
 119   memset(&Header, 0, sizeof(Header));
 120
 121   Header.amd_kernel_code_version_major = 1;
 122   Header.amd_kernel_code_version_minor = 0;
 123   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
 124   Header.amd_machine_version_major = ISA.Major;
 125   Header.amd_machine_version_minor = ISA.Minor;
 126   Header.amd_machine_version_stepping = ISA.Stepping;
 127   Header.kernel_code_entry_byte_offset = sizeof(Header);
 128   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
 129   Header.wavefront_size = 6;
 130   // These alignment values are specified in powers of two, so alignment =
 131   // 2^n.  The minimum alignment is 2^4 = 16.
 132   Header.kernarg_segment_alignment = 4;
 133   Header.group_segment_alignment = 4;
 134   Header.private_segment_alignment = 4;
 135 }
 136
 137 MCSection *getHSATextSection(MCContext &Ctx) {
 138   return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
 139                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
 140                            ELF::SHF_EXECINSTR |
 141                            ELF::SHF_AMDGPU_HSA_AGENT |
 142                            ELF::SHF_AMDGPU_HSA_CODE);
 143 }
 144
 145 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
 146   return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
 147                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
 148                            ELF::SHF_AMDGPU_HSA_GLOBAL |
 149                            ELF::SHF_AMDGPU_HSA_AGENT);
 150 }
 151
 152 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
 153   return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
 154                             ELF::SHF_ALLOC | ELF::SHF_WRITE |
 155                             ELF::SHF_AMDGPU_HSA_GLOBAL);
 156 }
 157
 158 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
 159   return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
 160                            ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
 161                            ELF::SHF_AMDGPU_HSA_AGENT);
 162 }
 163
 164 bool isGroupSegment(const GlobalValue *GV) {
 165   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 166 }
 167
 168 bool isGlobalSegment(const GlobalValue *GV) {
 169   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 170 }
 171
 172 bool isReadOnlySegment(const GlobalValue *GV) {
 173   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
 174 }
 175
 176 bool shouldEmitConstantsToTextSection(const Triple &TT) {
 177   return TT.getOS() != Triple::AMDHSA;
 178 }
 179
 180 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
 181   Attribute A = F.getFnAttribute(Name);
 182   int Result = Default;
 183
 184   if (A.isStringAttribute()) {
 185     StringRef Str = A.getValueAsString();
 186     if (Str.getAsInteger(0, Result)) {
 187       LLVMContext &Ctx = F.getContext();
 188       Ctx.emitError("can't parse integer attribute " + Name);
 189     }
 190   }
 191
 192   return Result;
 193 }
 194
 195 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 196                                             StringRef Name,
 197                                             std::pair<int, int> Default,
 198                                             bool OnlyFirstRequired) {
 199   Attribute A = F.getFnAttribute(Name);
 200   if (!A.isStringAttribute())
 201     return Default;
 202
 203   LLVMContext &Ctx = F.getContext();
 204   std::pair<int, int> Ints = Default;
 205   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
 206   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
 207     Ctx.emitError("can't parse first integer attribute " + Name);
 208     return Default;
 209   }
 210   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
 211     if (!OnlyFirstRequired || Strs.second.trim().size()) {
 212       Ctx.emitError("can't parse second integer attribute " + Name);
 213       return Default;
 214     }
 215   }
 216
 217   return Ints;
 218 }
 219
 220 unsigned getWaitcntBitMask(IsaVersion Version) {
 221   unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
 222   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
 223   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
 224   return Vmcnt | Expcnt | Lgkmcnt;
 225 }
 226
 227 unsigned getVmcntBitMask(IsaVersion Version) {
 228   return (1 << getVmcntBitWidth()) - 1;
 229 }
 230
 231 unsigned getExpcntBitMask(IsaVersion Version) {
 232   return (1 << getExpcntBitWidth()) - 1;
 233 }
 234
 235 unsigned getLgkmcntBitMask(IsaVersion Version) {
 236   return (1 << getLgkmcntBitWidth()) - 1;
 237 }
 238
 239 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
 240   return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
 241 }
 242
 243 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
 244   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 245 }
 246
 247 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
 248   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 249 }
 250
 251 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
 252                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
 253   Vmcnt = decodeVmcnt(Version, Waitcnt);
 254   Expcnt = decodeExpcnt(Version, Waitcnt);
 255   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 256 }
 257
 258 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
 259   return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
 260 }
 261
 262 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
 263   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 264 }
 265
 266 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
 267   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 268 }
 269
 270 unsigned encodeWaitcnt(IsaVersion Version,
 271                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
 272   unsigned Waitcnt = getWaitcntBitMask(Version);
 273   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
 274   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
 275   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
 276   return Waitcnt;
 277 }
 278
 279 unsigned getInitialPSInputAddr(const Function &F) {
 280   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 281 }
 282
 283 bool isShader(CallingConv::ID cc) {
 284   switch(cc) {
 285     case CallingConv::AMDGPU_VS:
 286     case CallingConv::AMDGPU_GS:
 287     case CallingConv::AMDGPU_PS:
 288     case CallingConv::AMDGPU_CS:
 289       return true;
 290     default:
 291       return false;
 292   }
 293 }
 294
 295 bool isCompute(CallingConv::ID cc) {
 296   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
 297 }
 298
 299 bool isSI(const MCSubtargetInfo &STI) {
 300   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 301 }
 302
 303 bool isCI(const MCSubtargetInfo &STI) {
 304   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
 305 }
 306
 307 bool isVI(const MCSubtargetInfo &STI) {
 308   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 309 }
 310
 311 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 312
 313   switch(Reg) {
 314   default: break;
 315   case AMDGPU::FLAT_SCR:
 316     assert(!isSI(STI));
 317     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
 318
 319   case AMDGPU::FLAT_SCR_LO:
 320     assert(!isSI(STI));
 321     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
 322
 323   case AMDGPU::FLAT_SCR_HI:
 324     assert(!isSI(STI));
 325     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
 326   }
 327   return Reg;
 328 }
 329
 330 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 331   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 332   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
 333          OpType <= AMDGPU::OPERAND_SRC_LAST;
 334 }
 335
 336 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 337   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 338   switch (OpType) {
 339   case AMDGPU::OPERAND_REG_IMM_FP32:
 340   case AMDGPU::OPERAND_REG_IMM_FP64:
 341   case AMDGPU::OPERAND_REG_IMM_FP16:
 342   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 343   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 344   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 345     return true;
 346   default:
 347     return false;
 348   }
 349 }
 350
 351 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 352   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 353   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
 354          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
 355 }
 356
 357 // Avoid using MCRegisterClass::getSize, since that function will go away
 358 // (move from MC* level to Target* level). Return size in bits.
 359 unsigned getRegBitWidth(unsigned RCID) {
 360   switch (RCID) {
 361   case AMDGPU::SGPR_32RegClassID:
 362   case AMDGPU::VGPR_32RegClassID:
 363   case AMDGPU::VS_32RegClassID:
 364   case AMDGPU::SReg_32RegClassID:
 365   case AMDGPU::SReg_32_XM0RegClassID:
 366     return 32;
 367   case AMDGPU::SGPR_64RegClassID:
 368   case AMDGPU::VS_64RegClassID:
 369   case AMDGPU::SReg_64RegClassID:
 370   case AMDGPU::VReg_64RegClassID:
 371     return 64;
 372   case AMDGPU::VReg_96RegClassID:
 373     return 96;
 374   case AMDGPU::SGPR_128RegClassID:
 375   case AMDGPU::SReg_128RegClassID:
 376   case AMDGPU::VReg_128RegClassID:
 377     return 128;
 378   case AMDGPU::SReg_256RegClassID:
 379   case AMDGPU::VReg_256RegClassID:
 380     return 256;
 381   case AMDGPU::SReg_512RegClassID:
 382   case AMDGPU::VReg_512RegClassID:
 383     return 512;
 384   default:
 385     llvm_unreachable("Unexpected register class");
 386   }
 387 }
 388
 389 unsigned getRegBitWidth(const MCRegisterClass &RC) {
 390   return getRegBitWidth(RC.getID());
 391 }
 392
 393 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 394                            unsigned OpNo) {
 395   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
 396   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 397 }
 398
 399 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
 400   if (Literal >= -16 && Literal <= 64)
 401     return true;
 402
 403   uint64_t Val = static_cast<uint64_t>(Literal);
 404   return (Val == DoubleToBits(0.0)) ||
 405          (Val == DoubleToBits(1.0)) ||
 406          (Val == DoubleToBits(-1.0)) ||
 407          (Val == DoubleToBits(0.5)) ||
 408          (Val == DoubleToBits(-0.5)) ||
 409          (Val == DoubleToBits(2.0)) ||
 410          (Val == DoubleToBits(-2.0)) ||
 411          (Val == DoubleToBits(4.0)) ||
 412          (Val == DoubleToBits(-4.0)) ||
 413          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
 414 }
 415
 416 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
 417   if (Literal >= -16 && Literal <= 64)
 418     return true;
 419
 420   // The actual type of the operand does not seem to matter as long
 421   // as the bits match one of the inline immediate values.  For example:
 422   //
 423   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
 424   // so it is a legal inline immediate.
 425   //
 426   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
 427   // floating-point, so it is a legal inline immediate.
 428
 429   uint32_t Val = static_cast<uint32_t>(Literal);
 430   return (Val == FloatToBits(0.0f)) ||
 431          (Val == FloatToBits(1.0f)) ||
 432          (Val == FloatToBits(-1.0f)) ||
 433          (Val == FloatToBits(0.5f)) ||
 434          (Val == FloatToBits(-0.5f)) ||
 435          (Val == FloatToBits(2.0f)) ||
 436          (Val == FloatToBits(-2.0f)) ||
 437          (Val == FloatToBits(4.0f)) ||
 438          (Val == FloatToBits(-4.0f)) ||
 439          (Val == 0x3e22f983 && HasInv2Pi);
 440 }
 441
 442 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 443   assert(HasInv2Pi);
 444
 445   if (Literal >= -16 && Literal <= 64)
 446     return true;
 447
 448   uint16_t Val = static_cast<uint16_t>(Literal);
 449   return Val == 0x3C00 || // 1.0
 450          Val == 0xBC00 || // -1.0
 451          Val == 0x3800 || // 0.5
 452          Val == 0xB800 || // -0.5
 453          Val == 0x4000 || // 2.0
 454          Val == 0xC000 || // -2.0
 455          Val == 0x4400 || // 4.0
 456          Val == 0xC400 || // -4.0
 457          Val == 0x3118;   // 1/2pi
 458 }
 459
 460 } // End namespace AMDGPU
 461 } // End namespace llvm