contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

   1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "AMDGPUBaseInfo.h"
  11 #include "AMDGPU.h"
  12 #include "SIDefines.h"
  13 #include "llvm/ADT/StringRef.h"
  14 #include "llvm/ADT/Triple.h"
  15 #include "llvm/BinaryFormat/ELF.h"
  16 #include "llvm/CodeGen/MachineMemOperand.h"
  17 #include "llvm/IR/Attributes.h"
  18 #include "llvm/IR/Constants.h"
  19 #include "llvm/IR/Function.h"
  20 #include "llvm/IR/GlobalValue.h"
  21 #include "llvm/IR/Instruction.h"
  22 #include "llvm/IR/LLVMContext.h"
  23 #include "llvm/IR/Module.h"
  24 #include "llvm/MC/MCContext.h"
  25 #include "llvm/MC/MCInstrDesc.h"
  26 #include "llvm/MC/MCRegisterInfo.h"
  27 #include "llvm/MC/MCSectionELF.h"
  28 #include "llvm/MC/MCSubtargetInfo.h"
  29 #include "llvm/MC/SubtargetFeature.h"
  30 #include "llvm/Support/Casting.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/Support/MathExtras.h"
  33 #include <algorithm>
  34 #include <cassert>
  35 #include <cstdint>
  36 #include <cstring>
  37 #include <utility>
  38
  39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  40
  41 #define GET_INSTRINFO_NAMED_OPS
  42 #include "AMDGPUGenInstrInfo.inc"
  43 #undef GET_INSTRINFO_NAMED_OPS
  44
  45 namespace {
  46
  47 /// \returns Bit mask for given bit \p Shift and bit \p Width.
  48 unsigned getBitMask(unsigned Shift, unsigned Width) {
  49   return ((1 << Width) - 1) << Shift;
  50 }
  51
  52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
  53 ///
  54 /// \returns Packed \p Dst.
  55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
  56   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
  57   Dst |= (Src << Shift) & getBitMask(Shift, Width);
  58   return Dst;
  59 }
  60
  61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
  62 ///
  63 /// \returns Unpacked bits.
  64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
  65   return (Src & getBitMask(Shift, Width)) >> Shift;
  66 }
  67
  68 /// \returns Vmcnt bit shift (lower bits).
  69 unsigned getVmcntBitShiftLo() { return 0; }
  70
  71 /// \returns Vmcnt bit width (lower bits).
  72 unsigned getVmcntBitWidthLo() { return 4; }
  73
  74 /// \returns Expcnt bit shift.
  75 unsigned getExpcntBitShift() { return 4; }
  76
  77 /// \returns Expcnt bit width.
  78 unsigned getExpcntBitWidth() { return 3; }
  79
  80 /// \returns Lgkmcnt bit shift.
  81 unsigned getLgkmcntBitShift() { return 8; }
  82
  83 /// \returns Lgkmcnt bit width.
  84 unsigned getLgkmcntBitWidth() { return 4; }
  85
  86 /// \returns Vmcnt bit shift (higher bits).
  87 unsigned getVmcntBitShiftHi() { return 14; }
  88
  89 /// \returns Vmcnt bit width (higher bits).
  90 unsigned getVmcntBitWidthHi() { return 2; }
  91
  92 } // end namespace anonymous
  93
  94 namespace llvm {
  95
  96 static cl::opt<bool> EnablePackedInlinableLiterals(
  97     "enable-packed-inlinable-literals",
  98     cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
  99     cl::init(false));
 100
 101 namespace AMDGPU {
 102
 103 namespace IsaInfo {
 104
 105 IsaVersion getIsaVersion(const FeatureBitset &Features) {
 106   // SI.
 107   if (Features.test(FeatureISAVersion6_0_0))
 108     return {6, 0, 0};
 109   if (Features.test(FeatureISAVersion6_0_1))
 110     return {6, 0, 1};
 111   // CI.
 112   if (Features.test(FeatureISAVersion7_0_0))
 113     return {7, 0, 0};
 114   if (Features.test(FeatureISAVersion7_0_1))
 115     return {7, 0, 1};
 116   if (Features.test(FeatureISAVersion7_0_2))
 117     return {7, 0, 2};
 118   if (Features.test(FeatureISAVersion7_0_3))
 119     return {7, 0, 3};
 120
 121   // VI.
 122   if (Features.test(FeatureISAVersion8_0_0))
 123     return {8, 0, 0};
 124   if (Features.test(FeatureISAVersion8_0_1))
 125     return {8, 0, 1};
 126   if (Features.test(FeatureISAVersion8_0_2))
 127     return {8, 0, 2};
 128   if (Features.test(FeatureISAVersion8_0_3))
 129     return {8, 0, 3};
 130   if (Features.test(FeatureISAVersion8_0_4))
 131     return {8, 0, 4};
 132   if (Features.test(FeatureISAVersion8_1_0))
 133     return {8, 1, 0};
 134
 135   // GFX9.
 136   if (Features.test(FeatureISAVersion9_0_0))
 137     return {9, 0, 0};
 138   if (Features.test(FeatureISAVersion9_0_1))
 139     return {9, 0, 1};
 140   if (Features.test(FeatureISAVersion9_0_2))
 141     return {9, 0, 2};
 142   if (Features.test(FeatureISAVersion9_0_3))
 143     return {9, 0, 3};
 144
 145   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
 146     return {0, 0, 0};
 147   return {7, 0, 0};
 148 }
 149
 150 unsigned getWavefrontSize(const FeatureBitset &Features) {
 151   if (Features.test(FeatureWavefrontSize16))
 152     return 16;
 153   if (Features.test(FeatureWavefrontSize32))
 154     return 32;
 155
 156   return 64;
 157 }
 158
 159 unsigned getLocalMemorySize(const FeatureBitset &Features) {
 160   if (Features.test(FeatureLocalMemorySize32768))
 161     return 32768;
 162   if (Features.test(FeatureLocalMemorySize65536))
 163     return 65536;
 164
 165   return 0;
 166 }
 167
 168 unsigned getEUsPerCU(const FeatureBitset &Features) {
 169   return 4;
 170 }
 171
 172 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
 173                                unsigned FlatWorkGroupSize) {
 174   if (!Features.test(FeatureGCN))
 175     return 8;
 176   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 177   if (N == 1)
 178     return 40;
 179   N = 40 / N;
 180   return std::min(N, 16u);
 181 }
 182
 183 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
 184   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
 185 }
 186
 187 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
 188                           unsigned FlatWorkGroupSize) {
 189   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 190 }
 191
 192 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
 193   return 1;
 194 }
 195
 196 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
 197   if (!Features.test(FeatureGCN))
 198     return 8;
 199   // FIXME: Need to take scratch memory into account.
 200   return 10;
 201 }
 202
 203 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 204                           unsigned FlatWorkGroupSize) {
 205   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
 206                  getEUsPerCU(Features)) / getEUsPerCU(Features);
 207 }
 208
 209 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
 210   return 1;
 211 }
 212
 213 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
 214   return 2048;
 215 }
 216
 217 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 218                               unsigned FlatWorkGroupSize) {
 219   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
 220                  getWavefrontSize(Features);
 221 }
 222
 223 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
 224   IsaVersion Version = getIsaVersion(Features);
 225   if (Version.Major >= 8)
 226     return 16;
 227   return 8;
 228 }
 229
 230 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
 231   return 8;
 232 }
 233
 234 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
 235   IsaVersion Version = getIsaVersion(Features);
 236   if (Version.Major >= 8)
 237     return 800;
 238   return 512;
 239 }
 240
 241 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
 242   if (Features.test(FeatureSGPRInitBug))
 243     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 244
 245   IsaVersion Version = getIsaVersion(Features);
 246   if (Version.Major >= 8)
 247     return 102;
 248   return 104;
 249 }
 250
 251 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 252   assert(WavesPerEU != 0);
 253
 254   if (WavesPerEU >= getMaxWavesPerEU(Features))
 255     return 0;
 256   unsigned MinNumSGPRs =
 257       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
 258                 getSGPRAllocGranule(Features)) + 1;
 259   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
 260 }
 261
 262 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 263                         bool Addressable) {
 264   assert(WavesPerEU != 0);
 265
 266   IsaVersion Version = getIsaVersion(Features);
 267   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
 268                                    getSGPRAllocGranule(Features));
 269   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
 270   if (Version.Major >= 8 && !Addressable)
 271     AddressableNumSGPRs = 112;
 272   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 273 }
 274
 275 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
 276   return 4;
 277 }
 278
 279 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
 280   return getVGPRAllocGranule(Features);
 281 }
 282
 283 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
 284   return 256;
 285 }
 286
 287 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
 288   return getTotalNumVGPRs(Features);
 289 }
 290
 291 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 292   assert(WavesPerEU != 0);
 293
 294   if (WavesPerEU >= getMaxWavesPerEU(Features))
 295     return 0;
 296   unsigned MinNumVGPRs =
 297       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
 298                 getVGPRAllocGranule(Features)) + 1;
 299   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
 300 }
 301
 302 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 303   assert(WavesPerEU != 0);
 304
 305   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
 306                                    getVGPRAllocGranule(Features));
 307   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
 308   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 309 }
 310
 311 } // end namespace IsaInfo
 312
 313 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 314                                const FeatureBitset &Features) {
 315   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
 316
 317   memset(&Header, 0, sizeof(Header));
 318
 319   Header.amd_kernel_code_version_major = 1;
 320   Header.amd_kernel_code_version_minor = 1;
 321   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
 322   Header.amd_machine_version_major = ISA.Major;
 323   Header.amd_machine_version_minor = ISA.Minor;
 324   Header.amd_machine_version_stepping = ISA.Stepping;
 325   Header.kernel_code_entry_byte_offset = sizeof(Header);
 326   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
 327   Header.wavefront_size = 6;
 328
 329   // If the code object does not support indirect functions, then the value must
 330   // be 0xffffffff.
 331   Header.call_convention = -1;
 332
 333   // These alignment values are specified in powers of two, so alignment =
 334   // 2^n.  The minimum alignment is 2^4 = 16.
 335   Header.kernarg_segment_alignment = 4;
 336   Header.group_segment_alignment = 4;
 337   Header.private_segment_alignment = 4;
 338 }
 339
 340 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
 341   return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
 342 }
 343
 344 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
 345   return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
 346 }
 347
 348 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
 349   return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
 350 }
 351
 352 bool shouldEmitConstantsToTextSection(const Triple &TT) {
 353   return TT.getOS() != Triple::AMDHSA;
 354 }
 355
 356 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
 357   Attribute A = F.getFnAttribute(Name);
 358   int Result = Default;
 359
 360   if (A.isStringAttribute()) {
 361     StringRef Str = A.getValueAsString();
 362     if (Str.getAsInteger(0, Result)) {
 363       LLVMContext &Ctx = F.getContext();
 364       Ctx.emitError("can't parse integer attribute " + Name);
 365     }
 366   }
 367
 368   return Result;
 369 }
 370
 371 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 372                                             StringRef Name,
 373                                             std::pair<int, int> Default,
 374                                             bool OnlyFirstRequired) {
 375   Attribute A = F.getFnAttribute(Name);
 376   if (!A.isStringAttribute())
 377     return Default;
 378
 379   LLVMContext &Ctx = F.getContext();
 380   std::pair<int, int> Ints = Default;
 381   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
 382   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
 383     Ctx.emitError("can't parse first integer attribute " + Name);
 384     return Default;
 385   }
 386   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
 387     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
 388       Ctx.emitError("can't parse second integer attribute " + Name);
 389       return Default;
 390     }
 391   }
 392
 393   return Ints;
 394 }
 395
 396 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
 397   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
 398   if (Version.Major < 9)
 399     return VmcntLo;
 400
 401   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
 402   return VmcntLo | VmcntHi;
 403 }
 404
 405 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
 406   return (1 << getExpcntBitWidth()) - 1;
 407 }
 408
 409 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
 410   return (1 << getLgkmcntBitWidth()) - 1;
 411 }
 412
 413 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
 414   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
 415   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
 416   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
 417   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
 418   if (Version.Major < 9)
 419     return Waitcnt;
 420
 421   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
 422   return Waitcnt | VmcntHi;
 423 }
 424
 425 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 426   unsigned VmcntLo =
 427       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 428   if (Version.Major < 9)
 429     return VmcntLo;
 430
 431   unsigned VmcntHi =
 432       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 433   VmcntHi <<= getVmcntBitWidthLo();
 434   return VmcntLo | VmcntHi;
 435 }
 436
 437 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 438   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 439 }
 440
 441 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 442   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 443 }
 444
 445 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 446                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
 447   Vmcnt = decodeVmcnt(Version, Waitcnt);
 448   Expcnt = decodeExpcnt(Version, Waitcnt);
 449   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 450 }
 451
 452 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 453                      unsigned Vmcnt) {
 454   Waitcnt =
 455       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 456   if (Version.Major < 9)
 457     return Waitcnt;
 458
 459   Vmcnt >>= getVmcntBitWidthLo();
 460   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 461 }
 462
 463 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 464                       unsigned Expcnt) {
 465   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 466 }
 467
 468 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 469                        unsigned Lgkmcnt) {
 470   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 471 }
 472
 473 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 474                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
 475   unsigned Waitcnt = getWaitcntBitMask(Version);
 476   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
 477   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
 478   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
 479   return Waitcnt;
 480 }
 481
 482 unsigned getInitialPSInputAddr(const Function &F) {
 483   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 484 }
 485
 486 bool isShader(CallingConv::ID cc) {
 487   switch(cc) {
 488     case CallingConv::AMDGPU_VS:
 489     case CallingConv::AMDGPU_HS:
 490     case CallingConv::AMDGPU_GS:
 491     case CallingConv::AMDGPU_PS:
 492     case CallingConv::AMDGPU_CS:
 493       return true;
 494     default:
 495       return false;
 496   }
 497 }
 498
 499 bool isCompute(CallingConv::ID cc) {
 500   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
 501 }
 502
 503 bool isEntryFunctionCC(CallingConv::ID CC) {
 504   switch (CC) {
 505   case CallingConv::AMDGPU_KERNEL:
 506   case CallingConv::SPIR_KERNEL:
 507   case CallingConv::AMDGPU_VS:
 508   case CallingConv::AMDGPU_GS:
 509   case CallingConv::AMDGPU_PS:
 510   case CallingConv::AMDGPU_CS:
 511   case CallingConv::AMDGPU_HS:
 512     return true;
 513   default:
 514     return false;
 515   }
 516 }
 517
 518 bool isSI(const MCSubtargetInfo &STI) {
 519   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 520 }
 521
 522 bool isCI(const MCSubtargetInfo &STI) {
 523   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
 524 }
 525
 526 bool isVI(const MCSubtargetInfo &STI) {
 527   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 528 }
 529
 530 bool isGFX9(const MCSubtargetInfo &STI) {
 531   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
 532 }
 533
 534 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
 535   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
 536   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
 537   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
 538     Reg == AMDGPU::SCC;
 539 }
 540
 541 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
 542   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
 543     if (*R == Reg1) return true;
 544   }
 545   return false;
 546 }
 547
 548 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 549
 550   switch(Reg) {
 551   default: break;
 552   case AMDGPU::FLAT_SCR:
 553     assert(!isSI(STI));
 554     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
 555
 556   case AMDGPU::FLAT_SCR_LO:
 557     assert(!isSI(STI));
 558     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
 559
 560   case AMDGPU::FLAT_SCR_HI:
 561     assert(!isSI(STI));
 562     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
 563   }
 564   return Reg;
 565 }
 566
 567 unsigned mc2PseudoReg(unsigned Reg) {
 568   switch (Reg) {
 569   case AMDGPU::FLAT_SCR_ci:
 570   case AMDGPU::FLAT_SCR_vi:
 571     return FLAT_SCR;
 572
 573   case AMDGPU::FLAT_SCR_LO_ci:
 574   case AMDGPU::FLAT_SCR_LO_vi:
 575     return AMDGPU::FLAT_SCR_LO;
 576
 577   case AMDGPU::FLAT_SCR_HI_ci:
 578   case AMDGPU::FLAT_SCR_HI_vi:
 579     return AMDGPU::FLAT_SCR_HI;
 580
 581   default:
 582     return Reg;
 583   }
 584 }
 585
 586 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 587   assert(OpNo < Desc.NumOperands);
 588   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 589   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
 590          OpType <= AMDGPU::OPERAND_SRC_LAST;
 591 }
 592
 593 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 594   assert(OpNo < Desc.NumOperands);
 595   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 596   switch (OpType) {
 597   case AMDGPU::OPERAND_REG_IMM_FP32:
 598   case AMDGPU::OPERAND_REG_IMM_FP64:
 599   case AMDGPU::OPERAND_REG_IMM_FP16:
 600   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 601   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 602   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 603   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 604     return true;
 605   default:
 606     return false;
 607   }
 608 }
 609
 610 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 611   assert(OpNo < Desc.NumOperands);
 612   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 613   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
 614          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
 615 }
 616
 617 // Avoid using MCRegisterClass::getSize, since that function will go away
 618 // (move from MC* level to Target* level). Return size in bits.
 619 unsigned getRegBitWidth(unsigned RCID) {
 620   switch (RCID) {
 621   case AMDGPU::SGPR_32RegClassID:
 622   case AMDGPU::VGPR_32RegClassID:
 623   case AMDGPU::VS_32RegClassID:
 624   case AMDGPU::SReg_32RegClassID:
 625   case AMDGPU::SReg_32_XM0RegClassID:
 626     return 32;
 627   case AMDGPU::SGPR_64RegClassID:
 628   case AMDGPU::VS_64RegClassID:
 629   case AMDGPU::SReg_64RegClassID:
 630   case AMDGPU::VReg_64RegClassID:
 631     return 64;
 632   case AMDGPU::VReg_96RegClassID:
 633     return 96;
 634   case AMDGPU::SGPR_128RegClassID:
 635   case AMDGPU::SReg_128RegClassID:
 636   case AMDGPU::VReg_128RegClassID:
 637     return 128;
 638   case AMDGPU::SReg_256RegClassID:
 639   case AMDGPU::VReg_256RegClassID:
 640     return 256;
 641   case AMDGPU::SReg_512RegClassID:
 642   case AMDGPU::VReg_512RegClassID:
 643     return 512;
 644   default:
 645     llvm_unreachable("Unexpected register class");
 646   }
 647 }
 648
 649 unsigned getRegBitWidth(const MCRegisterClass &RC) {
 650   return getRegBitWidth(RC.getID());
 651 }
 652
 653 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 654                            unsigned OpNo) {
 655   assert(OpNo < Desc.NumOperands);
 656   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
 657   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 658 }
 659
 660 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
 661   if (Literal >= -16 && Literal <= 64)
 662     return true;
 663
 664   uint64_t Val = static_cast<uint64_t>(Literal);
 665   return (Val == DoubleToBits(0.0)) ||
 666          (Val == DoubleToBits(1.0)) ||
 667          (Val == DoubleToBits(-1.0)) ||
 668          (Val == DoubleToBits(0.5)) ||
 669          (Val == DoubleToBits(-0.5)) ||
 670          (Val == DoubleToBits(2.0)) ||
 671          (Val == DoubleToBits(-2.0)) ||
 672          (Val == DoubleToBits(4.0)) ||
 673          (Val == DoubleToBits(-4.0)) ||
 674          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
 675 }
 676
 677 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
 678   if (Literal >= -16 && Literal <= 64)
 679     return true;
 680
 681   // The actual type of the operand does not seem to matter as long
 682   // as the bits match one of the inline immediate values.  For example:
 683   //
 684   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
 685   // so it is a legal inline immediate.
 686   //
 687   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
 688   // floating-point, so it is a legal inline immediate.
 689
 690   uint32_t Val = static_cast<uint32_t>(Literal);
 691   return (Val == FloatToBits(0.0f)) ||
 692          (Val == FloatToBits(1.0f)) ||
 693          (Val == FloatToBits(-1.0f)) ||
 694          (Val == FloatToBits(0.5f)) ||
 695          (Val == FloatToBits(-0.5f)) ||
 696          (Val == FloatToBits(2.0f)) ||
 697          (Val == FloatToBits(-2.0f)) ||
 698          (Val == FloatToBits(4.0f)) ||
 699          (Val == FloatToBits(-4.0f)) ||
 700          (Val == 0x3e22f983 && HasInv2Pi);
 701 }
 702
 703 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 704   if (!HasInv2Pi)
 705     return false;
 706
 707   if (Literal >= -16 && Literal <= 64)
 708     return true;
 709
 710   uint16_t Val = static_cast<uint16_t>(Literal);
 711   return Val == 0x3C00 || // 1.0
 712          Val == 0xBC00 || // -1.0
 713          Val == 0x3800 || // 0.5
 714          Val == 0xB800 || // -0.5
 715          Val == 0x4000 || // 2.0
 716          Val == 0xC000 || // -2.0
 717          Val == 0x4400 || // 4.0
 718          Val == 0xC400 || // -4.0
 719          Val == 0x3118;   // 1/2pi
 720 }
 721
 722 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
 723   assert(HasInv2Pi);
 724
 725   if (!EnablePackedInlinableLiterals)
 726     return false;
 727
 728   int16_t Lo16 = static_cast<int16_t>(Literal);
 729   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
 730   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
 731 }
 732
 733 bool isUniformMMO(const MachineMemOperand *MMO) {
 734   const Value *Ptr = MMO->getValue();
 735   // UndefValue means this is a load of a kernel input.  These are uniform.
 736   // Sometimes LDS instructions have constant pointers.
 737   // If Ptr is null, then that means this mem operand contains a
 738   // PseudoSourceValue like GOT.
 739   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
 740       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
 741     return true;
 742
 743   const Instruction *I = dyn_cast<Instruction>(Ptr);
 744   return I && I->getMetadata("amdgpu.uniform");
 745 }
 746
 747 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 748   if (isSI(ST) || isCI(ST))
 749     return ByteOffset >> 2;
 750
 751   return ByteOffset;
 752 }
 753
 754 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 755   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
 756   return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
 757                                 isUInt<20>(EncodedOffset);
 758 }
 759 } // end namespace AMDGPU
 760
 761 } // end namespace llvm
 762
 763 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
 764 const unsigned AMDGPUAS::GLOBAL_ADDRESS;
 765 const unsigned AMDGPUAS::LOCAL_ADDRESS;
 766 const unsigned AMDGPUAS::PARAM_D_ADDRESS;
 767 const unsigned AMDGPUAS::PARAM_I_ADDRESS;
 768 const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
 769 const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
 770 const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
 771 const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
 772 const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
 773 const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
 774 const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
 775 const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
 776 const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
 777 const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
 778 const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
 779 const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
 780 const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
 781 const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
 782 const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
 783 const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
 784 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
 785
 786 namespace llvm {
 787 namespace AMDGPU {
 788
 789 AMDGPUAS getAMDGPUAS(Triple T) {
 790   auto Env = T.getEnvironmentName();
 791   AMDGPUAS AS;
 792   if (Env == "amdgiz" || Env == "amdgizcl") {
 793     AS.FLAT_ADDRESS     = 0;
 794     AS.PRIVATE_ADDRESS  = 5;
 795     AS.REGION_ADDRESS   = 4;
 796   }
 797   else {
 798     AS.FLAT_ADDRESS     = 4;
 799     AS.PRIVATE_ADDRESS  = 0;
 800     AS.REGION_ADDRESS   = 5;
 801    }
 802   return AS;
 803 }
 804
 805 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
 806   return getAMDGPUAS(M.getTargetTriple());
 807 }
 808
 809 AMDGPUAS getAMDGPUAS(const Module &M) {
 810   return getAMDGPUAS(Triple(M.getTargetTriple()));
 811 }
 812 } // namespace AMDGPU
 813 } // namespace llvm