lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

   1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "AMDGPU.h"
  11 #include "AMDGPUBaseInfo.h"
  12 #include "SIDefines.h"
  13 #include "llvm/ADT/StringRef.h"
  14 #include "llvm/ADT/Triple.h"
  15 #include "llvm/CodeGen/MachineMemOperand.h"
  16 #include "llvm/IR/Attributes.h"
  17 #include "llvm/IR/Constants.h"
  18 #include "llvm/IR/Function.h"
  19 #include "llvm/IR/GlobalValue.h"
  20 #include "llvm/IR/Instruction.h"
  21 #include "llvm/IR/LLVMContext.h"
  22 #include "llvm/IR/Module.h"
  23 #include "llvm/MC/MCContext.h"
  24 #include "llvm/MC/MCInstrDesc.h"
  25 #include "llvm/MC/MCRegisterInfo.h"
  26 #include "llvm/MC/MCSectionELF.h"
  27 #include "llvm/MC/MCSubtargetInfo.h"
  28 #include "llvm/MC/SubtargetFeature.h"
  29 #include "llvm/Support/Casting.h"
  30 #include "llvm/Support/ELF.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/Support/MathExtras.h"
  33 #include <algorithm>
  34 #include <cassert>
  35 #include <cstdint>
  36 #include <cstring>
  37 #include <utility>
  38
  39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  40
  41
  42 #define GET_INSTRINFO_NAMED_OPS
  43 #include "AMDGPUGenInstrInfo.inc"
  44 #undef GET_INSTRINFO_NAMED_OPS
  45
  46 namespace {
  47
  48 /// \returns Bit mask for given bit \p Shift and bit \p Width.
  49 unsigned getBitMask(unsigned Shift, unsigned Width) {
  50   return ((1 << Width) - 1) << Shift;
  51 }
  52
  53 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
  54 ///
  55 /// \returns Packed \p Dst.
  56 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
  57   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
  58   Dst |= (Src << Shift) & getBitMask(Shift, Width);
  59   return Dst;
  60 }
  61
  62 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
  63 ///
  64 /// \returns Unpacked bits.
  65 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
  66   return (Src & getBitMask(Shift, Width)) >> Shift;
  67 }
  68
  69 /// \returns Vmcnt bit shift (lower bits).
  70 unsigned getVmcntBitShiftLo() { return 0; }
  71
  72 /// \returns Vmcnt bit width (lower bits).
  73 unsigned getVmcntBitWidthLo() { return 4; }
  74
  75 /// \returns Expcnt bit shift.
  76 unsigned getExpcntBitShift() { return 4; }
  77
  78 /// \returns Expcnt bit width.
  79 unsigned getExpcntBitWidth() { return 3; }
  80
  81 /// \returns Lgkmcnt bit shift.
  82 unsigned getLgkmcntBitShift() { return 8; }
  83
  84 /// \returns Lgkmcnt bit width.
  85 unsigned getLgkmcntBitWidth() { return 4; }
  86
  87 /// \returns Vmcnt bit shift (higher bits).
  88 unsigned getVmcntBitShiftHi() { return 14; }
  89
  90 /// \returns Vmcnt bit width (higher bits).
  91 unsigned getVmcntBitWidthHi() { return 2; }
  92
  93 } // end namespace anonymous
  94
  95 namespace llvm {
  96 namespace AMDGPU {
  97
  98 namespace IsaInfo {
  99
 100 IsaVersion getIsaVersion(const FeatureBitset &Features) {
 101   // CI.
 102   if (Features.test(FeatureISAVersion7_0_0))
 103     return {7, 0, 0};
 104   if (Features.test(FeatureISAVersion7_0_1))
 105     return {7, 0, 1};
 106   if (Features.test(FeatureISAVersion7_0_2))
 107     return {7, 0, 2};
 108
 109   // VI.
 110   if (Features.test(FeatureISAVersion8_0_0))
 111     return {8, 0, 0};
 112   if (Features.test(FeatureISAVersion8_0_1))
 113     return {8, 0, 1};
 114   if (Features.test(FeatureISAVersion8_0_2))
 115     return {8, 0, 2};
 116   if (Features.test(FeatureISAVersion8_0_3))
 117     return {8, 0, 3};
 118   if (Features.test(FeatureISAVersion8_0_4))
 119     return {8, 0, 4};
 120   if (Features.test(FeatureISAVersion8_1_0))
 121     return {8, 1, 0};
 122
 123   // GFX9.
 124   if (Features.test(FeatureISAVersion9_0_0))
 125     return {9, 0, 0};
 126   if (Features.test(FeatureISAVersion9_0_1))
 127     return {9, 0, 1};
 128
 129   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
 130     return {0, 0, 0};
 131   return {7, 0, 0};
 132 }
 133
 134 unsigned getWavefrontSize(const FeatureBitset &Features) {
 135   if (Features.test(FeatureWavefrontSize16))
 136     return 16;
 137   if (Features.test(FeatureWavefrontSize32))
 138     return 32;
 139
 140   return 64;
 141 }
 142
 143 unsigned getLocalMemorySize(const FeatureBitset &Features) {
 144   if (Features.test(FeatureLocalMemorySize32768))
 145     return 32768;
 146   if (Features.test(FeatureLocalMemorySize65536))
 147     return 65536;
 148
 149   return 0;
 150 }
 151
 152 unsigned getEUsPerCU(const FeatureBitset &Features) {
 153   return 4;
 154 }
 155
 156 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
 157                                unsigned FlatWorkGroupSize) {
 158   if (!Features.test(FeatureGCN))
 159     return 8;
 160   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 161   if (N == 1)
 162     return 40;
 163   N = 40 / N;
 164   return std::min(N, 16u);
 165 }
 166
 167 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
 168   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
 169 }
 170
 171 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
 172                           unsigned FlatWorkGroupSize) {
 173   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 174 }
 175
 176 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
 177   return 1;
 178 }
 179
 180 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
 181   if (!Features.test(FeatureGCN))
 182     return 8;
 183   // FIXME: Need to take scratch memory into account.
 184   return 10;
 185 }
 186
 187 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 188                           unsigned FlatWorkGroupSize) {
 189   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
 190                  getEUsPerCU(Features)) / getEUsPerCU(Features);
 191 }
 192
 193 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
 194   return 1;
 195 }
 196
 197 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
 198   return 2048;
 199 }
 200
 201 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 202                               unsigned FlatWorkGroupSize) {
 203   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
 204                  getWavefrontSize(Features);
 205 }
 206
 207 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
 208   IsaVersion Version = getIsaVersion(Features);
 209   if (Version.Major >= 8)
 210     return 16;
 211   return 8;
 212 }
 213
 214 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
 215   return 8;
 216 }
 217
 218 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
 219   IsaVersion Version = getIsaVersion(Features);
 220   if (Version.Major >= 8)
 221     return 800;
 222   return 512;
 223 }
 224
 225 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
 226   if (Features.test(FeatureSGPRInitBug))
 227     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 228
 229   IsaVersion Version = getIsaVersion(Features);
 230   if (Version.Major >= 8)
 231     return 102;
 232   return 104;
 233 }
 234
 235 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 236   assert(WavesPerEU != 0);
 237
 238   if (WavesPerEU >= getMaxWavesPerEU(Features))
 239     return 0;
 240   unsigned MinNumSGPRs =
 241       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
 242                 getSGPRAllocGranule(Features)) + 1;
 243   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
 244 }
 245
 246 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 247                         bool Addressable) {
 248   assert(WavesPerEU != 0);
 249
 250   IsaVersion Version = getIsaVersion(Features);
 251   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
 252                                    getSGPRAllocGranule(Features));
 253   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
 254   if (Version.Major >= 8 && !Addressable)
 255     AddressableNumSGPRs = 112;
 256   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 257 }
 258
 259 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
 260   return 4;
 261 }
 262
 263 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
 264   return getVGPRAllocGranule(Features);
 265 }
 266
 267 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
 268   return 256;
 269 }
 270
 271 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
 272   return getTotalNumVGPRs(Features);
 273 }
 274
 275 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 276   assert(WavesPerEU != 0);
 277
 278   if (WavesPerEU >= getMaxWavesPerEU(Features))
 279     return 0;
 280   unsigned MinNumVGPRs =
 281       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
 282                 getVGPRAllocGranule(Features)) + 1;
 283   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
 284 }
 285
 286 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 287   assert(WavesPerEU != 0);
 288
 289   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
 290                                    getVGPRAllocGranule(Features));
 291   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
 292   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 293 }
 294
 295 } // end namespace IsaInfo
 296
 297 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 298                                const FeatureBitset &Features) {
 299   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
 300
 301   memset(&Header, 0, sizeof(Header));
 302
 303   Header.amd_kernel_code_version_major = 1;
 304   Header.amd_kernel_code_version_minor = 1;
 305   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
 306   Header.amd_machine_version_major = ISA.Major;
 307   Header.amd_machine_version_minor = ISA.Minor;
 308   Header.amd_machine_version_stepping = ISA.Stepping;
 309   Header.kernel_code_entry_byte_offset = sizeof(Header);
 310   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
 311   Header.wavefront_size = 6;
 312
 313   // If the code object does not support indirect functions, then the value must
 314   // be 0xffffffff.
 315   Header.call_convention = -1;
 316
 317   // These alignment values are specified in powers of two, so alignment =
 318   // 2^n.  The minimum alignment is 2^4 = 16.
 319   Header.kernarg_segment_alignment = 4;
 320   Header.group_segment_alignment = 4;
 321   Header.private_segment_alignment = 4;
 322 }
 323
 324 MCSection *getHSATextSection(MCContext &Ctx) {
 325   return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
 326                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
 327                            ELF::SHF_EXECINSTR |
 328                            ELF::SHF_AMDGPU_HSA_AGENT |
 329                            ELF::SHF_AMDGPU_HSA_CODE);
 330 }
 331
 332 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
 333   return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
 334                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
 335                            ELF::SHF_AMDGPU_HSA_GLOBAL |
 336                            ELF::SHF_AMDGPU_HSA_AGENT);
 337 }
 338
 339 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
 340   return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
 341                             ELF::SHF_ALLOC | ELF::SHF_WRITE |
 342                             ELF::SHF_AMDGPU_HSA_GLOBAL);
 343 }
 344
 345 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
 346   return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
 347                            ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
 348                            ELF::SHF_AMDGPU_HSA_AGENT);
 349 }
 350
 351 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
 352   return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
 353 }
 354
 355 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
 356   return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
 357 }
 358
 359 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
 360   return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
 361 }
 362
 363 bool shouldEmitConstantsToTextSection(const Triple &TT) {
 364   return TT.getOS() != Triple::AMDHSA;
 365 }
 366
 367 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
 368   Attribute A = F.getFnAttribute(Name);
 369   int Result = Default;
 370
 371   if (A.isStringAttribute()) {
 372     StringRef Str = A.getValueAsString();
 373     if (Str.getAsInteger(0, Result)) {
 374       LLVMContext &Ctx = F.getContext();
 375       Ctx.emitError("can't parse integer attribute " + Name);
 376     }
 377   }
 378
 379   return Result;
 380 }
 381
 382 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 383                                             StringRef Name,
 384                                             std::pair<int, int> Default,
 385                                             bool OnlyFirstRequired) {
 386   Attribute A = F.getFnAttribute(Name);
 387   if (!A.isStringAttribute())
 388     return Default;
 389
 390   LLVMContext &Ctx = F.getContext();
 391   std::pair<int, int> Ints = Default;
 392   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
 393   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
 394     Ctx.emitError("can't parse first integer attribute " + Name);
 395     return Default;
 396   }
 397   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
 398     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
 399       Ctx.emitError("can't parse second integer attribute " + Name);
 400       return Default;
 401     }
 402   }
 403
 404   return Ints;
 405 }
 406
 407 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
 408   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
 409   if (Version.Major < 9)
 410     return VmcntLo;
 411
 412   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
 413   return VmcntLo | VmcntHi;
 414 }
 415
 416 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
 417   return (1 << getExpcntBitWidth()) - 1;
 418 }
 419
 420 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
 421   return (1 << getLgkmcntBitWidth()) - 1;
 422 }
 423
 424 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
 425   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
 426   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
 427   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
 428   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
 429   if (Version.Major < 9)
 430     return Waitcnt;
 431
 432   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
 433   return Waitcnt | VmcntHi;
 434 }
 435
 436 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 437   unsigned VmcntLo =
 438       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 439   if (Version.Major < 9)
 440     return VmcntLo;
 441
 442   unsigned VmcntHi =
 443       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 444   VmcntHi <<= getVmcntBitWidthLo();
 445   return VmcntLo | VmcntHi;
 446 }
 447
 448 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 449   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 450 }
 451
 452 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 453   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 454 }
 455
 456 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 457                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
 458   Vmcnt = decodeVmcnt(Version, Waitcnt);
 459   Expcnt = decodeExpcnt(Version, Waitcnt);
 460   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 461 }
 462
 463 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 464                      unsigned Vmcnt) {
 465   Waitcnt =
 466       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 467   if (Version.Major < 9)
 468     return Waitcnt;
 469
 470   Vmcnt >>= getVmcntBitWidthLo();
 471   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 472 }
 473
 474 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 475                       unsigned Expcnt) {
 476   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 477 }
 478
 479 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 480                        unsigned Lgkmcnt) {
 481   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 482 }
 483
 484 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 485                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
 486   unsigned Waitcnt = getWaitcntBitMask(Version);
 487   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
 488   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
 489   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
 490   return Waitcnt;
 491 }
 492
 493 unsigned getInitialPSInputAddr(const Function &F) {
 494   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 495 }
 496
 497 bool isShader(CallingConv::ID cc) {
 498   switch(cc) {
 499     case CallingConv::AMDGPU_VS:
 500     case CallingConv::AMDGPU_GS:
 501     case CallingConv::AMDGPU_PS:
 502     case CallingConv::AMDGPU_CS:
 503       return true;
 504     default:
 505       return false;
 506   }
 507 }
 508
 509 bool isCompute(CallingConv::ID cc) {
 510   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
 511 }
 512
 513 bool isEntryFunctionCC(CallingConv::ID CC) {
 514   return true;
 515 }
 516
 517 bool isSI(const MCSubtargetInfo &STI) {
 518   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 519 }
 520
 521 bool isCI(const MCSubtargetInfo &STI) {
 522   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
 523 }
 524
 525 bool isVI(const MCSubtargetInfo &STI) {
 526   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 527 }
 528
 529 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 530
 531   switch(Reg) {
 532   default: break;
 533   case AMDGPU::FLAT_SCR:
 534     assert(!isSI(STI));
 535     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
 536
 537   case AMDGPU::FLAT_SCR_LO:
 538     assert(!isSI(STI));
 539     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
 540
 541   case AMDGPU::FLAT_SCR_HI:
 542     assert(!isSI(STI));
 543     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
 544   }
 545   return Reg;
 546 }
 547
 548 unsigned mc2PseudoReg(unsigned Reg) {
 549   switch (Reg) {
 550   case AMDGPU::FLAT_SCR_ci:
 551   case AMDGPU::FLAT_SCR_vi:
 552     return FLAT_SCR;
 553
 554   case AMDGPU::FLAT_SCR_LO_ci:
 555   case AMDGPU::FLAT_SCR_LO_vi:
 556     return AMDGPU::FLAT_SCR_LO;
 557
 558   case AMDGPU::FLAT_SCR_HI_ci:
 559   case AMDGPU::FLAT_SCR_HI_vi:
 560     return AMDGPU::FLAT_SCR_HI;
 561
 562   default:
 563     return Reg;
 564   }
 565 }
 566
 567 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 568   assert(OpNo < Desc.NumOperands);
 569   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 570   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
 571          OpType <= AMDGPU::OPERAND_SRC_LAST;
 572 }
 573
 574 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 575   assert(OpNo < Desc.NumOperands);
 576   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 577   switch (OpType) {
 578   case AMDGPU::OPERAND_REG_IMM_FP32:
 579   case AMDGPU::OPERAND_REG_IMM_FP64:
 580   case AMDGPU::OPERAND_REG_IMM_FP16:
 581   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 582   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 583   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 584   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 585     return true;
 586   default:
 587     return false;
 588   }
 589 }
 590
 591 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 592   assert(OpNo < Desc.NumOperands);
 593   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 594   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
 595          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
 596 }
 597
 598 // Avoid using MCRegisterClass::getSize, since that function will go away
 599 // (move from MC* level to Target* level). Return size in bits.
 600 unsigned getRegBitWidth(unsigned RCID) {
 601   switch (RCID) {
 602   case AMDGPU::SGPR_32RegClassID:
 603   case AMDGPU::VGPR_32RegClassID:
 604   case AMDGPU::VS_32RegClassID:
 605   case AMDGPU::SReg_32RegClassID:
 606   case AMDGPU::SReg_32_XM0RegClassID:
 607     return 32;
 608   case AMDGPU::SGPR_64RegClassID:
 609   case AMDGPU::VS_64RegClassID:
 610   case AMDGPU::SReg_64RegClassID:
 611   case AMDGPU::VReg_64RegClassID:
 612     return 64;
 613   case AMDGPU::VReg_96RegClassID:
 614     return 96;
 615   case AMDGPU::SGPR_128RegClassID:
 616   case AMDGPU::SReg_128RegClassID:
 617   case AMDGPU::VReg_128RegClassID:
 618     return 128;
 619   case AMDGPU::SReg_256RegClassID:
 620   case AMDGPU::VReg_256RegClassID:
 621     return 256;
 622   case AMDGPU::SReg_512RegClassID:
 623   case AMDGPU::VReg_512RegClassID:
 624     return 512;
 625   default:
 626     llvm_unreachable("Unexpected register class");
 627   }
 628 }
 629
 630 unsigned getRegBitWidth(const MCRegisterClass &RC) {
 631   return getRegBitWidth(RC.getID());
 632 }
 633
 634 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 635                            unsigned OpNo) {
 636   assert(OpNo < Desc.NumOperands);
 637   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
 638   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 639 }
 640
 641 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
 642   if (Literal >= -16 && Literal <= 64)
 643     return true;
 644
 645   uint64_t Val = static_cast<uint64_t>(Literal);
 646   return (Val == DoubleToBits(0.0)) ||
 647          (Val == DoubleToBits(1.0)) ||
 648          (Val == DoubleToBits(-1.0)) ||
 649          (Val == DoubleToBits(0.5)) ||
 650          (Val == DoubleToBits(-0.5)) ||
 651          (Val == DoubleToBits(2.0)) ||
 652          (Val == DoubleToBits(-2.0)) ||
 653          (Val == DoubleToBits(4.0)) ||
 654          (Val == DoubleToBits(-4.0)) ||
 655          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
 656 }
 657
 658 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
 659   if (Literal >= -16 && Literal <= 64)
 660     return true;
 661
 662   // The actual type of the operand does not seem to matter as long
 663   // as the bits match one of the inline immediate values.  For example:
 664   //
 665   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
 666   // so it is a legal inline immediate.
 667   //
 668   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
 669   // floating-point, so it is a legal inline immediate.
 670
 671   uint32_t Val = static_cast<uint32_t>(Literal);
 672   return (Val == FloatToBits(0.0f)) ||
 673          (Val == FloatToBits(1.0f)) ||
 674          (Val == FloatToBits(-1.0f)) ||
 675          (Val == FloatToBits(0.5f)) ||
 676          (Val == FloatToBits(-0.5f)) ||
 677          (Val == FloatToBits(2.0f)) ||
 678          (Val == FloatToBits(-2.0f)) ||
 679          (Val == FloatToBits(4.0f)) ||
 680          (Val == FloatToBits(-4.0f)) ||
 681          (Val == 0x3e22f983 && HasInv2Pi);
 682 }
 683
 684 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 685   if (!HasInv2Pi)
 686     return false;
 687
 688   if (Literal >= -16 && Literal <= 64)
 689     return true;
 690
 691   uint16_t Val = static_cast<uint16_t>(Literal);
 692   return Val == 0x3C00 || // 1.0
 693          Val == 0xBC00 || // -1.0
 694          Val == 0x3800 || // 0.5
 695          Val == 0xB800 || // -0.5
 696          Val == 0x4000 || // 2.0
 697          Val == 0xC000 || // -2.0
 698          Val == 0x4400 || // 4.0
 699          Val == 0xC400 || // -4.0
 700          Val == 0x3118;   // 1/2pi
 701 }
 702
 703 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
 704   assert(HasInv2Pi);
 705
 706   int16_t Lo16 = static_cast<int16_t>(Literal);
 707   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
 708   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
 709 }
 710
 711 bool isUniformMMO(const MachineMemOperand *MMO) {
 712   const Value *Ptr = MMO->getValue();
 713   // UndefValue means this is a load of a kernel input.  These are uniform.
 714   // Sometimes LDS instructions have constant pointers.
 715   // If Ptr is null, then that means this mem operand contains a
 716   // PseudoSourceValue like GOT.
 717   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
 718       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
 719     return true;
 720
 721   const Instruction *I = dyn_cast<Instruction>(Ptr);
 722   return I && I->getMetadata("amdgpu.uniform");
 723 }
 724
 725 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 726   if (isSI(ST) || isCI(ST))
 727     return ByteOffset >> 2;
 728
 729   return ByteOffset;
 730 }
 731
 732 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 733   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
 734   return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
 735                                 isUInt<20>(EncodedOffset);
 736 }
 737 } // end namespace AMDGPU
 738
 739 } // end namespace llvm
 740
 741 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
 742 const unsigned AMDGPUAS::GLOBAL_ADDRESS;
 743 const unsigned AMDGPUAS::LOCAL_ADDRESS;
 744 const unsigned AMDGPUAS::PARAM_D_ADDRESS;
 745 const unsigned AMDGPUAS::PARAM_I_ADDRESS;
 746 const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
 747 const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
 748 const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
 749 const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
 750 const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
 751 const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
 752 const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
 753 const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
 754 const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
 755 const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
 756 const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
 757 const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
 758 const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
 759 const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
 760 const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
 761 const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
 762 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
 763
 764 namespace llvm {
 765 namespace AMDGPU {
 766
 767 AMDGPUAS getAMDGPUAS(Triple T) {
 768   auto Env = T.getEnvironmentName();
 769   AMDGPUAS AS;
 770   if (Env == "amdgiz" || Env == "amdgizcl") {
 771     AS.FLAT_ADDRESS     = 0;
 772     AS.PRIVATE_ADDRESS  = 5;
 773     AS.REGION_ADDRESS   = 4;
 774   }
 775   else {
 776     AS.FLAT_ADDRESS     = 4;
 777     AS.PRIVATE_ADDRESS  = 0;
 778     AS.REGION_ADDRESS   = 5;
 779    }
 780   return AS;
 781 }
 782
 783 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
 784   return getAMDGPUAS(M.getTargetTriple());
 785 }
 786
 787 AMDGPUAS getAMDGPUAS(const Module &M) {
 788   return getAMDGPUAS(Triple(M.getTargetTriple()));
 789 }
 790 } // namespace AMDGPU
 791 } // namespace llvm