lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

   1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "AMDGPUBaseInfo.h"
  11 #include "AMDGPU.h"
  12 #include "SIDefines.h"
  13 #include "llvm/ADT/StringRef.h"
  14 #include "llvm/ADT/Triple.h"
  15 #include "llvm/BinaryFormat/ELF.h"
  16 #include "llvm/CodeGen/MachineMemOperand.h"
  17 #include "llvm/IR/Attributes.h"
  18 #include "llvm/IR/Constants.h"
  19 #include "llvm/IR/Function.h"
  20 #include "llvm/IR/GlobalValue.h"
  21 #include "llvm/IR/Instruction.h"
  22 #include "llvm/IR/LLVMContext.h"
  23 #include "llvm/IR/Module.h"
  24 #include "llvm/MC/MCContext.h"
  25 #include "llvm/MC/MCInstrDesc.h"
  26 #include "llvm/MC/MCInstrInfo.h"
  27 #include "llvm/MC/MCRegisterInfo.h"
  28 #include "llvm/MC/MCSectionELF.h"
  29 #include "llvm/MC/MCSubtargetInfo.h"
  30 #include "llvm/MC/SubtargetFeature.h"
  31 #include "llvm/Support/Casting.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include <algorithm>
  35 #include <cassert>
  36 #include <cstdint>
  37 #include <cstring>
  38 #include <utility>
  39
  40 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  41
  42 #define GET_INSTRINFO_NAMED_OPS
  43 #define GET_INSTRMAP_INFO
  44 #include "AMDGPUGenInstrInfo.inc"
  45 #undef GET_INSTRMAP_INFO
  46 #undef GET_INSTRINFO_NAMED_OPS
  47
  48 namespace {
  49
  50 /// \returns Bit mask for given bit \p Shift and bit \p Width.
  51 unsigned getBitMask(unsigned Shift, unsigned Width) {
  52   return ((1 << Width) - 1) << Shift;
  53 }
  54
  55 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
  56 ///
  57 /// \returns Packed \p Dst.
  58 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
  59   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
  60   Dst |= (Src << Shift) & getBitMask(Shift, Width);
  61   return Dst;
  62 }
  63
  64 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
  65 ///
  66 /// \returns Unpacked bits.
  67 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
  68   return (Src & getBitMask(Shift, Width)) >> Shift;
  69 }
  70
  71 /// \returns Vmcnt bit shift (lower bits).
  72 unsigned getVmcntBitShiftLo() { return 0; }
  73
  74 /// \returns Vmcnt bit width (lower bits).
  75 unsigned getVmcntBitWidthLo() { return 4; }
  76
  77 /// \returns Expcnt bit shift.
  78 unsigned getExpcntBitShift() { return 4; }
  79
  80 /// \returns Expcnt bit width.
  81 unsigned getExpcntBitWidth() { return 3; }
  82
  83 /// \returns Lgkmcnt bit shift.
  84 unsigned getLgkmcntBitShift() { return 8; }
  85
  86 /// \returns Lgkmcnt bit width.
  87 unsigned getLgkmcntBitWidth() { return 4; }
  88
  89 /// \returns Vmcnt bit shift (higher bits).
  90 unsigned getVmcntBitShiftHi() { return 14; }
  91
  92 /// \returns Vmcnt bit width (higher bits).
  93 unsigned getVmcntBitWidthHi() { return 2; }
  94
  95 } // end namespace anonymous
  96
  97 namespace llvm {
  98
  99 static cl::opt<bool> EnablePackedInlinableLiterals(
 100     "enable-packed-inlinable-literals",
 101     cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
 102     cl::init(false));
 103
 104 namespace AMDGPU {
 105
 106 LLVM_READNONE
 107 static inline Channels indexToChannel(unsigned Channel) {
 108   switch (Channel) {
 109   case 1:
 110     return AMDGPU::Channels_1;
 111   case 2:
 112     return AMDGPU::Channels_2;
 113   case 3:
 114     return AMDGPU::Channels_3;
 115   case 4:
 116     return AMDGPU::Channels_4;
 117   default:
 118     llvm_unreachable("invalid MIMG channel");
 119   }
 120 }
 121
 122
 123 // FIXME: Need to handle d16 images correctly.
 124 static unsigned rcToChannels(unsigned RCID) {
 125   switch (RCID) {
 126   case AMDGPU::VGPR_32RegClassID:
 127     return 1;
 128   case AMDGPU::VReg_64RegClassID:
 129     return 2;
 130   case AMDGPU::VReg_96RegClassID:
 131     return 3;
 132   case AMDGPU::VReg_128RegClassID:
 133     return 4;
 134   default:
 135     llvm_unreachable("invalid MIMG register class");
 136   }
 137 }
 138
 139 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
 140   AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
 141   unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
 142   if (NewChannels == OrigChannels)
 143     return Opc;
 144
 145   switch (OrigChannels) {
 146   case 1:
 147     return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
 148   case 2:
 149     return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
 150   case 3:
 151     return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
 152   case 4:
 153     return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
 154   default:
 155     llvm_unreachable("invalid MIMG channel");
 156   }
 157 }
 158
 159 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
 160 // header files, so we need to wrap it in a function that takes unsigned
 161 // instead.
 162 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
 163   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
 164 }
 165
 166 namespace IsaInfo {
 167
 168 IsaVersion getIsaVersion(const FeatureBitset &Features) {
 169   // GCN GFX6 (Southern Islands (SI)).
 170   if (Features.test(FeatureISAVersion6_0_0))
 171     return {6, 0, 0};
 172   if (Features.test(FeatureISAVersion6_0_1))
 173     return {6, 0, 1};
 174
 175   // GCN GFX7 (Sea Islands (CI)).
 176   if (Features.test(FeatureISAVersion7_0_0))
 177     return {7, 0, 0};
 178   if (Features.test(FeatureISAVersion7_0_1))
 179     return {7, 0, 1};
 180   if (Features.test(FeatureISAVersion7_0_2))
 181     return {7, 0, 2};
 182   if (Features.test(FeatureISAVersion7_0_3))
 183     return {7, 0, 3};
 184   if (Features.test(FeatureISAVersion7_0_4))
 185     return {7, 0, 4};
 186
 187   // GCN GFX8 (Volcanic Islands (VI)).
 188   if (Features.test(FeatureISAVersion8_0_0))
 189     return {8, 0, 0};
 190   if (Features.test(FeatureISAVersion8_0_1))
 191     return {8, 0, 1};
 192   if (Features.test(FeatureISAVersion8_0_2))
 193     return {8, 0, 2};
 194   if (Features.test(FeatureISAVersion8_0_3))
 195     return {8, 0, 3};
 196   if (Features.test(FeatureISAVersion8_1_0))
 197     return {8, 1, 0};
 198
 199   // GCN GFX9.
 200   if (Features.test(FeatureISAVersion9_0_0))
 201     return {9, 0, 0};
 202   if (Features.test(FeatureISAVersion9_0_2))
 203     return {9, 0, 2};
 204
 205   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
 206     return {0, 0, 0};
 207   return {7, 0, 0};
 208 }
 209
 210 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
 211   auto TargetTriple = STI->getTargetTriple();
 212   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
 213
 214   Stream << TargetTriple.getArchName() << '-'
 215          << TargetTriple.getVendorName() << '-'
 216          << TargetTriple.getOSName() << '-'
 217          << TargetTriple.getEnvironmentName() << '-'
 218          << "gfx"
 219          << ISAVersion.Major
 220          << ISAVersion.Minor
 221          << ISAVersion.Stepping;
 222   Stream.flush();
 223 }
 224
 225 bool hasCodeObjectV3(const FeatureBitset &Features) {
 226   return Features.test(FeatureCodeObjectV3);
 227 }
 228
 229 unsigned getWavefrontSize(const FeatureBitset &Features) {
 230   if (Features.test(FeatureWavefrontSize16))
 231     return 16;
 232   if (Features.test(FeatureWavefrontSize32))
 233     return 32;
 234
 235   return 64;
 236 }
 237
 238 unsigned getLocalMemorySize(const FeatureBitset &Features) {
 239   if (Features.test(FeatureLocalMemorySize32768))
 240     return 32768;
 241   if (Features.test(FeatureLocalMemorySize65536))
 242     return 65536;
 243
 244   return 0;
 245 }
 246
 247 unsigned getEUsPerCU(const FeatureBitset &Features) {
 248   return 4;
 249 }
 250
 251 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
 252                                unsigned FlatWorkGroupSize) {
 253   if (!Features.test(FeatureGCN))
 254     return 8;
 255   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 256   if (N == 1)
 257     return 40;
 258   N = 40 / N;
 259   return std::min(N, 16u);
 260 }
 261
 262 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
 263   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
 264 }
 265
 266 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
 267                           unsigned FlatWorkGroupSize) {
 268   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 269 }
 270
 271 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
 272   return 1;
 273 }
 274
 275 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
 276   if (!Features.test(FeatureGCN))
 277     return 8;
 278   // FIXME: Need to take scratch memory into account.
 279   return 10;
 280 }
 281
 282 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 283                           unsigned FlatWorkGroupSize) {
 284   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
 285                  getEUsPerCU(Features)) / getEUsPerCU(Features);
 286 }
 287
 288 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
 289   return 1;
 290 }
 291
 292 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
 293   return 2048;
 294 }
 295
 296 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 297                               unsigned FlatWorkGroupSize) {
 298   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
 299                  getWavefrontSize(Features);
 300 }
 301
 302 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
 303   IsaVersion Version = getIsaVersion(Features);
 304   if (Version.Major >= 8)
 305     return 16;
 306   return 8;
 307 }
 308
 309 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
 310   return 8;
 311 }
 312
 313 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
 314   IsaVersion Version = getIsaVersion(Features);
 315   if (Version.Major >= 8)
 316     return 800;
 317   return 512;
 318 }
 319
 320 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
 321   if (Features.test(FeatureSGPRInitBug))
 322     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 323
 324   IsaVersion Version = getIsaVersion(Features);
 325   if (Version.Major >= 8)
 326     return 102;
 327   return 104;
 328 }
 329
 330 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 331   assert(WavesPerEU != 0);
 332
 333   if (WavesPerEU >= getMaxWavesPerEU(Features))
 334     return 0;
 335   unsigned MinNumSGPRs =
 336       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
 337                 getSGPRAllocGranule(Features)) + 1;
 338   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
 339 }
 340
 341 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 342                         bool Addressable) {
 343   assert(WavesPerEU != 0);
 344
 345   IsaVersion Version = getIsaVersion(Features);
 346   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
 347                                    getSGPRAllocGranule(Features));
 348   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
 349   if (Version.Major >= 8 && !Addressable)
 350     AddressableNumSGPRs = 112;
 351   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 352 }
 353
 354 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
 355   return 4;
 356 }
 357
 358 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
 359   return getVGPRAllocGranule(Features);
 360 }
 361
 362 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
 363   return 256;
 364 }
 365
 366 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
 367   return getTotalNumVGPRs(Features);
 368 }
 369
 370 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 371   assert(WavesPerEU != 0);
 372
 373   if (WavesPerEU >= getMaxWavesPerEU(Features))
 374     return 0;
 375   unsigned MinNumVGPRs =
 376       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
 377                 getVGPRAllocGranule(Features)) + 1;
 378   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
 379 }
 380
 381 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 382   assert(WavesPerEU != 0);
 383
 384   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
 385                                    getVGPRAllocGranule(Features));
 386   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
 387   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 388 }
 389
 390 } // end namespace IsaInfo
 391
 392 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 393                                const FeatureBitset &Features) {
 394   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
 395
 396   memset(&Header, 0, sizeof(Header));
 397
 398   Header.amd_kernel_code_version_major = 1;
 399   Header.amd_kernel_code_version_minor = 1;
 400   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
 401   Header.amd_machine_version_major = ISA.Major;
 402   Header.amd_machine_version_minor = ISA.Minor;
 403   Header.amd_machine_version_stepping = ISA.Stepping;
 404   Header.kernel_code_entry_byte_offset = sizeof(Header);
 405   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
 406   Header.wavefront_size = 6;
 407
 408   // If the code object does not support indirect functions, then the value must
 409   // be 0xffffffff.
 410   Header.call_convention = -1;
 411
 412   // These alignment values are specified in powers of two, so alignment =
 413   // 2^n.  The minimum alignment is 2^4 = 16.
 414   Header.kernarg_segment_alignment = 4;
 415   Header.group_segment_alignment = 4;
 416   Header.private_segment_alignment = 4;
 417 }
 418
 419 bool isGroupSegment(const GlobalValue *GV) {
 420   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 421 }
 422
 423 bool isGlobalSegment(const GlobalValue *GV) {
 424   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 425 }
 426
 427 bool isReadOnlySegment(const GlobalValue *GV) {
 428   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
 429 }
 430
 431 bool shouldEmitConstantsToTextSection(const Triple &TT) {
 432   return TT.getOS() != Triple::AMDHSA;
 433 }
 434
 435 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
 436   Attribute A = F.getFnAttribute(Name);
 437   int Result = Default;
 438
 439   if (A.isStringAttribute()) {
 440     StringRef Str = A.getValueAsString();
 441     if (Str.getAsInteger(0, Result)) {
 442       LLVMContext &Ctx = F.getContext();
 443       Ctx.emitError("can't parse integer attribute " + Name);
 444     }
 445   }
 446
 447   return Result;
 448 }
 449
 450 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 451                                             StringRef Name,
 452                                             std::pair<int, int> Default,
 453                                             bool OnlyFirstRequired) {
 454   Attribute A = F.getFnAttribute(Name);
 455   if (!A.isStringAttribute())
 456     return Default;
 457
 458   LLVMContext &Ctx = F.getContext();
 459   std::pair<int, int> Ints = Default;
 460   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
 461   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
 462     Ctx.emitError("can't parse first integer attribute " + Name);
 463     return Default;
 464   }
 465   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
 466     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
 467       Ctx.emitError("can't parse second integer attribute " + Name);
 468       return Default;
 469     }
 470   }
 471
 472   return Ints;
 473 }
 474
 475 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
 476   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
 477   if (Version.Major < 9)
 478     return VmcntLo;
 479
 480   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
 481   return VmcntLo | VmcntHi;
 482 }
 483
 484 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
 485   return (1 << getExpcntBitWidth()) - 1;
 486 }
 487
 488 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
 489   return (1 << getLgkmcntBitWidth()) - 1;
 490 }
 491
 492 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
 493   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
 494   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
 495   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
 496   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
 497   if (Version.Major < 9)
 498     return Waitcnt;
 499
 500   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
 501   return Waitcnt | VmcntHi;
 502 }
 503
 504 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 505   unsigned VmcntLo =
 506       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 507   if (Version.Major < 9)
 508     return VmcntLo;
 509
 510   unsigned VmcntHi =
 511       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 512   VmcntHi <<= getVmcntBitWidthLo();
 513   return VmcntLo | VmcntHi;
 514 }
 515
 516 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 517   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 518 }
 519
 520 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 521   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 522 }
 523
 524 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 525                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
 526   Vmcnt = decodeVmcnt(Version, Waitcnt);
 527   Expcnt = decodeExpcnt(Version, Waitcnt);
 528   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 529 }
 530
 531 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 532                      unsigned Vmcnt) {
 533   Waitcnt =
 534       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 535   if (Version.Major < 9)
 536     return Waitcnt;
 537
 538   Vmcnt >>= getVmcntBitWidthLo();
 539   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 540 }
 541
 542 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 543                       unsigned Expcnt) {
 544   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 545 }
 546
 547 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 548                        unsigned Lgkmcnt) {
 549   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 550 }
 551
 552 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 553                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
 554   unsigned Waitcnt = getWaitcntBitMask(Version);
 555   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
 556   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
 557   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
 558   return Waitcnt;
 559 }
 560
 561 unsigned getInitialPSInputAddr(const Function &F) {
 562   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 563 }
 564
 565 bool isShader(CallingConv::ID cc) {
 566   switch(cc) {
 567     case CallingConv::AMDGPU_VS:
 568     case CallingConv::AMDGPU_LS:
 569     case CallingConv::AMDGPU_HS:
 570     case CallingConv::AMDGPU_ES:
 571     case CallingConv::AMDGPU_GS:
 572     case CallingConv::AMDGPU_PS:
 573     case CallingConv::AMDGPU_CS:
 574       return true;
 575     default:
 576       return false;
 577   }
 578 }
 579
 580 bool isCompute(CallingConv::ID cc) {
 581   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
 582 }
 583
 584 bool isEntryFunctionCC(CallingConv::ID CC) {
 585   switch (CC) {
 586   case CallingConv::AMDGPU_KERNEL:
 587   case CallingConv::SPIR_KERNEL:
 588   case CallingConv::AMDGPU_VS:
 589   case CallingConv::AMDGPU_GS:
 590   case CallingConv::AMDGPU_PS:
 591   case CallingConv::AMDGPU_CS:
 592   case CallingConv::AMDGPU_ES:
 593   case CallingConv::AMDGPU_HS:
 594   case CallingConv::AMDGPU_LS:
 595     return true;
 596   default:
 597     return false;
 598   }
 599 }
 600
 601 bool isSI(const MCSubtargetInfo &STI) {
 602   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 603 }
 604
 605 bool isCI(const MCSubtargetInfo &STI) {
 606   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
 607 }
 608
 609 bool isVI(const MCSubtargetInfo &STI) {
 610   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 611 }
 612
 613 bool isGFX9(const MCSubtargetInfo &STI) {
 614   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
 615 }
 616
 617 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
 618   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
 619 }
 620
 621 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
 622   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
 623   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
 624   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
 625     Reg == AMDGPU::SCC;
 626 }
 627
 628 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
 629   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
 630     if (*R == Reg1) return true;
 631   }
 632   return false;
 633 }
 634
 635 #define MAP_REG2REG \
 636   using namespace AMDGPU; \
 637   switch(Reg) { \
 638   default: return Reg; \
 639   CASE_CI_VI(FLAT_SCR) \
 640   CASE_CI_VI(FLAT_SCR_LO) \
 641   CASE_CI_VI(FLAT_SCR_HI) \
 642   CASE_VI_GFX9(TTMP0) \
 643   CASE_VI_GFX9(TTMP1) \
 644   CASE_VI_GFX9(TTMP2) \
 645   CASE_VI_GFX9(TTMP3) \
 646   CASE_VI_GFX9(TTMP4) \
 647   CASE_VI_GFX9(TTMP5) \
 648   CASE_VI_GFX9(TTMP6) \
 649   CASE_VI_GFX9(TTMP7) \
 650   CASE_VI_GFX9(TTMP8) \
 651   CASE_VI_GFX9(TTMP9) \
 652   CASE_VI_GFX9(TTMP10) \
 653   CASE_VI_GFX9(TTMP11) \
 654   CASE_VI_GFX9(TTMP12) \
 655   CASE_VI_GFX9(TTMP13) \
 656   CASE_VI_GFX9(TTMP14) \
 657   CASE_VI_GFX9(TTMP15) \
 658   CASE_VI_GFX9(TTMP0_TTMP1) \
 659   CASE_VI_GFX9(TTMP2_TTMP3) \
 660   CASE_VI_GFX9(TTMP4_TTMP5) \
 661   CASE_VI_GFX9(TTMP6_TTMP7) \
 662   CASE_VI_GFX9(TTMP8_TTMP9) \
 663   CASE_VI_GFX9(TTMP10_TTMP11) \
 664   CASE_VI_GFX9(TTMP12_TTMP13) \
 665   CASE_VI_GFX9(TTMP14_TTMP15) \
 666   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
 667   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
 668   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
 669   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
 670   }
 671
 672 #define CASE_CI_VI(node) \
 673   assert(!isSI(STI)); \
 674   case node: return isCI(STI) ? node##_ci : node##_vi;
 675
 676 #define CASE_VI_GFX9(node) \
 677   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
 678
 679 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 680   MAP_REG2REG
 681 }
 682
 683 #undef CASE_CI_VI
 684 #undef CASE_VI_GFX9
 685
 686 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
 687 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
 688
 689 unsigned mc2PseudoReg(unsigned Reg) {
 690   MAP_REG2REG
 691 }
 692
 693 #undef CASE_CI_VI
 694 #undef CASE_VI_GFX9
 695 #undef MAP_REG2REG
 696
 697 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 698   assert(OpNo < Desc.NumOperands);
 699   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 700   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
 701          OpType <= AMDGPU::OPERAND_SRC_LAST;
 702 }
 703
 704 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 705   assert(OpNo < Desc.NumOperands);
 706   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 707   switch (OpType) {
 708   case AMDGPU::OPERAND_REG_IMM_FP32:
 709   case AMDGPU::OPERAND_REG_IMM_FP64:
 710   case AMDGPU::OPERAND_REG_IMM_FP16:
 711   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 712   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 713   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 714   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 715     return true;
 716   default:
 717     return false;
 718   }
 719 }
 720
 721 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 722   assert(OpNo < Desc.NumOperands);
 723   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 724   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
 725          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
 726 }
 727
 728 // Avoid using MCRegisterClass::getSize, since that function will go away
 729 // (move from MC* level to Target* level). Return size in bits.
 730 unsigned getRegBitWidth(unsigned RCID) {
 731   switch (RCID) {
 732   case AMDGPU::SGPR_32RegClassID:
 733   case AMDGPU::VGPR_32RegClassID:
 734   case AMDGPU::VS_32RegClassID:
 735   case AMDGPU::SReg_32RegClassID:
 736   case AMDGPU::SReg_32_XM0RegClassID:
 737     return 32;
 738   case AMDGPU::SGPR_64RegClassID:
 739   case AMDGPU::VS_64RegClassID:
 740   case AMDGPU::SReg_64RegClassID:
 741   case AMDGPU::VReg_64RegClassID:
 742     return 64;
 743   case AMDGPU::VReg_96RegClassID:
 744     return 96;
 745   case AMDGPU::SGPR_128RegClassID:
 746   case AMDGPU::SReg_128RegClassID:
 747   case AMDGPU::VReg_128RegClassID:
 748     return 128;
 749   case AMDGPU::SReg_256RegClassID:
 750   case AMDGPU::VReg_256RegClassID:
 751     return 256;
 752   case AMDGPU::SReg_512RegClassID:
 753   case AMDGPU::VReg_512RegClassID:
 754     return 512;
 755   default:
 756     llvm_unreachable("Unexpected register class");
 757   }
 758 }
 759
 760 unsigned getRegBitWidth(const MCRegisterClass &RC) {
 761   return getRegBitWidth(RC.getID());
 762 }
 763
 764 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 765                            unsigned OpNo) {
 766   assert(OpNo < Desc.NumOperands);
 767   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
 768   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 769 }
 770
 771 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
 772   if (Literal >= -16 && Literal <= 64)
 773     return true;
 774
 775   uint64_t Val = static_cast<uint64_t>(Literal);
 776   return (Val == DoubleToBits(0.0)) ||
 777          (Val == DoubleToBits(1.0)) ||
 778          (Val == DoubleToBits(-1.0)) ||
 779          (Val == DoubleToBits(0.5)) ||
 780          (Val == DoubleToBits(-0.5)) ||
 781          (Val == DoubleToBits(2.0)) ||
 782          (Val == DoubleToBits(-2.0)) ||
 783          (Val == DoubleToBits(4.0)) ||
 784          (Val == DoubleToBits(-4.0)) ||
 785          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
 786 }
 787
 788 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
 789   if (Literal >= -16 && Literal <= 64)
 790     return true;
 791
 792   // The actual type of the operand does not seem to matter as long
 793   // as the bits match one of the inline immediate values.  For example:
 794   //
 795   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
 796   // so it is a legal inline immediate.
 797   //
 798   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
 799   // floating-point, so it is a legal inline immediate.
 800
 801   uint32_t Val = static_cast<uint32_t>(Literal);
 802   return (Val == FloatToBits(0.0f)) ||
 803          (Val == FloatToBits(1.0f)) ||
 804          (Val == FloatToBits(-1.0f)) ||
 805          (Val == FloatToBits(0.5f)) ||
 806          (Val == FloatToBits(-0.5f)) ||
 807          (Val == FloatToBits(2.0f)) ||
 808          (Val == FloatToBits(-2.0f)) ||
 809          (Val == FloatToBits(4.0f)) ||
 810          (Val == FloatToBits(-4.0f)) ||
 811          (Val == 0x3e22f983 && HasInv2Pi);
 812 }
 813
 814 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 815   if (!HasInv2Pi)
 816     return false;
 817
 818   if (Literal >= -16 && Literal <= 64)
 819     return true;
 820
 821   uint16_t Val = static_cast<uint16_t>(Literal);
 822   return Val == 0x3C00 || // 1.0
 823          Val == 0xBC00 || // -1.0
 824          Val == 0x3800 || // 0.5
 825          Val == 0xB800 || // -0.5
 826          Val == 0x4000 || // 2.0
 827          Val == 0xC000 || // -2.0
 828          Val == 0x4400 || // 4.0
 829          Val == 0xC400 || // -4.0
 830          Val == 0x3118;   // 1/2pi
 831 }
 832
 833 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
 834   assert(HasInv2Pi);
 835
 836   if (!EnablePackedInlinableLiterals)
 837     return false;
 838
 839   int16_t Lo16 = static_cast<int16_t>(Literal);
 840   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
 841   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
 842 }
 843
 844 bool isArgPassedInSGPR(const Argument *A) {
 845   const Function *F = A->getParent();
 846
 847   // Arguments to compute shaders are never a source of divergence.
 848   CallingConv::ID CC = F->getCallingConv();
 849   switch (CC) {
 850   case CallingConv::AMDGPU_KERNEL:
 851   case CallingConv::SPIR_KERNEL:
 852     return true;
 853   case CallingConv::AMDGPU_VS:
 854   case CallingConv::AMDGPU_LS:
 855   case CallingConv::AMDGPU_HS:
 856   case CallingConv::AMDGPU_ES:
 857   case CallingConv::AMDGPU_GS:
 858   case CallingConv::AMDGPU_PS:
 859   case CallingConv::AMDGPU_CS:
 860     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
 861     // Everything else is in VGPRs.
 862     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
 863            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
 864   default:
 865     // TODO: Should calls support inreg for SGPR inputs?
 866     return false;
 867   }
 868 }
 869
 870 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
 871 bool isUniformMMO(const MachineMemOperand *MMO) {
 872   const Value *Ptr = MMO->getValue();
 873   // UndefValue means this is a load of a kernel input.  These are uniform.
 874   // Sometimes LDS instructions have constant pointers.
 875   // If Ptr is null, then that means this mem operand contains a
 876   // PseudoSourceValue like GOT.
 877   if (!Ptr || isa<UndefValue>(Ptr) ||
 878       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
 879     return true;
 880
 881   if (const Argument *Arg = dyn_cast<Argument>(Ptr))
 882     return isArgPassedInSGPR(Arg);
 883
 884   const Instruction *I = dyn_cast<Instruction>(Ptr);
 885   return I && I->getMetadata("amdgpu.uniform");
 886 }
 887
 888 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 889   if (isGCN3Encoding(ST))
 890     return ByteOffset;
 891   return ByteOffset >> 2;
 892 }
 893
 894 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 895   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
 896   return isGCN3Encoding(ST) ?
 897     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
 898 }
 899
 900 } // end namespace AMDGPU
 901
 902 } // end namespace llvm
 903
 904 namespace llvm {
 905 namespace AMDGPU {
 906
 907 AMDGPUAS getAMDGPUAS(Triple T) {
 908   auto Env = T.getEnvironmentName();
 909   AMDGPUAS AS;
 910   if (Env == "amdgiz" || Env == "amdgizcl") {
 911     AS.FLAT_ADDRESS     = 0;
 912     AS.PRIVATE_ADDRESS  = 5;
 913     AS.REGION_ADDRESS   = 4;
 914   }
 915   else {
 916     AS.FLAT_ADDRESS     = 4;
 917     AS.PRIVATE_ADDRESS  = 0;
 918     AS.REGION_ADDRESS   = 5;
 919    }
 920   return AS;
 921 }
 922
 923 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
 924   return getAMDGPUAS(M.getTargetTriple());
 925 }
 926
 927 AMDGPUAS getAMDGPUAS(const Module &M) {
 928   return getAMDGPUAS(Triple(M.getTargetTriple()));
 929 }
 930 } // namespace AMDGPU
 931 } // namespace llvm