contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

   1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "AMDGPUBaseInfo.h"
  11 #include "AMDGPU.h"
  12 #include "SIDefines.h"
  13 #include "llvm/ADT/StringRef.h"
  14 #include "llvm/ADT/Triple.h"
  15 #include "llvm/BinaryFormat/ELF.h"
  16 #include "llvm/CodeGen/MachineMemOperand.h"
  17 #include "llvm/IR/Attributes.h"
  18 #include "llvm/IR/Constants.h"
  19 #include "llvm/IR/Function.h"
  20 #include "llvm/IR/GlobalValue.h"
  21 #include "llvm/IR/Instruction.h"
  22 #include "llvm/IR/LLVMContext.h"
  23 #include "llvm/IR/Module.h"
  24 #include "llvm/MC/MCContext.h"
  25 #include "llvm/MC/MCInstrDesc.h"
  26 #include "llvm/MC/MCInstrInfo.h"
  27 #include "llvm/MC/MCRegisterInfo.h"
  28 #include "llvm/MC/MCSectionELF.h"
  29 #include "llvm/MC/MCSubtargetInfo.h"
  30 #include "llvm/MC/SubtargetFeature.h"
  31 #include "llvm/Support/Casting.h"
  32 #include "llvm/Support/ErrorHandling.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include <algorithm>
  35 #include <cassert>
  36 #include <cstdint>
  37 #include <cstring>
  38 #include <utility>
  39
  40 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  41
  42 #define GET_INSTRINFO_NAMED_OPS
  43 #define GET_INSTRMAP_INFO
  44 #include "AMDGPUGenInstrInfo.inc"
  45 #undef GET_INSTRMAP_INFO
  46 #undef GET_INSTRINFO_NAMED_OPS
  47
  48 namespace {
  49
  50 /// \returns Bit mask for given bit \p Shift and bit \p Width.
  51 unsigned getBitMask(unsigned Shift, unsigned Width) {
  52   return ((1 << Width) - 1) << Shift;
  53 }
  54
  55 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
  56 ///
  57 /// \returns Packed \p Dst.
  58 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
  59   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
  60   Dst |= (Src << Shift) & getBitMask(Shift, Width);
  61   return Dst;
  62 }
  63
  64 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
  65 ///
  66 /// \returns Unpacked bits.
  67 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
  68   return (Src & getBitMask(Shift, Width)) >> Shift;
  69 }
  70
  71 /// \returns Vmcnt bit shift (lower bits).
  72 unsigned getVmcntBitShiftLo() { return 0; }
  73
  74 /// \returns Vmcnt bit width (lower bits).
  75 unsigned getVmcntBitWidthLo() { return 4; }
  76
  77 /// \returns Expcnt bit shift.
  78 unsigned getExpcntBitShift() { return 4; }
  79
  80 /// \returns Expcnt bit width.
  81 unsigned getExpcntBitWidth() { return 3; }
  82
  83 /// \returns Lgkmcnt bit shift.
  84 unsigned getLgkmcntBitShift() { return 8; }
  85
  86 /// \returns Lgkmcnt bit width.
  87 unsigned getLgkmcntBitWidth() { return 4; }
  88
  89 /// \returns Vmcnt bit shift (higher bits).
  90 unsigned getVmcntBitShiftHi() { return 14; }
  91
  92 /// \returns Vmcnt bit width (higher bits).
  93 unsigned getVmcntBitWidthHi() { return 2; }
  94
  95 } // end namespace anonymous
  96
  97 namespace llvm {
  98
  99 static cl::opt<bool> EnablePackedInlinableLiterals(
 100     "enable-packed-inlinable-literals",
 101     cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
 102     cl::init(false));
 103
 104 namespace AMDGPU {
 105
 106 LLVM_READNONE
 107 static inline Channels indexToChannel(unsigned Channel) {
 108   switch (Channel) {
 109   case 1:
 110     return AMDGPU::Channels_1;
 111   case 2:
 112     return AMDGPU::Channels_2;
 113   case 3:
 114     return AMDGPU::Channels_3;
 115   case 4:
 116     return AMDGPU::Channels_4;
 117   default:
 118     llvm_unreachable("invalid MIMG channel");
 119   }
 120 }
 121
 122
 123 // FIXME: Need to handle d16 images correctly.
 124 static unsigned rcToChannels(unsigned RCID) {
 125   switch (RCID) {
 126   case AMDGPU::VGPR_32RegClassID:
 127     return 1;
 128   case AMDGPU::VReg_64RegClassID:
 129     return 2;
 130   case AMDGPU::VReg_96RegClassID:
 131     return 3;
 132   case AMDGPU::VReg_128RegClassID:
 133     return 4;
 134   default:
 135     llvm_unreachable("invalid MIMG register class");
 136   }
 137 }
 138
 139 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
 140   AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
 141   unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
 142   if (NewChannels == OrigChannels)
 143     return Opc;
 144
 145   switch (OrigChannels) {
 146   case 1:
 147     return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
 148   case 2:
 149     return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
 150   case 3:
 151     return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
 152   case 4:
 153     return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
 154   default:
 155     llvm_unreachable("invalid MIMG channel");
 156   }
 157 }
 158
 159 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
 160 // header files, so we need to wrap it in a function that takes unsigned
 161 // instead.
 162 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
 163   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
 164 }
 165
 166 namespace IsaInfo {
 167
 168 IsaVersion getIsaVersion(const FeatureBitset &Features) {
 169   // GCN GFX6 (Southern Islands (SI)).
 170   if (Features.test(FeatureISAVersion6_0_0))
 171     return {6, 0, 0};
 172   if (Features.test(FeatureISAVersion6_0_1))
 173     return {6, 0, 1};
 174
 175   // GCN GFX7 (Sea Islands (CI)).
 176   if (Features.test(FeatureISAVersion7_0_0))
 177     return {7, 0, 0};
 178   if (Features.test(FeatureISAVersion7_0_1))
 179     return {7, 0, 1};
 180   if (Features.test(FeatureISAVersion7_0_2))
 181     return {7, 0, 2};
 182   if (Features.test(FeatureISAVersion7_0_3))
 183     return {7, 0, 3};
 184   if (Features.test(FeatureISAVersion7_0_4))
 185     return {7, 0, 4};
 186
 187   // GCN GFX8 (Volcanic Islands (VI)).
 188   if (Features.test(FeatureISAVersion8_0_0))
 189     return {8, 0, 0};
 190   if (Features.test(FeatureISAVersion8_0_1))
 191     return {8, 0, 1};
 192   if (Features.test(FeatureISAVersion8_0_2))
 193     return {8, 0, 2};
 194   if (Features.test(FeatureISAVersion8_0_3))
 195     return {8, 0, 3};
 196   if (Features.test(FeatureISAVersion8_1_0))
 197     return {8, 1, 0};
 198
 199   // GCN GFX9.
 200   if (Features.test(FeatureISAVersion9_0_0))
 201     return {9, 0, 0};
 202   if (Features.test(FeatureISAVersion9_0_2))
 203     return {9, 0, 2};
 204
 205   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
 206     return {0, 0, 0};
 207   return {7, 0, 0};
 208 }
 209
 210 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
 211   auto TargetTriple = STI->getTargetTriple();
 212   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
 213
 214   Stream << TargetTriple.getArchName() << '-'
 215          << TargetTriple.getVendorName() << '-'
 216          << TargetTriple.getOSName() << '-'
 217          << TargetTriple.getEnvironmentName() << '-'
 218          << "gfx"
 219          << ISAVersion.Major
 220          << ISAVersion.Minor
 221          << ISAVersion.Stepping;
 222   Stream.flush();
 223 }
 224
 225 bool hasCodeObjectV3(const FeatureBitset &Features) {
 226   return Features.test(FeatureCodeObjectV3);
 227 }
 228
 229 unsigned getWavefrontSize(const FeatureBitset &Features) {
 230   if (Features.test(FeatureWavefrontSize16))
 231     return 16;
 232   if (Features.test(FeatureWavefrontSize32))
 233     return 32;
 234
 235   return 64;
 236 }
 237
 238 unsigned getLocalMemorySize(const FeatureBitset &Features) {
 239   if (Features.test(FeatureLocalMemorySize32768))
 240     return 32768;
 241   if (Features.test(FeatureLocalMemorySize65536))
 242     return 65536;
 243
 244   return 0;
 245 }
 246
 247 unsigned getEUsPerCU(const FeatureBitset &Features) {
 248   return 4;
 249 }
 250
 251 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
 252                                unsigned FlatWorkGroupSize) {
 253   if (!Features.test(FeatureGCN))
 254     return 8;
 255   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 256   if (N == 1)
 257     return 40;
 258   N = 40 / N;
 259   return std::min(N, 16u);
 260 }
 261
 262 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
 263   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
 264 }
 265
 266 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
 267                           unsigned FlatWorkGroupSize) {
 268   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 269 }
 270
 271 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
 272   return 1;
 273 }
 274
 275 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
 276   if (!Features.test(FeatureGCN))
 277     return 8;
 278   // FIXME: Need to take scratch memory into account.
 279   return 10;
 280 }
 281
 282 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 283                           unsigned FlatWorkGroupSize) {
 284   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
 285                  getEUsPerCU(Features)) / getEUsPerCU(Features);
 286 }
 287
 288 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
 289   return 1;
 290 }
 291
 292 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
 293   return 2048;
 294 }
 295
 296 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 297                               unsigned FlatWorkGroupSize) {
 298   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
 299                  getWavefrontSize(Features);
 300 }
 301
 302 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
 303   IsaVersion Version = getIsaVersion(Features);
 304   if (Version.Major >= 8)
 305     return 16;
 306   return 8;
 307 }
 308
 309 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
 310   return 8;
 311 }
 312
 313 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
 314   IsaVersion Version = getIsaVersion(Features);
 315   if (Version.Major >= 8)
 316     return 800;
 317   return 512;
 318 }
 319
 320 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
 321   if (Features.test(FeatureSGPRInitBug))
 322     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 323
 324   IsaVersion Version = getIsaVersion(Features);
 325   if (Version.Major >= 8)
 326     return 102;
 327   return 104;
 328 }
 329
 330 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 331   assert(WavesPerEU != 0);
 332
 333   if (WavesPerEU >= getMaxWavesPerEU(Features))
 334     return 0;
 335   unsigned MinNumSGPRs =
 336       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
 337                 getSGPRAllocGranule(Features)) + 1;
 338   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
 339 }
 340
 341 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 342                         bool Addressable) {
 343   assert(WavesPerEU != 0);
 344
 345   IsaVersion Version = getIsaVersion(Features);
 346   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
 347                                    getSGPRAllocGranule(Features));
 348   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
 349   if (Version.Major >= 8 && !Addressable)
 350     AddressableNumSGPRs = 112;
 351   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 352 }
 353
 354 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
 355   return 4;
 356 }
 357
 358 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
 359   return getVGPRAllocGranule(Features);
 360 }
 361
 362 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
 363   return 256;
 364 }
 365
 366 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
 367   return getTotalNumVGPRs(Features);
 368 }
 369
 370 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 371   assert(WavesPerEU != 0);
 372
 373   if (WavesPerEU >= getMaxWavesPerEU(Features))
 374     return 0;
 375   unsigned MinNumVGPRs =
 376       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
 377                 getVGPRAllocGranule(Features)) + 1;
 378   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
 379 }
 380
 381 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 382   assert(WavesPerEU != 0);
 383
 384   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
 385                                    getVGPRAllocGranule(Features));
 386   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
 387   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 388 }
 389
 390 } // end namespace IsaInfo
 391
 392 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 393                                const FeatureBitset &Features) {
 394   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
 395
 396   memset(&Header, 0, sizeof(Header));
 397
 398   Header.amd_kernel_code_version_major = 1;
 399   Header.amd_kernel_code_version_minor = 1;
 400   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
 401   Header.amd_machine_version_major = ISA.Major;
 402   Header.amd_machine_version_minor = ISA.Minor;
 403   Header.amd_machine_version_stepping = ISA.Stepping;
 404   Header.kernel_code_entry_byte_offset = sizeof(Header);
 405   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
 406   Header.wavefront_size = 6;
 407
 408   // If the code object does not support indirect functions, then the value must
 409   // be 0xffffffff.
 410   Header.call_convention = -1;
 411
 412   // These alignment values are specified in powers of two, so alignment =
 413   // 2^n.  The minimum alignment is 2^4 = 16.
 414   Header.kernarg_segment_alignment = 4;
 415   Header.group_segment_alignment = 4;
 416   Header.private_segment_alignment = 4;
 417 }
 418
 419 bool isGroupSegment(const GlobalValue *GV) {
 420   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 421 }
 422
 423 bool isGlobalSegment(const GlobalValue *GV) {
 424   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 425 }
 426
 427 bool isReadOnlySegment(const GlobalValue *GV) {
 428   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
 429 }
 430
 431 bool shouldEmitConstantsToTextSection(const Triple &TT) {
 432   return TT.getOS() != Triple::AMDHSA;
 433 }
 434
 435 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
 436   Attribute A = F.getFnAttribute(Name);
 437   int Result = Default;
 438
 439   if (A.isStringAttribute()) {
 440     StringRef Str = A.getValueAsString();
 441     if (Str.getAsInteger(0, Result)) {
 442       LLVMContext &Ctx = F.getContext();
 443       Ctx.emitError("can't parse integer attribute " + Name);
 444     }
 445   }
 446
 447   return Result;
 448 }
 449
 450 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 451                                             StringRef Name,
 452                                             std::pair<int, int> Default,
 453                                             bool OnlyFirstRequired) {
 454   Attribute A = F.getFnAttribute(Name);
 455   if (!A.isStringAttribute())
 456     return Default;
 457
 458   LLVMContext &Ctx = F.getContext();
 459   std::pair<int, int> Ints = Default;
 460   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
 461   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
 462     Ctx.emitError("can't parse first integer attribute " + Name);
 463     return Default;
 464   }
 465   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
 466     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
 467       Ctx.emitError("can't parse second integer attribute " + Name);
 468       return Default;
 469     }
 470   }
 471
 472   return Ints;
 473 }
 474
 475 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
 476   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
 477   if (Version.Major < 9)
 478     return VmcntLo;
 479
 480   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
 481   return VmcntLo | VmcntHi;
 482 }
 483
 484 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
 485   return (1 << getExpcntBitWidth()) - 1;
 486 }
 487
 488 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
 489   return (1 << getLgkmcntBitWidth()) - 1;
 490 }
 491
 492 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
 493   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
 494   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
 495   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
 496   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
 497   if (Version.Major < 9)
 498     return Waitcnt;
 499
 500   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
 501   return Waitcnt | VmcntHi;
 502 }
 503
 504 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 505   unsigned VmcntLo =
 506       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 507   if (Version.Major < 9)
 508     return VmcntLo;
 509
 510   unsigned VmcntHi =
 511       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 512   VmcntHi <<= getVmcntBitWidthLo();
 513   return VmcntLo | VmcntHi;
 514 }
 515
 516 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 517   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 518 }
 519
 520 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 521   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 522 }
 523
 524 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 525                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
 526   Vmcnt = decodeVmcnt(Version, Waitcnt);
 527   Expcnt = decodeExpcnt(Version, Waitcnt);
 528   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 529 }
 530
 531 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 532                      unsigned Vmcnt) {
 533   Waitcnt =
 534       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 535   if (Version.Major < 9)
 536     return Waitcnt;
 537
 538   Vmcnt >>= getVmcntBitWidthLo();
 539   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 540 }
 541
 542 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 543                       unsigned Expcnt) {
 544   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 545 }
 546
 547 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 548                        unsigned Lgkmcnt) {
 549   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 550 }
 551
 552 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 553                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
 554   unsigned Waitcnt = getWaitcntBitMask(Version);
 555   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
 556   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
 557   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
 558   return Waitcnt;
 559 }
 560
 561 unsigned getInitialPSInputAddr(const Function &F) {
 562   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 563 }
 564
 565 bool isShader(CallingConv::ID cc) {
 566   switch(cc) {
 567     case CallingConv::AMDGPU_VS:
 568     case CallingConv::AMDGPU_LS:
 569     case CallingConv::AMDGPU_HS:
 570     case CallingConv::AMDGPU_ES:
 571     case CallingConv::AMDGPU_GS:
 572     case CallingConv::AMDGPU_PS:
 573     case CallingConv::AMDGPU_CS:
 574       return true;
 575     default:
 576       return false;
 577   }
 578 }
 579
 580 bool isCompute(CallingConv::ID cc) {
 581   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
 582 }
 583
 584 bool isEntryFunctionCC(CallingConv::ID CC) {
 585   switch (CC) {
 586   case CallingConv::AMDGPU_KERNEL:
 587   case CallingConv::SPIR_KERNEL:
 588   case CallingConv::AMDGPU_VS:
 589   case CallingConv::AMDGPU_GS:
 590   case CallingConv::AMDGPU_PS:
 591   case CallingConv::AMDGPU_CS:
 592   case CallingConv::AMDGPU_ES:
 593   case CallingConv::AMDGPU_HS:
 594   case CallingConv::AMDGPU_LS:
 595     return true;
 596   default:
 597     return false;
 598   }
 599 }
 600
 601 bool isSI(const MCSubtargetInfo &STI) {
 602   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 603 }
 604
 605 bool isCI(const MCSubtargetInfo &STI) {
 606   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
 607 }
 608
 609 bool isVI(const MCSubtargetInfo &STI) {
 610   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 611 }
 612
 613 bool isGFX9(const MCSubtargetInfo &STI) {
 614   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
 615 }
 616
 617 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
 618   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
 619 }
 620
 621 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
 622   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
 623   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
 624   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
 625     Reg == AMDGPU::SCC;
 626 }
 627
 628 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
 629   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
 630     if (*R == Reg1) return true;
 631   }
 632   return false;
 633 }
 634
 635 #define MAP_REG2REG \
 636   using namespace AMDGPU; \
 637   switch(Reg) { \
 638   default: return Reg; \
 639   CASE_CI_VI(FLAT_SCR) \
 640   CASE_CI_VI(FLAT_SCR_LO) \
 641   CASE_CI_VI(FLAT_SCR_HI) \
 642   CASE_VI_GFX9(TTMP0) \
 643   CASE_VI_GFX9(TTMP1) \
 644   CASE_VI_GFX9(TTMP2) \
 645   CASE_VI_GFX9(TTMP3) \
 646   CASE_VI_GFX9(TTMP4) \
 647   CASE_VI_GFX9(TTMP5) \
 648   CASE_VI_GFX9(TTMP6) \
 649   CASE_VI_GFX9(TTMP7) \
 650   CASE_VI_GFX9(TTMP8) \
 651   CASE_VI_GFX9(TTMP9) \
 652   CASE_VI_GFX9(TTMP10) \
 653   CASE_VI_GFX9(TTMP11) \
 654   CASE_VI_GFX9(TTMP12) \
 655   CASE_VI_GFX9(TTMP13) \
 656   CASE_VI_GFX9(TTMP14) \
 657   CASE_VI_GFX9(TTMP15) \
 658   CASE_VI_GFX9(TTMP0_TTMP1) \
 659   CASE_VI_GFX9(TTMP2_TTMP3) \
 660   CASE_VI_GFX9(TTMP4_TTMP5) \
 661   CASE_VI_GFX9(TTMP6_TTMP7) \
 662   CASE_VI_GFX9(TTMP8_TTMP9) \
 663   CASE_VI_GFX9(TTMP10_TTMP11) \
 664   CASE_VI_GFX9(TTMP12_TTMP13) \
 665   CASE_VI_GFX9(TTMP14_TTMP15) \
 666   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
 667   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
 668   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
 669   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
 670   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
 671   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
 672   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
 673   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
 674   }
 675
 676 #define CASE_CI_VI(node) \
 677   assert(!isSI(STI)); \
 678   case node: return isCI(STI) ? node##_ci : node##_vi;
 679
 680 #define CASE_VI_GFX9(node) \
 681   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
 682
 683 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 684   MAP_REG2REG
 685 }
 686
 687 #undef CASE_CI_VI
 688 #undef CASE_VI_GFX9
 689
 690 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
 691 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
 692
 693 unsigned mc2PseudoReg(unsigned Reg) {
 694   MAP_REG2REG
 695 }
 696
 697 #undef CASE_CI_VI
 698 #undef CASE_VI_GFX9
 699 #undef MAP_REG2REG
 700
 701 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 702   assert(OpNo < Desc.NumOperands);
 703   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 704   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
 705          OpType <= AMDGPU::OPERAND_SRC_LAST;
 706 }
 707
 708 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 709   assert(OpNo < Desc.NumOperands);
 710   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 711   switch (OpType) {
 712   case AMDGPU::OPERAND_REG_IMM_FP32:
 713   case AMDGPU::OPERAND_REG_IMM_FP64:
 714   case AMDGPU::OPERAND_REG_IMM_FP16:
 715   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 716   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 717   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 718   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 719     return true;
 720   default:
 721     return false;
 722   }
 723 }
 724
 725 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 726   assert(OpNo < Desc.NumOperands);
 727   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 728   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
 729          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
 730 }
 731
 732 // Avoid using MCRegisterClass::getSize, since that function will go away
 733 // (move from MC* level to Target* level). Return size in bits.
 734 unsigned getRegBitWidth(unsigned RCID) {
 735   switch (RCID) {
 736   case AMDGPU::SGPR_32RegClassID:
 737   case AMDGPU::VGPR_32RegClassID:
 738   case AMDGPU::VS_32RegClassID:
 739   case AMDGPU::SReg_32RegClassID:
 740   case AMDGPU::SReg_32_XM0RegClassID:
 741     return 32;
 742   case AMDGPU::SGPR_64RegClassID:
 743   case AMDGPU::VS_64RegClassID:
 744   case AMDGPU::SReg_64RegClassID:
 745   case AMDGPU::VReg_64RegClassID:
 746     return 64;
 747   case AMDGPU::VReg_96RegClassID:
 748     return 96;
 749   case AMDGPU::SGPR_128RegClassID:
 750   case AMDGPU::SReg_128RegClassID:
 751   case AMDGPU::VReg_128RegClassID:
 752     return 128;
 753   case AMDGPU::SReg_256RegClassID:
 754   case AMDGPU::VReg_256RegClassID:
 755     return 256;
 756   case AMDGPU::SReg_512RegClassID:
 757   case AMDGPU::VReg_512RegClassID:
 758     return 512;
 759   default:
 760     llvm_unreachable("Unexpected register class");
 761   }
 762 }
 763
 764 unsigned getRegBitWidth(const MCRegisterClass &RC) {
 765   return getRegBitWidth(RC.getID());
 766 }
 767
 768 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 769                            unsigned OpNo) {
 770   assert(OpNo < Desc.NumOperands);
 771   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
 772   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 773 }
 774
 775 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
 776   if (Literal >= -16 && Literal <= 64)
 777     return true;
 778
 779   uint64_t Val = static_cast<uint64_t>(Literal);
 780   return (Val == DoubleToBits(0.0)) ||
 781          (Val == DoubleToBits(1.0)) ||
 782          (Val == DoubleToBits(-1.0)) ||
 783          (Val == DoubleToBits(0.5)) ||
 784          (Val == DoubleToBits(-0.5)) ||
 785          (Val == DoubleToBits(2.0)) ||
 786          (Val == DoubleToBits(-2.0)) ||
 787          (Val == DoubleToBits(4.0)) ||
 788          (Val == DoubleToBits(-4.0)) ||
 789          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
 790 }
 791
 792 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
 793   if (Literal >= -16 && Literal <= 64)
 794     return true;
 795
 796   // The actual type of the operand does not seem to matter as long
 797   // as the bits match one of the inline immediate values.  For example:
 798   //
 799   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
 800   // so it is a legal inline immediate.
 801   //
 802   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
 803   // floating-point, so it is a legal inline immediate.
 804
 805   uint32_t Val = static_cast<uint32_t>(Literal);
 806   return (Val == FloatToBits(0.0f)) ||
 807          (Val == FloatToBits(1.0f)) ||
 808          (Val == FloatToBits(-1.0f)) ||
 809          (Val == FloatToBits(0.5f)) ||
 810          (Val == FloatToBits(-0.5f)) ||
 811          (Val == FloatToBits(2.0f)) ||
 812          (Val == FloatToBits(-2.0f)) ||
 813          (Val == FloatToBits(4.0f)) ||
 814          (Val == FloatToBits(-4.0f)) ||
 815          (Val == 0x3e22f983 && HasInv2Pi);
 816 }
 817
 818 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 819   if (!HasInv2Pi)
 820     return false;
 821
 822   if (Literal >= -16 && Literal <= 64)
 823     return true;
 824
 825   uint16_t Val = static_cast<uint16_t>(Literal);
 826   return Val == 0x3C00 || // 1.0
 827          Val == 0xBC00 || // -1.0
 828          Val == 0x3800 || // 0.5
 829          Val == 0xB800 || // -0.5
 830          Val == 0x4000 || // 2.0
 831          Val == 0xC000 || // -2.0
 832          Val == 0x4400 || // 4.0
 833          Val == 0xC400 || // -4.0
 834          Val == 0x3118;   // 1/2pi
 835 }
 836
 837 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
 838   assert(HasInv2Pi);
 839
 840   if (!EnablePackedInlinableLiterals)
 841     return false;
 842
 843   int16_t Lo16 = static_cast<int16_t>(Literal);
 844   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
 845   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
 846 }
 847
 848 bool isArgPassedInSGPR(const Argument *A) {
 849   const Function *F = A->getParent();
 850
 851   // Arguments to compute shaders are never a source of divergence.
 852   CallingConv::ID CC = F->getCallingConv();
 853   switch (CC) {
 854   case CallingConv::AMDGPU_KERNEL:
 855   case CallingConv::SPIR_KERNEL:
 856     return true;
 857   case CallingConv::AMDGPU_VS:
 858   case CallingConv::AMDGPU_LS:
 859   case CallingConv::AMDGPU_HS:
 860   case CallingConv::AMDGPU_ES:
 861   case CallingConv::AMDGPU_GS:
 862   case CallingConv::AMDGPU_PS:
 863   case CallingConv::AMDGPU_CS:
 864     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
 865     // Everything else is in VGPRs.
 866     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
 867            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
 868   default:
 869     // TODO: Should calls support inreg for SGPR inputs?
 870     return false;
 871   }
 872 }
 873
 874 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 875   if (isGCN3Encoding(ST))
 876     return ByteOffset;
 877   return ByteOffset >> 2;
 878 }
 879
 880 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 881   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
 882   return isGCN3Encoding(ST) ?
 883     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
 884 }
 885
 886 } // end namespace AMDGPU
 887
 888 } // end namespace llvm
 889
 890 namespace llvm {
 891 namespace AMDGPU {
 892
 893 AMDGPUAS getAMDGPUAS(Triple T) {
 894   auto Env = T.getEnvironmentName();
 895   AMDGPUAS AS;
 896   if (Env == "amdgiz" || Env == "amdgizcl") {
 897     AS.FLAT_ADDRESS     = 0;
 898     AS.PRIVATE_ADDRESS  = 5;
 899     AS.REGION_ADDRESS   = 4;
 900   }
 901   else {
 902     AS.FLAT_ADDRESS     = 4;
 903     AS.PRIVATE_ADDRESS  = 0;
 904     AS.REGION_ADDRESS   = 5;
 905    }
 906   return AS;
 907 }
 908
 909 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
 910   return getAMDGPUAS(M.getTargetTriple());
 911 }
 912
 913 AMDGPUAS getAMDGPUAS(const Module &M) {
 914   return getAMDGPUAS(Triple(M.getTargetTriple()));
 915 }
 916 } // namespace AMDGPU
 917 } // namespace llvm