contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

   1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "AMDGPUBaseInfo.h"
  11 #include "AMDGPUTargetTransformInfo.h"
  12 #include "AMDGPU.h"
  13 #include "SIDefines.h"
  14 #include "llvm/ADT/StringRef.h"
  15 #include "llvm/ADT/Triple.h"
  16 #include "llvm/BinaryFormat/ELF.h"
  17 #include "llvm/CodeGen/MachineMemOperand.h"
  18 #include "llvm/IR/Attributes.h"
  19 #include "llvm/IR/Constants.h"
  20 #include "llvm/IR/Function.h"
  21 #include "llvm/IR/GlobalValue.h"
  22 #include "llvm/IR/Instruction.h"
  23 #include "llvm/IR/LLVMContext.h"
  24 #include "llvm/IR/Module.h"
  25 #include "llvm/MC/MCContext.h"
  26 #include "llvm/MC/MCInstrDesc.h"
  27 #include "llvm/MC/MCInstrInfo.h"
  28 #include "llvm/MC/MCRegisterInfo.h"
  29 #include "llvm/MC/MCSectionELF.h"
  30 #include "llvm/MC/MCSubtargetInfo.h"
  31 #include "llvm/MC/SubtargetFeature.h"
  32 #include "llvm/Support/Casting.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/MathExtras.h"
  35 #include <algorithm>
  36 #include <cassert>
  37 #include <cstdint>
  38 #include <cstring>
  39 #include <utility>
  40
  41 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  42
  43 #define GET_INSTRINFO_NAMED_OPS
  44 #define GET_INSTRMAP_INFO
  45 #include "AMDGPUGenInstrInfo.inc"
  46 #undef GET_INSTRMAP_INFO
  47 #undef GET_INSTRINFO_NAMED_OPS
  48
  49 namespace {
  50
  51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
  52 unsigned getBitMask(unsigned Shift, unsigned Width) {
  53   return ((1 << Width) - 1) << Shift;
  54 }
  55
  56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
  57 ///
  58 /// \returns Packed \p Dst.
  59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
  60   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
  61   Dst |= (Src << Shift) & getBitMask(Shift, Width);
  62   return Dst;
  63 }
  64
  65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
  66 ///
  67 /// \returns Unpacked bits.
  68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
  69   return (Src & getBitMask(Shift, Width)) >> Shift;
  70 }
  71
  72 /// \returns Vmcnt bit shift (lower bits).
  73 unsigned getVmcntBitShiftLo() { return 0; }
  74
  75 /// \returns Vmcnt bit width (lower bits).
  76 unsigned getVmcntBitWidthLo() { return 4; }
  77
  78 /// \returns Expcnt bit shift.
  79 unsigned getExpcntBitShift() { return 4; }
  80
  81 /// \returns Expcnt bit width.
  82 unsigned getExpcntBitWidth() { return 3; }
  83
  84 /// \returns Lgkmcnt bit shift.
  85 unsigned getLgkmcntBitShift() { return 8; }
  86
  87 /// \returns Lgkmcnt bit width.
  88 unsigned getLgkmcntBitWidth() { return 4; }
  89
  90 /// \returns Vmcnt bit shift (higher bits).
  91 unsigned getVmcntBitShiftHi() { return 14; }
  92
  93 /// \returns Vmcnt bit width (higher bits).
  94 unsigned getVmcntBitWidthHi() { return 2; }
  95
  96 } // end namespace anonymous
  97
  98 namespace llvm {
  99
 100 namespace AMDGPU {
 101
 102 struct MIMGInfo {
 103   uint16_t Opcode;
 104   uint16_t BaseOpcode;
 105   uint8_t MIMGEncoding;
 106   uint8_t VDataDwords;
 107   uint8_t VAddrDwords;
 108 };
 109
 110 #define GET_MIMGBaseOpcodesTable_IMPL
 111 #define GET_MIMGDimInfoTable_IMPL
 112 #define GET_MIMGInfoTable_IMPL
 113 #include "AMDGPUGenSearchableTables.inc"
 114
 115 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
 116                   unsigned VDataDwords, unsigned VAddrDwords) {
 117   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
 118                                              VDataDwords, VAddrDwords);
 119   return Info ? Info->Opcode : -1;
 120 }
 121
 122 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
 123   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
 124   const MIMGInfo *NewInfo =
 125       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
 126                           NewChannels, OrigInfo->VAddrDwords);
 127   return NewInfo ? NewInfo->Opcode : -1;
 128 }
 129
 130 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
 131 // header files, so we need to wrap it in a function that takes unsigned
 132 // instead.
 133 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
 134   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
 135 }
 136
 137 namespace IsaInfo {
 138
 139 IsaVersion getIsaVersion(const FeatureBitset &Features) {
 140   // GCN GFX6 (Southern Islands (SI)).
 141   if (Features.test(FeatureISAVersion6_0_0))
 142     return {6, 0, 0};
 143   if (Features.test(FeatureISAVersion6_0_1))
 144     return {6, 0, 1};
 145
 146   // GCN GFX7 (Sea Islands (CI)).
 147   if (Features.test(FeatureISAVersion7_0_0))
 148     return {7, 0, 0};
 149   if (Features.test(FeatureISAVersion7_0_1))
 150     return {7, 0, 1};
 151   if (Features.test(FeatureISAVersion7_0_2))
 152     return {7, 0, 2};
 153   if (Features.test(FeatureISAVersion7_0_3))
 154     return {7, 0, 3};
 155   if (Features.test(FeatureISAVersion7_0_4))
 156     return {7, 0, 4};
 157   if (Features.test(FeatureSeaIslands))
 158     return {7, 0, 0};
 159
 160   // GCN GFX8 (Volcanic Islands (VI)).
 161   if (Features.test(FeatureISAVersion8_0_1))
 162     return {8, 0, 1};
 163   if (Features.test(FeatureISAVersion8_0_2))
 164     return {8, 0, 2};
 165   if (Features.test(FeatureISAVersion8_0_3))
 166     return {8, 0, 3};
 167   if (Features.test(FeatureISAVersion8_1_0))
 168     return {8, 1, 0};
 169   if (Features.test(FeatureVolcanicIslands))
 170     return {8, 0, 0};
 171
 172   // GCN GFX9.
 173   if (Features.test(FeatureISAVersion9_0_0))
 174     return {9, 0, 0};
 175   if (Features.test(FeatureISAVersion9_0_2))
 176     return {9, 0, 2};
 177   if (Features.test(FeatureISAVersion9_0_4))
 178     return {9, 0, 4};
 179   if (Features.test(FeatureISAVersion9_0_6))
 180     return {9, 0, 6};
 181   if (Features.test(FeatureGFX9))
 182     return {9, 0, 0};
 183
 184   if (Features.test(FeatureSouthernIslands))
 185     return {0, 0, 0};
 186   return {7, 0, 0};
 187 }
 188
 189 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
 190   auto TargetTriple = STI->getTargetTriple();
 191   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
 192
 193   Stream << TargetTriple.getArchName() << '-'
 194          << TargetTriple.getVendorName() << '-'
 195          << TargetTriple.getOSName() << '-'
 196          << TargetTriple.getEnvironmentName() << '-'
 197          << "gfx"
 198          << ISAVersion.Major
 199          << ISAVersion.Minor
 200          << ISAVersion.Stepping;
 201
 202   if (hasXNACK(*STI))
 203     Stream << "+xnack";
 204
 205   Stream.flush();
 206 }
 207
 208 bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
 209   return STI->getFeatureBits().test(FeatureCodeObjectV3);
 210 }
 211
 212 unsigned getWavefrontSize(const FeatureBitset &Features) {
 213   if (Features.test(FeatureWavefrontSize16))
 214     return 16;
 215   if (Features.test(FeatureWavefrontSize32))
 216     return 32;
 217
 218   return 64;
 219 }
 220
 221 unsigned getLocalMemorySize(const FeatureBitset &Features) {
 222   if (Features.test(FeatureLocalMemorySize32768))
 223     return 32768;
 224   if (Features.test(FeatureLocalMemorySize65536))
 225     return 65536;
 226
 227   return 0;
 228 }
 229
 230 unsigned getEUsPerCU(const FeatureBitset &Features) {
 231   return 4;
 232 }
 233
 234 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
 235                                unsigned FlatWorkGroupSize) {
 236   if (!Features.test(FeatureGCN))
 237     return 8;
 238   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 239   if (N == 1)
 240     return 40;
 241   N = 40 / N;
 242   return std::min(N, 16u);
 243 }
 244
 245 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
 246   return getMaxWavesPerEU() * getEUsPerCU(Features);
 247 }
 248
 249 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
 250                           unsigned FlatWorkGroupSize) {
 251   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
 252 }
 253
 254 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
 255   return 1;
 256 }
 257
 258 unsigned getMaxWavesPerEU() {
 259   // FIXME: Need to take scratch memory into account.
 260   return 10;
 261 }
 262
 263 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 264                           unsigned FlatWorkGroupSize) {
 265   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
 266                  getEUsPerCU(Features)) / getEUsPerCU(Features);
 267 }
 268
 269 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
 270   return 1;
 271 }
 272
 273 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
 274   return 2048;
 275 }
 276
 277 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 278                               unsigned FlatWorkGroupSize) {
 279   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
 280                  getWavefrontSize(Features);
 281 }
 282
 283 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
 284   IsaVersion Version = getIsaVersion(Features);
 285   if (Version.Major >= 8)
 286     return 16;
 287   return 8;
 288 }
 289
 290 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
 291   return 8;
 292 }
 293
 294 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
 295   IsaVersion Version = getIsaVersion(Features);
 296   if (Version.Major >= 8)
 297     return 800;
 298   return 512;
 299 }
 300
 301 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
 302   if (Features.test(FeatureSGPRInitBug))
 303     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
 304
 305   IsaVersion Version = getIsaVersion(Features);
 306   if (Version.Major >= 8)
 307     return 102;
 308   return 104;
 309 }
 310
 311 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 312   assert(WavesPerEU != 0);
 313
 314   if (WavesPerEU >= getMaxWavesPerEU())
 315     return 0;
 316
 317   unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
 318   if (Features.test(FeatureTrapHandler))
 319     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
 320   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
 321   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
 322 }
 323
 324 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 325                         bool Addressable) {
 326   assert(WavesPerEU != 0);
 327
 328   IsaVersion Version = getIsaVersion(Features);
 329   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
 330   if (Version.Major >= 8 && !Addressable)
 331     AddressableNumSGPRs = 112;
 332   unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
 333   if (Features.test(FeatureTrapHandler))
 334     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
 335   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
 336   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
 337 }
 338
 339 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
 340                           bool FlatScrUsed, bool XNACKUsed) {
 341   unsigned ExtraSGPRs = 0;
 342   if (VCCUsed)
 343     ExtraSGPRs = 2;
 344
 345   IsaVersion Version = getIsaVersion(Features);
 346   if (Version.Major < 8) {
 347     if (FlatScrUsed)
 348       ExtraSGPRs = 4;
 349   } else {
 350     if (XNACKUsed)
 351       ExtraSGPRs = 4;
 352
 353     if (FlatScrUsed)
 354       ExtraSGPRs = 6;
 355   }
 356
 357   return ExtraSGPRs;
 358 }
 359
 360 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
 361                           bool FlatScrUsed) {
 362   return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
 363                           Features[AMDGPU::FeatureXNACK]);
 364 }
 365
 366 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
 367   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
 368   // SGPRBlocks is actual number of SGPR blocks minus 1.
 369   return NumSGPRs / getSGPREncodingGranule(Features) - 1;
 370 }
 371
 372 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
 373   return 4;
 374 }
 375
 376 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
 377   return getVGPRAllocGranule(Features);
 378 }
 379
 380 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
 381   return 256;
 382 }
 383
 384 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
 385   return getTotalNumVGPRs(Features);
 386 }
 387
 388 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 389   assert(WavesPerEU != 0);
 390
 391   if (WavesPerEU >= getMaxWavesPerEU())
 392     return 0;
 393   unsigned MinNumVGPRs =
 394       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
 395                 getVGPRAllocGranule(Features)) + 1;
 396   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
 397 }
 398
 399 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
 400   assert(WavesPerEU != 0);
 401
 402   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
 403                                    getVGPRAllocGranule(Features));
 404   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
 405   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
 406 }
 407
 408 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
 409   NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
 410   // VGPRBlocks is actual number of VGPR blocks minus 1.
 411   return NumVGPRs / getVGPREncodingGranule(Features) - 1;
 412 }
 413
 414 } // end namespace IsaInfo
 415
 416 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 417                                const FeatureBitset &Features) {
 418   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
 419
 420   memset(&Header, 0, sizeof(Header));
 421
 422   Header.amd_kernel_code_version_major = 1;
 423   Header.amd_kernel_code_version_minor = 2;
 424   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
 425   Header.amd_machine_version_major = ISA.Major;
 426   Header.amd_machine_version_minor = ISA.Minor;
 427   Header.amd_machine_version_stepping = ISA.Stepping;
 428   Header.kernel_code_entry_byte_offset = sizeof(Header);
 429   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
 430   Header.wavefront_size = 6;
 431
 432   // If the code object does not support indirect functions, then the value must
 433   // be 0xffffffff.
 434   Header.call_convention = -1;
 435
 436   // These alignment values are specified in powers of two, so alignment =
 437   // 2^n.  The minimum alignment is 2^4 = 16.
 438   Header.kernarg_segment_alignment = 4;
 439   Header.group_segment_alignment = 4;
 440   Header.private_segment_alignment = 4;
 441 }
 442
 443 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
 444   amdhsa::kernel_descriptor_t KD;
 445   memset(&KD, 0, sizeof(KD));
 446   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
 447                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
 448                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
 449   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
 450                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
 451   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
 452                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
 453   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
 454                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
 455   return KD;
 456 }
 457
 458 bool isGroupSegment(const GlobalValue *GV) {
 459   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
 460 }
 461
 462 bool isGlobalSegment(const GlobalValue *GV) {
 463   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 464 }
 465
 466 bool isReadOnlySegment(const GlobalValue *GV) {
 467   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
 468          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
 469 }
 470
 471 bool shouldEmitConstantsToTextSection(const Triple &TT) {
 472   return TT.getOS() != Triple::AMDHSA;
 473 }
 474
 475 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
 476   Attribute A = F.getFnAttribute(Name);
 477   int Result = Default;
 478
 479   if (A.isStringAttribute()) {
 480     StringRef Str = A.getValueAsString();
 481     if (Str.getAsInteger(0, Result)) {
 482       LLVMContext &Ctx = F.getContext();
 483       Ctx.emitError("can't parse integer attribute " + Name);
 484     }
 485   }
 486
 487   return Result;
 488 }
 489
 490 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 491                                             StringRef Name,
 492                                             std::pair<int, int> Default,
 493                                             bool OnlyFirstRequired) {
 494   Attribute A = F.getFnAttribute(Name);
 495   if (!A.isStringAttribute())
 496     return Default;
 497
 498   LLVMContext &Ctx = F.getContext();
 499   std::pair<int, int> Ints = Default;
 500   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
 501   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
 502     Ctx.emitError("can't parse first integer attribute " + Name);
 503     return Default;
 504   }
 505   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
 506     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
 507       Ctx.emitError("can't parse second integer attribute " + Name);
 508       return Default;
 509     }
 510   }
 511
 512   return Ints;
 513 }
 514
 515 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
 516   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
 517   if (Version.Major < 9)
 518     return VmcntLo;
 519
 520   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
 521   return VmcntLo | VmcntHi;
 522 }
 523
 524 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
 525   return (1 << getExpcntBitWidth()) - 1;
 526 }
 527
 528 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
 529   return (1 << getLgkmcntBitWidth()) - 1;
 530 }
 531
 532 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
 533   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
 534   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
 535   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
 536   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
 537   if (Version.Major < 9)
 538     return Waitcnt;
 539
 540   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
 541   return Waitcnt | VmcntHi;
 542 }
 543
 544 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 545   unsigned VmcntLo =
 546       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 547   if (Version.Major < 9)
 548     return VmcntLo;
 549
 550   unsigned VmcntHi =
 551       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 552   VmcntHi <<= getVmcntBitWidthLo();
 553   return VmcntLo | VmcntHi;
 554 }
 555
 556 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 557   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 558 }
 559
 560 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
 561   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 562 }
 563
 564 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 565                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
 566   Vmcnt = decodeVmcnt(Version, Waitcnt);
 567   Expcnt = decodeExpcnt(Version, Waitcnt);
 568   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 569 }
 570
 571 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 572                      unsigned Vmcnt) {
 573   Waitcnt =
 574       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
 575   if (Version.Major < 9)
 576     return Waitcnt;
 577
 578   Vmcnt >>= getVmcntBitWidthLo();
 579   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
 580 }
 581
 582 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 583                       unsigned Expcnt) {
 584   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 585 }
 586
 587 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 588                        unsigned Lgkmcnt) {
 589   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 590 }
 591
 592 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 593                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
 594   unsigned Waitcnt = getWaitcntBitMask(Version);
 595   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
 596   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
 597   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
 598   return Waitcnt;
 599 }
 600
 601 unsigned getInitialPSInputAddr(const Function &F) {
 602   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 603 }
 604
 605 bool isShader(CallingConv::ID cc) {
 606   switch(cc) {
 607     case CallingConv::AMDGPU_VS:
 608     case CallingConv::AMDGPU_LS:
 609     case CallingConv::AMDGPU_HS:
 610     case CallingConv::AMDGPU_ES:
 611     case CallingConv::AMDGPU_GS:
 612     case CallingConv::AMDGPU_PS:
 613     case CallingConv::AMDGPU_CS:
 614       return true;
 615     default:
 616       return false;
 617   }
 618 }
 619
 620 bool isCompute(CallingConv::ID cc) {
 621   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
 622 }
 623
 624 bool isEntryFunctionCC(CallingConv::ID CC) {
 625   switch (CC) {
 626   case CallingConv::AMDGPU_KERNEL:
 627   case CallingConv::SPIR_KERNEL:
 628   case CallingConv::AMDGPU_VS:
 629   case CallingConv::AMDGPU_GS:
 630   case CallingConv::AMDGPU_PS:
 631   case CallingConv::AMDGPU_CS:
 632   case CallingConv::AMDGPU_ES:
 633   case CallingConv::AMDGPU_HS:
 634   case CallingConv::AMDGPU_LS:
 635     return true;
 636   default:
 637     return false;
 638   }
 639 }
 640
 641 bool hasXNACK(const MCSubtargetInfo &STI) {
 642   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
 643 }
 644
 645 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
 646   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
 647 }
 648
 649 bool hasPackedD16(const MCSubtargetInfo &STI) {
 650   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
 651 }
 652
 653 bool isSI(const MCSubtargetInfo &STI) {
 654   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
 655 }
 656
 657 bool isCI(const MCSubtargetInfo &STI) {
 658   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
 659 }
 660
 661 bool isVI(const MCSubtargetInfo &STI) {
 662   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
 663 }
 664
 665 bool isGFX9(const MCSubtargetInfo &STI) {
 666   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
 667 }
 668
 669 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
 670   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
 671 }
 672
 673 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
 674   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
 675   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
 676   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
 677     Reg == AMDGPU::SCC;
 678 }
 679
 680 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
 681   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
 682     if (*R == Reg1) return true;
 683   }
 684   return false;
 685 }
 686
 687 #define MAP_REG2REG \
 688   using namespace AMDGPU; \
 689   switch(Reg) { \
 690   default: return Reg; \
 691   CASE_CI_VI(FLAT_SCR) \
 692   CASE_CI_VI(FLAT_SCR_LO) \
 693   CASE_CI_VI(FLAT_SCR_HI) \
 694   CASE_VI_GFX9(TTMP0) \
 695   CASE_VI_GFX9(TTMP1) \
 696   CASE_VI_GFX9(TTMP2) \
 697   CASE_VI_GFX9(TTMP3) \
 698   CASE_VI_GFX9(TTMP4) \
 699   CASE_VI_GFX9(TTMP5) \
 700   CASE_VI_GFX9(TTMP6) \
 701   CASE_VI_GFX9(TTMP7) \
 702   CASE_VI_GFX9(TTMP8) \
 703   CASE_VI_GFX9(TTMP9) \
 704   CASE_VI_GFX9(TTMP10) \
 705   CASE_VI_GFX9(TTMP11) \
 706   CASE_VI_GFX9(TTMP12) \
 707   CASE_VI_GFX9(TTMP13) \
 708   CASE_VI_GFX9(TTMP14) \
 709   CASE_VI_GFX9(TTMP15) \
 710   CASE_VI_GFX9(TTMP0_TTMP1) \
 711   CASE_VI_GFX9(TTMP2_TTMP3) \
 712   CASE_VI_GFX9(TTMP4_TTMP5) \
 713   CASE_VI_GFX9(TTMP6_TTMP7) \
 714   CASE_VI_GFX9(TTMP8_TTMP9) \
 715   CASE_VI_GFX9(TTMP10_TTMP11) \
 716   CASE_VI_GFX9(TTMP12_TTMP13) \
 717   CASE_VI_GFX9(TTMP14_TTMP15) \
 718   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
 719   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
 720   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
 721   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
 722   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
 723   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
 724   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
 725   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
 726   }
 727
 728 #define CASE_CI_VI(node) \
 729   assert(!isSI(STI)); \
 730   case node: return isCI(STI) ? node##_ci : node##_vi;
 731
 732 #define CASE_VI_GFX9(node) \
 733   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
 734
 735 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
 736   if (STI.getTargetTriple().getArch() == Triple::r600)
 737     return Reg;
 738   MAP_REG2REG
 739 }
 740
 741 #undef CASE_CI_VI
 742 #undef CASE_VI_GFX9
 743
 744 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
 745 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
 746
 747 unsigned mc2PseudoReg(unsigned Reg) {
 748   MAP_REG2REG
 749 }
 750
 751 #undef CASE_CI_VI
 752 #undef CASE_VI_GFX9
 753 #undef MAP_REG2REG
 754
 755 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 756   assert(OpNo < Desc.NumOperands);
 757   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 758   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
 759          OpType <= AMDGPU::OPERAND_SRC_LAST;
 760 }
 761
 762 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 763   assert(OpNo < Desc.NumOperands);
 764   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 765   switch (OpType) {
 766   case AMDGPU::OPERAND_REG_IMM_FP32:
 767   case AMDGPU::OPERAND_REG_IMM_FP64:
 768   case AMDGPU::OPERAND_REG_IMM_FP16:
 769   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 770   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 771   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 772   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 773     return true;
 774   default:
 775     return false;
 776   }
 777 }
 778
 779 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
 780   assert(OpNo < Desc.NumOperands);
 781   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
 782   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
 783          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
 784 }
 785
 786 // Avoid using MCRegisterClass::getSize, since that function will go away
 787 // (move from MC* level to Target* level). Return size in bits.
 788 unsigned getRegBitWidth(unsigned RCID) {
 789   switch (RCID) {
 790   case AMDGPU::SGPR_32RegClassID:
 791   case AMDGPU::VGPR_32RegClassID:
 792   case AMDGPU::VS_32RegClassID:
 793   case AMDGPU::SReg_32RegClassID:
 794   case AMDGPU::SReg_32_XM0RegClassID:
 795     return 32;
 796   case AMDGPU::SGPR_64RegClassID:
 797   case AMDGPU::VS_64RegClassID:
 798   case AMDGPU::SReg_64RegClassID:
 799   case AMDGPU::VReg_64RegClassID:
 800     return 64;
 801   case AMDGPU::VReg_96RegClassID:
 802     return 96;
 803   case AMDGPU::SGPR_128RegClassID:
 804   case AMDGPU::SReg_128RegClassID:
 805   case AMDGPU::VReg_128RegClassID:
 806     return 128;
 807   case AMDGPU::SReg_256RegClassID:
 808   case AMDGPU::VReg_256RegClassID:
 809     return 256;
 810   case AMDGPU::SReg_512RegClassID:
 811   case AMDGPU::VReg_512RegClassID:
 812     return 512;
 813   default:
 814     llvm_unreachable("Unexpected register class");
 815   }
 816 }
 817
 818 unsigned getRegBitWidth(const MCRegisterClass &RC) {
 819   return getRegBitWidth(RC.getID());
 820 }
 821
 822 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 823                            unsigned OpNo) {
 824   assert(OpNo < Desc.NumOperands);
 825   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
 826   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
 827 }
 828
 829 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
 830   if (Literal >= -16 && Literal <= 64)
 831     return true;
 832
 833   uint64_t Val = static_cast<uint64_t>(Literal);
 834   return (Val == DoubleToBits(0.0)) ||
 835          (Val == DoubleToBits(1.0)) ||
 836          (Val == DoubleToBits(-1.0)) ||
 837          (Val == DoubleToBits(0.5)) ||
 838          (Val == DoubleToBits(-0.5)) ||
 839          (Val == DoubleToBits(2.0)) ||
 840          (Val == DoubleToBits(-2.0)) ||
 841          (Val == DoubleToBits(4.0)) ||
 842          (Val == DoubleToBits(-4.0)) ||
 843          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
 844 }
 845
 846 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
 847   if (Literal >= -16 && Literal <= 64)
 848     return true;
 849
 850   // The actual type of the operand does not seem to matter as long
 851   // as the bits match one of the inline immediate values.  For example:
 852   //
 853   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
 854   // so it is a legal inline immediate.
 855   //
 856   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
 857   // floating-point, so it is a legal inline immediate.
 858
 859   uint32_t Val = static_cast<uint32_t>(Literal);
 860   return (Val == FloatToBits(0.0f)) ||
 861          (Val == FloatToBits(1.0f)) ||
 862          (Val == FloatToBits(-1.0f)) ||
 863          (Val == FloatToBits(0.5f)) ||
 864          (Val == FloatToBits(-0.5f)) ||
 865          (Val == FloatToBits(2.0f)) ||
 866          (Val == FloatToBits(-2.0f)) ||
 867          (Val == FloatToBits(4.0f)) ||
 868          (Val == FloatToBits(-4.0f)) ||
 869          (Val == 0x3e22f983 && HasInv2Pi);
 870 }
 871
 872 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
 873   if (!HasInv2Pi)
 874     return false;
 875
 876   if (Literal >= -16 && Literal <= 64)
 877     return true;
 878
 879   uint16_t Val = static_cast<uint16_t>(Literal);
 880   return Val == 0x3C00 || // 1.0
 881          Val == 0xBC00 || // -1.0
 882          Val == 0x3800 || // 0.5
 883          Val == 0xB800 || // -0.5
 884          Val == 0x4000 || // 2.0
 885          Val == 0xC000 || // -2.0
 886          Val == 0x4400 || // 4.0
 887          Val == 0xC400 || // -4.0
 888          Val == 0x3118;   // 1/2pi
 889 }
 890
 891 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
 892   assert(HasInv2Pi);
 893
 894   int16_t Lo16 = static_cast<int16_t>(Literal);
 895   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
 896   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
 897 }
 898
 899 bool isArgPassedInSGPR(const Argument *A) {
 900   const Function *F = A->getParent();
 901
 902   // Arguments to compute shaders are never a source of divergence.
 903   CallingConv::ID CC = F->getCallingConv();
 904   switch (CC) {
 905   case CallingConv::AMDGPU_KERNEL:
 906   case CallingConv::SPIR_KERNEL:
 907     return true;
 908   case CallingConv::AMDGPU_VS:
 909   case CallingConv::AMDGPU_LS:
 910   case CallingConv::AMDGPU_HS:
 911   case CallingConv::AMDGPU_ES:
 912   case CallingConv::AMDGPU_GS:
 913   case CallingConv::AMDGPU_PS:
 914   case CallingConv::AMDGPU_CS:
 915     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
 916     // Everything else is in VGPRs.
 917     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
 918            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
 919   default:
 920     // TODO: Should calls support inreg for SGPR inputs?
 921     return false;
 922   }
 923 }
 924
 925 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 926   if (isGCN3Encoding(ST))
 927     return ByteOffset;
 928   return ByteOffset >> 2;
 929 }
 930
 931 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
 932   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
 933   return isGCN3Encoding(ST) ?
 934     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
 935 }
 936
 937 } // end namespace AMDGPU
 938
 939 } // end namespace llvm
 940
 941 namespace llvm {
 942 namespace AMDGPU {
 943
 944 AMDGPUAS getAMDGPUAS(Triple T) {
 945   AMDGPUAS AS;
 946   AS.FLAT_ADDRESS = 0;
 947   AS.PRIVATE_ADDRESS = 5;
 948   AS.REGION_ADDRESS = 2;
 949   return AS;
 950 }
 951
 952 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
 953   return getAMDGPUAS(M.getTargetTriple());
 954 }
 955
 956 AMDGPUAS getAMDGPUAS(const Module &M) {
 957   return getAMDGPUAS(Triple(M.getTargetTriple()));
 958 }
 959
 960 namespace {
 961
 962 struct SourceOfDivergence {
 963   unsigned Intr;
 964 };
 965 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
 966
 967 #define GET_SourcesOfDivergence_IMPL
 968 #include "AMDGPUGenSearchableTables.inc"
 969
 970 } // end anonymous namespace
 971
 972 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
 973   return lookupSourceOfDivergence(IntrID);
 974 }
 975 } // namespace AMDGPU
 976 } // namespace llvm