contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

   1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  12
  13 #include "AMDGPU.h"
  14 #include "AMDKernelCodeT.h"
  15 #include "SIDefines.h"
  16 #include "llvm/ADT/StringRef.h"
  17 #include "llvm/IR/CallingConv.h"
  18 #include "llvm/MC/MCInstrDesc.h"
  19 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  20 #include "llvm/Support/Compiler.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include <cstdint>
  23 #include <string>
  24 #include <utility>
  25
  26 namespace llvm {
  27
  28 class Argument;
  29 class FeatureBitset;
  30 class Function;
  31 class GlobalValue;
  32 class MCContext;
  33 class MCRegisterClass;
  34 class MCRegisterInfo;
  35 class MCSection;
  36 class MCSubtargetInfo;
  37 class MachineMemOperand;
  38 class Triple;
  39
  40 namespace AMDGPU {
  41
  42 #define GET_MIMGBaseOpcode_DECL
  43 #define GET_MIMGDim_DECL
  44 #define GET_MIMGEncoding_DECL
  45 #define GET_MIMGLZMapping_DECL
  46 #include "AMDGPUGenSearchableTables.inc"
  47
  48 namespace IsaInfo {
  49
  50 enum {
  51   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
  52   // doesn't spill SGPRs as much as when 80 is set.
  53   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
  54   TRAP_NUM_SGPRS = 16
  55 };
  56
  57 /// Instruction set architecture version.
  58 struct IsaVersion {
  59   unsigned Major;
  60   unsigned Minor;
  61   unsigned Stepping;
  62 };
  63
  64 /// \returns Isa version for given subtarget \p Features.
  65 IsaVersion getIsaVersion(const FeatureBitset &Features);
  66
  67 /// Streams isa version string for given subtarget \p STI into \p Stream.
  68 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
  69
  70 /// \returns True if given subtarget \p STI supports code object version 3,
  71 /// false otherwise.
  72 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
  73
  74 /// \returns Wavefront size for given subtarget \p Features.
  75 unsigned getWavefrontSize(const FeatureBitset &Features);
  76
  77 /// \returns Local memory size in bytes for given subtarget \p Features.
  78 unsigned getLocalMemorySize(const FeatureBitset &Features);
  79
  80 /// \returns Number of execution units per compute unit for given subtarget \p
  81 /// Features.
  82 unsigned getEUsPerCU(const FeatureBitset &Features);
  83
  84 /// \returns Maximum number of work groups per compute unit for given subtarget
  85 /// \p Features and limited by given \p FlatWorkGroupSize.
  86 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
  87                                unsigned FlatWorkGroupSize);
  88
  89 /// \returns Maximum number of waves per compute unit for given subtarget \p
  90 /// Features without any kind of limitation.
  91 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
  92
  93 /// \returns Maximum number of waves per compute unit for given subtarget \p
  94 /// Features and limited by given \p FlatWorkGroupSize.
  95 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
  96                           unsigned FlatWorkGroupSize);
  97
  98 /// \returns Minimum number of waves per execution unit for given subtarget \p
  99 /// Features.
 100 unsigned getMinWavesPerEU(const FeatureBitset &Features);
 101
 102 /// \returns Maximum number of waves per execution unit for given subtarget \p
 103 /// Features without any kind of limitation.
 104 unsigned getMaxWavesPerEU();
 105
 106 /// \returns Maximum number of waves per execution unit for given subtarget \p
 107 /// Features and limited by given \p FlatWorkGroupSize.
 108 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 109                           unsigned FlatWorkGroupSize);
 110
 111 /// \returns Minimum flat work group size for given subtarget \p Features.
 112 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
 113
 114 /// \returns Maximum flat work group size for given subtarget \p Features.
 115 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
 116
 117 /// \returns Number of waves per work group for given subtarget \p Features and
 118 /// limited by given \p FlatWorkGroupSize.
 119 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 120                               unsigned FlatWorkGroupSize);
 121
 122 /// \returns SGPR allocation granularity for given subtarget \p Features.
 123 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
 124
 125 /// \returns SGPR encoding granularity for given subtarget \p Features.
 126 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
 127
 128 /// \returns Total number of SGPRs for given subtarget \p Features.
 129 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
 130
 131 /// \returns Addressable number of SGPRs for given subtarget \p Features.
 132 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
 133
 134 /// \returns Minimum number of SGPRs that meets the given number of waves per
 135 /// execution unit requirement for given subtarget \p Features.
 136 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 137
 138 /// \returns Maximum number of SGPRs that meets the given number of waves per
 139 /// execution unit requirement for given subtarget \p Features.
 140 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 141                         bool Addressable);
 142
 143 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 144 /// Features when the given special registers are used.
 145 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
 146                           bool FlatScrUsed, bool XNACKUsed);
 147
 148 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 149 /// Features when the given special registers are used. XNACK is inferred from
 150 /// \p Features.
 151 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
 152                           bool FlatScrUsed);
 153
 154 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
 155 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
 156 /// register counts.
 157 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
 158
 159 /// \returns VGPR allocation granularity for given subtarget \p Features.
 160 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
 161
 162 /// \returns VGPR encoding granularity for given subtarget \p Features.
 163 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
 164
 165 /// \returns Total number of VGPRs for given subtarget \p Features.
 166 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
 167
 168 /// \returns Addressable number of VGPRs for given subtarget \p Features.
 169 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
 170
 171 /// \returns Minimum number of VGPRs that meets given number of waves per
 172 /// execution unit requirement for given subtarget \p Features.
 173 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 174
 175 /// \returns Maximum number of VGPRs that meets given number of waves per
 176 /// execution unit requirement for given subtarget \p Features.
 177 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 178
 179 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
 180 /// \p NumVGPRs are used.
 181 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
 182
 183 } // end namespace IsaInfo
 184
 185 LLVM_READONLY
 186 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 187
 188 struct MIMGBaseOpcodeInfo {
 189   MIMGBaseOpcode BaseOpcode;
 190   bool Store;
 191   bool Atomic;
 192   bool AtomicX2;
 193   bool Sampler;
 194
 195   uint8_t NumExtraArgs;
 196   bool Gradients;
 197   bool Coordinates;
 198   bool LodOrClampOrMip;
 199   bool HasD16;
 200 };
 201
 202 LLVM_READONLY
 203 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
 204
 205 struct MIMGDimInfo {
 206   MIMGDim Dim;
 207   uint8_t NumCoords;
 208   uint8_t NumGradients;
 209   bool DA;
 210 };
 211
 212 LLVM_READONLY
 213 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
 214
 215 struct MIMGLZMappingInfo {
 216   MIMGBaseOpcode L;
 217   MIMGBaseOpcode LZ;
 218 };
 219
 220 LLVM_READONLY
 221 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
 222
 223 LLVM_READONLY
 224 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
 225                   unsigned VDataDwords, unsigned VAddrDwords);
 226
 227 LLVM_READONLY
 228 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
 229
 230 LLVM_READONLY
 231 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 232
 233 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 234                                const FeatureBitset &Features);
 235
 236 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
 237
 238 bool isGroupSegment(const GlobalValue *GV);
 239 bool isGlobalSegment(const GlobalValue *GV);
 240 bool isReadOnlySegment(const GlobalValue *GV);
 241
 242 /// \returns True if constants should be emitted to .text section for given
 243 /// target triple \p TT, false otherwise.
 244 bool shouldEmitConstantsToTextSection(const Triple &TT);
 245
 246 /// \returns Integer value requested using \p F's \p Name attribute.
 247 ///
 248 /// \returns \p Default if attribute is not present.
 249 ///
 250 /// \returns \p Default and emits error if requested value cannot be converted
 251 /// to integer.
 252 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
 253
 254 /// \returns A pair of integer values requested using \p F's \p Name attribute
 255 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
 256 /// is false).
 257 ///
 258 /// \returns \p Default if attribute is not present.
 259 ///
 260 /// \returns \p Default and emits error if one of the requested values cannot be
 261 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
 262 /// not present.
 263 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 264                                             StringRef Name,
 265                                             std::pair<int, int> Default,
 266                                             bool OnlyFirstRequired = false);
 267
 268 /// \returns Vmcnt bit mask for given isa \p Version.
 269 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
 270
 271 /// \returns Expcnt bit mask for given isa \p Version.
 272 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
 273
 274 /// \returns Lgkmcnt bit mask for given isa \p Version.
 275 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
 276
 277 /// \returns Waitcnt bit mask for given isa \p Version.
 278 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
 279
 280 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
 281 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 282
 283 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
 284 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 285
 286 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
 287 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 288
 289 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
 290 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
 291 /// \p Lgkmcnt respectively.
 292 ///
 293 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
 294 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
 295 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
 296 ///     \p Expcnt = \p Waitcnt[6:4]
 297 ///     \p Lgkmcnt = \p Waitcnt[11:8]
 298 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 299                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
 300
 301 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
 302 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 303                      unsigned Vmcnt);
 304
 305 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
 306 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 307                       unsigned Expcnt);
 308
 309 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
 310 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 311                        unsigned Lgkmcnt);
 312
 313 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
 314 /// \p Version.
 315 ///
 316 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
 317 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
 318 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
 319 ///     Waitcnt[6:4]   = \p Expcnt
 320 ///     Waitcnt[11:8]  = \p Lgkmcnt
 321 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
 322 ///
 323 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
 324 /// isa \p Version.
 325 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 326                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
 327
 328 unsigned getInitialPSInputAddr(const Function &F);
 329
 330 LLVM_READNONE
 331 bool isShader(CallingConv::ID CC);
 332
 333 LLVM_READNONE
 334 bool isCompute(CallingConv::ID CC);
 335
 336 LLVM_READNONE
 337 bool isEntryFunctionCC(CallingConv::ID CC);
 338
 339 // FIXME: Remove this when calling conventions cleaned up
 340 LLVM_READNONE
 341 inline bool isKernel(CallingConv::ID CC) {
 342   switch (CC) {
 343   case CallingConv::AMDGPU_KERNEL:
 344   case CallingConv::SPIR_KERNEL:
 345     return true;
 346   default:
 347     return false;
 348   }
 349 }
 350
 351 bool hasXNACK(const MCSubtargetInfo &STI);
 352 bool hasMIMG_R128(const MCSubtargetInfo &STI);
 353 bool hasPackedD16(const MCSubtargetInfo &STI);
 354
 355 bool isSI(const MCSubtargetInfo &STI);
 356 bool isCI(const MCSubtargetInfo &STI);
 357 bool isVI(const MCSubtargetInfo &STI);
 358 bool isGFX9(const MCSubtargetInfo &STI);
 359
 360 /// Is Reg - scalar register
 361 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
 362
 363 /// Is there any intersection between registers
 364 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
 365
 366 /// If \p Reg is a pseudo reg, return the correct hardware register given
 367 /// \p STI otherwise return \p Reg.
 368 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
 369
 370 /// Convert hardware register \p Reg to a pseudo register
 371 LLVM_READNONE
 372 unsigned mc2PseudoReg(unsigned Reg);
 373
 374 /// Can this operand also contain immediate values?
 375 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
 376
 377 /// Is this floating-point operand?
 378 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
 379
 380 /// Does this opearnd support only inlinable literals?
 381 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
 382
 383 /// Get the size in bits of a register from the register class \p RC.
 384 unsigned getRegBitWidth(unsigned RCID);
 385
 386 /// Get the size in bits of a register from the register class \p RC.
 387 unsigned getRegBitWidth(const MCRegisterClass &RC);
 388
 389 /// Get size of register operand
 390 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 391                            unsigned OpNo);
 392
 393 LLVM_READNONE
 394 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
 395   switch (OpInfo.OperandType) {
 396   case AMDGPU::OPERAND_REG_IMM_INT32:
 397   case AMDGPU::OPERAND_REG_IMM_FP32:
 398   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
 399   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 400     return 4;
 401
 402   case AMDGPU::OPERAND_REG_IMM_INT64:
 403   case AMDGPU::OPERAND_REG_IMM_FP64:
 404   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
 405   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 406     return 8;
 407
 408   case AMDGPU::OPERAND_REG_IMM_INT16:
 409   case AMDGPU::OPERAND_REG_IMM_FP16:
 410   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
 411   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 412   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
 413   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 414     return 2;
 415
 416   default:
 417     llvm_unreachable("unhandled operand type");
 418   }
 419 }
 420
 421 LLVM_READNONE
 422 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
 423   return getOperandSize(Desc.OpInfo[OpNo]);
 424 }
 425
 426 /// Is this literal inlinable
 427 LLVM_READNONE
 428 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
 429
 430 LLVM_READNONE
 431 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
 432
 433 LLVM_READNONE
 434 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
 435
 436 LLVM_READNONE
 437 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
 438
 439 bool isArgPassedInSGPR(const Argument *Arg);
 440
 441 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
 442 /// offset field.
 443 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 444
 445 /// \returns true if this offset is small enough to fit in the SMRD
 446 /// offset field.  \p ByteOffset should be the offset in bytes and
 447 /// not the encoded offset.
 448 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 449
 450 /// \returns true if the intrinsic is divergent
 451 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 452
 453 } // end namespace AMDGPU
 454 } // end namespace llvm
 455
 456 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H