contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

   1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  11 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
  12
  13 #include "AMDGPU.h"
  14 #include "AMDKernelCodeT.h"
  15 #include "SIDefines.h"
  16 #include "llvm/ADT/StringRef.h"
  17 #include "llvm/IR/CallingConv.h"
  18 #include "llvm/MC/MCInstrDesc.h"
  19 #include "llvm/Support/AMDHSAKernelDescriptor.h"
  20 #include "llvm/Support/Compiler.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include <cstdint>
  23 #include <string>
  24 #include <utility>
  25
  26 namespace llvm {
  27
  28 class Argument;
  29 class FeatureBitset;
  30 class Function;
  31 class GlobalValue;
  32 class MCContext;
  33 class MCRegisterClass;
  34 class MCRegisterInfo;
  35 class MCSection;
  36 class MCSubtargetInfo;
  37 class MachineMemOperand;
  38 class Triple;
  39
  40 namespace AMDGPU {
  41
  42 #define GET_MIMGBaseOpcode_DECL
  43 #define GET_MIMGDim_DECL
  44 #define GET_MIMGEncoding_DECL
  45 #include "AMDGPUGenSearchableTables.inc"
  46
  47 namespace IsaInfo {
  48
  49 enum {
  50   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
  51   // doesn't spill SGPRs as much as when 80 is set.
  52   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
  53   TRAP_NUM_SGPRS = 16
  54 };
  55
  56 /// Instruction set architecture version.
  57 struct IsaVersion {
  58   unsigned Major;
  59   unsigned Minor;
  60   unsigned Stepping;
  61 };
  62
  63 /// \returns Isa version for given subtarget \p Features.
  64 IsaVersion getIsaVersion(const FeatureBitset &Features);
  65
  66 /// Streams isa version string for given subtarget \p STI into \p Stream.
  67 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
  68
  69 /// \returns True if given subtarget \p STI supports code object version 3,
  70 /// false otherwise.
  71 bool hasCodeObjectV3(const MCSubtargetInfo *STI);
  72
  73 /// \returns Wavefront size for given subtarget \p Features.
  74 unsigned getWavefrontSize(const FeatureBitset &Features);
  75
  76 /// \returns Local memory size in bytes for given subtarget \p Features.
  77 unsigned getLocalMemorySize(const FeatureBitset &Features);
  78
  79 /// \returns Number of execution units per compute unit for given subtarget \p
  80 /// Features.
  81 unsigned getEUsPerCU(const FeatureBitset &Features);
  82
  83 /// \returns Maximum number of work groups per compute unit for given subtarget
  84 /// \p Features and limited by given \p FlatWorkGroupSize.
  85 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
  86                                unsigned FlatWorkGroupSize);
  87
  88 /// \returns Maximum number of waves per compute unit for given subtarget \p
  89 /// Features without any kind of limitation.
  90 unsigned getMaxWavesPerCU(const FeatureBitset &Features);
  91
  92 /// \returns Maximum number of waves per compute unit for given subtarget \p
  93 /// Features and limited by given \p FlatWorkGroupSize.
  94 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
  95                           unsigned FlatWorkGroupSize);
  96
  97 /// \returns Minimum number of waves per execution unit for given subtarget \p
  98 /// Features.
  99 unsigned getMinWavesPerEU(const FeatureBitset &Features);
 100
 101 /// \returns Maximum number of waves per execution unit for given subtarget \p
 102 /// Features without any kind of limitation.
 103 unsigned getMaxWavesPerEU();
 104
 105 /// \returns Maximum number of waves per execution unit for given subtarget \p
 106 /// Features and limited by given \p FlatWorkGroupSize.
 107 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
 108                           unsigned FlatWorkGroupSize);
 109
 110 /// \returns Minimum flat work group size for given subtarget \p Features.
 111 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
 112
 113 /// \returns Maximum flat work group size for given subtarget \p Features.
 114 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
 115
 116 /// \returns Number of waves per work group for given subtarget \p Features and
 117 /// limited by given \p FlatWorkGroupSize.
 118 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
 119                               unsigned FlatWorkGroupSize);
 120
 121 /// \returns SGPR allocation granularity for given subtarget \p Features.
 122 unsigned getSGPRAllocGranule(const FeatureBitset &Features);
 123
 124 /// \returns SGPR encoding granularity for given subtarget \p Features.
 125 unsigned getSGPREncodingGranule(const FeatureBitset &Features);
 126
 127 /// \returns Total number of SGPRs for given subtarget \p Features.
 128 unsigned getTotalNumSGPRs(const FeatureBitset &Features);
 129
 130 /// \returns Addressable number of SGPRs for given subtarget \p Features.
 131 unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
 132
 133 /// \returns Minimum number of SGPRs that meets the given number of waves per
 134 /// execution unit requirement for given subtarget \p Features.
 135 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 136
 137 /// \returns Maximum number of SGPRs that meets the given number of waves per
 138 /// execution unit requirement for given subtarget \p Features.
 139 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
 140                         bool Addressable);
 141
 142 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 143 /// Features when the given special registers are used.
 144 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
 145                           bool FlatScrUsed, bool XNACKUsed);
 146
 147 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
 148 /// Features when the given special registers are used. XNACK is inferred from
 149 /// \p Features.
 150 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
 151                           bool FlatScrUsed);
 152
 153 /// \returns Number of SGPR blocks needed for given subtarget \p Features when
 154 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
 155 /// register counts.
 156 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
 157
 158 /// \returns VGPR allocation granularity for given subtarget \p Features.
 159 unsigned getVGPRAllocGranule(const FeatureBitset &Features);
 160
 161 /// \returns VGPR encoding granularity for given subtarget \p Features.
 162 unsigned getVGPREncodingGranule(const FeatureBitset &Features);
 163
 164 /// \returns Total number of VGPRs for given subtarget \p Features.
 165 unsigned getTotalNumVGPRs(const FeatureBitset &Features);
 166
 167 /// \returns Addressable number of VGPRs for given subtarget \p Features.
 168 unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
 169
 170 /// \returns Minimum number of VGPRs that meets given number of waves per
 171 /// execution unit requirement for given subtarget \p Features.
 172 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 173
 174 /// \returns Maximum number of VGPRs that meets given number of waves per
 175 /// execution unit requirement for given subtarget \p Features.
 176 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
 177
 178 /// \returns Number of VGPR blocks needed for given subtarget \p Features when
 179 /// \p NumVGPRs are used.
 180 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
 181
 182 } // end namespace IsaInfo
 183
 184 LLVM_READONLY
 185 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
 186
 187 struct MIMGBaseOpcodeInfo {
 188   MIMGBaseOpcode BaseOpcode;
 189   bool Store;
 190   bool Atomic;
 191   bool AtomicX2;
 192   bool Sampler;
 193
 194   uint8_t NumExtraArgs;
 195   bool Gradients;
 196   bool Coordinates;
 197   bool LodOrClampOrMip;
 198   bool HasD16;
 199 };
 200
 201 LLVM_READONLY
 202 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
 203
 204 struct MIMGDimInfo {
 205   MIMGDim Dim;
 206   uint8_t NumCoords;
 207   uint8_t NumGradients;
 208   bool DA;
 209 };
 210
 211 LLVM_READONLY
 212 const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
 213
 214 LLVM_READONLY
 215 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
 216                   unsigned VDataDwords, unsigned VAddrDwords);
 217
 218 LLVM_READONLY
 219 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
 220
 221 LLVM_READONLY
 222 int getMCOpcode(uint16_t Opcode, unsigned Gen);
 223
 224 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
 225                                const FeatureBitset &Features);
 226
 227 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
 228
 229 bool isGroupSegment(const GlobalValue *GV);
 230 bool isGlobalSegment(const GlobalValue *GV);
 231 bool isReadOnlySegment(const GlobalValue *GV);
 232
 233 /// \returns True if constants should be emitted to .text section for given
 234 /// target triple \p TT, false otherwise.
 235 bool shouldEmitConstantsToTextSection(const Triple &TT);
 236
 237 /// \returns Integer value requested using \p F's \p Name attribute.
 238 ///
 239 /// \returns \p Default if attribute is not present.
 240 ///
 241 /// \returns \p Default and emits error if requested value cannot be converted
 242 /// to integer.
 243 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
 244
 245 /// \returns A pair of integer values requested using \p F's \p Name attribute
 246 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
 247 /// is false).
 248 ///
 249 /// \returns \p Default if attribute is not present.
 250 ///
 251 /// \returns \p Default and emits error if one of the requested values cannot be
 252 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
 253 /// not present.
 254 std::pair<int, int> getIntegerPairAttribute(const Function &F,
 255                                             StringRef Name,
 256                                             std::pair<int, int> Default,
 257                                             bool OnlyFirstRequired = false);
 258
 259 /// \returns Vmcnt bit mask for given isa \p Version.
 260 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
 261
 262 /// \returns Expcnt bit mask for given isa \p Version.
 263 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
 264
 265 /// \returns Lgkmcnt bit mask for given isa \p Version.
 266 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
 267
 268 /// \returns Waitcnt bit mask for given isa \p Version.
 269 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
 270
 271 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
 272 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 273
 274 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
 275 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 276
 277 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
 278 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 279
 280 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
 281 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
 282 /// \p Lgkmcnt respectively.
 283 ///
 284 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
 285 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
 286 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
 287 ///     \p Expcnt = \p Waitcnt[6:4]
 288 ///     \p Lgkmcnt = \p Waitcnt[11:8]
 289 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 290                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
 291
 292 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
 293 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 294                      unsigned Vmcnt);
 295
 296 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
 297 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 298                       unsigned Expcnt);
 299
 300 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
 301 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
 302                        unsigned Lgkmcnt);
 303
 304 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
 305 /// \p Version.
 306 ///
 307 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
 308 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
 309 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
 310 ///     Waitcnt[6:4]   = \p Expcnt
 311 ///     Waitcnt[11:8]  = \p Lgkmcnt
 312 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
 313 ///
 314 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
 315 /// isa \p Version.
 316 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
 317                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
 318
 319 unsigned getInitialPSInputAddr(const Function &F);
 320
 321 LLVM_READNONE
 322 bool isShader(CallingConv::ID CC);
 323
 324 LLVM_READNONE
 325 bool isCompute(CallingConv::ID CC);
 326
 327 LLVM_READNONE
 328 bool isEntryFunctionCC(CallingConv::ID CC);
 329
 330 // FIXME: Remove this when calling conventions cleaned up
 331 LLVM_READNONE
 332 inline bool isKernel(CallingConv::ID CC) {
 333   switch (CC) {
 334   case CallingConv::AMDGPU_KERNEL:
 335   case CallingConv::SPIR_KERNEL:
 336     return true;
 337   default:
 338     return false;
 339   }
 340 }
 341
 342 bool hasXNACK(const MCSubtargetInfo &STI);
 343 bool hasMIMG_R128(const MCSubtargetInfo &STI);
 344 bool hasPackedD16(const MCSubtargetInfo &STI);
 345
 346 bool isSI(const MCSubtargetInfo &STI);
 347 bool isCI(const MCSubtargetInfo &STI);
 348 bool isVI(const MCSubtargetInfo &STI);
 349 bool isGFX9(const MCSubtargetInfo &STI);
 350
 351 /// Is Reg - scalar register
 352 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
 353
 354 /// Is there any intersection between registers
 355 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
 356
 357 /// If \p Reg is a pseudo reg, return the correct hardware register given
 358 /// \p STI otherwise return \p Reg.
 359 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
 360
 361 /// Convert hardware register \p Reg to a pseudo register
 362 LLVM_READNONE
 363 unsigned mc2PseudoReg(unsigned Reg);
 364
 365 /// Can this operand also contain immediate values?
 366 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
 367
 368 /// Is this floating-point operand?
 369 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
 370
 371 /// Does this opearnd support only inlinable literals?
 372 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
 373
 374 /// Get the size in bits of a register from the register class \p RC.
 375 unsigned getRegBitWidth(unsigned RCID);
 376
 377 /// Get the size in bits of a register from the register class \p RC.
 378 unsigned getRegBitWidth(const MCRegisterClass &RC);
 379
 380 /// Get size of register operand
 381 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
 382                            unsigned OpNo);
 383
 384 LLVM_READNONE
 385 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
 386   switch (OpInfo.OperandType) {
 387   case AMDGPU::OPERAND_REG_IMM_INT32:
 388   case AMDGPU::OPERAND_REG_IMM_FP32:
 389   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
 390   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
 391     return 4;
 392
 393   case AMDGPU::OPERAND_REG_IMM_INT64:
 394   case AMDGPU::OPERAND_REG_IMM_FP64:
 395   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
 396   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
 397     return 8;
 398
 399   case AMDGPU::OPERAND_REG_IMM_INT16:
 400   case AMDGPU::OPERAND_REG_IMM_FP16:
 401   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
 402   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
 403   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
 404   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
 405     return 2;
 406
 407   default:
 408     llvm_unreachable("unhandled operand type");
 409   }
 410 }
 411
 412 LLVM_READNONE
 413 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
 414   return getOperandSize(Desc.OpInfo[OpNo]);
 415 }
 416
 417 /// Is this literal inlinable
 418 LLVM_READNONE
 419 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
 420
 421 LLVM_READNONE
 422 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
 423
 424 LLVM_READNONE
 425 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
 426
 427 LLVM_READNONE
 428 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
 429
 430 bool isArgPassedInSGPR(const Argument *Arg);
 431
 432 /// \returns The encoding that will be used for \p ByteOffset in the SMRD
 433 /// offset field.
 434 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 435
 436 /// \returns true if this offset is small enough to fit in the SMRD
 437 /// offset field.  \p ByteOffset should be the offset in bytes and
 438 /// not the encoded offset.
 439 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
 440
 441 /// \returns true if the intrinsic is divergent
 442 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
 443
 444 } // end namespace AMDGPU
 445 } // end namespace llvm
 446
 447 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H