contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

   1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This file a TargetTransformInfo::Concept conforming object specific to the
  11 /// AMDGPU target machine. It uses the target's detailed information to
  12 /// provide more precise answers to certain TTI queries, while letting the
  13 /// target independent and default TTI implementations handle the rest.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
  18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
  19
  20 #include "AMDGPU.h"
  21 #include "AMDGPUSubtarget.h"
  22 #include "AMDGPUTargetMachine.h"
  23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  24 #include "Utils/AMDGPUBaseInfo.h"
  25 #include "llvm/ADT/ArrayRef.h"
  26 #include "llvm/Analysis/TargetTransformInfo.h"
  27 #include "llvm/CodeGen/BasicTTIImpl.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/MC/SubtargetFeature.h"
  30 #include "llvm/Support/MathExtras.h"
  31 #include <cassert>
  32
  33 namespace llvm {
  34
  35 class AMDGPUTargetLowering;
  36 class Loop;
  37 class ScalarEvolution;
  38 class Type;
  39 class Value;
  40
  41 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
  42   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
  43   using TTI = TargetTransformInfo;
  44
  45   friend BaseT;
  46
  47   Triple TargetTriple;
  48
  49 public:
  50   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
  51     : BaseT(TM, F.getParent()->getDataLayout()),
  52       TargetTriple(TM->getTargetTriple()) {}
  53
  54   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  55                                TTI::UnrollingPreferences &UP);
  56 };
  57
  58 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
  59   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
  60   using TTI = TargetTransformInfo;
  61
  62   friend BaseT;
  63
  64   const GCNSubtarget *ST;
  65   const AMDGPUTargetLowering *TLI;
  66   AMDGPUTTIImpl CommonTTI;
  67   bool IsGraphicsShader;
  68
  69   const FeatureBitset InlineFeatureIgnoreList = {
  70     // Codegen control options which don't matter.
  71     AMDGPU::FeatureEnableLoadStoreOpt,
  72     AMDGPU::FeatureEnableSIScheduler,
  73     AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
  74     AMDGPU::FeatureFlatForGlobal,
  75     AMDGPU::FeaturePromoteAlloca,
  76     AMDGPU::FeatureUnalignedBufferAccess,
  77     AMDGPU::FeatureUnalignedScratchAccess,
  78
  79     AMDGPU::FeatureAutoWaitcntBeforeBarrier,
  80
  81     // Property of the kernel/environment which can't actually differ.
  82     AMDGPU::FeatureSGPRInitBug,
  83     AMDGPU::FeatureXNACK,
  84     AMDGPU::FeatureTrapHandler,
  85     AMDGPU::FeatureCodeObjectV3,
  86
  87     // The default assumption needs to be ecc is enabled, but no directly
  88     // exposed operations depend on it, so it can be safely inlined.
  89     AMDGPU::FeatureSRAMECC,
  90
  91     // Perf-tuning features
  92     AMDGPU::FeatureFastFMAF32,
  93     AMDGPU::HalfRate64Ops
  94   };
  95
  96   const GCNSubtarget *getST() const { return ST; }
  97   const AMDGPUTargetLowering *getTLI() const { return TLI; }
  98
  99   static inline int getFullRateInstrCost() {
 100     return TargetTransformInfo::TCC_Basic;
 101   }
 102
 103   static inline int getHalfRateInstrCost() {
 104     return 2 * TargetTransformInfo::TCC_Basic;
 105   }
 106
 107   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
 108   // should be 2 or 4.
 109   static inline int getQuarterRateInstrCost() {
 110     return 3 * TargetTransformInfo::TCC_Basic;
 111   }
 112
 113    // On some parts, normal fp64 operations are half rate, and others
 114    // quarter. This also applies to some integer operations.
 115   inline int get64BitInstrCost() const {
 116     return ST->hasHalfRate64Ops() ?
 117       getHalfRateInstrCost() : getQuarterRateInstrCost();
 118   }
 119
 120 public:
 121   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
 122     : BaseT(TM, F.getParent()->getDataLayout()),
 123       ST(static_cast<const GCNSubtarget*>(TM->getSubtargetImpl(F))),
 124       TLI(ST->getTargetLowering()),
 125       CommonTTI(TM, F),
 126       IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
 127
 128   bool hasBranchDivergence() { return true; }
 129
 130   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 131                                TTI::UnrollingPreferences &UP);
 132
 133   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
 134     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
 135     return TTI::PSK_FastHardware;
 136   }
 137
 138   unsigned getHardwareNumberOfRegisters(bool Vector) const;
 139   unsigned getNumberOfRegisters(bool Vector) const;
 140   unsigned getRegisterBitWidth(bool Vector) const;
 141   unsigned getMinVectorRegisterBitWidth() const;
 142   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
 143                                unsigned ChainSizeInBytes,
 144                                VectorType *VecTy) const;
 145   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 146                                 unsigned ChainSizeInBytes,
 147                                 VectorType *VecTy) const;
 148   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 149
 150   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
 151                                   unsigned Alignment,
 152                                   unsigned AddrSpace) const;
 153   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 154                                    unsigned Alignment,
 155                                    unsigned AddrSpace) const;
 156   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 157                                     unsigned Alignment,
 158                                     unsigned AddrSpace) const;
 159
 160   unsigned getMaxInterleaveFactor(unsigned VF);
 161
 162   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
 163
 164   int getArithmeticInstrCost(
 165     unsigned Opcode, Type *Ty,
 166     TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
 167     TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
 168     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 169     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 170     ArrayRef<const Value *> Args = ArrayRef<const Value *>());
 171
 172   unsigned getCFInstrCost(unsigned Opcode);
 173
 174   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 175   bool isSourceOfDivergence(const Value *V) const;
 176   bool isAlwaysUniform(const Value *V) const;
 177
 178   unsigned getFlatAddressSpace() const {
 179     // Don't bother running InferAddressSpaces pass on graphics shaders which
 180     // don't use flat addressing.
 181     if (IsGraphicsShader)
 182       return -1;
 183     return AMDGPUAS::FLAT_ADDRESS;
 184   }
 185
 186   unsigned getVectorSplitCost() { return 0; }
 187
 188   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 189                           Type *SubTp);
 190
 191   bool areInlineCompatible(const Function *Caller,
 192                            const Function *Callee) const;
 193
 194   unsigned getInliningThresholdMultiplier() { return 7; }
 195
 196   int getInlinerVectorBonusPercent() { return 0; }
 197
 198   int getArithmeticReductionCost(unsigned Opcode,
 199                                  Type *Ty,
 200                                  bool IsPairwise);
 201   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
 202                              bool IsPairwiseForm,
 203                              bool IsUnsigned);
 204 };
 205
 206 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
 207   using BaseT = BasicTTIImplBase<R600TTIImpl>;
 208   using TTI = TargetTransformInfo;
 209
 210   friend BaseT;
 211
 212   const R600Subtarget *ST;
 213   const AMDGPUTargetLowering *TLI;
 214   AMDGPUTTIImpl CommonTTI;
 215
 216 public:
 217   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
 218     : BaseT(TM, F.getParent()->getDataLayout()),
 219       ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
 220       TLI(ST->getTargetLowering()),
 221       CommonTTI(TM, F)  {}
 222
 223   const R600Subtarget *getST() const { return ST; }
 224   const AMDGPUTargetLowering *getTLI() const { return TLI; }
 225
 226   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 227                                TTI::UnrollingPreferences &UP);
 228   unsigned getHardwareNumberOfRegisters(bool Vec) const;
 229   unsigned getNumberOfRegisters(bool Vec) const;
 230   unsigned getRegisterBitWidth(bool Vector) const;
 231   unsigned getMinVectorRegisterBitWidth() const;
 232   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 233   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
 234                                   unsigned AddrSpace) const;
 235   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 236                                    unsigned Alignment,
 237                                    unsigned AddrSpace) const;
 238   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 239                                     unsigned Alignment,
 240                                     unsigned AddrSpace) const;
 241   unsigned getMaxInterleaveFactor(unsigned VF);
 242   unsigned getCFInstrCost(unsigned Opcode);
 243   int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
 244 };
 245
 246 } // end namespace llvm
 247
 248 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H