contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

   1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file a TargetTransformInfo::Concept conforming object specific to the
  10 /// AArch64 target machine. It uses the target's detailed information to
  11 /// provide more precise answers to certain TTI queries, while letting the
  12 /// target independent and default TTI implementations handle the rest.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  18
  19 #include "AArch64.h"
  20 #include "AArch64Subtarget.h"
  21 #include "AArch64TargetMachine.h"
  22 #include "llvm/ADT/ArrayRef.h"
  23 #include "llvm/Analysis/TargetTransformInfo.h"
  24 #include "llvm/CodeGen/BasicTTIImpl.h"
  25 #include "llvm/IR/Function.h"
  26 #include "llvm/IR/Intrinsics.h"
  27 #include <cstdint>
  28
  29 namespace llvm {
  30
  31 class APInt;
  32 class Instruction;
  33 class IntrinsicInst;
  34 class Loop;
  35 class SCEV;
  36 class ScalarEvolution;
  37 class Type;
  38 class Value;
  39 class VectorType;
  40
  41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
  42   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
  43   using TTI = TargetTransformInfo;
  44
  45   friend BaseT;
  46
  47   const AArch64Subtarget *ST;
  48   const AArch64TargetLowering *TLI;
  49
  50   const AArch64Subtarget *getST() const { return ST; }
  51   const AArch64TargetLowering *getTLI() const { return TLI; }
  52
  53   enum MemIntrinsicType {
  54     VECTOR_LDST_TWO_ELEMENTS,
  55     VECTOR_LDST_THREE_ELEMENTS,
  56     VECTOR_LDST_FOUR_ELEMENTS
  57   };
  58
  59   bool isWideningInstruction(Type *Ty, unsigned Opcode,
  60                              ArrayRef<const Value *> Args);
  61
  62 public:
  63   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
  64       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  65         TLI(ST->getTargetLowering()) {}
  66
  67   bool areInlineCompatible(const Function *Caller,
  68                            const Function *Callee) const;
  69
  70   /// \name Scalar TTI Implementations
  71   /// @{
  72
  73   using BaseT::getIntImmCost;
  74   int getIntImmCost(int64_t Val);
  75   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
  76   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
  77                         Type *Ty, TTI::TargetCostKind CostKind);
  78   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  79                           Type *Ty, TTI::TargetCostKind CostKind);
  80   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  81
  82   /// @}
  83
  84   /// \name Vector TTI Implementations
  85   /// @{
  86
  87   bool enableInterleavedAccessVectorization() { return true; }
  88
  89   unsigned getNumberOfRegisters(unsigned ClassID) const {
  90     bool Vector = (ClassID == 1);
  91     if (Vector) {
  92       if (ST->hasNEON())
  93         return 32;
  94       return 0;
  95     }
  96     return 31;
  97   }
  98
  99   unsigned getRegisterBitWidth(bool Vector) const {
 100     if (Vector) {
 101       if (ST->hasSVE())
 102         return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
 103       if (ST->hasNEON())
 104         return 128;
 105       return 0;
 106     }
 107     return 64;
 108   }
 109
 110   unsigned getMinVectorRegisterBitWidth() {
 111     return ST->getMinVectorRegisterBitWidth();
 112   }
 113
 114   unsigned getMaxInterleaveFactor(unsigned VF);
 115
 116   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 117                        TTI::TargetCostKind CostKind,
 118                        const Instruction *I = nullptr);
 119
 120   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
 121                                unsigned Index);
 122
 123   unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
 124
 125   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 126
 127   int getArithmeticInstrCost(
 128       unsigned Opcode, Type *Ty,
 129       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
 130       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
 131       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
 132       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 133       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 134       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
 135       const Instruction *CxtI = nullptr);
 136
 137   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
 138
 139   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
 140                          TTI::TargetCostKind CostKind,
 141                          const Instruction *I = nullptr);
 142
 143   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
 144                                                     bool IsZeroCmp) const;
 145
 146   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
 147                       unsigned AddressSpace,
 148                       TTI::TargetCostKind CostKind,
 149                       const Instruction *I = nullptr);
 150
 151   int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
 152
 153   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 154                                TTI::UnrollingPreferences &UP);
 155
 156   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 157                              TTI::PeelingPreferences &PP);
 158
 159   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
 160                                            Type *ExpectedType);
 161
 162   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
 163
 164   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
 165     if (!isa<ScalableVectorType>(DataType) || !ST->hasSVE())
 166       return false;
 167
 168     Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
 169     if (Ty->isBFloatTy() || Ty->isHalfTy() ||
 170         Ty->isFloatTy() || Ty->isDoubleTy())
 171       return true;
 172
 173     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
 174         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
 175       return true;
 176
 177     return false;
 178   }
 179
 180   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
 181     return isLegalMaskedLoadStore(DataType, Alignment);
 182   }
 183
 184   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
 185     return isLegalMaskedLoadStore(DataType, Alignment);
 186   }
 187
 188   bool isLegalNTStore(Type *DataType, Align Alignment) {
 189     // NOTE: The logic below is mostly geared towards LV, which calls it with
 190     //       vectors with 2 elements. We might want to improve that, if other
 191     //       users show up.
 192     // Nontemporal vector stores can be directly lowered to STNP, if the vector
 193     // can be halved so that each half fits into a register. That's the case if
 194     // the element type fits into a register and the number of elements is a
 195     // power of 2 > 1.
 196     if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
 197       unsigned NumElements =
 198           cast<FixedVectorType>(DataTypeVTy)->getNumElements();
 199       unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
 200       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
 201              EltSize <= 128 && isPowerOf2_64(EltSize);
 202     }
 203     return BaseT::isLegalNTStore(DataType, Alignment);
 204   }
 205
 206   int getInterleavedMemoryOpCost(
 207       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
 208       Align Alignment, unsigned AddressSpace,
 209       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
 210       bool UseMaskForCond = false, bool UseMaskForGaps = false);
 211
 212   bool
 213   shouldConsiderAddressTypePromotion(const Instruction &I,
 214                                      bool &AllowPromotionWithoutCommonHeader);
 215
 216   bool shouldExpandReduction(const IntrinsicInst *II) const {
 217     switch (II->getIntrinsicID()) {
 218     case Intrinsic::experimental_vector_reduce_v2_fadd:
 219     case Intrinsic::experimental_vector_reduce_v2_fmul:
 220       // We don't have legalization support for ordered FP reductions.
 221       return !II->getFastMathFlags().allowReassoc();
 222
 223     case Intrinsic::experimental_vector_reduce_fmax:
 224     case Intrinsic::experimental_vector_reduce_fmin:
 225       // Lowering asserts that there are no NaNs.
 226       return !II->getFastMathFlags().noNaNs();
 227
 228     default:
 229       // Don't expand anything else, let legalization deal with it.
 230       return false;
 231     }
 232   }
 233
 234   unsigned getGISelRematGlobalCost() const {
 235     return 2;
 236   }
 237
 238   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
 239                              TTI::ReductionFlags Flags) const;
 240
 241   int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
 242                                  bool IsPairwiseForm,
 243                                  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
 244
 245   int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
 246                      VectorType *SubTp);
 247   /// @}
 248 };
 249
 250 } // end namespace llvm
 251
 252 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H