contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

   1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This file a TargetTransformInfo::Concept conforming object specific to the
  10 /// AArch64 target machine. It uses the target's detailed information to
  11 /// provide more precise answers to certain TTI queries, while letting the
  12 /// target independent and default TTI implementations handle the rest.
  13 ///
  14 //===----------------------------------------------------------------------===//
  15
  16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  18
  19 #include "AArch64.h"
  20 #include "AArch64Subtarget.h"
  21 #include "AArch64TargetMachine.h"
  22 #include "llvm/ADT/ArrayRef.h"
  23 #include "llvm/Analysis/TargetTransformInfo.h"
  24 #include "llvm/CodeGen/BasicTTIImpl.h"
  25 #include "llvm/IR/Function.h"
  26 #include "llvm/IR/Intrinsics.h"
  27 #include <cstdint>
  28
  29 namespace llvm {
  30
  31 class APInt;
  32 class Instruction;
  33 class IntrinsicInst;
  34 class Loop;
  35 class SCEV;
  36 class ScalarEvolution;
  37 class Type;
  38 class Value;
  39 class VectorType;
  40
  41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
  42   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
  43   using TTI = TargetTransformInfo;
  44
  45   friend BaseT;
  46
  47   const AArch64Subtarget *ST;
  48   const AArch64TargetLowering *TLI;
  49
  50   const AArch64Subtarget *getST() const { return ST; }
  51   const AArch64TargetLowering *getTLI() const { return TLI; }
  52
  53   enum MemIntrinsicType {
  54     VECTOR_LDST_TWO_ELEMENTS,
  55     VECTOR_LDST_THREE_ELEMENTS,
  56     VECTOR_LDST_FOUR_ELEMENTS
  57   };
  58
  59   bool isWideningInstruction(Type *Ty, unsigned Opcode,
  60                              ArrayRef<const Value *> Args);
  61
  62 public:
  63   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
  64       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  65         TLI(ST->getTargetLowering()) {}
  66
  67   bool areInlineCompatible(const Function *Caller,
  68                            const Function *Callee) const;
  69
  70   /// \name Scalar TTI Implementations
  71   /// @{
  72
  73   using BaseT::getIntImmCost;
  74   InstructionCost getIntImmCost(int64_t Val);
  75   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  76                                 TTI::TargetCostKind CostKind);
  77   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  78                                     const APInt &Imm, Type *Ty,
  79                                     TTI::TargetCostKind CostKind,
  80                                     Instruction *Inst = nullptr);
  81   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  82                                       const APInt &Imm, Type *Ty,
  83                                       TTI::TargetCostKind CostKind);
  84   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  85
  86   /// @}
  87
  88   /// \name Vector TTI Implementations
  89   /// @{
  90
  91   bool enableInterleavedAccessVectorization() { return true; }
  92
  93   unsigned getNumberOfRegisters(unsigned ClassID) const {
  94     bool Vector = (ClassID == 1);
  95     if (Vector) {
  96       if (ST->hasNEON())
  97         return 32;
  98       return 0;
  99     }
 100     return 31;
 101   }
 102
 103   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 104                                         TTI::TargetCostKind CostKind);
 105
 106   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
 107                                                IntrinsicInst &II) const;
 108
 109   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
 110     switch (K) {
 111     case TargetTransformInfo::RGK_Scalar:
 112       return TypeSize::getFixed(64);
 113     case TargetTransformInfo::RGK_FixedWidthVector:
 114       if (ST->hasSVE())
 115         return TypeSize::getFixed(
 116             std::max(ST->getMinSVEVectorSizeInBits(), 128u));
 117       return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
 118     case TargetTransformInfo::RGK_ScalableVector:
 119       return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
 120     }
 121     llvm_unreachable("Unsupported register kind");
 122   }
 123
 124   unsigned getMinVectorRegisterBitWidth() const {
 125     return ST->getMinVectorRegisterBitWidth();
 126   }
 127
 128   Optional<unsigned> getMaxVScale() const {
 129     if (ST->hasSVE())
 130       return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
 131     return BaseT::getMaxVScale();
 132   }
 133
 134   /// Try to return an estimate cost factor that can be used as a multiplier
 135   /// when scalarizing an operation for a vector with ElementCount \p VF.
 136   /// For scalable vectors this currently takes the most pessimistic view based
 137   /// upon the maximum possible value for vscale.
 138   unsigned getMaxNumElements(ElementCount VF) const {
 139     if (!VF.isScalable())
 140       return VF.getFixedValue();
 141     Optional<unsigned> MaxNumVScale = getMaxVScale();
 142     assert(MaxNumVScale && "Expected valid max vscale value");
 143     return *MaxNumVScale * VF.getKnownMinValue();
 144   }
 145
 146   unsigned getMaxInterleaveFactor(unsigned VF);
 147
 148   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 149                                         Align Alignment, unsigned AddressSpace,
 150                                         TTI::TargetCostKind CostKind);
 151
 152   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 153                                          const Value *Ptr, bool VariableMask,
 154                                          Align Alignment,
 155                                          TTI::TargetCostKind CostKind,
 156                                          const Instruction *I = nullptr);
 157
 158   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 159                                    TTI::CastContextHint CCH,
 160                                    TTI::TargetCostKind CostKind,
 161                                    const Instruction *I = nullptr);
 162
 163   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
 164                                            VectorType *VecTy, unsigned Index);
 165
 166   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
 167                                  const Instruction *I = nullptr);
 168
 169   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
 170                                      unsigned Index);
 171
 172   InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
 173                                          bool IsUnsigned,
 174                                          TTI::TargetCostKind CostKind);
 175
 176   InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
 177                                                 VectorType *ValTy,
 178                                                 TTI::TargetCostKind CostKind);
 179
 180   InstructionCost getSpliceCost(VectorType *Tp, int Index);
 181
 182   InstructionCost getArithmeticInstrCost(
 183       unsigned Opcode, Type *Ty,
 184       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
 185       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
 186       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
 187       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 188       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 189       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
 190       const Instruction *CxtI = nullptr);
 191
 192   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 193                                             const SCEV *Ptr);
 194
 195   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
 196                                      CmpInst::Predicate VecPred,
 197                                      TTI::TargetCostKind CostKind,
 198                                      const Instruction *I = nullptr);
 199
 200   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
 201                                                     bool IsZeroCmp) const;
 202   bool useNeonVector(const Type *Ty) const;
 203
 204   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
 205                                   MaybeAlign Alignment, unsigned AddressSpace,
 206                                   TTI::TargetCostKind CostKind,
 207                                   const Instruction *I = nullptr);
 208
 209   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
 210
 211   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 212                                TTI::UnrollingPreferences &UP);
 213
 214   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 215                              TTI::PeelingPreferences &PP);
 216
 217   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
 218                                            Type *ExpectedType);
 219
 220   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
 221
 222   bool isElementTypeLegalForScalableVector(Type *Ty) const {
 223     if (Ty->isPointerTy())
 224       return true;
 225
 226     if (Ty->isBFloatTy() && ST->hasBF16())
 227       return true;
 228
 229     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
 230       return true;
 231
 232     if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
 233         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
 234       return true;
 235
 236     return false;
 237   }
 238
 239   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
 240     if (!ST->hasSVE())
 241       return false;
 242
 243     // For fixed vectors, avoid scalarization if using SVE for them.
 244     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
 245       return false; // Fall back to scalarization of masked operations.
 246
 247     return !DataType->getScalarType()->isIntegerTy(1) &&
 248            isElementTypeLegalForScalableVector(DataType->getScalarType());
 249   }
 250
 251   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
 252     return isLegalMaskedLoadStore(DataType, Alignment);
 253   }
 254
 255   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
 256     return isLegalMaskedLoadStore(DataType, Alignment);
 257   }
 258
 259   bool isLegalMaskedGatherScatter(Type *DataType) const {
 260     if (!ST->hasSVE())
 261       return false;
 262
 263     // For fixed vectors, scalarize if not using SVE for them.
 264     auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
 265     if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
 266                          DataTypeFVTy->getNumElements() < 2))
 267       return false;
 268
 269     return !DataType->getScalarType()->isIntegerTy(1) &&
 270            isElementTypeLegalForScalableVector(DataType->getScalarType());
 271   }
 272
 273   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
 274     return isLegalMaskedGatherScatter(DataType);
 275   }
 276   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
 277     return isLegalMaskedGatherScatter(DataType);
 278   }
 279
 280   bool isLegalNTStore(Type *DataType, Align Alignment) {
 281     // NOTE: The logic below is mostly geared towards LV, which calls it with
 282     //       vectors with 2 elements. We might want to improve that, if other
 283     //       users show up.
 284     // Nontemporal vector stores can be directly lowered to STNP, if the vector
 285     // can be halved so that each half fits into a register. That's the case if
 286     // the element type fits into a register and the number of elements is a
 287     // power of 2 > 1.
 288     if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
 289       unsigned NumElements =
 290           cast<FixedVectorType>(DataTypeVTy)->getNumElements();
 291       unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
 292       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
 293              EltSize <= 128 && isPowerOf2_64(EltSize);
 294     }
 295     return BaseT::isLegalNTStore(DataType, Alignment);
 296   }
 297
 298   InstructionCost getInterleavedMemoryOpCost(
 299       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
 300       Align Alignment, unsigned AddressSpace,
 301       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
 302       bool UseMaskForCond = false, bool UseMaskForGaps = false);
 303
 304   bool
 305   shouldConsiderAddressTypePromotion(const Instruction &I,
 306                                      bool &AllowPromotionWithoutCommonHeader);
 307
 308   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
 309
 310   unsigned getGISelRematGlobalCost() const {
 311     return 2;
 312   }
 313
 314   bool supportsScalableVectors() const { return ST->hasSVE(); }
 315
 316   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
 317                                    ElementCount VF) const;
 318
 319   InstructionCost getArithmeticReductionCost(
 320       unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
 321       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
 322
 323   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
 324                                  ArrayRef<int> Mask, int Index,
 325                                  VectorType *SubTp);
 326   /// @}
 327 };
 328
 329 } // end namespace llvm
 330
 331 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H