llvm/lib/Target/ARM/ARMTargetTransformInfo.h

   1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This file a TargetTransformInfo::Concept conforming object specific to the
  11 /// ARM target machine. It uses the target's detailed information to
  12 /// provide more precise answers to certain TTI queries, while letting the
  13 /// target independent and default TTI implementations handle the rest.
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  19
  20 #include "ARM.h"
  21 #include "ARMSubtarget.h"
  22 #include "ARMTargetMachine.h"
  23 #include "llvm/ADT/ArrayRef.h"
  24 #include "llvm/Analysis/TargetTransformInfo.h"
  25 #include "llvm/CodeGen/BasicTTIImpl.h"
  26 #include "llvm/IR/Constant.h"
  27 #include "llvm/IR/Function.h"
  28 #include "llvm/MC/SubtargetFeature.h"
  29
  30 namespace llvm {
  31
  32 class APInt;
  33 class ARMTargetLowering;
  34 class Instruction;
  35 class Loop;
  36 class SCEV;
  37 class ScalarEvolution;
  38 class Type;
  39 class Value;
  40
  41 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  42   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
  43   using TTI = TargetTransformInfo;
  44
  45   friend BaseT;
  46
  47   const ARMSubtarget *ST;
  48   const ARMTargetLowering *TLI;
  49
  50   // Currently the following features are excluded from InlineFeatureWhitelist.
  51   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
  52   // Depending on whether they are set or unset, different
  53   // instructions/registers are available. For example, inlining a callee with
  54   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
  55   // fail if the callee uses ARM only instructions, e.g. in inline asm.
  56   const FeatureBitset InlineFeatureWhitelist = {
  57       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
  58       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
  59       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
  60       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
  61       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
  62       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
  63       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
  64       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
  65       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
  66       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
  67       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
  68       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
  69       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
  70       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
  71       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
  72       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
  73       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
  74       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
  75       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
  76       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
  77       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
  78       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
  79       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
  80       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
  81   };
  82
  83   const ARMSubtarget *getST() const { return ST; }
  84   const ARMTargetLowering *getTLI() const { return TLI; }
  85
  86 public:
  87   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
  88       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  89         TLI(ST->getTargetLowering()) {}
  90
  91   bool areInlineCompatible(const Function *Caller,
  92                            const Function *Callee) const;
  93
  94   bool enableInterleavedAccessVectorization() { return true; }
  95
  96   bool shouldFavorBackedgeIndex(const Loop *L) const {
  97     if (L->getHeader()->getParent()->hasOptSize())
  98       return false;
  99     return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
 100   }
 101
 102   /// Floating-point computation using ARMv8 AArch32 Advanced
 103   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
 104   /// and Arm MVE are IEEE-754 compliant.
 105   bool isFPVectorizationPotentiallyUnsafe() {
 106     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
 107   }
 108
 109   /// \name Scalar TTI Implementations
 110   /// @{
 111
 112   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
 113                             Type *Ty);
 114
 115   using BaseT::getIntImmCost;
 116   int getIntImmCost(const APInt &Imm, Type *Ty);
 117
 118   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
 119
 120   /// @}
 121
 122   /// \name Vector TTI Implementations
 123   /// @{
 124
 125   unsigned getNumberOfRegisters(unsigned ClassID) const {
 126     bool Vector = (ClassID == 1);
 127     if (Vector) {
 128       if (ST->hasNEON())
 129         return 16;
 130       if (ST->hasMVEIntegerOps())
 131         return 8;
 132       return 0;
 133     }
 134
 135     if (ST->isThumb1Only())
 136       return 8;
 137     return 13;
 138   }
 139
 140   unsigned getRegisterBitWidth(bool Vector) const {
 141     if (Vector) {
 142       if (ST->hasNEON())
 143         return 128;
 144       if (ST->hasMVEIntegerOps())
 145         return 128;
 146       return 0;
 147     }
 148
 149     return 32;
 150   }
 151
 152   unsigned getMaxInterleaveFactor(unsigned VF) {
 153     return ST->getMaxInterleaveFactor();
 154   }
 155
 156   bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment);
 157
 158   bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) {
 159     return isLegalMaskedLoad(DataTy, Alignment);
 160   }
 161
 162   bool isLegalMaskedGather(Type *Ty, MaybeAlign Alignment);
 163
 164   bool isLegalMaskedScatter(Type *Ty, MaybeAlign Alignment) { return false; }
 165
 166   int getMemcpyCost(const Instruction *I);
 167
 168   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
 169
 170   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
 171                              TTI::ReductionFlags Flags) const;
 172
 173   bool shouldExpandReduction(const IntrinsicInst *II) const {
 174     switch (II->getIntrinsicID()) {
 175     case Intrinsic::experimental_vector_reduce_v2_fadd:
 176     case Intrinsic::experimental_vector_reduce_v2_fmul:
 177       // We don't have legalization support for ordered FP reductions.
 178       if (!II->getFastMathFlags().allowReassoc())
 179         return true;
 180       // Can't legalize reductions with soft floats.
 181       return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
 182
 183     case Intrinsic::experimental_vector_reduce_fmin:
 184     case Intrinsic::experimental_vector_reduce_fmax:
 185       // Can't legalize reductions with soft floats, and NoNan will create
 186       // fminimum which we do not know how to lower.
 187       return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
 188              !II->getFastMathFlags().noNaNs();
 189
 190     default:
 191       // Don't expand anything else, let legalization deal with it.
 192       return false;
 193     }
 194   }
 195
 196   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 197                        const Instruction *I = nullptr);
 198
 199   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
 200                          const Instruction *I = nullptr);
 201
 202   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 203
 204   int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
 205                                 const SCEV *Ptr);
 206
 207   int getArithmeticInstrCost(
 208       unsigned Opcode, Type *Ty,
 209       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
 210       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
 211       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
 212       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
 213       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
 214       const Instruction *CxtI = nullptr);
 215
 216   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
 217                       unsigned AddressSpace, const Instruction *I = nullptr);
 218
 219   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
 220                                  ArrayRef<unsigned> Indices, unsigned Alignment,
 221                                  unsigned AddressSpace,
 222                                  bool UseMaskForCond = false,
 223                                  bool UseMaskForGaps = false);
 224
 225   bool isLoweredToCall(const Function *F);
 226   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
 227                                 AssumptionCache &AC,
 228                                 TargetLibraryInfo *LibInfo,
 229                                 HardwareLoopInfo &HWLoopInfo);
 230   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
 231                                    ScalarEvolution &SE,
 232                                    AssumptionCache &AC,
 233                                    TargetLibraryInfo *TLI,
 234                                    DominatorTree *DT,
 235                                    const LoopAccessInfo *LAI);
 236   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 237                                TTI::UnrollingPreferences &UP);
 238
 239   bool shouldBuildLookupTablesForConstant(Constant *C) const {
 240     // In the ROPI and RWPI relocation models we can't have pointers to global
 241     // variables or functions in constant data, so don't convert switches to
 242     // lookup tables if any of the values would need relocation.
 243     if (ST->isROPI() || ST->isRWPI())
 244       return !C->needsRelocation();
 245
 246     return true;
 247   }
 248   /// @}
 249 };
 250
 251 } // end namespace llvm
 252
 253 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H