1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
36 class ScalarEvolution;
41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42 using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43 using TTI = TargetTransformInfo;
47 const AArch64Subtarget *ST;
48 const AArch64TargetLowering *TLI;
50 const AArch64Subtarget *getST() const { return ST; }
51 const AArch64TargetLowering *getTLI() const { return TLI; }
53 enum MemIntrinsicType {
54 VECTOR_LDST_TWO_ELEMENTS,
55 VECTOR_LDST_THREE_ELEMENTS,
56 VECTOR_LDST_FOUR_ELEMENTS
59 bool isWideningInstruction(Type *Ty, unsigned Opcode,
60 ArrayRef<const Value *> Args);
63 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65 TLI(ST->getTargetLowering()) {}
67 bool areInlineCompatible(const Function *Caller,
68 const Function *Callee) const;
70 /// \name Scalar TTI Implementations
73 using BaseT::getIntImmCost;
74 int getIntImmCost(int64_t Val);
75 int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
76 int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
77 Type *Ty, TTI::TargetCostKind CostKind);
78 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
79 Type *Ty, TTI::TargetCostKind CostKind);
80 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
84 /// \name Vector TTI Implementations
87 bool enableInterleavedAccessVectorization() { return true; }
89 unsigned getNumberOfRegisters(unsigned ClassID) const {
90 bool Vector = (ClassID == 1);
99 unsigned getRegisterBitWidth(bool Vector) const {
102 return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
110 unsigned getMinVectorRegisterBitWidth() {
111 return ST->getMinVectorRegisterBitWidth();
114 unsigned getMaxInterleaveFactor(unsigned VF);
116 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
117 TTI::TargetCostKind CostKind,
118 const Instruction *I = nullptr);
120 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
123 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
125 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
127 int getArithmeticInstrCost(
128 unsigned Opcode, Type *Ty,
129 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
130 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
131 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
132 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
133 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
134 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
135 const Instruction *CxtI = nullptr);
137 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
139 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
140 TTI::TargetCostKind CostKind,
141 const Instruction *I = nullptr);
143 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
144 bool IsZeroCmp) const;
146 int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
147 unsigned AddressSpace,
148 TTI::TargetCostKind CostKind,
149 const Instruction *I = nullptr);
151 int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
153 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
154 TTI::UnrollingPreferences &UP);
156 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
157 TTI::PeelingPreferences &PP);
159 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
162 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
164 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
165 if (!isa<ScalableVectorType>(DataType) || !ST->hasSVE())
168 Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
169 if (Ty->isBFloatTy() || Ty->isHalfTy() ||
170 Ty->isFloatTy() || Ty->isDoubleTy())
173 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
174 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
180 bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
181 return isLegalMaskedLoadStore(DataType, Alignment);
184 bool isLegalMaskedStore(Type *DataType, Align Alignment) {
185 return isLegalMaskedLoadStore(DataType, Alignment);
188 bool isLegalNTStore(Type *DataType, Align Alignment) {
189 // NOTE: The logic below is mostly geared towards LV, which calls it with
190 // vectors with 2 elements. We might want to improve that, if other
192 // Nontemporal vector stores can be directly lowered to STNP, if the vector
193 // can be halved so that each half fits into a register. That's the case if
194 // the element type fits into a register and the number of elements is a
196 if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
197 unsigned NumElements =
198 cast<FixedVectorType>(DataTypeVTy)->getNumElements();
199 unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
200 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
201 EltSize <= 128 && isPowerOf2_64(EltSize);
203 return BaseT::isLegalNTStore(DataType, Alignment);
206 int getInterleavedMemoryOpCost(
207 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
208 Align Alignment, unsigned AddressSpace,
209 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
210 bool UseMaskForCond = false, bool UseMaskForGaps = false);
213 shouldConsiderAddressTypePromotion(const Instruction &I,
214 bool &AllowPromotionWithoutCommonHeader);
216 bool shouldExpandReduction(const IntrinsicInst *II) const {
217 switch (II->getIntrinsicID()) {
218 case Intrinsic::experimental_vector_reduce_v2_fadd:
219 case Intrinsic::experimental_vector_reduce_v2_fmul:
220 // We don't have legalization support for ordered FP reductions.
221 return !II->getFastMathFlags().allowReassoc();
223 case Intrinsic::experimental_vector_reduce_fmax:
224 case Intrinsic::experimental_vector_reduce_fmin:
225 // Lowering asserts that there are no NaNs.
226 return !II->getFastMathFlags().noNaNs();
229 // Don't expand anything else, let legalization deal with it.
234 unsigned getGISelRematGlobalCost() const {
238 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
239 TTI::ReductionFlags Flags) const;
241 int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
243 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
245 int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
250 } // end namespace llvm
252 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H