1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
36 class ScalarEvolution;
41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42 using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43 using TTI = TargetTransformInfo;
47 const AArch64Subtarget *ST;
48 const AArch64TargetLowering *TLI;
50 const AArch64Subtarget *getST() const { return ST; }
51 const AArch64TargetLowering *getTLI() const { return TLI; }
53 enum MemIntrinsicType {
54 VECTOR_LDST_TWO_ELEMENTS,
55 VECTOR_LDST_THREE_ELEMENTS,
56 VECTOR_LDST_FOUR_ELEMENTS
59 bool isWideningInstruction(Type *Ty, unsigned Opcode,
60 ArrayRef<const Value *> Args);
63 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65 TLI(ST->getTargetLowering()) {}
67 bool areInlineCompatible(const Function *Caller,
68 const Function *Callee) const;
70 /// \name Scalar TTI Implementations
73 using BaseT::getIntImmCost;
74 InstructionCost getIntImmCost(int64_t Val);
75 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
76 TTI::TargetCostKind CostKind);
77 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
78 const APInt &Imm, Type *Ty,
79 TTI::TargetCostKind CostKind,
80 Instruction *Inst = nullptr);
81 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
82 const APInt &Imm, Type *Ty,
83 TTI::TargetCostKind CostKind);
84 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
88 /// \name Vector TTI Implementations
91 bool enableInterleavedAccessVectorization() { return true; }
93 unsigned getNumberOfRegisters(unsigned ClassID) const {
94 bool Vector = (ClassID == 1);
103 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
104 TTI::TargetCostKind CostKind);
106 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
107 IntrinsicInst &II) const;
109 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
111 case TargetTransformInfo::RGK_Scalar:
112 return TypeSize::getFixed(64);
113 case TargetTransformInfo::RGK_FixedWidthVector:
115 return TypeSize::getFixed(
116 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
117 return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
118 case TargetTransformInfo::RGK_ScalableVector:
119 return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
121 llvm_unreachable("Unsupported register kind");
124 unsigned getMinVectorRegisterBitWidth() const {
125 return ST->getMinVectorRegisterBitWidth();
128 Optional<unsigned> getMaxVScale() const {
130 return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
131 return BaseT::getMaxVScale();
134 /// Try to return an estimate cost factor that can be used as a multiplier
135 /// when scalarizing an operation for a vector with ElementCount \p VF.
136 /// For scalable vectors this currently takes the most pessimistic view based
137 /// upon the maximum possible value for vscale.
138 unsigned getMaxNumElements(ElementCount VF) const {
139 if (!VF.isScalable())
140 return VF.getFixedValue();
141 Optional<unsigned> MaxNumVScale = getMaxVScale();
142 assert(MaxNumVScale && "Expected valid max vscale value");
143 return *MaxNumVScale * VF.getKnownMinValue();
146 unsigned getMaxInterleaveFactor(unsigned VF);
148 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
149 Align Alignment, unsigned AddressSpace,
150 TTI::TargetCostKind CostKind);
152 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
153 const Value *Ptr, bool VariableMask,
155 TTI::TargetCostKind CostKind,
156 const Instruction *I = nullptr);
158 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
159 TTI::CastContextHint CCH,
160 TTI::TargetCostKind CostKind,
161 const Instruction *I = nullptr);
163 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
164 VectorType *VecTy, unsigned Index);
166 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
167 const Instruction *I = nullptr);
169 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
172 InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
174 TTI::TargetCostKind CostKind);
176 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
178 TTI::TargetCostKind CostKind);
180 InstructionCost getSpliceCost(VectorType *Tp, int Index);
182 InstructionCost getArithmeticInstrCost(
183 unsigned Opcode, Type *Ty,
184 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
185 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
186 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
187 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
188 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
189 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
190 const Instruction *CxtI = nullptr);
192 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
195 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
196 CmpInst::Predicate VecPred,
197 TTI::TargetCostKind CostKind,
198 const Instruction *I = nullptr);
200 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
201 bool IsZeroCmp) const;
202 bool useNeonVector(const Type *Ty) const;
204 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
205 MaybeAlign Alignment, unsigned AddressSpace,
206 TTI::TargetCostKind CostKind,
207 const Instruction *I = nullptr);
209 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
211 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
212 TTI::UnrollingPreferences &UP);
214 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
215 TTI::PeelingPreferences &PP);
217 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
220 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
222 bool isElementTypeLegalForScalableVector(Type *Ty) const {
223 if (Ty->isPointerTy())
226 if (Ty->isBFloatTy() && ST->hasBF16())
229 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
232 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
233 Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
239 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
243 // For fixed vectors, avoid scalarization if using SVE for them.
244 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
245 return false; // Fall back to scalarization of masked operations.
247 return !DataType->getScalarType()->isIntegerTy(1) &&
248 isElementTypeLegalForScalableVector(DataType->getScalarType());
251 bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
252 return isLegalMaskedLoadStore(DataType, Alignment);
255 bool isLegalMaskedStore(Type *DataType, Align Alignment) {
256 return isLegalMaskedLoadStore(DataType, Alignment);
259 bool isLegalMaskedGatherScatter(Type *DataType) const {
263 // For fixed vectors, scalarize if not using SVE for them.
264 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
265 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
266 DataTypeFVTy->getNumElements() < 2))
269 return !DataType->getScalarType()->isIntegerTy(1) &&
270 isElementTypeLegalForScalableVector(DataType->getScalarType());
273 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
274 return isLegalMaskedGatherScatter(DataType);
276 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
277 return isLegalMaskedGatherScatter(DataType);
280 bool isLegalNTStore(Type *DataType, Align Alignment) {
281 // NOTE: The logic below is mostly geared towards LV, which calls it with
282 // vectors with 2 elements. We might want to improve that, if other
284 // Nontemporal vector stores can be directly lowered to STNP, if the vector
285 // can be halved so that each half fits into a register. That's the case if
286 // the element type fits into a register and the number of elements is a
288 if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
289 unsigned NumElements =
290 cast<FixedVectorType>(DataTypeVTy)->getNumElements();
291 unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
292 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
293 EltSize <= 128 && isPowerOf2_64(EltSize);
295 return BaseT::isLegalNTStore(DataType, Alignment);
298 InstructionCost getInterleavedMemoryOpCost(
299 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
300 Align Alignment, unsigned AddressSpace,
301 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
302 bool UseMaskForCond = false, bool UseMaskForGaps = false);
305 shouldConsiderAddressTypePromotion(const Instruction &I,
306 bool &AllowPromotionWithoutCommonHeader);
308 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
310 unsigned getGISelRematGlobalCost() const {
314 bool supportsScalableVectors() const { return ST->hasSVE(); }
316 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
317 ElementCount VF) const;
319 InstructionCost getArithmeticReductionCost(
320 unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
321 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
323 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
324 ArrayRef<int> Mask, int Index,
329 } // end namespace llvm
331 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H