1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Analysis/TargetTransformInfo.h"
10 #include "llvm/Analysis/CFG.h"
11 #include "llvm/Analysis/LoopIterator.h"
12 #include "llvm/Analysis/TargetTransformInfoImpl.h"
13 #include "llvm/IR/CFG.h"
14 #include "llvm/IR/DataLayout.h"
15 #include "llvm/IR/Dominators.h"
16 #include "llvm/IR/Instruction.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/IntrinsicInst.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/Operator.h"
21 #include "llvm/IR/PatternMatch.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/ErrorHandling.h"
28 using namespace PatternMatch;
30 #define DEBUG_TYPE "tti"
32 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
34 cl::desc("Recognize reduction patterns."));
37 /// No-op implementation of the TTI interface using the utility base
40 /// This is used when no target specific information is available.
41 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
42 explicit NoTTIImpl(const DataLayout &DL)
43 : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
47 bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
48 // If the loop has irreducible control flow, it can not be converted to
50 LoopBlocksRPO RPOT(L);
52 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
57 IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
58 II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) {
60 FunctionType *FTy = I.getCalledFunction()->getFunctionType();
61 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
62 Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end());
63 if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
64 FMF = FPMO->getFastMathFlags();
67 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
69 II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) {
71 if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
72 FMF = FPMO->getFastMathFlags();
75 CI.getCalledFunction()->getFunctionType();
76 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
79 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
82 RetTy(CI.getType()), IID(Id), VF(Factor) {
84 if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
85 FMF = FPMO->getFastMathFlags();
87 Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
89 CI.getCalledFunction()->getFunctionType();
90 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
93 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
96 unsigned ScalarCost) :
97 RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
99 if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
100 FMF = FPMO->getFastMathFlags();
102 Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
104 CI.getCalledFunction()->getFunctionType();
105 ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
108 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
109 ArrayRef<Type *> Tys,
110 FastMathFlags Flags) :
111 RetTy(RTy), IID(Id), FMF(Flags) {
112 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
115 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
116 ArrayRef<Type *> Tys,
118 unsigned ScalarCost) :
119 RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
120 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
123 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
124 ArrayRef<Type *> Tys,
127 const IntrinsicInst *I) :
128 II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
129 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
132 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
133 ArrayRef<Type *> Tys) :
134 RetTy(RTy), IID(Id) {
135 ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
138 IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
139 ArrayRef<const Value *> Args)
140 : RetTy(Ty), IID(Id) {
142 Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
143 ParamTys.reserve(Arguments.size());
144 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
145 ParamTys.push_back(Arguments[Idx]->getType());
148 bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
149 LoopInfo &LI, DominatorTree &DT,
150 bool ForceNestedLoop,
151 bool ForceHardwareLoopPHI) {
152 SmallVector<BasicBlock *, 4> ExitingBlocks;
153 L->getExitingBlocks(ExitingBlocks);
155 for (BasicBlock *BB : ExitingBlocks) {
156 // If we pass the updated counter back through a phi, we need to know
157 // which latch the updated value will be coming from.
158 if (!L->isLoopLatch(BB)) {
159 if (ForceHardwareLoopPHI || CounterInReg)
163 const SCEV *EC = SE.getExitCount(L, BB);
164 if (isa<SCEVCouldNotCompute>(EC))
166 if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
167 if (ConstEC->getValue()->isZero())
169 } else if (!SE.isLoopInvariant(EC, L))
172 if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
175 // If this exiting block is contained in a nested loop, it is not eligible
176 // for insertion of the branch-and-decrement since the inner loop would
177 // end up messing up the value in the CTR.
178 if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
181 // We now have a loop-invariant count of loop iterations (which is not the
182 // constant zero) for which we know that this loop will not exit via this
185 // We need to make sure that this block will run on every loop iteration.
186 // For this to be true, we must dominate all blocks with backedges. Such
187 // blocks are in-loop predecessors to the header block.
188 bool NotAlways = false;
189 for (BasicBlock *Pred : predecessors(L->getHeader())) {
190 if (!L->contains(Pred))
193 if (!DT.dominates(BB, Pred)) {
202 // Make sure this blocks ends with a conditional branch.
203 Instruction *TI = BB->getTerminator();
207 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
208 if (!BI->isConditional())
215 // Note that this block may not be the loop latch block, even if the loop
216 // has a latch block.
227 TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
228 : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
230 TargetTransformInfo::~TargetTransformInfo() {}
232 TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
233 : TTIImpl(std::move(Arg.TTIImpl)) {}
235 TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
236 TTIImpl = std::move(RHS.TTIImpl);
240 unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
241 return TTIImpl->getInliningThresholdMultiplier();
244 int TargetTransformInfo::getInlinerVectorBonusPercent() const {
245 return TTIImpl->getInlinerVectorBonusPercent();
248 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
249 ArrayRef<const Value *> Operands,
250 TTI::TargetCostKind CostKind) const {
251 return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
254 unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
255 const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
256 BlockFrequencyInfo *BFI) const {
257 return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
260 int TargetTransformInfo::getUserCost(const User *U,
261 ArrayRef<const Value *> Operands,
262 enum TargetCostKind CostKind) const {
263 int Cost = TTIImpl->getUserCost(U, Operands, CostKind);
264 assert((CostKind == TTI::TCK_RecipThroughput || Cost >= 0) &&
265 "TTI should not produce negative costs!");
269 bool TargetTransformInfo::hasBranchDivergence() const {
270 return TTIImpl->hasBranchDivergence();
273 bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
274 return TTIImpl->useGPUDivergenceAnalysis();
277 bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
278 return TTIImpl->isSourceOfDivergence(V);
281 bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
282 return TTIImpl->isAlwaysUniform(V);
285 unsigned TargetTransformInfo::getFlatAddressSpace() const {
286 return TTIImpl->getFlatAddressSpace();
289 bool TargetTransformInfo::collectFlatAddressOperands(
290 SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
291 return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
294 bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
295 unsigned ToAS) const {
296 return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
299 Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
300 IntrinsicInst *II, Value *OldV, Value *NewV) const {
301 return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
304 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
305 return TTIImpl->isLoweredToCall(F);
308 bool TargetTransformInfo::isHardwareLoopProfitable(
309 Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
310 TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
311 return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
314 bool TargetTransformInfo::preferPredicateOverEpilogue(
315 Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
316 TargetLibraryInfo *TLI, DominatorTree *DT,
317 const LoopAccessInfo *LAI) const {
318 return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
321 bool TargetTransformInfo::emitGetActiveLaneMask() const {
322 return TTIImpl->emitGetActiveLaneMask();
325 void TargetTransformInfo::getUnrollingPreferences(
326 Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
327 return TTIImpl->getUnrollingPreferences(L, SE, UP);
330 void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
331 PeelingPreferences &PP) const {
332 return TTIImpl->getPeelingPreferences(L, SE, PP);
335 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
336 return TTIImpl->isLegalAddImmediate(Imm);
339 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
340 return TTIImpl->isLegalICmpImmediate(Imm);
343 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
345 bool HasBaseReg, int64_t Scale,
347 Instruction *I) const {
348 return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
349 Scale, AddrSpace, I);
352 bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
353 return TTIImpl->isLSRCostLess(C1, C2);
356 bool TargetTransformInfo::isProfitableLSRChainElement(Instruction *I) const {
357 return TTIImpl->isProfitableLSRChainElement(I);
360 bool TargetTransformInfo::canMacroFuseCmp() const {
361 return TTIImpl->canMacroFuseCmp();
364 bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI,
365 ScalarEvolution *SE, LoopInfo *LI,
366 DominatorTree *DT, AssumptionCache *AC,
367 TargetLibraryInfo *LibInfo) const {
368 return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
371 bool TargetTransformInfo::shouldFavorPostInc() const {
372 return TTIImpl->shouldFavorPostInc();
375 bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
376 return TTIImpl->shouldFavorBackedgeIndex(L);
379 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
380 Align Alignment) const {
381 return TTIImpl->isLegalMaskedStore(DataType, Alignment);
384 bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
385 Align Alignment) const {
386 return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
389 bool TargetTransformInfo::isLegalNTStore(Type *DataType,
390 Align Alignment) const {
391 return TTIImpl->isLegalNTStore(DataType, Alignment);
394 bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const {
395 return TTIImpl->isLegalNTLoad(DataType, Alignment);
398 bool TargetTransformInfo::isLegalMaskedGather(Type *DataType,
399 Align Alignment) const {
400 return TTIImpl->isLegalMaskedGather(DataType, Alignment);
403 bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType,
404 Align Alignment) const {
405 return TTIImpl->isLegalMaskedScatter(DataType, Alignment);
408 bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
409 return TTIImpl->isLegalMaskedCompressStore(DataType);
412 bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
413 return TTIImpl->isLegalMaskedExpandLoad(DataType);
416 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
417 return TTIImpl->hasDivRemOp(DataType, IsSigned);
420 bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
421 unsigned AddrSpace) const {
422 return TTIImpl->hasVolatileVariant(I, AddrSpace);
425 bool TargetTransformInfo::prefersVectorizedAddressing() const {
426 return TTIImpl->prefersVectorizedAddressing();
429 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
431 bool HasBaseReg, int64_t Scale,
432 unsigned AddrSpace) const {
433 int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
435 assert(Cost >= 0 && "TTI should not produce negative costs!");
439 bool TargetTransformInfo::LSRWithInstrQueries() const {
440 return TTIImpl->LSRWithInstrQueries();
443 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
444 return TTIImpl->isTruncateFree(Ty1, Ty2);
447 bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
448 return TTIImpl->isProfitableToHoist(I);
451 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
453 bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
454 return TTIImpl->isTypeLegal(Ty);
457 bool TargetTransformInfo::shouldBuildLookupTables() const {
458 return TTIImpl->shouldBuildLookupTables();
460 bool TargetTransformInfo::shouldBuildLookupTablesForConstant(
462 return TTIImpl->shouldBuildLookupTablesForConstant(C);
465 bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
466 return TTIImpl->useColdCCForColdCall(F);
470 TargetTransformInfo::getScalarizationOverhead(VectorType *Ty,
471 const APInt &DemandedElts,
472 bool Insert, bool Extract) const {
473 return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
476 unsigned TargetTransformInfo::getOperandsScalarizationOverhead(
477 ArrayRef<const Value *> Args, unsigned VF) const {
478 return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
481 bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
482 return TTIImpl->supportsEfficientVectorElementLoadStore();
485 bool TargetTransformInfo::enableAggressiveInterleaving(
486 bool LoopHasReductions) const {
487 return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
490 TargetTransformInfo::MemCmpExpansionOptions
491 TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
492 return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
495 bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
496 return TTIImpl->enableInterleavedAccessVectorization();
499 bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
500 return TTIImpl->enableMaskedInterleavedAccessVectorization();
503 bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
504 return TTIImpl->isFPVectorizationPotentiallyUnsafe();
507 bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
509 unsigned AddressSpace,
512 return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
513 AddressSpace, Alignment, Fast);
516 TargetTransformInfo::PopcntSupportKind
517 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
518 return TTIImpl->getPopcntSupport(IntTyWidthInBit);
521 bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
522 return TTIImpl->haveFastSqrt(Ty);
525 bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
526 return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
529 int TargetTransformInfo::getFPOpCost(Type *Ty) const {
530 int Cost = TTIImpl->getFPOpCost(Ty);
531 assert(Cost >= 0 && "TTI should not produce negative costs!");
535 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
538 int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
539 assert(Cost >= 0 && "TTI should not produce negative costs!");
543 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty,
544 TTI::TargetCostKind CostKind) const {
545 int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind);
546 assert(Cost >= 0 && "TTI should not produce negative costs!");
551 TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx,
552 const APInt &Imm, Type *Ty,
553 TTI::TargetCostKind CostKind) const {
554 int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind);
555 assert(Cost >= 0 && "TTI should not produce negative costs!");
560 TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
561 const APInt &Imm, Type *Ty,
562 TTI::TargetCostKind CostKind) const {
563 int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
564 assert(Cost >= 0 && "TTI should not produce negative costs!");
568 unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
569 return TTIImpl->getNumberOfRegisters(ClassID);
572 unsigned TargetTransformInfo::getRegisterClassForType(bool Vector,
574 return TTIImpl->getRegisterClassForType(Vector, Ty);
577 const char *TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
578 return TTIImpl->getRegisterClassName(ClassID);
581 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
582 return TTIImpl->getRegisterBitWidth(Vector);
585 unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
586 return TTIImpl->getMinVectorRegisterBitWidth();
589 bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
590 return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
593 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
594 return TTIImpl->getMinimumVF(ElemWidth);
597 bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
598 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
599 return TTIImpl->shouldConsiderAddressTypePromotion(
600 I, AllowPromotionWithoutCommonHeader);
603 unsigned TargetTransformInfo::getCacheLineSize() const {
604 return TTIImpl->getCacheLineSize();
607 llvm::Optional<unsigned>
608 TargetTransformInfo::getCacheSize(CacheLevel Level) const {
609 return TTIImpl->getCacheSize(Level);
612 llvm::Optional<unsigned>
613 TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const {
614 return TTIImpl->getCacheAssociativity(Level);
617 unsigned TargetTransformInfo::getPrefetchDistance() const {
618 return TTIImpl->getPrefetchDistance();
621 unsigned TargetTransformInfo::getMinPrefetchStride(
622 unsigned NumMemAccesses, unsigned NumStridedMemAccesses,
623 unsigned NumPrefetches, bool HasCall) const {
624 return TTIImpl->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
625 NumPrefetches, HasCall);
628 unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
629 return TTIImpl->getMaxPrefetchIterationsAhead();
632 bool TargetTransformInfo::enableWritePrefetching() const {
633 return TTIImpl->enableWritePrefetching();
636 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
637 return TTIImpl->getMaxInterleaveFactor(VF);
640 TargetTransformInfo::OperandValueKind
641 TargetTransformInfo::getOperandInfo(const Value *V,
642 OperandValueProperties &OpProps) {
643 OperandValueKind OpInfo = OK_AnyValue;
646 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
647 if (CI->getValue().isPowerOf2())
648 OpProps = OP_PowerOf2;
649 return OK_UniformConstantValue;
652 // A broadcast shuffle creates a uniform value.
653 // TODO: Add support for non-zero index broadcasts.
654 // TODO: Add support for different source vector width.
655 if (const auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
656 if (ShuffleInst->isZeroEltSplat())
657 OpInfo = OK_UniformValue;
659 const Value *Splat = getSplatValue(V);
661 // Check for a splat of a constant or for a non uniform vector of constants
662 // and check if the constant(s) are all powers of two.
663 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
664 OpInfo = OK_NonUniformConstantValue;
666 OpInfo = OK_UniformConstantValue;
667 if (auto *CI = dyn_cast<ConstantInt>(Splat))
668 if (CI->getValue().isPowerOf2())
669 OpProps = OP_PowerOf2;
670 } else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
671 OpProps = OP_PowerOf2;
672 for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
673 if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
674 if (CI->getValue().isPowerOf2())
682 // Check for a splat of a uniform value. This is not loop aware, so return
683 // true only for the obviously uniform cases (argument, globalvalue)
684 if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
685 OpInfo = OK_UniformValue;
690 int TargetTransformInfo::getArithmeticInstrCost(
691 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
692 OperandValueKind Opd1Info,
693 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
694 OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
695 const Instruction *CxtI) const {
696 int Cost = TTIImpl->getArithmeticInstrCost(
697 Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo,
699 assert(Cost >= 0 && "TTI should not produce negative costs!");
703 int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty,
704 int Index, VectorType *SubTp) const {
705 int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
706 assert(Cost >= 0 && "TTI should not produce negative costs!");
710 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
711 TTI::TargetCostKind CostKind,
712 const Instruction *I) const {
713 assert((I == nullptr || I->getOpcode() == Opcode) &&
714 "Opcode should reflect passed instruction.");
715 int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I);
716 assert(Cost >= 0 && "TTI should not produce negative costs!");
720 int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
722 unsigned Index) const {
723 int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
724 assert(Cost >= 0 && "TTI should not produce negative costs!");
728 int TargetTransformInfo::getCFInstrCost(unsigned Opcode,
729 TTI::TargetCostKind CostKind) const {
730 int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind);
731 assert(Cost >= 0 && "TTI should not produce negative costs!");
735 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
737 TTI::TargetCostKind CostKind,
738 const Instruction *I) const {
739 assert((I == nullptr || I->getOpcode() == Opcode) &&
740 "Opcode should reflect passed instruction.");
741 int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
742 assert(Cost >= 0 && "TTI should not produce negative costs!");
746 int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
747 unsigned Index) const {
748 int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
749 assert(Cost >= 0 && "TTI should not produce negative costs!");
753 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
754 Align Alignment, unsigned AddressSpace,
755 TTI::TargetCostKind CostKind,
756 const Instruction *I) const {
757 assert((I == nullptr || I->getOpcode() == Opcode) &&
758 "Opcode should reflect passed instruction.");
759 int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
761 assert(Cost >= 0 && "TTI should not produce negative costs!");
765 int TargetTransformInfo::getMaskedMemoryOpCost(
766 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
767 TTI::TargetCostKind CostKind) const {
769 TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
771 assert(Cost >= 0 && "TTI should not produce negative costs!");
775 int TargetTransformInfo::getGatherScatterOpCost(
776 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
777 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
778 int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
779 Alignment, CostKind, I);
780 assert(Cost >= 0 && "TTI should not produce negative costs!");
784 int TargetTransformInfo::getInterleavedMemoryOpCost(
785 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
786 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
787 bool UseMaskForCond, bool UseMaskForGaps) const {
788 int Cost = TTIImpl->getInterleavedMemoryOpCost(
789 Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind,
790 UseMaskForCond, UseMaskForGaps);
791 assert(Cost >= 0 && "TTI should not produce negative costs!");
796 TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
797 TTI::TargetCostKind CostKind) const {
798 int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind);
799 assert(Cost >= 0 && "TTI should not produce negative costs!");
803 int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
804 ArrayRef<Type *> Tys,
805 TTI::TargetCostKind CostKind) const {
806 int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind);
807 assert(Cost >= 0 && "TTI should not produce negative costs!");
811 unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
812 return TTIImpl->getNumberOfParts(Tp);
815 int TargetTransformInfo::getAddressComputationCost(Type *Tp,
817 const SCEV *Ptr) const {
818 int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
819 assert(Cost >= 0 && "TTI should not produce negative costs!");
823 int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
824 int Cost = TTIImpl->getMemcpyCost(I);
825 assert(Cost >= 0 && "TTI should not produce negative costs!");
829 int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode,
832 TTI::TargetCostKind CostKind) const {
833 int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
835 assert(Cost >= 0 && "TTI should not produce negative costs!");
839 int TargetTransformInfo::getMinMaxReductionCost(
840 VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
841 TTI::TargetCostKind CostKind) const {
843 TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
845 assert(Cost >= 0 && "TTI should not produce negative costs!");
850 TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
851 return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
854 bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
855 MemIntrinsicInfo &Info) const {
856 return TTIImpl->getTgtMemIntrinsic(Inst, Info);
859 unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
860 return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
863 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
864 IntrinsicInst *Inst, Type *ExpectedType) const {
865 return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
868 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
869 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
870 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign) const {
871 return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
872 DestAddrSpace, SrcAlign, DestAlign);
875 void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
876 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
877 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
878 unsigned SrcAlign, unsigned DestAlign) const {
879 TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
880 SrcAddrSpace, DestAddrSpace,
881 SrcAlign, DestAlign);
884 bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
885 const Function *Callee) const {
886 return TTIImpl->areInlineCompatible(Caller, Callee);
889 bool TargetTransformInfo::areFunctionArgsABICompatible(
890 const Function *Caller, const Function *Callee,
891 SmallPtrSetImpl<Argument *> &Args) const {
892 return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
895 bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
897 return TTIImpl->isIndexedLoadLegal(Mode, Ty);
900 bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
902 return TTIImpl->isIndexedStoreLegal(Mode, Ty);
905 unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
906 return TTIImpl->getLoadStoreVecRegBitWidth(AS);
909 bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
910 return TTIImpl->isLegalToVectorizeLoad(LI);
913 bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
914 return TTIImpl->isLegalToVectorizeStore(SI);
917 bool TargetTransformInfo::isLegalToVectorizeLoadChain(
918 unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
919 return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
923 bool TargetTransformInfo::isLegalToVectorizeStoreChain(
924 unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const {
925 return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
929 unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
931 unsigned ChainSizeInBytes,
932 VectorType *VecTy) const {
933 return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
936 unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
938 unsigned ChainSizeInBytes,
939 VectorType *VecTy) const {
940 return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
943 bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, Type *Ty,
944 ReductionFlags Flags) const {
945 return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
948 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
949 return TTIImpl->shouldExpandReduction(II);
952 unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
953 return TTIImpl->getGISelRematGlobalCost();
956 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
957 return TTIImpl->getInstructionLatency(I);
960 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
962 // We don't need a shuffle if we just want to have element 0 in position 0 of
964 if (!SI && Level == 0 && IsLeft)
969 SmallVector<int, 32> Mask(SI->getType()->getNumElements(), -1);
971 // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
972 // we look at the left or right side.
973 for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
976 ArrayRef<int> ActualMask = SI->getShuffleMask();
977 return Mask == ActualMask;
980 static Optional<TTI::ReductionData> getReductionData(Instruction *I) {
982 if (m_BinOp(m_Value(L), m_Value(R)).match(I))
983 return TTI::ReductionData(TTI::RK_Arithmetic, I->getOpcode(), L, R);
984 if (auto *SI = dyn_cast<SelectInst>(I)) {
985 if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
986 m_SMax(m_Value(L), m_Value(R)).match(SI) ||
987 m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
988 m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
989 m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
990 m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
991 auto *CI = cast<CmpInst>(SI->getCondition());
992 return TTI::ReductionData(TTI::RK_MinMax, CI->getOpcode(), L, R);
994 if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
995 m_UMax(m_Value(L), m_Value(R)).match(SI)) {
996 auto *CI = cast<CmpInst>(SI->getCondition());
997 return TTI::ReductionData(TTI::RK_UnsignedMinMax, CI->getOpcode(), L, R);
1003 static TTI::ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
1005 unsigned NumLevels) {
1006 // Match one level of pairwise operations.
1007 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1008 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1009 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1010 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1011 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1013 return TTI::RK_None;
1015 assert(I->getType()->isVectorTy() && "Expecting a vector type");
1017 Optional<TTI::ReductionData> RD = getReductionData(I);
1019 return TTI::RK_None;
1021 ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
1023 return TTI::RK_None;
1024 ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
1026 return TTI::RK_None;
1028 // On level 0 we can omit one shufflevector instruction.
1029 if (!Level && !RS && !LS)
1030 return TTI::RK_None;
1032 // Shuffle inputs must match.
1033 Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
1034 Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
1035 Value *NextLevelOp = nullptr;
1036 if (NextLevelOpR && NextLevelOpL) {
1037 // If we have two shuffles their operands must match.
1038 if (NextLevelOpL != NextLevelOpR)
1039 return TTI::RK_None;
1041 NextLevelOp = NextLevelOpL;
1042 } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
1043 // On the first level we can omit the shufflevector <0, undef,...>. So the
1044 // input to the other shufflevector <1, undef> must match with one of the
1045 // inputs to the current binary operation.
1047 // %NextLevelOpL = shufflevector %R, <1, undef ...>
1048 // %BinOp = fadd %NextLevelOpL, %R
1049 if (NextLevelOpL && NextLevelOpL != RD->RHS)
1050 return TTI::RK_None;
1051 else if (NextLevelOpR && NextLevelOpR != RD->LHS)
1052 return TTI::RK_None;
1054 NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
1056 return TTI::RK_None;
1058 // Check that the next levels binary operation exists and matches with the
1060 if (Level + 1 != NumLevels) {
1061 if (!isa<Instruction>(NextLevelOp))
1062 return TTI::RK_None;
1063 Optional<TTI::ReductionData> NextLevelRD =
1064 getReductionData(cast<Instruction>(NextLevelOp));
1065 if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
1066 return TTI::RK_None;
1069 // Shuffle mask for pairwise operation must match.
1070 if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
1071 if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
1072 return TTI::RK_None;
1073 } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
1074 if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
1075 return TTI::RK_None;
1077 return TTI::RK_None;
1080 if (++Level == NumLevels)
1083 // Match next level.
1084 return matchPairwiseReductionAtLevel(dyn_cast<Instruction>(NextLevelOp), Level,
1088 TTI::ReductionKind TTI::matchPairwiseReduction(
1089 const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) {
1090 if (!EnableReduxCost)
1091 return TTI::RK_None;
1093 // Need to extract the first element.
1094 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1097 Idx = CI->getZExtValue();
1099 return TTI::RK_None;
1101 auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1103 return TTI::RK_None;
1104 Optional<TTI::ReductionData> RD = getReductionData(RdxStart);
1106 return TTI::RK_None;
1108 auto *VecTy = cast<VectorType>(RdxStart->getType());
1109 unsigned NumVecElems = VecTy->getNumElements();
1110 if (!isPowerOf2_32(NumVecElems))
1111 return TTI::RK_None;
1113 // We look for a sequence of shuffle,shuffle,add triples like the following
1114 // that builds a pairwise reduction tree.
1117 // (X0 + X1, X2 + X3, undef, undef)
1118 // ((X0 + X1) + (X2 + X3), undef, undef, undef)
1120 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1121 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1122 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1123 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1124 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1125 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1126 // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1127 // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1128 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1129 // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
1130 // %r = extractelement <4 x float> %bin.rdx8, i32 0
1131 if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
1133 return TTI::RK_None;
1135 Opcode = RD->Opcode;
1141 static std::pair<Value *, ShuffleVectorInst *>
1142 getShuffleAndOtherOprd(Value *L, Value *R) {
1143 ShuffleVectorInst *S = nullptr;
1145 if ((S = dyn_cast<ShuffleVectorInst>(L)))
1146 return std::make_pair(R, S);
1148 S = dyn_cast<ShuffleVectorInst>(R);
1149 return std::make_pair(L, S);
1152 TTI::ReductionKind TTI::matchVectorSplittingReduction(
1153 const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) {
1155 if (!EnableReduxCost)
1156 return TTI::RK_None;
1158 // Need to extract the first element.
1159 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1162 Idx = CI->getZExtValue();
1164 return TTI::RK_None;
1166 auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1168 return TTI::RK_None;
1169 Optional<TTI::ReductionData> RD = getReductionData(RdxStart);
1171 return TTI::RK_None;
1173 auto *VecTy = cast<VectorType>(ReduxRoot->getOperand(0)->getType());
1174 unsigned NumVecElems = VecTy->getNumElements();
1175 if (!isPowerOf2_32(NumVecElems))
1176 return TTI::RK_None;
1178 // We look for a sequence of shuffles and adds like the following matching one
1179 // fadd, shuffle vector pair at a time.
1181 // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
1182 // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1183 // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
1184 // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
1185 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1186 // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
1187 // %r = extractelement <4 x float> %bin.rdx8, i32 0
1189 unsigned MaskStart = 1;
1190 Instruction *RdxOp = RdxStart;
1191 SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
1192 unsigned NumVecElemsRemain = NumVecElems;
1193 while (NumVecElemsRemain - 1) {
1194 // Check for the right reduction operation.
1196 return TTI::RK_None;
1197 Optional<TTI::ReductionData> RDLevel = getReductionData(RdxOp);
1198 if (!RDLevel || !RDLevel->hasSameData(*RD))
1199 return TTI::RK_None;
1202 ShuffleVectorInst *Shuffle;
1203 std::tie(NextRdxOp, Shuffle) =
1204 getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
1206 // Check the current reduction operation and the shuffle use the same value.
1207 if (Shuffle == nullptr)
1208 return TTI::RK_None;
1209 if (Shuffle->getOperand(0) != NextRdxOp)
1210 return TTI::RK_None;
1212 // Check that shuffle masks matches.
1213 for (unsigned j = 0; j != MaskStart; ++j)
1214 ShuffleMask[j] = MaskStart + j;
1215 // Fill the rest of the mask with -1 for undef.
1216 std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
1218 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1219 if (ShuffleMask != Mask)
1220 return TTI::RK_None;
1222 RdxOp = dyn_cast<Instruction>(NextRdxOp);
1223 NumVecElemsRemain /= 2;
1227 Opcode = RD->Opcode;
1232 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1233 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1235 switch (I->getOpcode()) {
1236 case Instruction::GetElementPtr:
1237 case Instruction::Ret:
1238 case Instruction::PHI:
1239 case Instruction::Br:
1240 case Instruction::Add:
1241 case Instruction::FAdd:
1242 case Instruction::Sub:
1243 case Instruction::FSub:
1244 case Instruction::Mul:
1245 case Instruction::FMul:
1246 case Instruction::UDiv:
1247 case Instruction::SDiv:
1248 case Instruction::FDiv:
1249 case Instruction::URem:
1250 case Instruction::SRem:
1251 case Instruction::FRem:
1252 case Instruction::Shl:
1253 case Instruction::LShr:
1254 case Instruction::AShr:
1255 case Instruction::And:
1256 case Instruction::Or:
1257 case Instruction::Xor:
1258 case Instruction::FNeg:
1259 case Instruction::Select:
1260 case Instruction::ICmp:
1261 case Instruction::FCmp:
1262 case Instruction::Store:
1263 case Instruction::Load:
1264 case Instruction::ZExt:
1265 case Instruction::SExt:
1266 case Instruction::FPToUI:
1267 case Instruction::FPToSI:
1268 case Instruction::FPExt:
1269 case Instruction::PtrToInt:
1270 case Instruction::IntToPtr:
1271 case Instruction::SIToFP:
1272 case Instruction::UIToFP:
1273 case Instruction::Trunc:
1274 case Instruction::FPTrunc:
1275 case Instruction::BitCast:
1276 case Instruction::AddrSpaceCast:
1277 case Instruction::ExtractElement:
1278 case Instruction::InsertElement:
1279 case Instruction::ExtractValue:
1280 case Instruction::ShuffleVector:
1281 case Instruction::Call:
1282 return getUserCost(I, CostKind);
1284 // We don't have any information on this instruction.
1289 TargetTransformInfo::Concept::~Concept() {}
1291 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1293 TargetIRAnalysis::TargetIRAnalysis(
1294 std::function<Result(const Function &)> TTICallback)
1295 : TTICallback(std::move(TTICallback)) {}
1297 TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
1298 FunctionAnalysisManager &) {
1299 return TTICallback(F);
1302 AnalysisKey TargetIRAnalysis::Key;
1304 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1305 return Result(F.getParent()->getDataLayout());
1308 // Register the basic pass.
1309 INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
1310 "Target Transform Information", false, true)
1311 char TargetTransformInfoWrapperPass::ID = 0;
1313 void TargetTransformInfoWrapperPass::anchor() {}
1315 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
1316 : ImmutablePass(ID) {
1317 initializeTargetTransformInfoWrapperPassPass(
1318 *PassRegistry::getPassRegistry());
1321 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
1322 TargetIRAnalysis TIRA)
1323 : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1324 initializeTargetTransformInfoWrapperPassPass(
1325 *PassRegistry::getPassRegistry());
1328 TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
1329 FunctionAnalysisManager DummyFAM;
1330 TTI = TIRA.run(F, DummyFAM);
1335 llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
1336 return new TargetTransformInfoWrapperPass(std::move(TIRA));