contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfo.h

   1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 /// \file
   9 /// This pass exposes codegen information to IR-level passes. Every
  10 /// transformation that uses codegen information is broken into three parts:
  11 /// 1. The IR-level analysis pass.
  12 /// 2. The IR-level transformation interface which provides the needed
  13 ///    information.
  14 /// 3. Codegen-level implementation which uses target-specific hooks.
  15 ///
  16 /// This file defines #2, which is the interface that IR-level transformations
  17 /// use for querying the codegen.
  18 ///
  19 //===----------------------------------------------------------------------===//
  20
  21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  23
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/IR/Operator.h"
  26 #include "llvm/IR/PassManager.h"
  27 #include "llvm/Pass.h"
  28 #include "llvm/Support/AtomicOrdering.h"
  29 #include "llvm/Support/DataTypes.h"
  30 #include "llvm/Analysis/LoopInfo.h"
  31 #include "llvm/Analysis/ScalarEvolution.h"
  32 #include "llvm/IR/Dominators.h"
  33 #include "llvm/Analysis/AssumptionCache.h"
  34 #include <functional>
  35
  36 namespace llvm {
  37
  38 namespace Intrinsic {
  39 typedef unsigned ID;
  40 }
  41
  42 class AssumptionCache;
  43 class BlockFrequencyInfo;
  44 class BranchInst;
  45 class Function;
  46 class GlobalValue;
  47 class IntrinsicInst;
  48 class LoadInst;
  49 class LoopAccessInfo;
  50 class Loop;
  51 class ProfileSummaryInfo;
  52 class SCEV;
  53 class ScalarEvolution;
  54 class StoreInst;
  55 class SwitchInst;
  56 class TargetLibraryInfo;
  57 class Type;
  58 class User;
  59 class Value;
  60
  61 /// Information about a load/store intrinsic defined by the target.
  62 struct MemIntrinsicInfo {
  63   /// This is the pointer that the intrinsic is loading from or storing to.
  64   /// If this is non-null, then analysis/optimization passes can assume that
  65   /// this intrinsic is functionally equivalent to a load/store from this
  66   /// pointer.
  67   Value *PtrVal = nullptr;
  68
  69   // Ordering for atomic operations.
  70   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
  71
  72   // Same Id is set by the target for corresponding load/store intrinsics.
  73   unsigned short MatchingId = 0;
  74
  75   bool ReadMem = false;
  76   bool WriteMem = false;
  77   bool IsVolatile = false;
  78
  79   bool isUnordered() const {
  80     return (Ordering == AtomicOrdering::NotAtomic ||
  81             Ordering == AtomicOrdering::Unordered) && !IsVolatile;
  82   }
  83 };
  84
  85 /// Attributes of a target dependent hardware loop.
  86 struct HardwareLoopInfo {
  87   HardwareLoopInfo() = delete;
  88   HardwareLoopInfo(Loop *L) : L(L) {}
  89   Loop *L = nullptr;
  90   BasicBlock *ExitBlock = nullptr;
  91   BranchInst *ExitBranch = nullptr;
  92   const SCEV *ExitCount = nullptr;
  93   IntegerType *CountType = nullptr;
  94   Value *LoopDecrement = nullptr; // Decrement the loop counter by this
  95                                   // value in every iteration.
  96   bool IsNestingLegal = false;    // Can a hardware loop be a parent to
  97                                   // another hardware loop?
  98   bool CounterInReg = false;      // Should loop counter be updated in
  99                                   // the loop via a phi?
 100   bool PerformEntryTest = false;  // Generate the intrinsic which also performs
 101                                   // icmp ne zero on the loop counter value and
 102                                   // produces an i1 to guard the loop entry.
 103   bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
 104                                DominatorTree &DT, bool ForceNestedLoop = false,
 105                                bool ForceHardwareLoopPHI = false);
 106   bool canAnalyze(LoopInfo &LI);
 107 };
 108
 109 /// This pass provides access to the codegen interfaces that are needed
 110 /// for IR-level transformations.
 111 class TargetTransformInfo {
 112 public:
 113   /// Construct a TTI object using a type implementing the \c Concept
 114   /// API below.
 115   ///
 116   /// This is used by targets to construct a TTI wrapping their target-specific
 117   /// implementation that encodes appropriate costs for their target.
 118   template <typename T> TargetTransformInfo(T Impl);
 119
 120   /// Construct a baseline TTI object using a minimal implementation of
 121   /// the \c Concept API below.
 122   ///
 123   /// The TTI implementation will reflect the information in the DataLayout
 124   /// provided if non-null.
 125   explicit TargetTransformInfo(const DataLayout &DL);
 126
 127   // Provide move semantics.
 128   TargetTransformInfo(TargetTransformInfo &&Arg);
 129   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
 130
 131   // We need to define the destructor out-of-line to define our sub-classes
 132   // out-of-line.
 133   ~TargetTransformInfo();
 134
 135   /// Handle the invalidation of this information.
 136   ///
 137   /// When used as a result of \c TargetIRAnalysis this method will be called
 138   /// when the function this was computed for changes. When it returns false,
 139   /// the information is preserved across those changes.
 140   bool invalidate(Function &, const PreservedAnalyses &,
 141                   FunctionAnalysisManager::Invalidator &) {
 142     // FIXME: We should probably in some way ensure that the subtarget
 143     // information for a function hasn't changed.
 144     return false;
 145   }
 146
 147   /// \name Generic Target Information
 148   /// @{
 149
 150   /// The kind of cost model.
 151   ///
 152   /// There are several different cost models that can be customized by the
 153   /// target. The normalization of each cost model may be target specific.
 154   enum TargetCostKind {
 155     TCK_RecipThroughput, ///< Reciprocal throughput.
 156     TCK_Latency,         ///< The latency of instruction.
 157     TCK_CodeSize         ///< Instruction code size.
 158   };
 159
 160   /// Query the cost of a specified instruction.
 161   ///
 162   /// Clients should use this interface to query the cost of an existing
 163   /// instruction. The instruction must have a valid parent (basic block).
 164   ///
 165   /// Note, this method does not cache the cost calculation and it
 166   /// can be expensive in some cases.
 167   int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
 168     switch (kind){
 169     case TCK_RecipThroughput:
 170       return getInstructionThroughput(I);
 171
 172     case TCK_Latency:
 173       return getInstructionLatency(I);
 174
 175     case TCK_CodeSize:
 176       return getUserCost(I);
 177     }
 178     llvm_unreachable("Unknown instruction cost kind");
 179   }
 180
 181   /// Underlying constants for 'cost' values in this interface.
 182   ///
 183   /// Many APIs in this interface return a cost. This enum defines the
 184   /// fundamental values that should be used to interpret (and produce) those
 185   /// costs. The costs are returned as an int rather than a member of this
 186   /// enumeration because it is expected that the cost of one IR instruction
 187   /// may have a multiplicative factor to it or otherwise won't fit directly
 188   /// into the enum. Moreover, it is common to sum or average costs which works
 189   /// better as simple integral values. Thus this enum only provides constants.
 190   /// Also note that the returned costs are signed integers to make it natural
 191   /// to add, subtract, and test with zero (a common boundary condition). It is
 192   /// not expected that 2^32 is a realistic cost to be modeling at any point.
 193   ///
 194   /// Note that these costs should usually reflect the intersection of code-size
 195   /// cost and execution cost. A free instruction is typically one that folds
 196   /// into another instruction. For example, reg-to-reg moves can often be
 197   /// skipped by renaming the registers in the CPU, but they still are encoded
 198   /// and thus wouldn't be considered 'free' here.
 199   enum TargetCostConstants {
 200     TCC_Free = 0,     ///< Expected to fold away in lowering.
 201     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
 202     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
 203   };
 204
 205   /// Estimate the cost of a specific operation when lowered.
 206   ///
 207   /// Note that this is designed to work on an arbitrary synthetic opcode, and
 208   /// thus work for hypothetical queries before an instruction has even been
 209   /// formed. However, this does *not* work for GEPs, and must not be called
 210   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
 211   /// analyzing a GEP's cost required more information.
 212   ///
 213   /// Typically only the result type is required, and the operand type can be
 214   /// omitted. However, if the opcode is one of the cast instructions, the
 215   /// operand type is required.
 216   ///
 217   /// The returned cost is defined in terms of \c TargetCostConstants, see its
 218   /// comments for a detailed explanation of the cost values.
 219   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
 220
 221   /// Estimate the cost of a GEP operation when lowered.
 222   ///
 223   /// The contract for this function is the same as \c getOperationCost except
 224   /// that it supports an interface that provides extra information specific to
 225   /// the GEP operation.
 226   int getGEPCost(Type *PointeeType, const Value *Ptr,
 227                  ArrayRef<const Value *> Operands) const;
 228
 229   /// Estimate the cost of a EXT operation when lowered.
 230   ///
 231   /// The contract for this function is the same as \c getOperationCost except
 232   /// that it supports an interface that provides extra information specific to
 233   /// the EXT operation.
 234   int getExtCost(const Instruction *I, const Value *Src) const;
 235
 236   /// Estimate the cost of a function call when lowered.
 237   ///
 238   /// The contract for this is the same as \c getOperationCost except that it
 239   /// supports an interface that provides extra information specific to call
 240   /// instructions.
 241   ///
 242   /// This is the most basic query for estimating call cost: it only knows the
 243   /// function type and (potentially) the number of arguments at the call site.
 244   /// The latter is only interesting for varargs function types.
 245   int getCallCost(FunctionType *FTy, int NumArgs = -1,
 246                   const User *U = nullptr) const;
 247
 248   /// Estimate the cost of calling a specific function when lowered.
 249   ///
 250   /// This overload adds the ability to reason about the particular function
 251   /// being called in the event it is a library call with special lowering.
 252   int getCallCost(const Function *F, int NumArgs = -1,
 253                   const User *U = nullptr) const;
 254
 255   /// Estimate the cost of calling a specific function when lowered.
 256   ///
 257   /// This overload allows specifying a set of candidate argument values.
 258   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
 259                   const User *U = nullptr) const;
 260
 261   /// \returns A value by which our inlining threshold should be multiplied.
 262   /// This is primarily used to bump up the inlining threshold wholesale on
 263   /// targets where calls are unusually expensive.
 264   ///
 265   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
 266   /// individual classes of instructions would be better.
 267   unsigned getInliningThresholdMultiplier() const;
 268
 269   /// \returns Vector bonus in percent.
 270   ///
 271   /// Vector bonuses: We want to more aggressively inline vector-dense kernels
 272   /// and apply this bonus based on the percentage of vector instructions. A
 273   /// bonus is applied if the vector instructions exceed 50% and half that amount
 274   /// is applied if it exceeds 10%. Note that these bonuses are some what
 275   /// arbitrary and evolved over time by accident as much as because they are
 276   /// principled bonuses.
 277   /// FIXME: It would be nice to base the bonus values on something more
 278   /// scientific. A target may has no bonus on vector instructions.
 279   int getInlinerVectorBonusPercent() const;
 280
 281   /// Estimate the cost of an intrinsic when lowered.
 282   ///
 283   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
 284   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 285                        ArrayRef<Type *> ParamTys,
 286                        const User *U = nullptr) const;
 287
 288   /// Estimate the cost of an intrinsic when lowered.
 289   ///
 290   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
 291   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 292                        ArrayRef<const Value *> Arguments,
 293                        const User *U = nullptr) const;
 294
 295   /// \return the expected cost of a memcpy, which could e.g. depend on the
 296   /// source/destination type and alignment and the number of bytes copied.
 297   int getMemcpyCost(const Instruction *I) const;
 298
 299   /// \return The estimated number of case clusters when lowering \p 'SI'.
 300   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
 301   /// table.
 302   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
 303                                             unsigned &JTSize,
 304                                             ProfileSummaryInfo *PSI,
 305                                             BlockFrequencyInfo *BFI) const;
 306
 307   /// Estimate the cost of a given IR user when lowered.
 308   ///
 309   /// This can estimate the cost of either a ConstantExpr or Instruction when
 310   /// lowered. It has two primary advantages over the \c getOperationCost and
 311   /// \c getGEPCost above, and one significant disadvantage: it can only be
 312   /// used when the IR construct has already been formed.
 313   ///
 314   /// The advantages are that it can inspect the SSA use graph to reason more
 315   /// accurately about the cost. For example, all-constant-GEPs can often be
 316   /// folded into a load or other instruction, but if they are used in some
 317   /// other context they may not be folded. This routine can distinguish such
 318   /// cases.
 319   ///
 320   /// \p Operands is a list of operands which can be a result of transformations
 321   /// of the current operands. The number of the operands on the list must equal
 322   /// to the number of the current operands the IR user has. Their order on the
 323   /// list must be the same as the order of the current operands the IR user
 324   /// has.
 325   ///
 326   /// The returned cost is defined in terms of \c TargetCostConstants, see its
 327   /// comments for a detailed explanation of the cost values.
 328   int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
 329
 330   /// This is a helper function which calls the two-argument getUserCost
 331   /// with \p Operands which are the current operands U has.
 332   int getUserCost(const User *U) const {
 333     SmallVector<const Value *, 4> Operands(U->value_op_begin(),
 334                                            U->value_op_end());
 335     return getUserCost(U, Operands);
 336   }
 337
 338   /// Return true if branch divergence exists.
 339   ///
 340   /// Branch divergence has a significantly negative impact on GPU performance
 341   /// when threads in the same wavefront take different paths due to conditional
 342   /// branches.
 343   bool hasBranchDivergence() const;
 344
 345   /// Returns whether V is a source of divergence.
 346   ///
 347   /// This function provides the target-dependent information for
 348   /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
 349   /// builds the dependency graph, and then runs the reachability algorithm
 350   /// starting with the sources of divergence.
 351   bool isSourceOfDivergence(const Value *V) const;
 352
 353   // Returns true for the target specific
 354   // set of operations which produce uniform result
 355   // even taking non-uniform arguments
 356   bool isAlwaysUniform(const Value *V) const;
 357
 358   /// Returns the address space ID for a target's 'flat' address space. Note
 359   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
 360   /// refers to as the generic address space. The flat address space is a
 361   /// generic address space that can be used access multiple segments of memory
 362   /// with different address spaces. Access of a memory location through a
 363   /// pointer with this address space is expected to be legal but slower
 364   /// compared to the same memory location accessed through a pointer with a
 365   /// different address space.
 366   //
 367   /// This is for targets with different pointer representations which can
 368   /// be converted with the addrspacecast instruction. If a pointer is converted
 369   /// to this address space, optimizations should attempt to replace the access
 370   /// with the source address space.
 371   ///
 372   /// \returns ~0u if the target does not have such a flat address space to
 373   /// optimize away.
 374   unsigned getFlatAddressSpace() const;
 375
 376   /// Return any intrinsic address operand indexes which may be rewritten if
 377   /// they use a flat address space pointer.
 378   ///
 379   /// \returns true if the intrinsic was handled.
 380   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
 381                                   Intrinsic::ID IID) const;
 382
 383   /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
 384   /// NewV, which has a different address space. This should happen for every
 385   /// operand index that collectFlatAddressOperands returned for the intrinsic.
 386   /// \returns true if the intrinsic /// was handled.
 387   bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
 388                                         Value *OldV, Value *NewV) const;
 389
 390   /// Test whether calls to a function lower to actual program function
 391   /// calls.
 392   ///
 393   /// The idea is to test whether the program is likely to require a 'call'
 394   /// instruction or equivalent in order to call the given function.
 395   ///
 396   /// FIXME: It's not clear that this is a good or useful query API. Client's
 397   /// should probably move to simpler cost metrics using the above.
 398   /// Alternatively, we could split the cost interface into distinct code-size
 399   /// and execution-speed costs. This would allow modelling the core of this
 400   /// query more accurately as a call is a single small instruction, but
 401   /// incurs significant execution cost.
 402   bool isLoweredToCall(const Function *F) const;
 403
 404   struct LSRCost {
 405     /// TODO: Some of these could be merged. Also, a lexical ordering
 406     /// isn't always optimal.
 407     unsigned Insns;
 408     unsigned NumRegs;
 409     unsigned AddRecCost;
 410     unsigned NumIVMuls;
 411     unsigned NumBaseAdds;
 412     unsigned ImmCost;
 413     unsigned SetupCost;
 414     unsigned ScaleCost;
 415   };
 416
 417   /// Parameters that control the generic loop unrolling transformation.
 418   struct UnrollingPreferences {
 419     /// The cost threshold for the unrolled loop. Should be relative to the
 420     /// getUserCost values returned by this API, and the expectation is that
 421     /// the unrolled loop's instructions when run through that interface should
 422     /// not exceed this cost. However, this is only an estimate. Also, specific
 423     /// loops may be unrolled even with a cost above this threshold if deemed
 424     /// profitable. Set this to UINT_MAX to disable the loop body cost
 425     /// restriction.
 426     unsigned Threshold;
 427     /// If complete unrolling will reduce the cost of the loop, we will boost
 428     /// the Threshold by a certain percent to allow more aggressive complete
 429     /// unrolling. This value provides the maximum boost percentage that we
 430     /// can apply to Threshold (The value should be no less than 100).
 431     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
 432     ///                                    MaxPercentThresholdBoost / 100)
 433     /// E.g. if complete unrolling reduces the loop execution time by 50%
 434     /// then we boost the threshold by the factor of 2x. If unrolling is not
 435     /// expected to reduce the running time, then we do not increase the
 436     /// threshold.
 437     unsigned MaxPercentThresholdBoost;
 438     /// The cost threshold for the unrolled loop when optimizing for size (set
 439     /// to UINT_MAX to disable).
 440     unsigned OptSizeThreshold;
 441     /// The cost threshold for the unrolled loop, like Threshold, but used
 442     /// for partial/runtime unrolling (set to UINT_MAX to disable).
 443     unsigned PartialThreshold;
 444     /// The cost threshold for the unrolled loop when optimizing for size, like
 445     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
 446     /// UINT_MAX to disable).
 447     unsigned PartialOptSizeThreshold;
 448     /// A forced unrolling factor (the number of concatenated bodies of the
 449     /// original loop in the unrolled loop body). When set to 0, the unrolling
 450     /// transformation will select an unrolling factor based on the current cost
 451     /// threshold and other factors.
 452     unsigned Count;
 453     /// A forced peeling factor (the number of bodied of the original loop
 454     /// that should be peeled off before the loop body). When set to 0, the
 455     /// unrolling transformation will select a peeling factor based on profile
 456     /// information and other factors.
 457     unsigned PeelCount;
 458     /// Default unroll count for loops with run-time trip count.
 459     unsigned DefaultUnrollRuntimeCount;
 460     // Set the maximum unrolling factor. The unrolling factor may be selected
 461     // using the appropriate cost threshold, but may not exceed this number
 462     // (set to UINT_MAX to disable). This does not apply in cases where the
 463     // loop is being fully unrolled.
 464     unsigned MaxCount;
 465     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
 466     /// applies even if full unrolling is selected. This allows a target to fall
 467     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
 468     unsigned FullUnrollMaxCount;
 469     // Represents number of instructions optimized when "back edge"
 470     // becomes "fall through" in unrolled loop.
 471     // For now we count a conditional branch on a backedge and a comparison
 472     // feeding it.
 473     unsigned BEInsns;
 474     /// Allow partial unrolling (unrolling of loops to expand the size of the
 475     /// loop body, not only to eliminate small constant-trip-count loops).
 476     bool Partial;
 477     /// Allow runtime unrolling (unrolling of loops to expand the size of the
 478     /// loop body even when the number of loop iterations is not known at
 479     /// compile time).
 480     bool Runtime;
 481     /// Allow generation of a loop remainder (extra iterations after unroll).
 482     bool AllowRemainder;
 483     /// Allow emitting expensive instructions (such as divisions) when computing
 484     /// the trip count of a loop for runtime unrolling.
 485     bool AllowExpensiveTripCount;
 486     /// Apply loop unroll on any kind of loop
 487     /// (mainly to loops that fail runtime unrolling).
 488     bool Force;
 489     /// Allow using trip count upper bound to unroll loops.
 490     bool UpperBound;
 491     /// Allow peeling off loop iterations.
 492     bool AllowPeeling;
 493     /// Allow unrolling of all the iterations of the runtime loop remainder.
 494     bool UnrollRemainder;
 495     /// Allow unroll and jam. Used to enable unroll and jam for the target.
 496     bool UnrollAndJam;
 497     /// Allow peeling basing on profile. Uses to enable peeling off all
 498     /// iterations basing on provided profile.
 499     /// If the value is true the peeling cost model can decide to peel only
 500     /// some iterations and in this case it will set this to false.
 501     bool PeelProfiledIterations;
 502     /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
 503     /// value above is used during unroll and jam for the outer loop size.
 504     /// This value is used in the same manner to limit the size of the inner
 505     /// loop.
 506     unsigned UnrollAndJamInnerLoopThreshold;
 507   };
 508
 509   /// Get target-customized preferences for the generic loop unrolling
 510   /// transformation. The caller will initialize UP with the current
 511   /// target-independent defaults.
 512   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
 513                                UnrollingPreferences &UP) const;
 514
 515   /// Query the target whether it would be profitable to convert the given loop
 516   /// into a hardware loop.
 517   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
 518                                 AssumptionCache &AC,
 519                                 TargetLibraryInfo *LibInfo,
 520                                 HardwareLoopInfo &HWLoopInfo) const;
 521
 522   /// Query the target whether it would be prefered to create a predicated vector
 523   /// loop, which can avoid the need to emit a scalar epilogue loop.
 524   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
 525                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
 526                                    DominatorTree *DT,
 527                                    const LoopAccessInfo *LAI) const;
 528
 529   /// @}
 530
 531   /// \name Scalar Target Information
 532   /// @{
 533
 534   /// Flags indicating the kind of support for population count.
 535   ///
 536   /// Compared to the SW implementation, HW support is supposed to
 537   /// significantly boost the performance when the population is dense, and it
 538   /// may or may not degrade performance if the population is sparse. A HW
 539   /// support is considered as "Fast" if it can outperform, or is on a par
 540   /// with, SW implementation when the population is sparse; otherwise, it is
 541   /// considered as "Slow".
 542   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
 543
 544   /// Return true if the specified immediate is legal add immediate, that
 545   /// is the target has add instructions which can add a register with the
 546   /// immediate without having to materialize the immediate into a register.
 547   bool isLegalAddImmediate(int64_t Imm) const;
 548
 549   /// Return true if the specified immediate is legal icmp immediate,
 550   /// that is the target has icmp instructions which can compare a register
 551   /// against the immediate without having to materialize the immediate into a
 552   /// register.
 553   bool isLegalICmpImmediate(int64_t Imm) const;
 554
 555   /// Return true if the addressing mode represented by AM is legal for
 556   /// this target, for a load/store of the specified type.
 557   /// The type may be VoidTy, in which case only return true if the addressing
 558   /// mode is legal for a load/store of any legal type.
 559   /// If target returns true in LSRWithInstrQueries(), I may be valid.
 560   /// TODO: Handle pre/postinc as well.
 561   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
 562                              bool HasBaseReg, int64_t Scale,
 563                              unsigned AddrSpace = 0,
 564                              Instruction *I = nullptr) const;
 565
 566   /// Return true if LSR cost of C1 is lower than C1.
 567   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
 568                      TargetTransformInfo::LSRCost &C2) const;
 569
 570   /// Return true if the target can fuse a compare and branch.
 571   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
 572   /// calculation for the instructions in a loop.
 573   bool canMacroFuseCmp() const;
 574
 575   /// Return true if the target can save a compare for loop count, for example
 576   /// hardware loop saves a compare.
 577   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
 578                   DominatorTree *DT, AssumptionCache *AC,
 579                   TargetLibraryInfo *LibInfo) const;
 580
 581   /// \return True is LSR should make efforts to create/preserve post-inc
 582   /// addressing mode expressions.
 583   bool shouldFavorPostInc() const;
 584
 585   /// Return true if LSR should make efforts to generate indexed addressing
 586   /// modes that operate across loop iterations.
 587   bool shouldFavorBackedgeIndex(const Loop *L) const;
 588
 589   /// Return true if the target supports masked store.
 590   bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
 591   /// Return true if the target supports masked load.
 592   bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
 593
 594   /// Return true if the target supports nontemporal store.
 595   bool isLegalNTStore(Type *DataType, Align Alignment) const;
 596   /// Return true if the target supports nontemporal load.
 597   bool isLegalNTLoad(Type *DataType, Align Alignment) const;
 598
 599   /// Return true if the target supports masked scatter.
 600   bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) const;
 601   /// Return true if the target supports masked gather.
 602   bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) const;
 603
 604   /// Return true if the target supports masked compress store.
 605   bool isLegalMaskedCompressStore(Type *DataType) const;
 606   /// Return true if the target supports masked expand load.
 607   bool isLegalMaskedExpandLoad(Type *DataType) const;
 608
 609   /// Return true if the target has a unified operation to calculate division
 610   /// and remainder. If so, the additional implicit multiplication and
 611   /// subtraction required to calculate a remainder from division are free. This
 612   /// can enable more aggressive transformations for division and remainder than
 613   /// would typically be allowed using throughput or size cost models.
 614   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
 615
 616   /// Return true if the given instruction (assumed to be a memory access
 617   /// instruction) has a volatile variant. If that's the case then we can avoid
 618   /// addrspacecast to generic AS for volatile loads/stores. Default
 619   /// implementation returns false, which prevents address space inference for
 620   /// volatile loads/stores.
 621   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
 622
 623   /// Return true if target doesn't mind addresses in vectors.
 624   bool prefersVectorizedAddressing() const;
 625
 626   /// Return the cost of the scaling factor used in the addressing
 627   /// mode represented by AM for this target, for a load/store
 628   /// of the specified type.
 629   /// If the AM is supported, the return value must be >= 0.
 630   /// If the AM is not supported, it returns a negative value.
 631   /// TODO: Handle pre/postinc as well.
 632   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
 633                            bool HasBaseReg, int64_t Scale,
 634                            unsigned AddrSpace = 0) const;
 635
 636   /// Return true if the loop strength reduce pass should make
 637   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
 638   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
 639   /// immediate offset and no index register.
 640   bool LSRWithInstrQueries() const;
 641
 642   /// Return true if it's free to truncate a value of type Ty1 to type
 643   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
 644   /// by referencing its sub-register AX.
 645   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
 646
 647   /// Return true if it is profitable to hoist instruction in the
 648   /// then/else to before if.
 649   bool isProfitableToHoist(Instruction *I) const;
 650
 651   bool useAA() const;
 652
 653   /// Return true if this type is legal.
 654   bool isTypeLegal(Type *Ty) const;
 655
 656   /// Return true if switches should be turned into lookup tables for the
 657   /// target.
 658   bool shouldBuildLookupTables() const;
 659
 660   /// Return true if switches should be turned into lookup tables
 661   /// containing this constant value for the target.
 662   bool shouldBuildLookupTablesForConstant(Constant *C) const;
 663
 664   /// Return true if the input function which is cold at all call sites,
 665   ///  should use coldcc calling convention.
 666   bool useColdCCForColdCall(Function &F) const;
 667
 668   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
 669
 670   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
 671                                             unsigned VF) const;
 672
 673   /// If target has efficient vector element load/store instructions, it can
 674   /// return true here so that insertion/extraction costs are not added to
 675   /// the scalarization cost of a load/store.
 676   bool supportsEfficientVectorElementLoadStore() const;
 677
 678   /// Don't restrict interleaved unrolling to small loops.
 679   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 680
 681   /// Returns options for expansion of memcmp. IsZeroCmp is
 682   // true if this is the expansion of memcmp(p1, p2, s) == 0.
 683   struct MemCmpExpansionOptions {
 684     // Return true if memcmp expansion is enabled.
 685     operator bool() const { return MaxNumLoads > 0; }
 686
 687     // Maximum number of load operations.
 688     unsigned MaxNumLoads = 0;
 689
 690     // The list of available load sizes (in bytes), sorted in decreasing order.
 691     SmallVector<unsigned, 8> LoadSizes;
 692
 693     // For memcmp expansion when the memcmp result is only compared equal or
 694     // not-equal to 0, allow up to this number of load pairs per block. As an
 695     // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
 696     //   a0 = load2bytes &a[0]
 697     //   b0 = load2bytes &b[0]
 698     //   a2 = load1byte  &a[2]
 699     //   b2 = load1byte  &b[2]
 700     //   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
 701     unsigned NumLoadsPerBlock = 1;
 702
 703     // Set to true to allow overlapping loads. For example, 7-byte compares can
 704     // be done with two 4-byte compares instead of 4+2+1-byte compares. This
 705     // requires all loads in LoadSizes to be doable in an unaligned way.
 706     bool AllowOverlappingLoads = false;
 707   };
 708   MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
 709                                                bool IsZeroCmp) const;
 710
 711   /// Enable matching of interleaved access groups.
 712   bool enableInterleavedAccessVectorization() const;
 713
 714   /// Enable matching of interleaved access groups that contain predicated
 715   /// accesses or gaps and therefore vectorized using masked
 716   /// vector loads/stores.
 717   bool enableMaskedInterleavedAccessVectorization() const;
 718
 719   /// Indicate that it is potentially unsafe to automatically vectorize
 720   /// floating-point operations because the semantics of vector and scalar
 721   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
 722   /// does not support IEEE-754 denormal numbers, while depending on the
 723   /// platform, scalar floating-point math does.
 724   /// This applies to floating-point math operations and calls, not memory
 725   /// operations, shuffles, or casts.
 726   bool isFPVectorizationPotentiallyUnsafe() const;
 727
 728   /// Determine if the target supports unaligned memory accesses.
 729   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
 730                                       unsigned BitWidth, unsigned AddressSpace = 0,
 731                                       unsigned Alignment = 1,
 732                                       bool *Fast = nullptr) const;
 733
 734   /// Return hardware support for population count.
 735   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 736
 737   /// Return true if the hardware has a fast square-root instruction.
 738   bool haveFastSqrt(Type *Ty) const;
 739
 740   /// Return true if it is faster to check if a floating-point value is NaN
 741   /// (or not-NaN) versus a comparison against a constant FP zero value.
 742   /// Targets should override this if materializing a 0.0 for comparison is
 743   /// generally as cheap as checking for ordered/unordered.
 744   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
 745
 746   /// Return the expected cost of supporting the floating point operation
 747   /// of the specified type.
 748   int getFPOpCost(Type *Ty) const;
 749
 750   /// Return the expected cost of materializing for the given integer
 751   /// immediate of the specified type.
 752   int getIntImmCost(const APInt &Imm, Type *Ty) const;
 753
 754   /// Return the expected cost of materialization for the given integer
 755   /// immediate of the specified type for a given instruction. The cost can be
 756   /// zero if the immediate can be folded into the specified instruction.
 757   int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
 758                         Type *Ty) const;
 759   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
 760                           Type *Ty) const;
 761
 762   /// Return the expected cost for the given integer when optimising
 763   /// for size. This is different than the other integer immediate cost
 764   /// functions in that it is subtarget agnostic. This is useful when you e.g.
 765   /// target one ISA such as Aarch32 but smaller encodings could be possible
 766   /// with another such as Thumb. This return value is used as a penalty when
 767   /// the total costs for a constant is calculated (the bigger the cost, the
 768   /// more beneficial constant hoisting is).
 769   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 770                             Type *Ty) const;
 771   /// @}
 772
 773   /// \name Vector Target Information
 774   /// @{
 775
 776   /// The various kinds of shuffle patterns for vector queries.
 777   enum ShuffleKind {
 778     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
 779     SK_Reverse,         ///< Reverse the order of the vector.
 780     SK_Select,          ///< Selects elements from the corresponding lane of
 781                         ///< either source operand. This is equivalent to a
 782                         ///< vector select with a constant condition operand.
 783     SK_Transpose,       ///< Transpose two vectors.
 784     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
 785     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
 786     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
 787                         ///< with any shuffle mask.
 788     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
 789                         ///< shuffle mask.
 790   };
 791
 792   /// Additional information about an operand's possible values.
 793   enum OperandValueKind {
 794     OK_AnyValue,               // Operand can have any value.
 795     OK_UniformValue,           // Operand is uniform (splat of a value).
 796     OK_UniformConstantValue,   // Operand is uniform constant.
 797     OK_NonUniformConstantValue // Operand is a non uniform constant value.
 798   };
 799
 800   /// Additional properties of an operand's values.
 801   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
 802
 803   /// \return the number of registers in the target-provided register class.
 804   unsigned getNumberOfRegisters(unsigned ClassID) const;
 805
 806   /// \return the target-provided register class ID for the provided type,
 807   /// accounting for type promotion and other type-legalization techniques that the target might apply.
 808   /// However, it specifically does not account for the scalarization or splitting of vector types.
 809   /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
 810   /// that type should be mapped to a register class containing no registers.
 811   /// Specifically, this is designed to provide a simple, high-level view of the register allocation
 812   /// later performed by the backend. These register classes don't necessarily map onto the
 813   /// register classes used by the backend.
 814   /// FIXME: It's not currently possible to determine how many registers
 815   /// are used by the provided type.
 816   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
 817
 818   /// \return the target-provided register class name
 819   const char* getRegisterClassName(unsigned ClassID) const;
 820
 821   /// \return The width of the largest scalar or vector register type.
 822   unsigned getRegisterBitWidth(bool Vector) const;
 823
 824   /// \return The width of the smallest vector register type.
 825   unsigned getMinVectorRegisterBitWidth() const;
 826
 827   /// \return True if the vectorization factor should be chosen to
 828   /// make the vector of the smallest element type match the size of a
 829   /// vector register. For wider element types, this could result in
 830   /// creating vectors that span multiple vector registers.
 831   /// If false, the vectorization factor will be chosen based on the
 832   /// size of the widest element type.
 833   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
 834
 835   /// \return The minimum vectorization factor for types of given element
 836   /// bit width, or 0 if there is no minimum VF. The returned value only
 837   /// applies when shouldMaximizeVectorBandwidth returns true.
 838   unsigned getMinimumVF(unsigned ElemWidth) const;
 839
 840   /// \return True if it should be considered for address type promotion.
 841   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
 842   /// profitable without finding other extensions fed by the same input.
 843   bool shouldConsiderAddressTypePromotion(
 844       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
 845
 846   /// \return The size of a cache line in bytes.
 847   unsigned getCacheLineSize() const;
 848
 849   /// The possible cache levels
 850   enum class CacheLevel {
 851     L1D,   // The L1 data cache
 852     L2D,   // The L2 data cache
 853
 854     // We currently do not model L3 caches, as their sizes differ widely between
 855     // microarchitectures. Also, we currently do not have a use for L3 cache
 856     // size modeling yet.
 857   };
 858
 859   /// \return The size of the cache level in bytes, if available.
 860   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
 861
 862   /// \return The associativity of the cache level, if available.
 863   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
 864
 865   /// \return How much before a load we should place the prefetch
 866   /// instruction.  This is currently measured in number of
 867   /// instructions.
 868   unsigned getPrefetchDistance() const;
 869
 870   /// \return Some HW prefetchers can handle accesses up to a certain
 871   /// constant stride.  This is the minimum stride in bytes where it
 872   /// makes sense to start adding SW prefetches.  The default is 1,
 873   /// i.e. prefetch with any stride.
 874   unsigned getMinPrefetchStride() const;
 875
 876   /// \return The maximum number of iterations to prefetch ahead.  If
 877   /// the required number of iterations is more than this number, no
 878   /// prefetching is performed.
 879   unsigned getMaxPrefetchIterationsAhead() const;
 880
 881   /// \return The maximum interleave factor that any transform should try to
 882   /// perform for this target. This number depends on the level of parallelism
 883   /// and the number of execution units in the CPU.
 884   unsigned getMaxInterleaveFactor(unsigned VF) const;
 885
 886   /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
 887   static OperandValueKind getOperandInfo(Value *V,
 888                                          OperandValueProperties &OpProps);
 889
 890   /// This is an approximation of reciprocal throughput of a math/logic op.
 891   /// A higher cost indicates less expected throughput.
 892   /// From Agner Fog's guides, reciprocal throughput is "the average number of
 893   /// clock cycles per instruction when the instructions are not part of a
 894   /// limiting dependency chain."
 895   /// Therefore, costs should be scaled to account for multiple execution units
 896   /// on the target that can process this type of instruction. For example, if
 897   /// there are 5 scalar integer units and 2 vector integer units that can
 898   /// calculate an 'add' in a single cycle, this model should indicate that the
 899   /// cost of the vector add instruction is 2.5 times the cost of the scalar
 900   /// add instruction.
 901   /// \p Args is an optional argument which holds the instruction operands
 902   /// values so the TTI can analyze those values searching for special
 903   /// cases or optimizations based on those values.
 904   /// \p CxtI is the optional original context instruction, if one exists, to
 905   /// provide even more information.
 906   int getArithmeticInstrCost(
 907       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
 908       OperandValueKind Opd2Info = OK_AnyValue,
 909       OperandValueProperties Opd1PropInfo = OP_None,
 910       OperandValueProperties Opd2PropInfo = OP_None,
 911       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
 912       const Instruction *CxtI = nullptr) const;
 913
 914   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
 915   /// The index and subtype parameters are used by the subvector insertion and
 916   /// extraction shuffle kinds to show the insert/extract point and the type of
 917   /// the subvector being inserted/extracted.
 918   /// NOTE: For subvector extractions Tp represents the source type.
 919   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
 920                      Type *SubTp = nullptr) const;
 921
 922   /// \return The expected cost of cast instructions, such as bitcast, trunc,
 923   /// zext, etc. If there is an existing instruction that holds Opcode, it
 924   /// may be passed in the 'I' parameter.
 925   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 926                        const Instruction *I = nullptr) const;
 927
 928   /// \return The expected cost of a sign- or zero-extended vector extract. Use
 929   /// -1 to indicate that there is no information about the index value.
 930   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
 931                                unsigned Index = -1) const;
 932
 933   /// \return The expected cost of control-flow related instructions such as
 934   /// Phi, Ret, Br.
 935   int getCFInstrCost(unsigned Opcode) const;
 936
 937   /// \returns The expected cost of compare and select instructions. If there
 938   /// is an existing instruction that holds Opcode, it may be passed in the
 939   /// 'I' parameter.
 940   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 941                  Type *CondTy = nullptr, const Instruction *I = nullptr) const;
 942
 943   /// \return The expected cost of vector Insert and Extract.
 944   /// Use -1 to indicate that there is no information on the index value.
 945   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
 946
 947   /// \return The cost of Load and Store instructions.
 948   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
 949                       unsigned AddressSpace,
 950                       const Instruction *I = nullptr) const;
 951
 952   /// \return The cost of masked Load and Store instructions.
 953   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 954                             unsigned AddressSpace) const;
 955
 956   /// \return The cost of Gather or Scatter operation
 957   /// \p Opcode - is a type of memory access Load or Store
 958   /// \p DataTy - a vector type of the data to be loaded or stored
 959   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
 960   /// \p VariableMask - true when the memory access is predicated with a mask
 961   ///                   that is not a compile-time constant
 962   /// \p Alignment - alignment of single element
 963   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
 964                              bool VariableMask, unsigned Alignment) const;
 965
 966   /// \return The cost of the interleaved memory operation.
 967   /// \p Opcode is the memory operation code
 968   /// \p VecTy is the vector type of the interleaved access.
 969   /// \p Factor is the interleave factor
 970   /// \p Indices is the indices for interleaved load members (as interleaved
 971   ///    load allows gaps)
 972   /// \p Alignment is the alignment of the memory operation
 973   /// \p AddressSpace is address space of the pointer.
 974   /// \p UseMaskForCond indicates if the memory access is predicated.
 975   /// \p UseMaskForGaps indicates if gaps should be masked.
 976   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
 977                                  ArrayRef<unsigned> Indices, unsigned Alignment,
 978                                  unsigned AddressSpace,
 979                                  bool UseMaskForCond = false,
 980                                  bool UseMaskForGaps = false) const;
 981
 982   /// Calculate the cost of performing a vector reduction.
 983   ///
 984   /// This is the cost of reducing the vector value of type \p Ty to a scalar
 985   /// value using the operation denoted by \p Opcode. The form of the reduction
 986   /// can either be a pairwise reduction or a reduction that splits the vector
 987   /// at every reduction level.
 988   ///
 989   /// Pairwise:
 990   ///  (v0, v1, v2, v3)
 991   ///  ((v0+v1), (v2+v3), undef, undef)
 992   /// Split:
 993   ///  (v0, v1, v2, v3)
 994   ///  ((v0+v2), (v1+v3), undef, undef)
 995   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
 996                                  bool IsPairwiseForm) const;
 997   int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
 998                              bool IsUnsigned) const;
 999
1000   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1001   /// Three cases are handled: 1. scalar instruction 2. vector instruction
1002   /// 3. scalar instruction which is to be vectorized with VF.
1003   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1004                             ArrayRef<Value *> Args, FastMathFlags FMF,
1005                             unsigned VF = 1) const;
1006
1007   /// \returns The cost of Intrinsic instructions. Types analysis only.
1008   /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
1009   /// arguments and the return value will be computed based on types.
1010   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1011                             ArrayRef<Type *> Tys, FastMathFlags FMF,
1012                             unsigned ScalarizationCostPassed = UINT_MAX) const;
1013
1014   /// \returns The cost of Call instructions.
1015   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
1016
1017   /// \returns The number of pieces into which the provided type must be
1018   /// split during legalization. Zero is returned when the answer is unknown.
1019   unsigned getNumberOfParts(Type *Tp) const;
1020
1021   /// \returns The cost of the address computation. For most targets this can be
1022   /// merged into the instruction indexing mode. Some targets might want to
1023   /// distinguish between address computation for memory operations on vector
1024   /// types and scalar types. Such targets should override this function.
1025   /// The 'SE' parameter holds pointer for the scalar evolution object which
1026   /// is used in order to get the Ptr step value in case of constant stride.
1027   /// The 'Ptr' parameter holds SCEV of the access pointer.
1028   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1029                                 const SCEV *Ptr = nullptr) const;
1030
1031   /// \returns The cost, if any, of keeping values of the given types alive
1032   /// over a callsite.
1033   ///
1034   /// Some types may require the use of register classes that do not have
1035   /// any callee-saved registers, so would require a spill and fill.
1036   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1037
1038   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
1039   /// will contain additional information - whether the intrinsic may write
1040   /// or read to memory, volatility and the pointer.  Info is undefined
1041   /// if false is returned.
1042   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1043
1044   /// \returns The maximum element size, in bytes, for an element
1045   /// unordered-atomic memory intrinsic.
1046   unsigned getAtomicMemIntrinsicMaxElementSize() const;
1047
1048   /// \returns A value which is the result of the given memory intrinsic.  New
1049   /// instructions may be created to extract the result from the given intrinsic
1050   /// memory operation.  Returns nullptr if the target cannot create a result
1051   /// from the given intrinsic.
1052   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1053                                            Type *ExpectedType) const;
1054
1055   /// \returns The type to use in a loop expansion of a memcpy call.
1056   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1057                                   unsigned SrcAlign, unsigned DestAlign) const;
1058
1059   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1060   /// \param RemainingBytes The number of bytes to copy.
1061   ///
1062   /// Calculates the operand types to use when copying \p RemainingBytes of
1063   /// memory, where source and destination alignments are \p SrcAlign and
1064   /// \p DestAlign respectively.
1065   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1066                                          LLVMContext &Context,
1067                                          unsigned RemainingBytes,
1068                                          unsigned SrcAlign,
1069                                          unsigned DestAlign) const;
1070
1071   /// \returns True if the two functions have compatible attributes for inlining
1072   /// purposes.
1073   bool areInlineCompatible(const Function *Caller,
1074                            const Function *Callee) const;
1075
1076   /// \returns True if the caller and callee agree on how \p Args will be passed
1077   /// to the callee.
1078   /// \param[out] Args The list of compatible arguments.  The implementation may
1079   /// filter out any incompatible args from this list.
1080   bool areFunctionArgsABICompatible(const Function *Caller,
1081                                     const Function *Callee,
1082                                     SmallPtrSetImpl<Argument *> &Args) const;
1083
1084   /// The type of load/store indexing.
1085   enum MemIndexedMode {
1086     MIM_Unindexed,  ///< No indexing.
1087     MIM_PreInc,     ///< Pre-incrementing.
1088     MIM_PreDec,     ///< Pre-decrementing.
1089     MIM_PostInc,    ///< Post-incrementing.
1090     MIM_PostDec     ///< Post-decrementing.
1091   };
1092
1093   /// \returns True if the specified indexed load for the given type is legal.
1094   bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1095
1096   /// \returns True if the specified indexed store for the given type is legal.
1097   bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1098
1099   /// \returns The bitwidth of the largest vector type that should be used to
1100   /// load/store in the given address space.
1101   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1102
1103   /// \returns True if the load instruction is legal to vectorize.
1104   bool isLegalToVectorizeLoad(LoadInst *LI) const;
1105
1106   /// \returns True if the store instruction is legal to vectorize.
1107   bool isLegalToVectorizeStore(StoreInst *SI) const;
1108
1109   /// \returns True if it is legal to vectorize the given load chain.
1110   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1111                                    unsigned Alignment,
1112                                    unsigned AddrSpace) const;
1113
1114   /// \returns True if it is legal to vectorize the given store chain.
1115   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1116                                     unsigned Alignment,
1117                                     unsigned AddrSpace) const;
1118
1119   /// \returns The new vector factor value if the target doesn't support \p
1120   /// SizeInBytes loads or has a better vector factor.
1121   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1122                                unsigned ChainSizeInBytes,
1123                                VectorType *VecTy) const;
1124
1125   /// \returns The new vector factor value if the target doesn't support \p
1126   /// SizeInBytes stores or has a better vector factor.
1127   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1128                                 unsigned ChainSizeInBytes,
1129                                 VectorType *VecTy) const;
1130
1131   /// Flags describing the kind of vector reduction.
1132   struct ReductionFlags {
1133     ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1134     bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
1135     bool IsSigned; ///< Whether the operation is a signed int reduction.
1136     bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
1137   };
1138
1139   /// \returns True if the target wants to handle the given reduction idiom in
1140   /// the intrinsics form instead of the shuffle form.
1141   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1142                              ReductionFlags Flags) const;
1143
1144   /// \returns True if the target wants to expand the given reduction intrinsic
1145   /// into a shuffle sequence.
1146   bool shouldExpandReduction(const IntrinsicInst *II) const;
1147
1148   /// \returns the size cost of rematerializing a GlobalValue address relative
1149   /// to a stack reload.
1150   unsigned getGISelRematGlobalCost() const;
1151
1152   /// @}
1153
1154 private:
1155   /// Estimate the latency of specified instruction.
1156   /// Returns 1 as the default value.
1157   int getInstructionLatency(const Instruction *I) const;
1158
1159   /// Returns the expected throughput cost of the instruction.
1160   /// Returns -1 if the cost is unknown.
1161   int getInstructionThroughput(const Instruction *I) const;
1162
1163   /// The abstract base class used to type erase specific TTI
1164   /// implementations.
1165   class Concept;
1166
1167   /// The template model for the base class which wraps a concrete
1168   /// implementation in a type erased interface.
1169   template <typename T> class Model;
1170
1171   std::unique_ptr<Concept> TTIImpl;
1172 };
1173
1174 class TargetTransformInfo::Concept {
1175 public:
1176   virtual ~Concept() = 0;
1177   virtual const DataLayout &getDataLayout() const = 0;
1178   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
1179   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1180                          ArrayRef<const Value *> Operands) = 0;
1181   virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
1182   virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
1183   virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
1184   virtual int getCallCost(const Function *F,
1185                           ArrayRef<const Value *> Arguments, const User *U) = 0;
1186   virtual unsigned getInliningThresholdMultiplier() = 0;
1187   virtual int getInlinerVectorBonusPercent() = 0;
1188   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1189                                ArrayRef<Type *> ParamTys, const User *U) = 0;
1190   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1191                                ArrayRef<const Value *> Arguments,
1192                                const User *U) = 0;
1193   virtual int getMemcpyCost(const Instruction *I) = 0;
1194   virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1195                                                     unsigned &JTSize,
1196                                                     ProfileSummaryInfo *PSI,
1197                                                     BlockFrequencyInfo *BFI) = 0;
1198   virtual int
1199   getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1200   virtual bool hasBranchDivergence() = 0;
1201   virtual bool isSourceOfDivergence(const Value *V) = 0;
1202   virtual bool isAlwaysUniform(const Value *V) = 0;
1203   virtual unsigned getFlatAddressSpace() = 0;
1204   virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1205                                           Intrinsic::ID IID) const = 0;
1206   virtual bool rewriteIntrinsicWithAddressSpace(
1207     IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
1208   virtual bool isLoweredToCall(const Function *F) = 0;
1209   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1210                                        UnrollingPreferences &UP) = 0;
1211   virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1212                                         AssumptionCache &AC,
1213                                         TargetLibraryInfo *LibInfo,
1214                                         HardwareLoopInfo &HWLoopInfo) = 0;
1215   virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
1216                                            ScalarEvolution &SE,
1217                                            AssumptionCache &AC,
1218                                            TargetLibraryInfo *TLI,
1219                                            DominatorTree *DT,
1220                                            const LoopAccessInfo *LAI) = 0;
1221   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1222   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1223   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1224                                      int64_t BaseOffset, bool HasBaseReg,
1225                                      int64_t Scale,
1226                                      unsigned AddrSpace,
1227                                      Instruction *I) = 0;
1228   virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1229                              TargetTransformInfo::LSRCost &C2) = 0;
1230   virtual bool canMacroFuseCmp() = 0;
1231   virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1232                           LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1233                           TargetLibraryInfo *LibInfo) = 0;
1234   virtual bool shouldFavorPostInc() const = 0;
1235   virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1236   virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
1237   virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
1238   virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1239   virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1240   virtual bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) = 0;
1241   virtual bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) = 0;
1242   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1243   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1244   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1245   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1246   virtual bool prefersVectorizedAddressing() = 0;
1247   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1248                                    int64_t BaseOffset, bool HasBaseReg,
1249                                    int64_t Scale, unsigned AddrSpace) = 0;
1250   virtual bool LSRWithInstrQueries() = 0;
1251   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1252   virtual bool isProfitableToHoist(Instruction *I) = 0;
1253   virtual bool useAA() = 0;
1254   virtual bool isTypeLegal(Type *Ty) = 0;
1255   virtual bool shouldBuildLookupTables() = 0;
1256   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1257   virtual bool useColdCCForColdCall(Function &F) = 0;
1258   virtual unsigned
1259   getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1260   virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1261                                                     unsigned VF) = 0;
1262   virtual bool supportsEfficientVectorElementLoadStore() = 0;
1263   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1264   virtual MemCmpExpansionOptions
1265   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1266   virtual bool enableInterleavedAccessVectorization() = 0;
1267   virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1268   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1269   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1270                                               unsigned BitWidth,
1271                                               unsigned AddressSpace,
1272                                               unsigned Alignment,
1273                                               bool *Fast) = 0;
1274   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1275   virtual bool haveFastSqrt(Type *Ty) = 0;
1276   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1277   virtual int getFPOpCost(Type *Ty) = 0;
1278   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1279                                     Type *Ty) = 0;
1280   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1281   virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1282                                 Type *Ty) = 0;
1283   virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1284                                   const APInt &Imm, Type *Ty) = 0;
1285   virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1286   virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
1287   virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
1288   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1289   virtual unsigned getMinVectorRegisterBitWidth() = 0;
1290   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1291   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1292   virtual bool shouldConsiderAddressTypePromotion(
1293       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1294   virtual unsigned getCacheLineSize() const = 0;
1295   virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1296   virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
1297
1298   /// \return How much before a load we should place the prefetch
1299   /// instruction.  This is currently measured in number of
1300   /// instructions.
1301   virtual unsigned getPrefetchDistance() const = 0;
1302
1303   /// \return Some HW prefetchers can handle accesses up to a certain
1304   /// constant stride.  This is the minimum stride in bytes where it
1305   /// makes sense to start adding SW prefetches.  The default is 1,
1306   /// i.e. prefetch with any stride.
1307   virtual unsigned getMinPrefetchStride() const = 0;
1308
1309   /// \return The maximum number of iterations to prefetch ahead.  If
1310   /// the required number of iterations is more than this number, no
1311   /// prefetching is performed.
1312   virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1313
1314   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1315   virtual unsigned getArithmeticInstrCost(
1316       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1317       OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
1318       OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
1319       const Instruction *CxtI = nullptr) = 0;
1320   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1321                              Type *SubTp) = 0;
1322   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1323                                const Instruction *I) = 0;
1324   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1325                                        VectorType *VecTy, unsigned Index) = 0;
1326   virtual int getCFInstrCost(unsigned Opcode) = 0;
1327   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1328                                 Type *CondTy, const Instruction *I) = 0;
1329   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1330                                  unsigned Index) = 0;
1331   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1332                               unsigned AddressSpace, const Instruction *I) = 0;
1333   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1334                                     unsigned Alignment,
1335                                     unsigned AddressSpace) = 0;
1336   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1337                                      Value *Ptr, bool VariableMask,
1338                                      unsigned Alignment) = 0;
1339   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1340                                          unsigned Factor,
1341                                          ArrayRef<unsigned> Indices,
1342                                          unsigned Alignment,
1343                                          unsigned AddressSpace,
1344                                          bool UseMaskForCond = false,
1345                                          bool UseMaskForGaps = false) = 0;
1346   virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1347                                          bool IsPairwiseForm) = 0;
1348   virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1349                                      bool IsPairwiseForm, bool IsUnsigned) = 0;
1350   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1351                       ArrayRef<Type *> Tys, FastMathFlags FMF,
1352                       unsigned ScalarizationCostPassed) = 0;
1353   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1354          ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1355   virtual int getCallInstrCost(Function *F, Type *RetTy,
1356                                ArrayRef<Type *> Tys) = 0;
1357   virtual unsigned getNumberOfParts(Type *Tp) = 0;
1358   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1359                                         const SCEV *Ptr) = 0;
1360   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1361   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1362                                   MemIntrinsicInfo &Info) = 0;
1363   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1364   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1365                                                    Type *ExpectedType) = 0;
1366   virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1367                                           unsigned SrcAlign,
1368                                           unsigned DestAlign) const = 0;
1369   virtual void getMemcpyLoopResidualLoweringType(
1370       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1371       unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1372   virtual bool areInlineCompatible(const Function *Caller,
1373                                    const Function *Callee) const = 0;
1374   virtual bool
1375   areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1376                                SmallPtrSetImpl<Argument *> &Args) const = 0;
1377   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1378   virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1379   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1380   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1381   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1382   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1383                                            unsigned Alignment,
1384                                            unsigned AddrSpace) const = 0;
1385   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1386                                             unsigned Alignment,
1387                                             unsigned AddrSpace) const = 0;
1388   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1389                                        unsigned ChainSizeInBytes,
1390                                        VectorType *VecTy) const = 0;
1391   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1392                                         unsigned ChainSizeInBytes,
1393                                         VectorType *VecTy) const = 0;
1394   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1395                                      ReductionFlags) const = 0;
1396   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1397   virtual unsigned getGISelRematGlobalCost() const = 0;
1398   virtual int getInstructionLatency(const Instruction *I) = 0;
1399 };
1400
1401 template <typename T>
1402 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1403   T Impl;
1404
1405 public:
1406   Model(T Impl) : Impl(std::move(Impl)) {}
1407   ~Model() override {}
1408
1409   const DataLayout &getDataLayout() const override {
1410     return Impl.getDataLayout();
1411   }
1412
1413   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1414     return Impl.getOperationCost(Opcode, Ty, OpTy);
1415   }
1416   int getGEPCost(Type *PointeeType, const Value *Ptr,
1417                  ArrayRef<const Value *> Operands) override {
1418     return Impl.getGEPCost(PointeeType, Ptr, Operands);
1419   }
1420   int getExtCost(const Instruction *I, const Value *Src) override {
1421     return Impl.getExtCost(I, Src);
1422   }
1423   int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
1424     return Impl.getCallCost(FTy, NumArgs, U);
1425   }
1426   int getCallCost(const Function *F, int NumArgs, const User *U) override {
1427     return Impl.getCallCost(F, NumArgs, U);
1428   }
1429   int getCallCost(const Function *F,
1430                   ArrayRef<const Value *> Arguments, const User *U) override {
1431     return Impl.getCallCost(F, Arguments, U);
1432   }
1433   unsigned getInliningThresholdMultiplier() override {
1434     return Impl.getInliningThresholdMultiplier();
1435   }
1436   int getInlinerVectorBonusPercent() override {
1437     return Impl.getInlinerVectorBonusPercent();
1438   }
1439   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1440                        ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
1441     return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
1442   }
1443   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1444                        ArrayRef<const Value *> Arguments,
1445                        const User *U = nullptr) override {
1446     return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
1447   }
1448   int getMemcpyCost(const Instruction *I) override {
1449     return Impl.getMemcpyCost(I);
1450   }
1451   int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1452     return Impl.getUserCost(U, Operands);
1453   }
1454   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1455   bool isSourceOfDivergence(const Value *V) override {
1456     return Impl.isSourceOfDivergence(V);
1457   }
1458
1459   bool isAlwaysUniform(const Value *V) override {
1460     return Impl.isAlwaysUniform(V);
1461   }
1462
1463   unsigned getFlatAddressSpace() override {
1464     return Impl.getFlatAddressSpace();
1465   }
1466
1467   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1468                                   Intrinsic::ID IID) const override {
1469     return Impl.collectFlatAddressOperands(OpIndexes, IID);
1470   }
1471
1472   bool rewriteIntrinsicWithAddressSpace(
1473     IntrinsicInst *II, Value *OldV, Value *NewV) const override {
1474     return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1475   }
1476
1477   bool isLoweredToCall(const Function *F) override {
1478     return Impl.isLoweredToCall(F);
1479   }
1480   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1481                                UnrollingPreferences &UP) override {
1482     return Impl.getUnrollingPreferences(L, SE, UP);
1483   }
1484   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1485                                 AssumptionCache &AC,
1486                                 TargetLibraryInfo *LibInfo,
1487                                 HardwareLoopInfo &HWLoopInfo) override {
1488     return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1489   }
1490   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1491                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
1492                                    DominatorTree *DT,
1493                                    const LoopAccessInfo *LAI) override {
1494     return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1495   }
1496   bool isLegalAddImmediate(int64_t Imm) override {
1497     return Impl.isLegalAddImmediate(Imm);
1498   }
1499   bool isLegalICmpImmediate(int64_t Imm) override {
1500     return Impl.isLegalICmpImmediate(Imm);
1501   }
1502   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1503                              bool HasBaseReg, int64_t Scale,
1504                              unsigned AddrSpace,
1505                              Instruction *I) override {
1506     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1507                                       Scale, AddrSpace, I);
1508   }
1509   bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1510                      TargetTransformInfo::LSRCost &C2) override {
1511     return Impl.isLSRCostLess(C1, C2);
1512   }
1513   bool canMacroFuseCmp() override {
1514     return Impl.canMacroFuseCmp();
1515   }
1516   bool canSaveCmp(Loop *L, BranchInst **BI,
1517                         ScalarEvolution *SE,
1518                         LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1519                         TargetLibraryInfo *LibInfo) override {
1520     return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1521   }
1522   bool shouldFavorPostInc() const override {
1523     return Impl.shouldFavorPostInc();
1524   }
1525   bool shouldFavorBackedgeIndex(const Loop *L) const override {
1526     return Impl.shouldFavorBackedgeIndex(L);
1527   }
1528   bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
1529     return Impl.isLegalMaskedStore(DataType, Alignment);
1530   }
1531   bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
1532     return Impl.isLegalMaskedLoad(DataType, Alignment);
1533   }
1534   bool isLegalNTStore(Type *DataType, Align Alignment) override {
1535     return Impl.isLegalNTStore(DataType, Alignment);
1536   }
1537   bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1538     return Impl.isLegalNTLoad(DataType, Alignment);
1539   }
1540   bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) override {
1541     return Impl.isLegalMaskedScatter(DataType, Alignment);
1542   }
1543   bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) override {
1544     return Impl.isLegalMaskedGather(DataType, Alignment);
1545   }
1546   bool isLegalMaskedCompressStore(Type *DataType) override {
1547     return Impl.isLegalMaskedCompressStore(DataType);
1548   }
1549   bool isLegalMaskedExpandLoad(Type *DataType) override {
1550     return Impl.isLegalMaskedExpandLoad(DataType);
1551   }
1552   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1553     return Impl.hasDivRemOp(DataType, IsSigned);
1554   }
1555   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1556     return Impl.hasVolatileVariant(I, AddrSpace);
1557   }
1558   bool prefersVectorizedAddressing() override {
1559     return Impl.prefersVectorizedAddressing();
1560   }
1561   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1562                            bool HasBaseReg, int64_t Scale,
1563                            unsigned AddrSpace) override {
1564     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1565                                      Scale, AddrSpace);
1566   }
1567   bool LSRWithInstrQueries() override {
1568     return Impl.LSRWithInstrQueries();
1569   }
1570   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1571     return Impl.isTruncateFree(Ty1, Ty2);
1572   }
1573   bool isProfitableToHoist(Instruction *I) override {
1574     return Impl.isProfitableToHoist(I);
1575   }
1576   bool useAA() override { return Impl.useAA(); }
1577   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1578   bool shouldBuildLookupTables() override {
1579     return Impl.shouldBuildLookupTables();
1580   }
1581   bool shouldBuildLookupTablesForConstant(Constant *C) override {
1582     return Impl.shouldBuildLookupTablesForConstant(C);
1583   }
1584   bool useColdCCForColdCall(Function &F) override {
1585     return Impl.useColdCCForColdCall(F);
1586   }
1587
1588   unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1589                                     bool Extract) override {
1590     return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1591   }
1592   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1593                                             unsigned VF) override {
1594     return Impl.getOperandsScalarizationOverhead(Args, VF);
1595   }
1596
1597   bool supportsEfficientVectorElementLoadStore() override {
1598     return Impl.supportsEfficientVectorElementLoadStore();
1599   }
1600
1601   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1602     return Impl.enableAggressiveInterleaving(LoopHasReductions);
1603   }
1604   MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1605                                                bool IsZeroCmp) const override {
1606     return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1607   }
1608   bool enableInterleavedAccessVectorization() override {
1609     return Impl.enableInterleavedAccessVectorization();
1610   }
1611   bool enableMaskedInterleavedAccessVectorization() override {
1612     return Impl.enableMaskedInterleavedAccessVectorization();
1613   }
1614   bool isFPVectorizationPotentiallyUnsafe() override {
1615     return Impl.isFPVectorizationPotentiallyUnsafe();
1616   }
1617   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1618                                       unsigned BitWidth, unsigned AddressSpace,
1619                                       unsigned Alignment, bool *Fast) override {
1620     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1621                                                Alignment, Fast);
1622   }
1623   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1624     return Impl.getPopcntSupport(IntTyWidthInBit);
1625   }
1626   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1627
1628   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1629     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1630   }
1631
1632   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1633
1634   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1635                             Type *Ty) override {
1636     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1637   }
1638   int getIntImmCost(const APInt &Imm, Type *Ty) override {
1639     return Impl.getIntImmCost(Imm, Ty);
1640   }
1641   int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1642                         Type *Ty) override {
1643     return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty);
1644   }
1645   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1646                           Type *Ty) override {
1647     return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty);
1648   }
1649   unsigned getNumberOfRegisters(unsigned ClassID) const override {
1650     return Impl.getNumberOfRegisters(ClassID);
1651   }
1652   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
1653     return Impl.getRegisterClassForType(Vector, Ty);
1654   }
1655   const char* getRegisterClassName(unsigned ClassID) const override {
1656     return Impl.getRegisterClassName(ClassID);
1657   }
1658   unsigned getRegisterBitWidth(bool Vector) const override {
1659     return Impl.getRegisterBitWidth(Vector);
1660   }
1661   unsigned getMinVectorRegisterBitWidth() override {
1662     return Impl.getMinVectorRegisterBitWidth();
1663   }
1664   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1665     return Impl.shouldMaximizeVectorBandwidth(OptSize);
1666   }
1667   unsigned getMinimumVF(unsigned ElemWidth) const override {
1668     return Impl.getMinimumVF(ElemWidth);
1669   }
1670   bool shouldConsiderAddressTypePromotion(
1671       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1672     return Impl.shouldConsiderAddressTypePromotion(
1673         I, AllowPromotionWithoutCommonHeader);
1674   }
1675   unsigned getCacheLineSize() const override {
1676     return Impl.getCacheLineSize();
1677   }
1678   llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
1679     return Impl.getCacheSize(Level);
1680   }
1681   llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
1682     return Impl.getCacheAssociativity(Level);
1683   }
1684
1685   /// Return the preferred prefetch distance in terms of instructions.
1686   ///
1687   unsigned getPrefetchDistance() const override {
1688     return Impl.getPrefetchDistance();
1689   }
1690
1691   /// Return the minimum stride necessary to trigger software
1692   /// prefetching.
1693   ///
1694   unsigned getMinPrefetchStride() const override {
1695     return Impl.getMinPrefetchStride();
1696   }
1697
1698   /// Return the maximum prefetch distance in terms of loop
1699   /// iterations.
1700   ///
1701   unsigned getMaxPrefetchIterationsAhead() const override {
1702     return Impl.getMaxPrefetchIterationsAhead();
1703   }
1704
1705   unsigned getMaxInterleaveFactor(unsigned VF) override {
1706     return Impl.getMaxInterleaveFactor(VF);
1707   }
1708   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1709                                             unsigned &JTSize,
1710                                             ProfileSummaryInfo *PSI,
1711                                             BlockFrequencyInfo *BFI) override {
1712     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
1713   }
1714   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
1715                                   OperandValueKind Opd1Info,
1716                                   OperandValueKind Opd2Info,
1717                                   OperandValueProperties Opd1PropInfo,
1718                                   OperandValueProperties Opd2PropInfo,
1719                                   ArrayRef<const Value *> Args,
1720                                   const Instruction *CxtI = nullptr) override {
1721     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1722                                        Opd1PropInfo, Opd2PropInfo, Args, CxtI);
1723   }
1724   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1725                      Type *SubTp) override {
1726     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1727   }
1728   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1729                        const Instruction *I) override {
1730     return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1731   }
1732   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1733                                unsigned Index) override {
1734     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1735   }
1736   int getCFInstrCost(unsigned Opcode) override {
1737     return Impl.getCFInstrCost(Opcode);
1738   }
1739   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1740                          const Instruction *I) override {
1741     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1742   }
1743   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1744     return Impl.getVectorInstrCost(Opcode, Val, Index);
1745   }
1746   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1747                       unsigned AddressSpace, const Instruction *I) override {
1748     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1749   }
1750   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1751                             unsigned AddressSpace) override {
1752     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1753   }
1754   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1755                              Value *Ptr, bool VariableMask,
1756                              unsigned Alignment) override {
1757     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1758                                        Alignment);
1759   }
1760   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1761                                  ArrayRef<unsigned> Indices, unsigned Alignment,
1762                                  unsigned AddressSpace, bool UseMaskForCond,
1763                                  bool UseMaskForGaps) override {
1764     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1765                                            Alignment, AddressSpace,
1766                                            UseMaskForCond, UseMaskForGaps);
1767   }
1768   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1769                                  bool IsPairwiseForm) override {
1770     return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1771   }
1772   int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1773                              bool IsPairwiseForm, bool IsUnsigned) override {
1774     return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1775    }
1776   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1777                FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1778     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1779                                       ScalarizationCostPassed);
1780   }
1781   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1782        ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1783     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1784   }
1785   int getCallInstrCost(Function *F, Type *RetTy,
1786                        ArrayRef<Type *> Tys) override {
1787     return Impl.getCallInstrCost(F, RetTy, Tys);
1788   }
1789   unsigned getNumberOfParts(Type *Tp) override {
1790     return Impl.getNumberOfParts(Tp);
1791   }
1792   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1793                                 const SCEV *Ptr) override {
1794     return Impl.getAddressComputationCost(Ty, SE, Ptr);
1795   }
1796   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1797     return Impl.getCostOfKeepingLiveOverCall(Tys);
1798   }
1799   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1800                           MemIntrinsicInfo &Info) override {
1801     return Impl.getTgtMemIntrinsic(Inst, Info);
1802   }
1803   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1804     return Impl.getAtomicMemIntrinsicMaxElementSize();
1805   }
1806   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1807                                            Type *ExpectedType) override {
1808     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1809   }
1810   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1811                                   unsigned SrcAlign,
1812                                   unsigned DestAlign) const override {
1813     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1814   }
1815   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1816                                          LLVMContext &Context,
1817                                          unsigned RemainingBytes,
1818                                          unsigned SrcAlign,
1819                                          unsigned DestAlign) const override {
1820     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1821                                            SrcAlign, DestAlign);
1822   }
1823   bool areInlineCompatible(const Function *Caller,
1824                            const Function *Callee) const override {
1825     return Impl.areInlineCompatible(Caller, Callee);
1826   }
1827   bool areFunctionArgsABICompatible(
1828       const Function *Caller, const Function *Callee,
1829       SmallPtrSetImpl<Argument *> &Args) const override {
1830     return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1831   }
1832   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1833     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1834   }
1835   bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1836     return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1837   }
1838   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1839     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1840   }
1841   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1842     return Impl.isLegalToVectorizeLoad(LI);
1843   }
1844   bool isLegalToVectorizeStore(StoreInst *SI) const override {
1845     return Impl.isLegalToVectorizeStore(SI);
1846   }
1847   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1848                                    unsigned Alignment,
1849                                    unsigned AddrSpace) const override {
1850     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1851                                             AddrSpace);
1852   }
1853   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1854                                     unsigned Alignment,
1855                                     unsigned AddrSpace) const override {
1856     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1857                                              AddrSpace);
1858   }
1859   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1860                                unsigned ChainSizeInBytes,
1861                                VectorType *VecTy) const override {
1862     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1863   }
1864   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1865                                 unsigned ChainSizeInBytes,
1866                                 VectorType *VecTy) const override {
1867     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1868   }
1869   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1870                              ReductionFlags Flags) const override {
1871     return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1872   }
1873   bool shouldExpandReduction(const IntrinsicInst *II) const override {
1874     return Impl.shouldExpandReduction(II);
1875   }
1876
1877   unsigned getGISelRematGlobalCost() const override {
1878     return Impl.getGISelRematGlobalCost();
1879   }
1880
1881   int getInstructionLatency(const Instruction *I) override {
1882     return Impl.getInstructionLatency(I);
1883   }
1884 };
1885
1886 template <typename T>
1887 TargetTransformInfo::TargetTransformInfo(T Impl)
1888     : TTIImpl(new Model<T>(Impl)) {}
1889
1890 /// Analysis pass providing the \c TargetTransformInfo.
1891 ///
1892 /// The core idea of the TargetIRAnalysis is to expose an interface through
1893 /// which LLVM targets can analyze and provide information about the middle
1894 /// end's target-independent IR. This supports use cases such as target-aware
1895 /// cost modeling of IR constructs.
1896 ///
1897 /// This is a function analysis because much of the cost modeling for targets
1898 /// is done in a subtarget specific way and LLVM supports compiling different
1899 /// functions targeting different subtargets in order to support runtime
1900 /// dispatch according to the observed subtarget.
1901 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1902 public:
1903   typedef TargetTransformInfo Result;
1904
1905   /// Default construct a target IR analysis.
1906   ///
1907   /// This will use the module's datalayout to construct a baseline
1908   /// conservative TTI result.
1909   TargetIRAnalysis();
1910
1911   /// Construct an IR analysis pass around a target-provide callback.
1912   ///
1913   /// The callback will be called with a particular function for which the TTI
1914   /// is needed and must return a TTI object for that function.
1915   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1916
1917   // Value semantics. We spell out the constructors for MSVC.
1918   TargetIRAnalysis(const TargetIRAnalysis &Arg)
1919       : TTICallback(Arg.TTICallback) {}
1920   TargetIRAnalysis(TargetIRAnalysis &&Arg)
1921       : TTICallback(std::move(Arg.TTICallback)) {}
1922   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1923     TTICallback = RHS.TTICallback;
1924     return *this;
1925   }
1926   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1927     TTICallback = std::move(RHS.TTICallback);
1928     return *this;
1929   }
1930
1931   Result run(const Function &F, FunctionAnalysisManager &);
1932
1933 private:
1934   friend AnalysisInfoMixin<TargetIRAnalysis>;
1935   static AnalysisKey Key;
1936
1937   /// The callback used to produce a result.
1938   ///
1939   /// We use a completely opaque callback so that targets can provide whatever
1940   /// mechanism they desire for constructing the TTI for a given function.
1941   ///
1942   /// FIXME: Should we really use std::function? It's relatively inefficient.
1943   /// It might be possible to arrange for even stateful callbacks to outlive
1944   /// the analysis and thus use a function_ref which would be lighter weight.
1945   /// This may also be less error prone as the callback is likely to reference
1946   /// the external TargetMachine, and that reference needs to never dangle.
1947   std::function<Result(const Function &)> TTICallback;
1948
1949   /// Helper function used as the callback in the default constructor.
1950   static Result getDefaultTTI(const Function &F);
1951 };
1952
1953 /// Wrapper pass for TargetTransformInfo.
1954 ///
1955 /// This pass can be constructed from a TTI object which it stores internally
1956 /// and is queried by passes.
1957 class TargetTransformInfoWrapperPass : public ImmutablePass {
1958   TargetIRAnalysis TIRA;
1959   Optional<TargetTransformInfo> TTI;
1960
1961   virtual void anchor();
1962
1963 public:
1964   static char ID;
1965
1966   /// We must provide a default constructor for the pass but it should
1967   /// never be used.
1968   ///
1969   /// Use the constructor below or call one of the creation routines.
1970   TargetTransformInfoWrapperPass();
1971
1972   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1973
1974   TargetTransformInfo &getTTI(const Function &F);
1975 };
1976
1977 /// Create an analysis pass wrapper around a TTI object.
1978 ///
1979 /// This analysis pass just holds the TTI instance and makes it available to
1980 /// clients.
1981 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1982
1983 } // End llvm namespace
1984
1985 #endif