contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h

   1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 /// \file
  10 /// This pass exposes codegen information to IR-level passes. Every
  11 /// transformation that uses codegen information is broken into three parts:
  12 /// 1. The IR-level analysis pass.
  13 /// 2. The IR-level transformation interface which provides the needed
  14 ///    information.
  15 /// 3. Codegen-level implementation which uses target-specific hooks.
  16 ///
  17 /// This file defines #2, which is the interface that IR-level transformations
  18 /// use for querying the codegen.
  19 ///
  20 //===----------------------------------------------------------------------===//
  21
  22 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  23 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  24
  25 #include "llvm/ADT/Optional.h"
  26 #include "llvm/IR/IntrinsicInst.h"
  27 #include "llvm/IR/Intrinsics.h"
  28 #include "llvm/IR/Operator.h"
  29 #include "llvm/IR/PassManager.h"
  30 #include "llvm/Pass.h"
  31 #include "llvm/Support/DataTypes.h"
  32 #include <functional>
  33
  34 namespace llvm {
  35
  36 class Function;
  37 class GlobalValue;
  38 class Loop;
  39 class ScalarEvolution;
  40 class SCEV;
  41 class Type;
  42 class User;
  43 class Value;
  44
  45 /// \brief Information about a load/store intrinsic defined by the target.
  46 struct MemIntrinsicInfo {
  47   MemIntrinsicInfo()
  48       : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
  49         NumMemRefs(0), PtrVal(nullptr) {}
  50   bool ReadMem;
  51   bool WriteMem;
  52   /// True only if this memory operation is non-volatile, non-atomic, and
  53   /// unordered.  (See LoadInst/StoreInst for details on each)
  54   bool IsSimple;
  55   // Same Id is set by the target for corresponding load/store intrinsics.
  56   unsigned short MatchingId;
  57   int NumMemRefs;
  58
  59   /// This is the pointer that the intrinsic is loading from or storing to.
  60   /// If this is non-null, then analysis/optimization passes can assume that
  61   /// this intrinsic is functionally equivalent to a load/store from this
  62   /// pointer.
  63   Value *PtrVal;
  64 };
  65
  66 /// \brief This pass provides access to the codegen interfaces that are needed
  67 /// for IR-level transformations.
  68 class TargetTransformInfo {
  69 public:
  70   /// \brief Construct a TTI object using a type implementing the \c Concept
  71   /// API below.
  72   ///
  73   /// This is used by targets to construct a TTI wrapping their target-specific
  74   /// implementaion that encodes appropriate costs for their target.
  75   template <typename T> TargetTransformInfo(T Impl);
  76
  77   /// \brief Construct a baseline TTI object using a minimal implementation of
  78   /// the \c Concept API below.
  79   ///
  80   /// The TTI implementation will reflect the information in the DataLayout
  81   /// provided if non-null.
  82   explicit TargetTransformInfo(const DataLayout &DL);
  83
  84   // Provide move semantics.
  85   TargetTransformInfo(TargetTransformInfo &&Arg);
  86   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
  87
  88   // We need to define the destructor out-of-line to define our sub-classes
  89   // out-of-line.
  90   ~TargetTransformInfo();
  91
  92   /// \brief Handle the invalidation of this information.
  93   ///
  94   /// When used as a result of \c TargetIRAnalysis this method will be called
  95   /// when the function this was computed for changes. When it returns false,
  96   /// the information is preserved across those changes.
  97   bool invalidate(Function &, const PreservedAnalyses &,
  98                   FunctionAnalysisManager::Invalidator &) {
  99     // FIXME: We should probably in some way ensure that the subtarget
 100     // information for a function hasn't changed.
 101     return false;
 102   }
 103
 104   /// \name Generic Target Information
 105   /// @{
 106
 107   /// \brief Underlying constants for 'cost' values in this interface.
 108   ///
 109   /// Many APIs in this interface return a cost. This enum defines the
 110   /// fundamental values that should be used to interpret (and produce) those
 111   /// costs. The costs are returned as an int rather than a member of this
 112   /// enumeration because it is expected that the cost of one IR instruction
 113   /// may have a multiplicative factor to it or otherwise won't fit directly
 114   /// into the enum. Moreover, it is common to sum or average costs which works
 115   /// better as simple integral values. Thus this enum only provides constants.
 116   /// Also note that the returned costs are signed integers to make it natural
 117   /// to add, subtract, and test with zero (a common boundary condition). It is
 118   /// not expected that 2^32 is a realistic cost to be modeling at any point.
 119   ///
 120   /// Note that these costs should usually reflect the intersection of code-size
 121   /// cost and execution cost. A free instruction is typically one that folds
 122   /// into another instruction. For example, reg-to-reg moves can often be
 123   /// skipped by renaming the registers in the CPU, but they still are encoded
 124   /// and thus wouldn't be considered 'free' here.
 125   enum TargetCostConstants {
 126     TCC_Free = 0,     ///< Expected to fold away in lowering.
 127     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
 128     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
 129   };
 130
 131   /// \brief Estimate the cost of a specific operation when lowered.
 132   ///
 133   /// Note that this is designed to work on an arbitrary synthetic opcode, and
 134   /// thus work for hypothetical queries before an instruction has even been
 135   /// formed. However, this does *not* work for GEPs, and must not be called
 136   /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
 137   /// analyzing a GEP's cost required more information.
 138   ///
 139   /// Typically only the result type is required, and the operand type can be
 140   /// omitted. However, if the opcode is one of the cast instructions, the
 141   /// operand type is required.
 142   ///
 143   /// The returned cost is defined in terms of \c TargetCostConstants, see its
 144   /// comments for a detailed explanation of the cost values.
 145   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
 146
 147   /// \brief Estimate the cost of a GEP operation when lowered.
 148   ///
 149   /// The contract for this function is the same as \c getOperationCost except
 150   /// that it supports an interface that provides extra information specific to
 151   /// the GEP operation.
 152   int getGEPCost(Type *PointeeType, const Value *Ptr,
 153                  ArrayRef<const Value *> Operands) const;
 154
 155   /// \brief Estimate the cost of a function call when lowered.
 156   ///
 157   /// The contract for this is the same as \c getOperationCost except that it
 158   /// supports an interface that provides extra information specific to call
 159   /// instructions.
 160   ///
 161   /// This is the most basic query for estimating call cost: it only knows the
 162   /// function type and (potentially) the number of arguments at the call site.
 163   /// The latter is only interesting for varargs function types.
 164   int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
 165
 166   /// \brief Estimate the cost of calling a specific function when lowered.
 167   ///
 168   /// This overload adds the ability to reason about the particular function
 169   /// being called in the event it is a library call with special lowering.
 170   int getCallCost(const Function *F, int NumArgs = -1) const;
 171
 172   /// \brief Estimate the cost of calling a specific function when lowered.
 173   ///
 174   /// This overload allows specifying a set of candidate argument values.
 175   int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
 176
 177   /// \returns A value by which our inlining threshold should be multiplied.
 178   /// This is primarily used to bump up the inlining threshold wholesale on
 179   /// targets where calls are unusually expensive.
 180   ///
 181   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
 182   /// individual classes of instructions would be better.
 183   unsigned getInliningThresholdMultiplier() const;
 184
 185   /// \brief Estimate the cost of an intrinsic when lowered.
 186   ///
 187   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
 188   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 189                        ArrayRef<Type *> ParamTys) const;
 190
 191   /// \brief Estimate the cost of an intrinsic when lowered.
 192   ///
 193   /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
 194   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 195                        ArrayRef<const Value *> Arguments) const;
 196
 197   /// \brief Estimate the cost of a given IR user when lowered.
 198   ///
 199   /// This can estimate the cost of either a ConstantExpr or Instruction when
 200   /// lowered. It has two primary advantages over the \c getOperationCost and
 201   /// \c getGEPCost above, and one significant disadvantage: it can only be
 202   /// used when the IR construct has already been formed.
 203   ///
 204   /// The advantages are that it can inspect the SSA use graph to reason more
 205   /// accurately about the cost. For example, all-constant-GEPs can often be
 206   /// folded into a load or other instruction, but if they are used in some
 207   /// other context they may not be folded. This routine can distinguish such
 208   /// cases.
 209   ///
 210   /// The returned cost is defined in terms of \c TargetCostConstants, see its
 211   /// comments for a detailed explanation of the cost values.
 212   int getUserCost(const User *U) const;
 213
 214   /// \brief Return true if branch divergence exists.
 215   ///
 216   /// Branch divergence has a significantly negative impact on GPU performance
 217   /// when threads in the same wavefront take different paths due to conditional
 218   /// branches.
 219   bool hasBranchDivergence() const;
 220
 221   /// \brief Returns whether V is a source of divergence.
 222   ///
 223   /// This function provides the target-dependent information for
 224   /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
 225   /// builds the dependency graph, and then runs the reachability algorithm
 226   /// starting with the sources of divergence.
 227   bool isSourceOfDivergence(const Value *V) const;
 228
 229   /// \brief Test whether calls to a function lower to actual program function
 230   /// calls.
 231   ///
 232   /// The idea is to test whether the program is likely to require a 'call'
 233   /// instruction or equivalent in order to call the given function.
 234   ///
 235   /// FIXME: It's not clear that this is a good or useful query API. Client's
 236   /// should probably move to simpler cost metrics using the above.
 237   /// Alternatively, we could split the cost interface into distinct code-size
 238   /// and execution-speed costs. This would allow modelling the core of this
 239   /// query more accurately as a call is a single small instruction, but
 240   /// incurs significant execution cost.
 241   bool isLoweredToCall(const Function *F) const;
 242
 243   /// Parameters that control the generic loop unrolling transformation.
 244   struct UnrollingPreferences {
 245     /// The cost threshold for the unrolled loop. Should be relative to the
 246     /// getUserCost values returned by this API, and the expectation is that
 247     /// the unrolled loop's instructions when run through that interface should
 248     /// not exceed this cost. However, this is only an estimate. Also, specific
 249     /// loops may be unrolled even with a cost above this threshold if deemed
 250     /// profitable. Set this to UINT_MAX to disable the loop body cost
 251     /// restriction.
 252     unsigned Threshold;
 253     /// If complete unrolling will reduce the cost of the loop, we will boost
 254     /// the Threshold by a certain percent to allow more aggressive complete
 255     /// unrolling. This value provides the maximum boost percentage that we
 256     /// can apply to Threshold (The value should be no less than 100).
 257     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
 258     ///                                    MaxPercentThresholdBoost / 100)
 259     /// E.g. if complete unrolling reduces the loop execution time by 50%
 260     /// then we boost the threshold by the factor of 2x. If unrolling is not
 261     /// expected to reduce the running time, then we do not increase the
 262     /// threshold.
 263     unsigned MaxPercentThresholdBoost;
 264     /// The cost threshold for the unrolled loop when optimizing for size (set
 265     /// to UINT_MAX to disable).
 266     unsigned OptSizeThreshold;
 267     /// The cost threshold for the unrolled loop, like Threshold, but used
 268     /// for partial/runtime unrolling (set to UINT_MAX to disable).
 269     unsigned PartialThreshold;
 270     /// The cost threshold for the unrolled loop when optimizing for size, like
 271     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
 272     /// UINT_MAX to disable).
 273     unsigned PartialOptSizeThreshold;
 274     /// A forced unrolling factor (the number of concatenated bodies of the
 275     /// original loop in the unrolled loop body). When set to 0, the unrolling
 276     /// transformation will select an unrolling factor based on the current cost
 277     /// threshold and other factors.
 278     unsigned Count;
 279     /// A forced peeling factor (the number of bodied of the original loop
 280     /// that should be peeled off before the loop body). When set to 0, the
 281     /// unrolling transformation will select a peeling factor based on profile
 282     /// information and other factors.
 283     unsigned PeelCount;
 284     /// Default unroll count for loops with run-time trip count.
 285     unsigned DefaultUnrollRuntimeCount;
 286     // Set the maximum unrolling factor. The unrolling factor may be selected
 287     // using the appropriate cost threshold, but may not exceed this number
 288     // (set to UINT_MAX to disable). This does not apply in cases where the
 289     // loop is being fully unrolled.
 290     unsigned MaxCount;
 291     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
 292     /// applies even if full unrolling is selected. This allows a target to fall
 293     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
 294     unsigned FullUnrollMaxCount;
 295     // Represents number of instructions optimized when "back edge"
 296     // becomes "fall through" in unrolled loop.
 297     // For now we count a conditional branch on a backedge and a comparison
 298     // feeding it.
 299     unsigned BEInsns;
 300     /// Allow partial unrolling (unrolling of loops to expand the size of the
 301     /// loop body, not only to eliminate small constant-trip-count loops).
 302     bool Partial;
 303     /// Allow runtime unrolling (unrolling of loops to expand the size of the
 304     /// loop body even when the number of loop iterations is not known at
 305     /// compile time).
 306     bool Runtime;
 307     /// Allow generation of a loop remainder (extra iterations after unroll).
 308     bool AllowRemainder;
 309     /// Allow emitting expensive instructions (such as divisions) when computing
 310     /// the trip count of a loop for runtime unrolling.
 311     bool AllowExpensiveTripCount;
 312     /// Apply loop unroll on any kind of loop
 313     /// (mainly to loops that fail runtime unrolling).
 314     bool Force;
 315     /// Allow using trip count upper bound to unroll loops.
 316     bool UpperBound;
 317     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
 318     bool AllowPeeling;
 319   };
 320
 321   /// \brief Get target-customized preferences for the generic loop unrolling
 322   /// transformation. The caller will initialize UP with the current
 323   /// target-independent defaults.
 324   void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
 325
 326   /// @}
 327
 328   /// \name Scalar Target Information
 329   /// @{
 330
 331   /// \brief Flags indicating the kind of support for population count.
 332   ///
 333   /// Compared to the SW implementation, HW support is supposed to
 334   /// significantly boost the performance when the population is dense, and it
 335   /// may or may not degrade performance if the population is sparse. A HW
 336   /// support is considered as "Fast" if it can outperform, or is on a par
 337   /// with, SW implementation when the population is sparse; otherwise, it is
 338   /// considered as "Slow".
 339   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
 340
 341   /// \brief Return true if the specified immediate is legal add immediate, that
 342   /// is the target has add instructions which can add a register with the
 343   /// immediate without having to materialize the immediate into a register.
 344   bool isLegalAddImmediate(int64_t Imm) const;
 345
 346   /// \brief Return true if the specified immediate is legal icmp immediate,
 347   /// that is the target has icmp instructions which can compare a register
 348   /// against the immediate without having to materialize the immediate into a
 349   /// register.
 350   bool isLegalICmpImmediate(int64_t Imm) const;
 351
 352   /// \brief Return true if the addressing mode represented by AM is legal for
 353   /// this target, for a load/store of the specified type.
 354   /// The type may be VoidTy, in which case only return true if the addressing
 355   /// mode is legal for a load/store of any legal type.
 356   /// TODO: Handle pre/postinc as well.
 357   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
 358                              bool HasBaseReg, int64_t Scale,
 359                              unsigned AddrSpace = 0) const;
 360
 361   /// \brief Return true if the target supports masked load/store
 362   /// AVX2 and AVX-512 targets allow masks for consecutive load and store
 363   bool isLegalMaskedStore(Type *DataType) const;
 364   bool isLegalMaskedLoad(Type *DataType) const;
 365
 366   /// \brief Return true if the target supports masked gather/scatter
 367   /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
 368   /// bits scalar type.
 369   bool isLegalMaskedScatter(Type *DataType) const;
 370   bool isLegalMaskedGather(Type *DataType) const;
 371
 372   /// \brief Return the cost of the scaling factor used in the addressing
 373   /// mode represented by AM for this target, for a load/store
 374   /// of the specified type.
 375   /// If the AM is supported, the return value must be >= 0.
 376   /// If the AM is not supported, it returns a negative value.
 377   /// TODO: Handle pre/postinc as well.
 378   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
 379                            bool HasBaseReg, int64_t Scale,
 380                            unsigned AddrSpace = 0) const;
 381
 382   /// \brief Return true if target supports the load / store
 383   /// instruction with the given Offset on the form reg + Offset. It
 384   /// may be that Offset is too big for a certain type (register
 385   /// class).
 386   bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const;
 387
 388   /// \brief Return true if it's free to truncate a value of type Ty1 to type
 389   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
 390   /// by referencing its sub-register AX.
 391   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
 392
 393   /// \brief Return true if it is profitable to hoist instruction in the
 394   /// then/else to before if.
 395   bool isProfitableToHoist(Instruction *I) const;
 396
 397   /// \brief Return true if this type is legal.
 398   bool isTypeLegal(Type *Ty) const;
 399
 400   /// \brief Returns the target's jmp_buf alignment in bytes.
 401   unsigned getJumpBufAlignment() const;
 402
 403   /// \brief Returns the target's jmp_buf size in bytes.
 404   unsigned getJumpBufSize() const;
 405
 406   /// \brief Return true if switches should be turned into lookup tables for the
 407   /// target.
 408   bool shouldBuildLookupTables() const;
 409
 410   /// \brief Return true if switches should be turned into lookup tables
 411   /// containing this constant value for the target.
 412   bool shouldBuildLookupTablesForConstant(Constant *C) const;
 413
 414   /// \brief Don't restrict interleaved unrolling to small loops.
 415   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 416
 417   /// \brief Enable matching of interleaved access groups.
 418   bool enableInterleavedAccessVectorization() const;
 419
 420   /// \brief Indicate that it is potentially unsafe to automatically vectorize
 421   /// floating-point operations because the semantics of vector and scalar
 422   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
 423   /// does not support IEEE-754 denormal numbers, while depending on the
 424   /// platform, scalar floating-point math does.
 425   /// This applies to floating-point math operations and calls, not memory
 426   /// operations, shuffles, or casts.
 427   bool isFPVectorizationPotentiallyUnsafe() const;
 428
 429   /// \brief Determine if the target supports unaligned memory accesses.
 430   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
 431                                       unsigned BitWidth, unsigned AddressSpace = 0,
 432                                       unsigned Alignment = 1,
 433                                       bool *Fast = nullptr) const;
 434
 435   /// \brief Return hardware support for population count.
 436   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 437
 438   /// \brief Return true if the hardware has a fast square-root instruction.
 439   bool haveFastSqrt(Type *Ty) const;
 440
 441   /// \brief Return the expected cost of supporting the floating point operation
 442   /// of the specified type.
 443   int getFPOpCost(Type *Ty) const;
 444
 445   /// \brief Return the expected cost of materializing for the given integer
 446   /// immediate of the specified type.
 447   int getIntImmCost(const APInt &Imm, Type *Ty) const;
 448
 449   /// \brief Return the expected cost of materialization for the given integer
 450   /// immediate of the specified type for a given instruction. The cost can be
 451   /// zero if the immediate can be folded into the specified instruction.
 452   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 453                     Type *Ty) const;
 454   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
 455                     Type *Ty) const;
 456
 457   /// \brief Return the expected cost for the given integer when optimising
 458   /// for size. This is different than the other integer immediate cost
 459   /// functions in that it is subtarget agnostic. This is useful when you e.g.
 460   /// target one ISA such as Aarch32 but smaller encodings could be possible
 461   /// with another such as Thumb. This return value is used as a penalty when
 462   /// the total costs for a constant is calculated (the bigger the cost, the
 463   /// more beneficial constant hoisting is).
 464   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 465                             Type *Ty) const;
 466   /// @}
 467
 468   /// \name Vector Target Information
 469   /// @{
 470
 471   /// \brief The various kinds of shuffle patterns for vector queries.
 472   enum ShuffleKind {
 473     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
 474     SK_Reverse,         ///< Reverse the order of the vector.
 475     SK_Alternate,       ///< Choose alternate elements from vector.
 476     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
 477     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
 478     SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
 479                         ///< with any shuffle mask.
 480     SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
 481                         ///< shuffle mask.
 482   };
 483
 484   /// \brief Additional information about an operand's possible values.
 485   enum OperandValueKind {
 486     OK_AnyValue,               // Operand can have any value.
 487     OK_UniformValue,           // Operand is uniform (splat of a value).
 488     OK_UniformConstantValue,   // Operand is uniform constant.
 489     OK_NonUniformConstantValue // Operand is a non uniform constant value.
 490   };
 491
 492   /// \brief Additional properties of an operand's values.
 493   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
 494
 495   /// \return The number of scalar or vector registers that the target has.
 496   /// If 'Vectors' is true, it returns the number of vector registers. If it is
 497   /// set to false, it returns the number of scalar registers.
 498   unsigned getNumberOfRegisters(bool Vector) const;
 499
 500   /// \return The width of the largest scalar or vector register type.
 501   unsigned getRegisterBitWidth(bool Vector) const;
 502
 503   /// \return The size of a cache line in bytes.
 504   unsigned getCacheLineSize() const;
 505
 506   /// \return How much before a load we should place the prefetch instruction.
 507   /// This is currently measured in number of instructions.
 508   unsigned getPrefetchDistance() const;
 509
 510   /// \return Some HW prefetchers can handle accesses up to a certain constant
 511   /// stride.  This is the minimum stride in bytes where it makes sense to start
 512   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
 513   unsigned getMinPrefetchStride() const;
 514
 515   /// \return The maximum number of iterations to prefetch ahead.  If the
 516   /// required number of iterations is more than this number, no prefetching is
 517   /// performed.
 518   unsigned getMaxPrefetchIterationsAhead() const;
 519
 520   /// \return The maximum interleave factor that any transform should try to
 521   /// perform for this target. This number depends on the level of parallelism
 522   /// and the number of execution units in the CPU.
 523   unsigned getMaxInterleaveFactor(unsigned VF) const;
 524
 525   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
 526   /// \p Args is an optional argument which holds the instruction operands
 527   /// values so the TTI can analyize those values searching for special
 528   /// cases\optimizations based on those values.
 529   int getArithmeticInstrCost(
 530       unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
 531       OperandValueKind Opd2Info = OK_AnyValue,
 532       OperandValueProperties Opd1PropInfo = OP_None,
 533       OperandValueProperties Opd2PropInfo = OP_None,
 534       ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
 535
 536   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
 537   /// The index and subtype parameters are used by the subvector insertion and
 538   /// extraction shuffle kinds.
 539   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
 540                      Type *SubTp = nullptr) const;
 541
 542   /// \return The expected cost of cast instructions, such as bitcast, trunc,
 543   /// zext, etc.
 544   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
 545
 546   /// \return The expected cost of a sign- or zero-extended vector extract. Use
 547   /// -1 to indicate that there is no information about the index value.
 548   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
 549                                unsigned Index = -1) const;
 550
 551   /// \return The expected cost of control-flow related instructions such as
 552   /// Phi, Ret, Br.
 553   int getCFInstrCost(unsigned Opcode) const;
 554
 555   /// \returns The expected cost of compare and select instructions.
 556   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 557                          Type *CondTy = nullptr) const;
 558
 559   /// \return The expected cost of vector Insert and Extract.
 560   /// Use -1 to indicate that there is no information on the index value.
 561   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
 562
 563   /// \return The cost of Load and Store instructions.
 564   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 565                       unsigned AddressSpace) const;
 566
 567   /// \return The cost of masked Load and Store instructions.
 568   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 569                             unsigned AddressSpace) const;
 570
 571   /// \return The cost of Gather or Scatter operation
 572   /// \p Opcode - is a type of memory access Load or Store
 573   /// \p DataTy - a vector type of the data to be loaded or stored
 574   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
 575   /// \p VariableMask - true when the memory access is predicated with a mask
 576   ///                   that is not a compile-time constant
 577   /// \p Alignment - alignment of single element
 578   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
 579                              bool VariableMask, unsigned Alignment) const;
 580
 581   /// \return The cost of the interleaved memory operation.
 582   /// \p Opcode is the memory operation code
 583   /// \p VecTy is the vector type of the interleaved access.
 584   /// \p Factor is the interleave factor
 585   /// \p Indices is the indices for interleaved load members (as interleaved
 586   ///    load allows gaps)
 587   /// \p Alignment is the alignment of the memory operation
 588   /// \p AddressSpace is address space of the pointer.
 589   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
 590                                  ArrayRef<unsigned> Indices, unsigned Alignment,
 591                                  unsigned AddressSpace) const;
 592
 593   /// \brief Calculate the cost of performing a vector reduction.
 594   ///
 595   /// This is the cost of reducing the vector value of type \p Ty to a scalar
 596   /// value using the operation denoted by \p Opcode. The form of the reduction
 597   /// can either be a pairwise reduction or a reduction that splits the vector
 598   /// at every reduction level.
 599   ///
 600   /// Pairwise:
 601   ///  (v0, v1, v2, v3)
 602   ///  ((v0+v1), (v2, v3), undef, undef)
 603   /// Split:
 604   ///  (v0, v1, v2, v3)
 605   ///  ((v0+v2), (v1+v3), undef, undef)
 606   int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
 607
 608   /// \returns The cost of Intrinsic instructions. Types analysis only.
 609   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 610                             ArrayRef<Type *> Tys, FastMathFlags FMF) const;
 611
 612   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
 613   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 614                             ArrayRef<Value *> Args, FastMathFlags FMF) const;
 615
 616   /// \returns The cost of Call instructions.
 617   int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
 618
 619   /// \returns The number of pieces into which the provided type must be
 620   /// split during legalization. Zero is returned when the answer is unknown.
 621   unsigned getNumberOfParts(Type *Tp) const;
 622
 623   /// \returns The cost of the address computation. For most targets this can be
 624   /// merged into the instruction indexing mode. Some targets might want to
 625   /// distinguish between address computation for memory operations on vector
 626   /// types and scalar types. Such targets should override this function.
 627   /// The 'SE' parameter holds pointer for the scalar evolution object which
 628   /// is used in order to get the Ptr step value in case of constant stride.
 629   /// The 'Ptr' parameter holds SCEV of the access pointer.
 630   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
 631                                 const SCEV *Ptr = nullptr) const;
 632
 633   /// \returns The cost, if any, of keeping values of the given types alive
 634   /// over a callsite.
 635   ///
 636   /// Some types may require the use of register classes that do not have
 637   /// any callee-saved registers, so would require a spill and fill.
 638   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
 639
 640   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
 641   /// will contain additional information - whether the intrinsic may write
 642   /// or read to memory, volatility and the pointer.  Info is undefined
 643   /// if false is returned.
 644   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
 645
 646   /// \returns A value which is the result of the given memory intrinsic.  New
 647   /// instructions may be created to extract the result from the given intrinsic
 648   /// memory operation.  Returns nullptr if the target cannot create a result
 649   /// from the given intrinsic.
 650   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
 651                                            Type *ExpectedType) const;
 652
 653   /// \returns True if the two functions have compatible attributes for inlining
 654   /// purposes.
 655   bool areInlineCompatible(const Function *Caller,
 656                            const Function *Callee) const;
 657
 658   /// \returns The bitwidth of the largest vector type that should be used to
 659   /// load/store in the given address space.
 660   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
 661
 662   /// \returns True if the load instruction is legal to vectorize.
 663   bool isLegalToVectorizeLoad(LoadInst *LI) const;
 664
 665   /// \returns True if the store instruction is legal to vectorize.
 666   bool isLegalToVectorizeStore(StoreInst *SI) const;
 667
 668   /// \returns True if it is legal to vectorize the given load chain.
 669   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 670                                    unsigned Alignment,
 671                                    unsigned AddrSpace) const;
 672
 673   /// \returns True if it is legal to vectorize the given store chain.
 674   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 675                                     unsigned Alignment,
 676                                     unsigned AddrSpace) const;
 677
 678   /// \returns The new vector factor value if the target doesn't support \p
 679   /// SizeInBytes loads or has a better vector factor.
 680   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
 681                                unsigned ChainSizeInBytes,
 682                                VectorType *VecTy) const;
 683
 684   /// \returns The new vector factor value if the target doesn't support \p
 685   /// SizeInBytes stores or has a better vector factor.
 686   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 687                                 unsigned ChainSizeInBytes,
 688                                 VectorType *VecTy) const;
 689
 690   /// @}
 691
 692 private:
 693   /// \brief The abstract base class used to type erase specific TTI
 694   /// implementations.
 695   class Concept;
 696
 697   /// \brief The template model for the base class which wraps a concrete
 698   /// implementation in a type erased interface.
 699   template <typename T> class Model;
 700
 701   std::unique_ptr<Concept> TTIImpl;
 702 };
 703
 704 class TargetTransformInfo::Concept {
 705 public:
 706   virtual ~Concept() = 0;
 707   virtual const DataLayout &getDataLayout() const = 0;
 708   virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
 709   virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
 710                          ArrayRef<const Value *> Operands) = 0;
 711   virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
 712   virtual int getCallCost(const Function *F, int NumArgs) = 0;
 713   virtual int getCallCost(const Function *F,
 714                           ArrayRef<const Value *> Arguments) = 0;
 715   virtual unsigned getInliningThresholdMultiplier() = 0;
 716   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 717                                ArrayRef<Type *> ParamTys) = 0;
 718   virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 719                                ArrayRef<const Value *> Arguments) = 0;
 720   virtual int getUserCost(const User *U) = 0;
 721   virtual bool hasBranchDivergence() = 0;
 722   virtual bool isSourceOfDivergence(const Value *V) = 0;
 723   virtual bool isLoweredToCall(const Function *F) = 0;
 724   virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
 725   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
 726   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
 727   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
 728                                      int64_t BaseOffset, bool HasBaseReg,
 729                                      int64_t Scale,
 730                                      unsigned AddrSpace) = 0;
 731   virtual bool isLegalMaskedStore(Type *DataType) = 0;
 732   virtual bool isLegalMaskedLoad(Type *DataType) = 0;
 733   virtual bool isLegalMaskedScatter(Type *DataType) = 0;
 734   virtual bool isLegalMaskedGather(Type *DataType) = 0;
 735   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
 736                                    int64_t BaseOffset, bool HasBaseReg,
 737                                    int64_t Scale, unsigned AddrSpace) = 0;
 738   virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
 739   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
 740   virtual bool isProfitableToHoist(Instruction *I) = 0;
 741   virtual bool isTypeLegal(Type *Ty) = 0;
 742   virtual unsigned getJumpBufAlignment() = 0;
 743   virtual unsigned getJumpBufSize() = 0;
 744   virtual bool shouldBuildLookupTables() = 0;
 745   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
 746   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
 747   virtual bool enableInterleavedAccessVectorization() = 0;
 748   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
 749   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
 750                                               unsigned BitWidth,
 751                                               unsigned AddressSpace,
 752                                               unsigned Alignment,
 753                                               bool *Fast) = 0;
 754   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
 755   virtual bool haveFastSqrt(Type *Ty) = 0;
 756   virtual int getFPOpCost(Type *Ty) = 0;
 757   virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 758                                     Type *Ty) = 0;
 759   virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
 760   virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 761                             Type *Ty) = 0;
 762   virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
 763                             Type *Ty) = 0;
 764   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
 765   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
 766   virtual unsigned getCacheLineSize() = 0;
 767   virtual unsigned getPrefetchDistance() = 0;
 768   virtual unsigned getMinPrefetchStride() = 0;
 769   virtual unsigned getMaxPrefetchIterationsAhead() = 0;
 770   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
 771   virtual unsigned
 772   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
 773                          OperandValueKind Opd2Info,
 774                          OperandValueProperties Opd1PropInfo,
 775                          OperandValueProperties Opd2PropInfo,
 776                          ArrayRef<const Value *> Args) = 0;
 777   virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
 778                              Type *SubTp) = 0;
 779   virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
 780   virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
 781                                        VectorType *VecTy, unsigned Index) = 0;
 782   virtual int getCFInstrCost(unsigned Opcode) = 0;
 783   virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 784                                  Type *CondTy) = 0;
 785   virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
 786                                  unsigned Index) = 0;
 787   virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 788                               unsigned AddressSpace) = 0;
 789   virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
 790                                     unsigned Alignment,
 791                                     unsigned AddressSpace) = 0;
 792   virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
 793                                      Value *Ptr, bool VariableMask,
 794                                      unsigned Alignment) = 0;
 795   virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
 796                                          unsigned Factor,
 797                                          ArrayRef<unsigned> Indices,
 798                                          unsigned Alignment,
 799                                          unsigned AddressSpace) = 0;
 800   virtual int getReductionCost(unsigned Opcode, Type *Ty,
 801                                bool IsPairwiseForm) = 0;
 802   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 803                                     ArrayRef<Type *> Tys,
 804                                     FastMathFlags FMF) = 0;
 805   virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
 806                                     ArrayRef<Value *> Args,
 807                                     FastMathFlags FMF) = 0;
 808   virtual int getCallInstrCost(Function *F, Type *RetTy,
 809                                ArrayRef<Type *> Tys) = 0;
 810   virtual unsigned getNumberOfParts(Type *Tp) = 0;
 811   virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
 812                                         const SCEV *Ptr) = 0;
 813   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
 814   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
 815                                   MemIntrinsicInfo &Info) = 0;
 816   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
 817                                                    Type *ExpectedType) = 0;
 818   virtual bool areInlineCompatible(const Function *Caller,
 819                                    const Function *Callee) const = 0;
 820   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
 821   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
 822   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
 823   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
 824                                            unsigned Alignment,
 825                                            unsigned AddrSpace) const = 0;
 826   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 827                                             unsigned Alignment,
 828                                             unsigned AddrSpace) const = 0;
 829   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
 830                                        unsigned ChainSizeInBytes,
 831                                        VectorType *VecTy) const = 0;
 832   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
 833                                         unsigned ChainSizeInBytes,
 834                                         VectorType *VecTy) const = 0;
 835 };
 836
 837 template <typename T>
 838 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
 839   T Impl;
 840
 841 public:
 842   Model(T Impl) : Impl(std::move(Impl)) {}
 843   ~Model() override {}
 844
 845   const DataLayout &getDataLayout() const override {
 846     return Impl.getDataLayout();
 847   }
 848
 849   int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
 850     return Impl.getOperationCost(Opcode, Ty, OpTy);
 851   }
 852   int getGEPCost(Type *PointeeType, const Value *Ptr,
 853                  ArrayRef<const Value *> Operands) override {
 854     return Impl.getGEPCost(PointeeType, Ptr, Operands);
 855   }
 856   int getCallCost(FunctionType *FTy, int NumArgs) override {
 857     return Impl.getCallCost(FTy, NumArgs);
 858   }
 859   int getCallCost(const Function *F, int NumArgs) override {
 860     return Impl.getCallCost(F, NumArgs);
 861   }
 862   int getCallCost(const Function *F,
 863                   ArrayRef<const Value *> Arguments) override {
 864     return Impl.getCallCost(F, Arguments);
 865   }
 866   unsigned getInliningThresholdMultiplier() override {
 867     return Impl.getInliningThresholdMultiplier();
 868   }
 869   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 870                        ArrayRef<Type *> ParamTys) override {
 871     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
 872   }
 873   int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
 874                        ArrayRef<const Value *> Arguments) override {
 875     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
 876   }
 877   int getUserCost(const User *U) override { return Impl.getUserCost(U); }
 878   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
 879   bool isSourceOfDivergence(const Value *V) override {
 880     return Impl.isSourceOfDivergence(V);
 881   }
 882   bool isLoweredToCall(const Function *F) override {
 883     return Impl.isLoweredToCall(F);
 884   }
 885   void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) override {
 886     return Impl.getUnrollingPreferences(L, UP);
 887   }
 888   bool isLegalAddImmediate(int64_t Imm) override {
 889     return Impl.isLegalAddImmediate(Imm);
 890   }
 891   bool isLegalICmpImmediate(int64_t Imm) override {
 892     return Impl.isLegalICmpImmediate(Imm);
 893   }
 894   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
 895                              bool HasBaseReg, int64_t Scale,
 896                              unsigned AddrSpace) override {
 897     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
 898                                       Scale, AddrSpace);
 899   }
 900   bool isLegalMaskedStore(Type *DataType) override {
 901     return Impl.isLegalMaskedStore(DataType);
 902   }
 903   bool isLegalMaskedLoad(Type *DataType) override {
 904     return Impl.isLegalMaskedLoad(DataType);
 905   }
 906   bool isLegalMaskedScatter(Type *DataType) override {
 907     return Impl.isLegalMaskedScatter(DataType);
 908   }
 909   bool isLegalMaskedGather(Type *DataType) override {
 910     return Impl.isLegalMaskedGather(DataType);
 911   }
 912   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
 913                            bool HasBaseReg, int64_t Scale,
 914                            unsigned AddrSpace) override {
 915     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
 916                                      Scale, AddrSpace);
 917   }
 918   bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
 919     return Impl.isFoldableMemAccessOffset(I, Offset);
 920   }
 921   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
 922     return Impl.isTruncateFree(Ty1, Ty2);
 923   }
 924   bool isProfitableToHoist(Instruction *I) override {
 925     return Impl.isProfitableToHoist(I);
 926   }
 927   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
 928   unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
 929   unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
 930   bool shouldBuildLookupTables() override {
 931     return Impl.shouldBuildLookupTables();
 932   }
 933   bool shouldBuildLookupTablesForConstant(Constant *C) override {
 934     return Impl.shouldBuildLookupTablesForConstant(C);
 935   }
 936   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
 937     return Impl.enableAggressiveInterleaving(LoopHasReductions);
 938   }
 939   bool enableInterleavedAccessVectorization() override {
 940     return Impl.enableInterleavedAccessVectorization();
 941   }
 942   bool isFPVectorizationPotentiallyUnsafe() override {
 943     return Impl.isFPVectorizationPotentiallyUnsafe();
 944   }
 945   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
 946                                       unsigned BitWidth, unsigned AddressSpace,
 947                                       unsigned Alignment, bool *Fast) override {
 948     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
 949                                                Alignment, Fast);
 950   }
 951   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
 952     return Impl.getPopcntSupport(IntTyWidthInBit);
 953   }
 954   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
 955
 956   int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
 957
 958   int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 959                             Type *Ty) override {
 960     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
 961   }
 962   int getIntImmCost(const APInt &Imm, Type *Ty) override {
 963     return Impl.getIntImmCost(Imm, Ty);
 964   }
 965   int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
 966                     Type *Ty) override {
 967     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
 968   }
 969   int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
 970                     Type *Ty) override {
 971     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
 972   }
 973   unsigned getNumberOfRegisters(bool Vector) override {
 974     return Impl.getNumberOfRegisters(Vector);
 975   }
 976   unsigned getRegisterBitWidth(bool Vector) override {
 977     return Impl.getRegisterBitWidth(Vector);
 978   }
 979
 980   unsigned getCacheLineSize() override {
 981     return Impl.getCacheLineSize();
 982   }
 983   unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
 984   unsigned getMinPrefetchStride() override {
 985     return Impl.getMinPrefetchStride();
 986   }
 987   unsigned getMaxPrefetchIterationsAhead() override {
 988     return Impl.getMaxPrefetchIterationsAhead();
 989   }
 990   unsigned getMaxInterleaveFactor(unsigned VF) override {
 991     return Impl.getMaxInterleaveFactor(VF);
 992   }
 993   unsigned
 994   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
 995                          OperandValueKind Opd2Info,
 996                          OperandValueProperties Opd1PropInfo,
 997                          OperandValueProperties Opd2PropInfo,
 998                          ArrayRef<const Value *> Args) override {
 999     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1000                                        Opd1PropInfo, Opd2PropInfo, Args);
1001   }
1002   int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1003                      Type *SubTp) override {
1004     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1005   }
1006   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
1007     return Impl.getCastInstrCost(Opcode, Dst, Src);
1008   }
1009   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1010                                unsigned Index) override {
1011     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1012   }
1013   int getCFInstrCost(unsigned Opcode) override {
1014     return Impl.getCFInstrCost(Opcode);
1015   }
1016   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
1017     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
1018   }
1019   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1020     return Impl.getVectorInstrCost(Opcode, Val, Index);
1021   }
1022   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1023                       unsigned AddressSpace) override {
1024     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1025   }
1026   int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1027                             unsigned AddressSpace) override {
1028     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1029   }
1030   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1031                              Value *Ptr, bool VariableMask,
1032                              unsigned Alignment) override {
1033     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1034                                        Alignment);
1035   }
1036   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1037                                  ArrayRef<unsigned> Indices, unsigned Alignment,
1038                                  unsigned AddressSpace) override {
1039     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1040                                            Alignment, AddressSpace);
1041   }
1042   int getReductionCost(unsigned Opcode, Type *Ty,
1043                        bool IsPairwiseForm) override {
1044     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
1045   }
1046   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1047                             FastMathFlags FMF) override {
1048     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
1049   }
1050   int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1051                             ArrayRef<Value *> Args,
1052                             FastMathFlags FMF) override {
1053     return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
1054   }
1055   int getCallInstrCost(Function *F, Type *RetTy,
1056                        ArrayRef<Type *> Tys) override {
1057     return Impl.getCallInstrCost(F, RetTy, Tys);
1058   }
1059   unsigned getNumberOfParts(Type *Tp) override {
1060     return Impl.getNumberOfParts(Tp);
1061   }
1062   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1063                                 const SCEV *Ptr) override {
1064     return Impl.getAddressComputationCost(Ty, SE, Ptr);
1065   }
1066   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1067     return Impl.getCostOfKeepingLiveOverCall(Tys);
1068   }
1069   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1070                           MemIntrinsicInfo &Info) override {
1071     return Impl.getTgtMemIntrinsic(Inst, Info);
1072   }
1073   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1074                                            Type *ExpectedType) override {
1075     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1076   }
1077   bool areInlineCompatible(const Function *Caller,
1078                            const Function *Callee) const override {
1079     return Impl.areInlineCompatible(Caller, Callee);
1080   }
1081   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1082     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1083   }
1084   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1085     return Impl.isLegalToVectorizeLoad(LI);
1086   }
1087   bool isLegalToVectorizeStore(StoreInst *SI) const override {
1088     return Impl.isLegalToVectorizeStore(SI);
1089   }
1090   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1091                                    unsigned Alignment,
1092                                    unsigned AddrSpace) const override {
1093     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1094                                             AddrSpace);
1095   }
1096   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1097                                     unsigned Alignment,
1098                                     unsigned AddrSpace) const override {
1099     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1100                                              AddrSpace);
1101   }
1102   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1103                                unsigned ChainSizeInBytes,
1104                                VectorType *VecTy) const override {
1105     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1106   }
1107   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1108                                 unsigned ChainSizeInBytes,
1109                                 VectorType *VecTy) const override {
1110     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1111   }
1112 };
1113
1114 template <typename T>
1115 TargetTransformInfo::TargetTransformInfo(T Impl)
1116     : TTIImpl(new Model<T>(Impl)) {}
1117
1118 /// \brief Analysis pass providing the \c TargetTransformInfo.
1119 ///
1120 /// The core idea of the TargetIRAnalysis is to expose an interface through
1121 /// which LLVM targets can analyze and provide information about the middle
1122 /// end's target-independent IR. This supports use cases such as target-aware
1123 /// cost modeling of IR constructs.
1124 ///
1125 /// This is a function analysis because much of the cost modeling for targets
1126 /// is done in a subtarget specific way and LLVM supports compiling different
1127 /// functions targeting different subtargets in order to support runtime
1128 /// dispatch according to the observed subtarget.
1129 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1130 public:
1131   typedef TargetTransformInfo Result;
1132
1133   /// \brief Default construct a target IR analysis.
1134   ///
1135   /// This will use the module's datalayout to construct a baseline
1136   /// conservative TTI result.
1137   TargetIRAnalysis();
1138
1139   /// \brief Construct an IR analysis pass around a target-provide callback.
1140   ///
1141   /// The callback will be called with a particular function for which the TTI
1142   /// is needed and must return a TTI object for that function.
1143   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1144
1145   // Value semantics. We spell out the constructors for MSVC.
1146   TargetIRAnalysis(const TargetIRAnalysis &Arg)
1147       : TTICallback(Arg.TTICallback) {}
1148   TargetIRAnalysis(TargetIRAnalysis &&Arg)
1149       : TTICallback(std::move(Arg.TTICallback)) {}
1150   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1151     TTICallback = RHS.TTICallback;
1152     return *this;
1153   }
1154   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1155     TTICallback = std::move(RHS.TTICallback);
1156     return *this;
1157   }
1158
1159   Result run(const Function &F, FunctionAnalysisManager &);
1160
1161 private:
1162   friend AnalysisInfoMixin<TargetIRAnalysis>;
1163   static AnalysisKey Key;
1164
1165   /// \brief The callback used to produce a result.
1166   ///
1167   /// We use a completely opaque callback so that targets can provide whatever
1168   /// mechanism they desire for constructing the TTI for a given function.
1169   ///
1170   /// FIXME: Should we really use std::function? It's relatively inefficient.
1171   /// It might be possible to arrange for even stateful callbacks to outlive
1172   /// the analysis and thus use a function_ref which would be lighter weight.
1173   /// This may also be less error prone as the callback is likely to reference
1174   /// the external TargetMachine, and that reference needs to never dangle.
1175   std::function<Result(const Function &)> TTICallback;
1176
1177   /// \brief Helper function used as the callback in the default constructor.
1178   static Result getDefaultTTI(const Function &F);
1179 };
1180
1181 /// \brief Wrapper pass for TargetTransformInfo.
1182 ///
1183 /// This pass can be constructed from a TTI object which it stores internally
1184 /// and is queried by passes.
1185 class TargetTransformInfoWrapperPass : public ImmutablePass {
1186   TargetIRAnalysis TIRA;
1187   Optional<TargetTransformInfo> TTI;
1188
1189   virtual void anchor();
1190
1191 public:
1192   static char ID;
1193
1194   /// \brief We must provide a default constructor for the pass but it should
1195   /// never be used.
1196   ///
1197   /// Use the constructor below or call one of the creation routines.
1198   TargetTransformInfoWrapperPass();
1199
1200   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1201
1202   TargetTransformInfo &getTTI(const Function &F);
1203 };
1204
1205 /// \brief Create an analysis pass wrapper around a TTI object.
1206 ///
1207 /// This analysis pass just holds the TTI instance and makes it available to
1208 /// clients.
1209 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1210
1211 } // End llvm namespace
1212
1213 #endif