contrib/llvm/lib/Analysis/InlineCost.cpp

   1 //===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements inline cost analysis.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/Analysis/InlineCost.h"
  15 #include "llvm/ADT/STLExtras.h"
  16 #include "llvm/ADT/SetVector.h"
  17 #include "llvm/ADT/SmallPtrSet.h"
  18 #include "llvm/ADT/SmallVector.h"
  19 #include "llvm/ADT/Statistic.h"
  20 #include "llvm/Analysis/AssumptionCache.h"
  21 #include "llvm/Analysis/BlockFrequencyInfo.h"
  22 #include "llvm/Analysis/CodeMetrics.h"
  23 #include "llvm/Analysis/ConstantFolding.h"
  24 #include "llvm/Analysis/InstructionSimplify.h"
  25 #include "llvm/Analysis/ProfileSummaryInfo.h"
  26 #include "llvm/Analysis/TargetTransformInfo.h"
  27 #include "llvm/IR/CallSite.h"
  28 #include "llvm/IR/CallingConv.h"
  29 #include "llvm/IR/DataLayout.h"
  30 #include "llvm/IR/GetElementPtrTypeIterator.h"
  31 #include "llvm/IR/GlobalAlias.h"
  32 #include "llvm/IR/InstVisitor.h"
  33 #include "llvm/IR/IntrinsicInst.h"
  34 #include "llvm/IR/Operator.h"
  35 #include "llvm/Support/Debug.h"
  36 #include "llvm/Support/raw_ostream.h"
  37
  38 using namespace llvm;
  39
  40 #define DEBUG_TYPE "inline-cost"
  41
  42 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
  43
  44 static cl::opt<int> InlineThreshold(
  45     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
  46     cl::desc("Control the amount of inlining to perform (default = 225)"));
  47
  48 static cl::opt<int> HintThreshold(
  49     "inlinehint-threshold", cl::Hidden, cl::init(325),
  50     cl::desc("Threshold for inlining functions with inline hint"));
  51
  52 static cl::opt<int>
  53     ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
  54                           cl::init(45),
  55                           cl::desc("Threshold for inlining cold callsites"));
  56
  57 // We introduce this threshold to help performance of instrumentation based
  58 // PGO before we actually hook up inliner with analysis passes such as BPI and
  59 // BFI.
  60 static cl::opt<int> ColdThreshold(
  61     "inlinecold-threshold", cl::Hidden, cl::init(45),
  62     cl::desc("Threshold for inlining functions with cold attribute"));
  63
  64 static cl::opt<int>
  65     HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
  66                          cl::ZeroOrMore,
  67                          cl::desc("Threshold for hot callsites "));
  68
  69 static cl::opt<int> ColdCallSiteRelFreq(
  70     "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
  71     cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
  72              "entry frequency, for a callsite to be cold in the absence of "
  73              "profile information."));
  74
  75 namespace {
  76
  77 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  78   typedef InstVisitor<CallAnalyzer, bool> Base;
  79   friend class InstVisitor<CallAnalyzer, bool>;
  80
  81   /// The TargetTransformInfo available for this compilation.
  82   const TargetTransformInfo &TTI;
  83
  84   /// Getter for the cache of @llvm.assume intrinsics.
  85   std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
  86
  87   /// Getter for BlockFrequencyInfo
  88   Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
  89
  90   /// Profile summary information.
  91   ProfileSummaryInfo *PSI;
  92
  93   /// The called function.
  94   Function &F;
  95
  96   // Cache the DataLayout since we use it a lot.
  97   const DataLayout &DL;
  98
  99   /// The candidate callsite being analyzed. Please do not use this to do
 100   /// analysis in the caller function; we want the inline cost query to be
 101   /// easily cacheable. Instead, use the cover function paramHasAttr.
 102   CallSite CandidateCS;
 103
 104   /// Tunable parameters that control the analysis.
 105   const InlineParams &Params;
 106
 107   int Threshold;
 108   int Cost;
 109
 110   bool IsCallerRecursive;
 111   bool IsRecursiveCall;
 112   bool ExposesReturnsTwice;
 113   bool HasDynamicAlloca;
 114   bool ContainsNoDuplicateCall;
 115   bool HasReturn;
 116   bool HasIndirectBr;
 117   bool HasFrameEscape;
 118
 119   /// Number of bytes allocated statically by the callee.
 120   uint64_t AllocatedSize;
 121   unsigned NumInstructions, NumVectorInstructions;
 122   int FiftyPercentVectorBonus, TenPercentVectorBonus;
 123   int VectorBonus;
 124
 125   /// While we walk the potentially-inlined instructions, we build up and
 126   /// maintain a mapping of simplified values specific to this callsite. The
 127   /// idea is to propagate any special information we have about arguments to
 128   /// this call through the inlinable section of the function, and account for
 129   /// likely simplifications post-inlining. The most important aspect we track
 130   /// is CFG altering simplifications -- when we prove a basic block dead, that
 131   /// can cause dramatic shifts in the cost of inlining a function.
 132   DenseMap<Value *, Constant *> SimplifiedValues;
 133
 134   /// Keep track of the values which map back (through function arguments) to
 135   /// allocas on the caller stack which could be simplified through SROA.
 136   DenseMap<Value *, Value *> SROAArgValues;
 137
 138   /// The mapping of caller Alloca values to their accumulated cost savings. If
 139   /// we have to disable SROA for one of the allocas, this tells us how much
 140   /// cost must be added.
 141   DenseMap<Value *, int> SROAArgCosts;
 142
 143   /// Keep track of values which map to a pointer base and constant offset.
 144   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
 145
 146   // Custom simplification helper routines.
 147   bool isAllocaDerivedArg(Value *V);
 148   bool lookupSROAArgAndCost(Value *V, Value *&Arg,
 149                             DenseMap<Value *, int>::iterator &CostIt);
 150   void disableSROA(DenseMap<Value *, int>::iterator CostIt);
 151   void disableSROA(Value *V);
 152   void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
 153                           int InstructionCost);
 154   bool isGEPFree(GetElementPtrInst &GEP);
 155   bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
 156   bool simplifyCallSite(Function *F, CallSite CS);
 157   template <typename Callable>
 158   bool simplifyInstruction(Instruction &I, Callable Evaluate);
 159   ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
 160
 161   /// Return true if the given argument to the function being considered for
 162   /// inlining has the given attribute set either at the call site or the
 163   /// function declaration.  Primarily used to inspect call site specific
 164   /// attributes since these can be more precise than the ones on the callee
 165   /// itself.
 166   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
 167
 168   /// Return true if the given value is known non null within the callee if
 169   /// inlined through this particular callsite.
 170   bool isKnownNonNullInCallee(Value *V);
 171
 172   /// Update Threshold based on callsite properties such as callee
 173   /// attributes and callee hotness for PGO builds. The Callee is explicitly
 174   /// passed to support analyzing indirect calls whose target is inferred by
 175   /// analysis.
 176   void updateThreshold(CallSite CS, Function &Callee);
 177
 178   /// Return true if size growth is allowed when inlining the callee at CS.
 179   bool allowSizeGrowth(CallSite CS);
 180
 181   /// Return true if \p CS is a cold callsite.
 182   bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
 183
 184   // Custom analysis routines.
 185   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
 186
 187   // Disable several entry points to the visitor so we don't accidentally use
 188   // them by declaring but not defining them here.
 189   void visit(Module *);
 190   void visit(Module &);
 191   void visit(Function *);
 192   void visit(Function &);
 193   void visit(BasicBlock *);
 194   void visit(BasicBlock &);
 195
 196   // Provide base case for our instruction visit.
 197   bool visitInstruction(Instruction &I);
 198
 199   // Our visit overrides.
 200   bool visitAlloca(AllocaInst &I);
 201   bool visitPHI(PHINode &I);
 202   bool visitGetElementPtr(GetElementPtrInst &I);
 203   bool visitBitCast(BitCastInst &I);
 204   bool visitPtrToInt(PtrToIntInst &I);
 205   bool visitIntToPtr(IntToPtrInst &I);
 206   bool visitCastInst(CastInst &I);
 207   bool visitUnaryInstruction(UnaryInstruction &I);
 208   bool visitCmpInst(CmpInst &I);
 209   bool visitSub(BinaryOperator &I);
 210   bool visitBinaryOperator(BinaryOperator &I);
 211   bool visitLoad(LoadInst &I);
 212   bool visitStore(StoreInst &I);
 213   bool visitExtractValue(ExtractValueInst &I);
 214   bool visitInsertValue(InsertValueInst &I);
 215   bool visitCallSite(CallSite CS);
 216   bool visitReturnInst(ReturnInst &RI);
 217   bool visitBranchInst(BranchInst &BI);
 218   bool visitSwitchInst(SwitchInst &SI);
 219   bool visitIndirectBrInst(IndirectBrInst &IBI);
 220   bool visitResumeInst(ResumeInst &RI);
 221   bool visitCleanupReturnInst(CleanupReturnInst &RI);
 222   bool visitCatchReturnInst(CatchReturnInst &RI);
 223   bool visitUnreachableInst(UnreachableInst &I);
 224
 225 public:
 226   CallAnalyzer(const TargetTransformInfo &TTI,
 227                std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
 228                Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
 229                ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
 230                const InlineParams &Params)
 231       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
 232         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
 233         CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
 234         Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
 235         ExposesReturnsTwice(false), HasDynamicAlloca(false),
 236         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
 237         HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
 238         NumVectorInstructions(0), FiftyPercentVectorBonus(0),
 239         TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
 240         NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
 241         NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
 242         SROACostSavings(0), SROACostSavingsLost(0) {}
 243
 244   bool analyzeCall(CallSite CS);
 245
 246   int getThreshold() { return Threshold; }
 247   int getCost() { return Cost; }
 248
 249   // Keep a bunch of stats about the cost savings found so we can print them
 250   // out when debugging.
 251   unsigned NumConstantArgs;
 252   unsigned NumConstantOffsetPtrArgs;
 253   unsigned NumAllocaArgs;
 254   unsigned NumConstantPtrCmps;
 255   unsigned NumConstantPtrDiffs;
 256   unsigned NumInstructionsSimplified;
 257   unsigned SROACostSavings;
 258   unsigned SROACostSavingsLost;
 259
 260   void dump();
 261 };
 262
 263 } // namespace
 264
 265 /// \brief Test whether the given value is an Alloca-derived function argument.
 266 bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
 267   return SROAArgValues.count(V);
 268 }
 269
 270 /// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
 271 /// Returns false if V does not map to a SROA-candidate.
 272 bool CallAnalyzer::lookupSROAArgAndCost(
 273     Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
 274   if (SROAArgValues.empty() || SROAArgCosts.empty())
 275     return false;
 276
 277   DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
 278   if (ArgIt == SROAArgValues.end())
 279     return false;
 280
 281   Arg = ArgIt->second;
 282   CostIt = SROAArgCosts.find(Arg);
 283   return CostIt != SROAArgCosts.end();
 284 }
 285
 286 /// \brief Disable SROA for the candidate marked by this cost iterator.
 287 ///
 288 /// This marks the candidate as no longer viable for SROA, and adds the cost
 289 /// savings associated with it back into the inline cost measurement.
 290 void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
 291   // If we're no longer able to perform SROA we need to undo its cost savings
 292   // and prevent subsequent analysis.
 293   Cost += CostIt->second;
 294   SROACostSavings -= CostIt->second;
 295   SROACostSavingsLost += CostIt->second;
 296   SROAArgCosts.erase(CostIt);
 297 }
 298
 299 /// \brief If 'V' maps to a SROA candidate, disable SROA for it.
 300 void CallAnalyzer::disableSROA(Value *V) {
 301   Value *SROAArg;
 302   DenseMap<Value *, int>::iterator CostIt;
 303   if (lookupSROAArgAndCost(V, SROAArg, CostIt))
 304     disableSROA(CostIt);
 305 }
 306
 307 /// \brief Accumulate the given cost for a particular SROA candidate.
 308 void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
 309                                       int InstructionCost) {
 310   CostIt->second += InstructionCost;
 311   SROACostSavings += InstructionCost;
 312 }
 313
 314 /// \brief Accumulate a constant GEP offset into an APInt if possible.
 315 ///
 316 /// Returns false if unable to compute the offset for any reason. Respects any
 317 /// simplified values known during the analysis of this callsite.
 318 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
 319   unsigned IntPtrWidth = DL.getPointerSizeInBits();
 320   assert(IntPtrWidth == Offset.getBitWidth());
 321
 322   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
 323        GTI != GTE; ++GTI) {
 324     ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
 325     if (!OpC)
 326       if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
 327         OpC = dyn_cast<ConstantInt>(SimpleOp);
 328     if (!OpC)
 329       return false;
 330     if (OpC->isZero())
 331       continue;
 332
 333     // Handle a struct index, which adds its field offset to the pointer.
 334     if (StructType *STy = GTI.getStructTypeOrNull()) {
 335       unsigned ElementIdx = OpC->getZExtValue();
 336       const StructLayout *SL = DL.getStructLayout(STy);
 337       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
 338       continue;
 339     }
 340
 341     APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
 342     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
 343   }
 344   return true;
 345 }
 346
 347 /// \brief Use TTI to check whether a GEP is free.
 348 ///
 349 /// Respects any simplified values known during the analysis of this callsite.
 350 bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
 351   SmallVector<Value *, 4> Indices;
 352   for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
 353     if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
 354        Indices.push_back(SimpleOp);
 355      else
 356        Indices.push_back(*I);
 357   return TargetTransformInfo::TCC_Free ==
 358          TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
 359                         Indices);
 360 }
 361
 362 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
 363   // Check whether inlining will turn a dynamic alloca into a static
 364   // alloca and handle that case.
 365   if (I.isArrayAllocation()) {
 366     Constant *Size = SimplifiedValues.lookup(I.getArraySize());
 367     if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
 368       Type *Ty = I.getAllocatedType();
 369       AllocatedSize = SaturatingMultiplyAdd(
 370           AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
 371       return Base::visitAlloca(I);
 372     }
 373   }
 374
 375   // Accumulate the allocated size.
 376   if (I.isStaticAlloca()) {
 377     Type *Ty = I.getAllocatedType();
 378     AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
 379   }
 380
 381   // We will happily inline static alloca instructions.
 382   if (I.isStaticAlloca())
 383     return Base::visitAlloca(I);
 384
 385   // FIXME: This is overly conservative. Dynamic allocas are inefficient for
 386   // a variety of reasons, and so we would like to not inline them into
 387   // functions which don't currently have a dynamic alloca. This simply
 388   // disables inlining altogether in the presence of a dynamic alloca.
 389   HasDynamicAlloca = true;
 390   return false;
 391 }
 392
 393 bool CallAnalyzer::visitPHI(PHINode &I) {
 394   // FIXME: We should potentially be tracking values through phi nodes,
 395   // especially when they collapse to a single value due to deleted CFG edges
 396   // during inlining.
 397
 398   // FIXME: We need to propagate SROA *disabling* through phi nodes, even
 399   // though we don't want to propagate it's bonuses. The idea is to disable
 400   // SROA if it *might* be used in an inappropriate manner.
 401
 402   // Phi nodes are always zero-cost.
 403   return true;
 404 }
 405
 406 bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
 407   Value *SROAArg;
 408   DenseMap<Value *, int>::iterator CostIt;
 409   bool SROACandidate =
 410       lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt);
 411
 412   // Try to fold GEPs of constant-offset call site argument pointers. This
 413   // requires target data and inbounds GEPs.
 414   if (I.isInBounds()) {
 415     // Check if we have a base + offset for the pointer.
 416     Value *Ptr = I.getPointerOperand();
 417     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
 418     if (BaseAndOffset.first) {
 419       // Check if the offset of this GEP is constant, and if so accumulate it
 420       // into Offset.
 421       if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
 422         // Non-constant GEPs aren't folded, and disable SROA.
 423         if (SROACandidate)
 424           disableSROA(CostIt);
 425         return isGEPFree(I);
 426       }
 427
 428       // Add the result as a new mapping to Base + Offset.
 429       ConstantOffsetPtrs[&I] = BaseAndOffset;
 430
 431       // Also handle SROA candidates here, we already know that the GEP is
 432       // all-constant indexed.
 433       if (SROACandidate)
 434         SROAArgValues[&I] = SROAArg;
 435
 436       return true;
 437     }
 438   }
 439
 440   // Lambda to check whether a GEP's indices are all constant.
 441   auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
 442     for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
 443       if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
 444         return false;
 445     return true;
 446   };
 447
 448   if (IsGEPOffsetConstant(I)) {
 449     if (SROACandidate)
 450       SROAArgValues[&I] = SROAArg;
 451
 452     // Constant GEPs are modeled as free.
 453     return true;
 454   }
 455
 456   // Variable GEPs will require math and will disable SROA.
 457   if (SROACandidate)
 458     disableSROA(CostIt);
 459   return isGEPFree(I);
 460 }
 461
 462 /// Simplify \p I if its operands are constants and update SimplifiedValues.
 463 /// \p Evaluate is a callable specific to instruction type that evaluates the
 464 /// instruction when all the operands are constants.
 465 template <typename Callable>
 466 bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
 467   SmallVector<Constant *, 2> COps;
 468   for (Value *Op : I.operands()) {
 469     Constant *COp = dyn_cast<Constant>(Op);
 470     if (!COp)
 471       COp = SimplifiedValues.lookup(Op);
 472     if (!COp)
 473       return false;
 474     COps.push_back(COp);
 475   }
 476   auto *C = Evaluate(COps);
 477   if (!C)
 478     return false;
 479   SimplifiedValues[&I] = C;
 480   return true;
 481 }
 482
 483 bool CallAnalyzer::visitBitCast(BitCastInst &I) {
 484   // Propagate constants through bitcasts.
 485   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 486         return ConstantExpr::getBitCast(COps[0], I.getType());
 487       }))
 488     return true;
 489
 490   // Track base/offsets through casts
 491   std::pair<Value *, APInt> BaseAndOffset =
 492       ConstantOffsetPtrs.lookup(I.getOperand(0));
 493   // Casts don't change the offset, just wrap it up.
 494   if (BaseAndOffset.first)
 495     ConstantOffsetPtrs[&I] = BaseAndOffset;
 496
 497   // Also look for SROA candidates here.
 498   Value *SROAArg;
 499   DenseMap<Value *, int>::iterator CostIt;
 500   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
 501     SROAArgValues[&I] = SROAArg;
 502
 503   // Bitcasts are always zero cost.
 504   return true;
 505 }
 506
 507 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
 508   // Propagate constants through ptrtoint.
 509   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 510         return ConstantExpr::getPtrToInt(COps[0], I.getType());
 511       }))
 512     return true;
 513
 514   // Track base/offset pairs when converted to a plain integer provided the
 515   // integer is large enough to represent the pointer.
 516   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
 517   if (IntegerSize >= DL.getPointerSizeInBits()) {
 518     std::pair<Value *, APInt> BaseAndOffset =
 519         ConstantOffsetPtrs.lookup(I.getOperand(0));
 520     if (BaseAndOffset.first)
 521       ConstantOffsetPtrs[&I] = BaseAndOffset;
 522   }
 523
 524   // This is really weird. Technically, ptrtoint will disable SROA. However,
 525   // unless that ptrtoint is *used* somewhere in the live basic blocks after
 526   // inlining, it will be nuked, and SROA should proceed. All of the uses which
 527   // would block SROA would also block SROA if applied directly to a pointer,
 528   // and so we can just add the integer in here. The only places where SROA is
 529   // preserved either cannot fire on an integer, or won't in-and-of themselves
 530   // disable SROA (ext) w/o some later use that we would see and disable.
 531   Value *SROAArg;
 532   DenseMap<Value *, int>::iterator CostIt;
 533   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
 534     SROAArgValues[&I] = SROAArg;
 535
 536   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 537 }
 538
 539 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
 540   // Propagate constants through ptrtoint.
 541   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 542         return ConstantExpr::getIntToPtr(COps[0], I.getType());
 543       }))
 544     return true;
 545
 546   // Track base/offset pairs when round-tripped through a pointer without
 547   // modifications provided the integer is not too large.
 548   Value *Op = I.getOperand(0);
 549   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
 550   if (IntegerSize <= DL.getPointerSizeInBits()) {
 551     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
 552     if (BaseAndOffset.first)
 553       ConstantOffsetPtrs[&I] = BaseAndOffset;
 554   }
 555
 556   // "Propagate" SROA here in the same manner as we do for ptrtoint above.
 557   Value *SROAArg;
 558   DenseMap<Value *, int>::iterator CostIt;
 559   if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
 560     SROAArgValues[&I] = SROAArg;
 561
 562   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 563 }
 564
 565 bool CallAnalyzer::visitCastInst(CastInst &I) {
 566   // Propagate constants through ptrtoint.
 567   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 568         return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
 569       }))
 570     return true;
 571
 572   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
 573   disableSROA(I.getOperand(0));
 574
 575   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 576 }
 577
 578 bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
 579   Value *Operand = I.getOperand(0);
 580   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 581         return ConstantFoldInstOperands(&I, COps[0], DL);
 582       }))
 583     return true;
 584
 585   // Disable any SROA on the argument to arbitrary unary operators.
 586   disableSROA(Operand);
 587
 588   return false;
 589 }
 590
 591 bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
 592   return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
 593 }
 594
 595 bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
 596   // Does the *call site* have the NonNull attribute set on an argument?  We
 597   // use the attribute on the call site to memoize any analysis done in the
 598   // caller. This will also trip if the callee function has a non-null
 599   // parameter attribute, but that's a less interesting case because hopefully
 600   // the callee would already have been simplified based on that.
 601   if (Argument *A = dyn_cast<Argument>(V))
 602     if (paramHasAttr(A, Attribute::NonNull))
 603       return true;
 604
 605   // Is this an alloca in the caller?  This is distinct from the attribute case
 606   // above because attributes aren't updated within the inliner itself and we
 607   // always want to catch the alloca derived case.
 608   if (isAllocaDerivedArg(V))
 609     // We can actually predict the result of comparisons between an
 610     // alloca-derived value and null. Note that this fires regardless of
 611     // SROA firing.
 612     return true;
 613
 614   return false;
 615 }
 616
 617 bool CallAnalyzer::allowSizeGrowth(CallSite CS) {
 618   // If the normal destination of the invoke or the parent block of the call
 619   // site is unreachable-terminated, there is little point in inlining this
 620   // unless there is literally zero cost.
 621   // FIXME: Note that it is possible that an unreachable-terminated block has a
 622   // hot entry. For example, in below scenario inlining hot_call_X() may be
 623   // beneficial :
 624   // main() {
 625   //   hot_call_1();
 626   //   ...
 627   //   hot_call_N()
 628   //   exit(0);
 629   // }
 630   // For now, we are not handling this corner case here as it is rare in real
 631   // code. In future, we should elaborate this based on BPI and BFI in more
 632   // general threshold adjusting heuristics in updateThreshold().
 633   Instruction *Instr = CS.getInstruction();
 634   if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
 635     if (isa<UnreachableInst>(II->getNormalDest()->getTerminator()))
 636       return false;
 637   } else if (isa<UnreachableInst>(Instr->getParent()->getTerminator()))
 638     return false;
 639
 640   return true;
 641 }
 642
 643 bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
 644   // If global profile summary is available, then callsite's coldness is
 645   // determined based on that.
 646   if (PSI->hasProfileSummary())
 647     return PSI->isColdCallSite(CS, CallerBFI);
 648   if (!CallerBFI)
 649     return false;
 650
 651   // In the absence of global profile summary, determine if the callsite is cold
 652   // relative to caller's entry. We could potentially cache the computation of
 653   // scaled entry frequency, but the added complexity is not worth it unless
 654   // this scaling shows up high in the profiles.
 655   const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
 656   auto CallSiteBB = CS.getInstruction()->getParent();
 657   auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
 658   auto CallerEntryFreq =
 659       CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
 660   return CallSiteFreq < CallerEntryFreq * ColdProb;
 661 }
 662
 663 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
 664   // If no size growth is allowed for this inlining, set Threshold to 0.
 665   if (!allowSizeGrowth(CS)) {
 666     Threshold = 0;
 667     return;
 668   }
 669
 670   Function *Caller = CS.getCaller();
 671
 672   // return min(A, B) if B is valid.
 673   auto MinIfValid = [](int A, Optional<int> B) {
 674     return B ? std::min(A, B.getValue()) : A;
 675   };
 676
 677   // return max(A, B) if B is valid.
 678   auto MaxIfValid = [](int A, Optional<int> B) {
 679     return B ? std::max(A, B.getValue()) : A;
 680   };
 681
 682   // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
 683   // and reduce the threshold if the caller has the necessary attribute.
 684   if (Caller->optForMinSize())
 685     Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
 686   else if (Caller->optForSize())
 687     Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
 688
 689   // Adjust the threshold based on inlinehint attribute and profile based
 690   // hotness information if the caller does not have MinSize attribute.
 691   if (!Caller->optForMinSize()) {
 692     if (Callee.hasFnAttribute(Attribute::InlineHint))
 693       Threshold = MaxIfValid(Threshold, Params.HintThreshold);
 694     if (PSI) {
 695       BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
 696       // FIXME: After switching to the new passmanager, simplify the logic below
 697       // by checking only the callsite hotness/coldness. The check for CallerBFI
 698       // exists only because we do not have BFI available with the old PM.
 699       //
 700       // Use callee's hotness information only if we have no way of determining
 701       // callsite's hotness information. Callsite hotness can be determined if
 702       // sample profile is used (which adds hotness metadata to calls) or if
 703       // caller's BlockFrequencyInfo is available.
 704       if (CallerBFI || PSI->hasSampleProfile()) {
 705         if (PSI->isHotCallSite(CS, CallerBFI)) {
 706           DEBUG(dbgs() << "Hot callsite.\n");
 707           Threshold = Params.HotCallSiteThreshold.getValue();
 708         } else if (isColdCallSite(CS, CallerBFI)) {
 709           DEBUG(dbgs() << "Cold callsite.\n");
 710           Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
 711         }
 712       } else {
 713         if (PSI->isFunctionEntryHot(&Callee)) {
 714           DEBUG(dbgs() << "Hot callee.\n");
 715           // If callsite hotness can not be determined, we may still know
 716           // that the callee is hot and treat it as a weaker hint for threshold
 717           // increase.
 718           Threshold = MaxIfValid(Threshold, Params.HintThreshold);
 719         } else if (PSI->isFunctionEntryCold(&Callee)) {
 720           DEBUG(dbgs() << "Cold callee.\n");
 721           Threshold = MinIfValid(Threshold, Params.ColdThreshold);
 722         }
 723       }
 724     }
 725   }
 726
 727   // Finally, take the target-specific inlining threshold multiplier into
 728   // account.
 729   Threshold *= TTI.getInliningThresholdMultiplier();
 730 }
 731
 732 bool CallAnalyzer::visitCmpInst(CmpInst &I) {
 733   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 734   // First try to handle simplified comparisons.
 735   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 736         return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
 737       }))
 738     return true;
 739
 740   if (I.getOpcode() == Instruction::FCmp)
 741     return false;
 742
 743   // Otherwise look for a comparison between constant offset pointers with
 744   // a common base.
 745   Value *LHSBase, *RHSBase;
 746   APInt LHSOffset, RHSOffset;
 747   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
 748   if (LHSBase) {
 749     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
 750     if (RHSBase && LHSBase == RHSBase) {
 751       // We have common bases, fold the icmp to a constant based on the
 752       // offsets.
 753       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
 754       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
 755       if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
 756         SimplifiedValues[&I] = C;
 757         ++NumConstantPtrCmps;
 758         return true;
 759       }
 760     }
 761   }
 762
 763   // If the comparison is an equality comparison with null, we can simplify it
 764   // if we know the value (argument) can't be null
 765   if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
 766       isKnownNonNullInCallee(I.getOperand(0))) {
 767     bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
 768     SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
 769                                       : ConstantInt::getFalse(I.getType());
 770     return true;
 771   }
 772   // Finally check for SROA candidates in comparisons.
 773   Value *SROAArg;
 774   DenseMap<Value *, int>::iterator CostIt;
 775   if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
 776     if (isa<ConstantPointerNull>(I.getOperand(1))) {
 777       accumulateSROACost(CostIt, InlineConstants::InstrCost);
 778       return true;
 779     }
 780
 781     disableSROA(CostIt);
 782   }
 783
 784   return false;
 785 }
 786
 787 bool CallAnalyzer::visitSub(BinaryOperator &I) {
 788   // Try to handle a special case: we can fold computing the difference of two
 789   // constant-related pointers.
 790   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 791   Value *LHSBase, *RHSBase;
 792   APInt LHSOffset, RHSOffset;
 793   std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
 794   if (LHSBase) {
 795     std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
 796     if (RHSBase && LHSBase == RHSBase) {
 797       // We have common bases, fold the subtract to a constant based on the
 798       // offsets.
 799       Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
 800       Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
 801       if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
 802         SimplifiedValues[&I] = C;
 803         ++NumConstantPtrDiffs;
 804         return true;
 805       }
 806     }
 807   }
 808
 809   // Otherwise, fall back to the generic logic for simplifying and handling
 810   // instructions.
 811   return Base::visitSub(I);
 812 }
 813
 814 bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
 815   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
 816   auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
 817     Value *SimpleV = nullptr;
 818     if (auto FI = dyn_cast<FPMathOperator>(&I))
 819       SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
 820                                 FI->getFastMathFlags(), DL);
 821     else
 822       SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
 823     return dyn_cast_or_null<Constant>(SimpleV);
 824   };
 825
 826   if (simplifyInstruction(I, Evaluate))
 827     return true;
 828
 829   // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
 830   disableSROA(LHS);
 831   disableSROA(RHS);
 832
 833   return false;
 834 }
 835
 836 bool CallAnalyzer::visitLoad(LoadInst &I) {
 837   Value *SROAArg;
 838   DenseMap<Value *, int>::iterator CostIt;
 839   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
 840     if (I.isSimple()) {
 841       accumulateSROACost(CostIt, InlineConstants::InstrCost);
 842       return true;
 843     }
 844
 845     disableSROA(CostIt);
 846   }
 847
 848   return false;
 849 }
 850
 851 bool CallAnalyzer::visitStore(StoreInst &I) {
 852   Value *SROAArg;
 853   DenseMap<Value *, int>::iterator CostIt;
 854   if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
 855     if (I.isSimple()) {
 856       accumulateSROACost(CostIt, InlineConstants::InstrCost);
 857       return true;
 858     }
 859
 860     disableSROA(CostIt);
 861   }
 862
 863   return false;
 864 }
 865
 866 bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
 867   // Constant folding for extract value is trivial.
 868   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 869         return ConstantExpr::getExtractValue(COps[0], I.getIndices());
 870       }))
 871     return true;
 872
 873   // SROA can look through these but give them a cost.
 874   return false;
 875 }
 876
 877 bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
 878   // Constant folding for insert value is trivial.
 879   if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
 880         return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
 881                                             /*InsertedValueOperand*/ COps[1],
 882                                             I.getIndices());
 883       }))
 884     return true;
 885
 886   // SROA can look through these but give them a cost.
 887   return false;
 888 }
 889
 890 /// \brief Try to simplify a call site.
 891 ///
 892 /// Takes a concrete function and callsite and tries to actually simplify it by
 893 /// analyzing the arguments and call itself with instsimplify. Returns true if
 894 /// it has simplified the callsite to some other entity (a constant), making it
 895 /// free.
 896 bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
 897   // FIXME: Using the instsimplify logic directly for this is inefficient
 898   // because we have to continually rebuild the argument list even when no
 899   // simplifications can be performed. Until that is fixed with remapping
 900   // inside of instsimplify, directly constant fold calls here.
 901   if (!canConstantFoldCallTo(CS, F))
 902     return false;
 903
 904   // Try to re-map the arguments to constants.
 905   SmallVector<Constant *, 4> ConstantArgs;
 906   ConstantArgs.reserve(CS.arg_size());
 907   for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E;
 908        ++I) {
 909     Constant *C = dyn_cast<Constant>(*I);
 910     if (!C)
 911       C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I));
 912     if (!C)
 913       return false; // This argument doesn't map to a constant.
 914
 915     ConstantArgs.push_back(C);
 916   }
 917   if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) {
 918     SimplifiedValues[CS.getInstruction()] = C;
 919     return true;
 920   }
 921
 922   return false;
 923 }
 924
 925 bool CallAnalyzer::visitCallSite(CallSite CS) {
 926   if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
 927       !F.hasFnAttribute(Attribute::ReturnsTwice)) {
 928     // This aborts the entire analysis.
 929     ExposesReturnsTwice = true;
 930     return false;
 931   }
 932   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->cannotDuplicate())
 933     ContainsNoDuplicateCall = true;
 934
 935   if (Function *F = CS.getCalledFunction()) {
 936     // When we have a concrete function, first try to simplify it directly.
 937     if (simplifyCallSite(F, CS))
 938       return true;
 939
 940     // Next check if it is an intrinsic we know about.
 941     // FIXME: Lift this into part of the InstVisitor.
 942     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
 943       switch (II->getIntrinsicID()) {
 944       default:
 945         return Base::visitCallSite(CS);
 946
 947       case Intrinsic::load_relative:
 948         // This is normally lowered to 4 LLVM instructions.
 949         Cost += 3 * InlineConstants::InstrCost;
 950         return false;
 951
 952       case Intrinsic::memset:
 953       case Intrinsic::memcpy:
 954       case Intrinsic::memmove:
 955         // SROA can usually chew through these intrinsics, but they aren't free.
 956         return false;
 957       case Intrinsic::localescape:
 958         HasFrameEscape = true;
 959         return false;
 960       }
 961     }
 962
 963     if (F == CS.getInstruction()->getParent()->getParent()) {
 964       // This flag will fully abort the analysis, so don't bother with anything
 965       // else.
 966       IsRecursiveCall = true;
 967       return false;
 968     }
 969
 970     if (TTI.isLoweredToCall(F)) {
 971       // We account for the average 1 instruction per call argument setup
 972       // here.
 973       Cost += CS.arg_size() * InlineConstants::InstrCost;
 974
 975       // Everything other than inline ASM will also have a significant cost
 976       // merely from making the call.
 977       if (!isa<InlineAsm>(CS.getCalledValue()))
 978         Cost += InlineConstants::CallPenalty;
 979     }
 980
 981     return Base::visitCallSite(CS);
 982   }
 983
 984   // Otherwise we're in a very special case -- an indirect function call. See
 985   // if we can be particularly clever about this.
 986   Value *Callee = CS.getCalledValue();
 987
 988   // First, pay the price of the argument setup. We account for the average
 989   // 1 instruction per call argument setup here.
 990   Cost += CS.arg_size() * InlineConstants::InstrCost;
 991
 992   // Next, check if this happens to be an indirect function call to a known
 993   // function in this inline context. If not, we've done all we can.
 994   Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
 995   if (!F)
 996     return Base::visitCallSite(CS);
 997
 998   // If we have a constant that we are calling as a function, we can peer
 999   // through it and see the function target. This happens not infrequently
1000   // during devirtualization and so we want to give it a hefty bonus for
1001   // inlining, but cap that bonus in the event that inlining wouldn't pan
1002   // out. Pretend to inline the function, with a custom threshold.
1003   auto IndirectCallParams = Params;
1004   IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
1005   CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
1006                   IndirectCallParams);
1007   if (CA.analyzeCall(CS)) {
1008     // We were able to inline the indirect call! Subtract the cost from the
1009     // threshold to get the bonus we want to apply, but don't go below zero.
1010     Cost -= std::max(0, CA.getThreshold() - CA.getCost());
1011   }
1012
1013   return Base::visitCallSite(CS);
1014 }
1015
1016 bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
1017   // At least one return instruction will be free after inlining.
1018   bool Free = !HasReturn;
1019   HasReturn = true;
1020   return Free;
1021 }
1022
1023 bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
1024   // We model unconditional branches as essentially free -- they really
1025   // shouldn't exist at all, but handling them makes the behavior of the
1026   // inliner more regular and predictable. Interestingly, conditional branches
1027   // which will fold away are also free.
1028   return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
1029          dyn_cast_or_null<ConstantInt>(
1030              SimplifiedValues.lookup(BI.getCondition()));
1031 }
1032
1033 bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
1034   // We model unconditional switches as free, see the comments on handling
1035   // branches.
1036   if (isa<ConstantInt>(SI.getCondition()))
1037     return true;
1038   if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
1039     if (isa<ConstantInt>(V))
1040       return true;
1041
1042   // Assume the most general case where the switch is lowered into
1043   // either a jump table, bit test, or a balanced binary tree consisting of
1044   // case clusters without merging adjacent clusters with the same
1045   // destination. We do not consider the switches that are lowered with a mix
1046   // of jump table/bit test/binary search tree. The cost of the switch is
1047   // proportional to the size of the tree or the size of jump table range.
1048   //
1049   // NB: We convert large switches which are just used to initialize large phi
1050   // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
1051   // inlining those. It will prevent inlining in cases where the optimization
1052   // does not (yet) fire.
1053
1054   // Maximum valid cost increased in this function.
1055   int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1;
1056
1057   // Exit early for a large switch, assuming one case needs at least one
1058   // instruction.
1059   // FIXME: This is not true for a bit test, but ignore such case for now to
1060   // save compile-time.
1061   int64_t CostLowerBound =
1062       std::min((int64_t)CostUpperBound,
1063                (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
1064
1065   if (CostLowerBound > Threshold) {
1066     Cost = CostLowerBound;
1067     return false;
1068   }
1069
1070   unsigned JumpTableSize = 0;
1071   unsigned NumCaseCluster =
1072       TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
1073
1074   // If suitable for a jump table, consider the cost for the table size and
1075   // branch to destination.
1076   if (JumpTableSize) {
1077     int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
1078                      4 * InlineConstants::InstrCost;
1079
1080     Cost = std::min((int64_t)CostUpperBound, JTCost + Cost);
1081     return false;
1082   }
1083
1084   // Considering forming a binary search, we should find the number of nodes
1085   // which is same as the number of comparisons when lowered. For a given
1086   // number of clusters, n, we can define a recursive function, f(n), to find
1087   // the number of nodes in the tree. The recursion is :
1088   // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
1089   // and f(n) = n, when n <= 3.
1090   // This will lead a binary tree where the leaf should be either f(2) or f(3)
1091   // when n > 3.  So, the number of comparisons from leaves should be n, while
1092   // the number of non-leaf should be :
1093   //   2^(log2(n) - 1) - 1
1094   //   = 2^log2(n) * 2^-1 - 1
1095   //   = n / 2 - 1.
1096   // Considering comparisons from leaf and non-leaf nodes, we can estimate the
1097   // number of comparisons in a simple closed form :
1098   //   n + n / 2 - 1 = n * 3 / 2 - 1
1099   if (NumCaseCluster <= 3) {
1100     // Suppose a comparison includes one compare and one conditional branch.
1101     Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
1102     return false;
1103   }
1104
1105   int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1;
1106   int64_t SwitchCost =
1107       ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
1108
1109   Cost = std::min((int64_t)CostUpperBound, SwitchCost + Cost);
1110   return false;
1111 }
1112
1113 bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
1114   // We never want to inline functions that contain an indirectbr.  This is
1115   // incorrect because all the blockaddress's (in static global initializers
1116   // for example) would be referring to the original function, and this
1117   // indirect jump would jump from the inlined copy of the function into the
1118   // original function which is extremely undefined behavior.
1119   // FIXME: This logic isn't really right; we can safely inline functions with
1120   // indirectbr's as long as no other function or global references the
1121   // blockaddress of a block within the current function.
1122   HasIndirectBr = true;
1123   return false;
1124 }
1125
1126 bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
1127   // FIXME: It's not clear that a single instruction is an accurate model for
1128   // the inline cost of a resume instruction.
1129   return false;
1130 }
1131
1132 bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
1133   // FIXME: It's not clear that a single instruction is an accurate model for
1134   // the inline cost of a cleanupret instruction.
1135   return false;
1136 }
1137
1138 bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
1139   // FIXME: It's not clear that a single instruction is an accurate model for
1140   // the inline cost of a catchret instruction.
1141   return false;
1142 }
1143
1144 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
1145   // FIXME: It might be reasonably to discount the cost of instructions leading
1146   // to unreachable as they have the lowest possible impact on both runtime and
1147   // code size.
1148   return true; // No actual code is needed for unreachable.
1149 }
1150
1151 bool CallAnalyzer::visitInstruction(Instruction &I) {
1152   // Some instructions are free. All of the free intrinsics can also be
1153   // handled by SROA, etc.
1154   if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I))
1155     return true;
1156
1157   // We found something we don't understand or can't handle. Mark any SROA-able
1158   // values in the operand list as no longer viable.
1159   for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
1160     disableSROA(*OI);
1161
1162   return false;
1163 }
1164
1165 /// \brief Analyze a basic block for its contribution to the inline cost.
1166 ///
1167 /// This method walks the analyzer over every instruction in the given basic
1168 /// block and accounts for their cost during inlining at this callsite. It
1169 /// aborts early if the threshold has been exceeded or an impossible to inline
1170 /// construct has been detected. It returns false if inlining is no longer
1171 /// viable, and true if inlining remains viable.
1172 bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
1173                                 SmallPtrSetImpl<const Value *> &EphValues) {
1174   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
1175     // FIXME: Currently, the number of instructions in a function regardless of
1176     // our ability to simplify them during inline to constants or dead code,
1177     // are actually used by the vector bonus heuristic. As long as that's true,
1178     // we have to special case debug intrinsics here to prevent differences in
1179     // inlining due to debug symbols. Eventually, the number of unsimplified
1180     // instructions shouldn't factor into the cost computation, but until then,
1181     // hack around it here.
1182     if (isa<DbgInfoIntrinsic>(I))
1183       continue;
1184
1185     // Skip ephemeral values.
1186     if (EphValues.count(&*I))
1187       continue;
1188
1189     ++NumInstructions;
1190     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
1191       ++NumVectorInstructions;
1192
1193     // If the instruction is floating point, and the target says this operation
1194     // is expensive or the function has the "use-soft-float" attribute, this may
1195     // eventually become a library call. Treat the cost as such.
1196     if (I->getType()->isFloatingPointTy()) {
1197       // If the function has the "use-soft-float" attribute, mark it as
1198       // expensive.
1199       if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
1200           (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
1201         Cost += InlineConstants::CallPenalty;
1202     }
1203
1204     // If the instruction simplified to a constant, there is no cost to this
1205     // instruction. Visit the instructions using our InstVisitor to account for
1206     // all of the per-instruction logic. The visit tree returns true if we
1207     // consumed the instruction in any way, and false if the instruction's base
1208     // cost should count against inlining.
1209     if (Base::visit(&*I))
1210       ++NumInstructionsSimplified;
1211     else
1212       Cost += InlineConstants::InstrCost;
1213
1214     // If the visit this instruction detected an uninlinable pattern, abort.
1215     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
1216         HasIndirectBr || HasFrameEscape)
1217       return false;
1218
1219     // If the caller is a recursive function then we don't want to inline
1220     // functions which allocate a lot of stack space because it would increase
1221     // the caller stack usage dramatically.
1222     if (IsCallerRecursive &&
1223         AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
1224       return false;
1225
1226     // Check if we've past the maximum possible threshold so we don't spin in
1227     // huge basic blocks that will never inline.
1228     if (Cost > Threshold)
1229       return false;
1230   }
1231
1232   return true;
1233 }
1234
1235 /// \brief Compute the base pointer and cumulative constant offsets for V.
1236 ///
1237 /// This strips all constant offsets off of V, leaving it the base pointer, and
1238 /// accumulates the total constant offset applied in the returned constant. It
1239 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
1240 /// no constant offsets applied.
1241 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
1242   if (!V->getType()->isPointerTy())
1243     return nullptr;
1244
1245   unsigned IntPtrWidth = DL.getPointerSizeInBits();
1246   APInt Offset = APInt::getNullValue(IntPtrWidth);
1247
1248   // Even though we don't look through PHI nodes, we could be called on an
1249   // instruction in an unreachable block, which may be on a cycle.
1250   SmallPtrSet<Value *, 4> Visited;
1251   Visited.insert(V);
1252   do {
1253     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
1254       if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
1255         return nullptr;
1256       V = GEP->getPointerOperand();
1257     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
1258       V = cast<Operator>(V)->getOperand(0);
1259     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1260       if (GA->isInterposable())
1261         break;
1262       V = GA->getAliasee();
1263     } else {
1264       break;
1265     }
1266     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
1267   } while (Visited.insert(V).second);
1268
1269   Type *IntPtrTy = DL.getIntPtrType(V->getContext());
1270   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
1271 }
1272
1273 /// \brief Analyze a call site for potential inlining.
1274 ///
1275 /// Returns true if inlining this call is viable, and false if it is not
1276 /// viable. It computes the cost and adjusts the threshold based on numerous
1277 /// factors and heuristics. If this method returns false but the computed cost
1278 /// is below the computed threshold, then inlining was forcibly disabled by
1279 /// some artifact of the routine.
1280 bool CallAnalyzer::analyzeCall(CallSite CS) {
1281   ++NumCallsAnalyzed;
1282
1283   // Perform some tweaks to the cost and threshold based on the direct
1284   // callsite information.
1285
1286   // We want to more aggressively inline vector-dense kernels, so up the
1287   // threshold, and we'll lower it if the % of vector instructions gets too
1288   // low. Note that these bonuses are some what arbitrary and evolved over time
1289   // by accident as much as because they are principled bonuses.
1290   //
1291   // FIXME: It would be nice to remove all such bonuses. At least it would be
1292   // nice to base the bonus values on something more scientific.
1293   assert(NumInstructions == 0);
1294   assert(NumVectorInstructions == 0);
1295
1296   // Update the threshold based on callsite properties
1297   updateThreshold(CS, F);
1298
1299   FiftyPercentVectorBonus = 3 * Threshold / 2;
1300   TenPercentVectorBonus = 3 * Threshold / 4;
1301
1302   // Track whether the post-inlining function would have more than one basic
1303   // block. A single basic block is often intended for inlining. Balloon the
1304   // threshold by 50% until we pass the single-BB phase.
1305   bool SingleBB = true;
1306   int SingleBBBonus = Threshold / 2;
1307
1308   // Speculatively apply all possible bonuses to Threshold. If cost exceeds
1309   // this Threshold any time, and cost cannot decrease, we can stop processing
1310   // the rest of the function body.
1311   Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
1312
1313   // Give out bonuses for the callsite, as the instructions setting them up
1314   // will be gone after inlining.
1315   Cost -= getCallsiteCost(CS, DL);
1316
1317   // If there is only one call of the function, and it has internal linkage,
1318   // the cost of inlining it drops dramatically.
1319   bool OnlyOneCallAndLocalLinkage =
1320       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
1321   if (OnlyOneCallAndLocalLinkage)
1322     Cost -= InlineConstants::LastCallToStaticBonus;
1323
1324   // If this function uses the coldcc calling convention, prefer not to inline
1325   // it.
1326   if (F.getCallingConv() == CallingConv::Cold)
1327     Cost += InlineConstants::ColdccPenalty;
1328
1329   // Check if we're done. This can happen due to bonuses and penalties.
1330   if (Cost > Threshold)
1331     return false;
1332
1333   if (F.empty())
1334     return true;
1335
1336   Function *Caller = CS.getInstruction()->getParent()->getParent();
1337   // Check if the caller function is recursive itself.
1338   for (User *U : Caller->users()) {
1339     CallSite Site(U);
1340     if (!Site)
1341       continue;
1342     Instruction *I = Site.getInstruction();
1343     if (I->getParent()->getParent() == Caller) {
1344       IsCallerRecursive = true;
1345       break;
1346     }
1347   }
1348
1349   // Populate our simplified values by mapping from function arguments to call
1350   // arguments with known important simplifications.
1351   CallSite::arg_iterator CAI = CS.arg_begin();
1352   for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
1353        FAI != FAE; ++FAI, ++CAI) {
1354     assert(CAI != CS.arg_end());
1355     if (Constant *C = dyn_cast<Constant>(CAI))
1356       SimplifiedValues[&*FAI] = C;
1357
1358     Value *PtrArg = *CAI;
1359     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
1360       ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
1361
1362       // We can SROA any pointer arguments derived from alloca instructions.
1363       if (isa<AllocaInst>(PtrArg)) {
1364         SROAArgValues[&*FAI] = PtrArg;
1365         SROAArgCosts[PtrArg] = 0;
1366       }
1367     }
1368   }
1369   NumConstantArgs = SimplifiedValues.size();
1370   NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
1371   NumAllocaArgs = SROAArgValues.size();
1372
1373   // FIXME: If a caller has multiple calls to a callee, we end up recomputing
1374   // the ephemeral values multiple times (and they're completely determined by
1375   // the callee, so this is purely duplicate work).
1376   SmallPtrSet<const Value *, 32> EphValues;
1377   CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
1378
1379   // The worklist of live basic blocks in the callee *after* inlining. We avoid
1380   // adding basic blocks of the callee which can be proven to be dead for this
1381   // particular call site in order to get more accurate cost estimates. This
1382   // requires a somewhat heavyweight iteration pattern: we need to walk the
1383   // basic blocks in a breadth-first order as we insert live successors. To
1384   // accomplish this, prioritizing for small iterations because we exit after
1385   // crossing our threshold, we use a small-size optimized SetVector.
1386   typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
1387                     SmallPtrSet<BasicBlock *, 16>>
1388       BBSetVector;
1389   BBSetVector BBWorklist;
1390   BBWorklist.insert(&F.getEntryBlock());
1391   // Note that we *must not* cache the size, this loop grows the worklist.
1392   for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
1393     // Bail out the moment we cross the threshold. This means we'll under-count
1394     // the cost, but only when undercounting doesn't matter.
1395     if (Cost > Threshold)
1396       break;
1397
1398     BasicBlock *BB = BBWorklist[Idx];
1399     if (BB->empty())
1400       continue;
1401
1402     // Disallow inlining a blockaddress. A blockaddress only has defined
1403     // behavior for an indirect branch in the same function, and we do not
1404     // currently support inlining indirect branches. But, the inliner may not
1405     // see an indirect branch that ends up being dead code at a particular call
1406     // site. If the blockaddress escapes the function, e.g., via a global
1407     // variable, inlining may lead to an invalid cross-function reference.
1408     if (BB->hasAddressTaken())
1409       return false;
1410
1411     // Analyze the cost of this block. If we blow through the threshold, this
1412     // returns false, and we can bail on out.
1413     if (!analyzeBlock(BB, EphValues))
1414       return false;
1415
1416     TerminatorInst *TI = BB->getTerminator();
1417
1418     // Add in the live successors by first checking whether we have terminator
1419     // that may be simplified based on the values simplified by this call.
1420     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1421       if (BI->isConditional()) {
1422         Value *Cond = BI->getCondition();
1423         if (ConstantInt *SimpleCond =
1424                 dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1425           BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
1426           continue;
1427         }
1428       }
1429     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
1430       Value *Cond = SI->getCondition();
1431       if (ConstantInt *SimpleCond =
1432               dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
1433         BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
1434         continue;
1435       }
1436     }
1437
1438     // If we're unable to select a particular successor, just count all of
1439     // them.
1440     for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
1441          ++TIdx)
1442       BBWorklist.insert(TI->getSuccessor(TIdx));
1443
1444     // If we had any successors at this point, than post-inlining is likely to
1445     // have them as well. Note that we assume any basic blocks which existed
1446     // due to branches or switches which folded above will also fold after
1447     // inlining.
1448     if (SingleBB && TI->getNumSuccessors() > 1) {
1449       // Take off the bonus we applied to the threshold.
1450       Threshold -= SingleBBBonus;
1451       SingleBB = false;
1452     }
1453   }
1454
1455   // If this is a noduplicate call, we can still inline as long as
1456   // inlining this would cause the removal of the caller (so the instruction
1457   // is not actually duplicated, just moved).
1458   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
1459     return false;
1460
1461   // We applied the maximum possible vector bonus at the beginning. Now,
1462   // subtract the excess bonus, if any, from the Threshold before
1463   // comparing against Cost.
1464   if (NumVectorInstructions <= NumInstructions / 10)
1465     Threshold -= FiftyPercentVectorBonus;
1466   else if (NumVectorInstructions <= NumInstructions / 2)
1467     Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
1468
1469   return Cost < std::max(1, Threshold);
1470 }
1471
1472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1473 /// \brief Dump stats about this call's analysis.
1474 LLVM_DUMP_METHOD void CallAnalyzer::dump() {
1475 #define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
1476   DEBUG_PRINT_STAT(NumConstantArgs);
1477   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
1478   DEBUG_PRINT_STAT(NumAllocaArgs);
1479   DEBUG_PRINT_STAT(NumConstantPtrCmps);
1480   DEBUG_PRINT_STAT(NumConstantPtrDiffs);
1481   DEBUG_PRINT_STAT(NumInstructionsSimplified);
1482   DEBUG_PRINT_STAT(NumInstructions);
1483   DEBUG_PRINT_STAT(SROACostSavings);
1484   DEBUG_PRINT_STAT(SROACostSavingsLost);
1485   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
1486   DEBUG_PRINT_STAT(Cost);
1487   DEBUG_PRINT_STAT(Threshold);
1488 #undef DEBUG_PRINT_STAT
1489 }
1490 #endif
1491
1492 /// \brief Test that there are no attribute conflicts between Caller and Callee
1493 ///        that prevent inlining.
1494 static bool functionsHaveCompatibleAttributes(Function *Caller,
1495                                               Function *Callee,
1496                                               TargetTransformInfo &TTI) {
1497   return TTI.areInlineCompatible(Caller, Callee) &&
1498          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
1499 }
1500
1501 int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) {
1502   int Cost = 0;
1503   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
1504     if (CS.isByValArgument(I)) {
1505       // We approximate the number of loads and stores needed by dividing the
1506       // size of the byval type by the target's pointer size.
1507       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
1508       unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
1509       unsigned PointerSize = DL.getPointerSizeInBits();
1510       // Ceiling division.
1511       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
1512
1513       // If it generates more than 8 stores it is likely to be expanded as an
1514       // inline memcpy so we take that as an upper bound. Otherwise we assume
1515       // one load and one store per word copied.
1516       // FIXME: The maxStoresPerMemcpy setting from the target should be used
1517       // here instead of a magic number of 8, but it's not available via
1518       // DataLayout.
1519       NumStores = std::min(NumStores, 8U);
1520
1521       Cost += 2 * NumStores * InlineConstants::InstrCost;
1522     } else {
1523       // For non-byval arguments subtract off one instruction per call
1524       // argument.
1525       Cost += InlineConstants::InstrCost;
1526     }
1527   }
1528   // The call instruction also disappears after inlining.
1529   Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty;
1530   return Cost;
1531 }
1532
1533 InlineCost llvm::getInlineCost(
1534     CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
1535     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1536     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1537     ProfileSummaryInfo *PSI) {
1538   return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
1539                        GetAssumptionCache, GetBFI, PSI);
1540 }
1541
1542 InlineCost llvm::getInlineCost(
1543     CallSite CS, Function *Callee, const InlineParams &Params,
1544     TargetTransformInfo &CalleeTTI,
1545     std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
1546     Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
1547     ProfileSummaryInfo *PSI) {
1548
1549   // Cannot inline indirect calls.
1550   if (!Callee)
1551     return llvm::InlineCost::getNever();
1552
1553   // Calls to functions with always-inline attributes should be inlined
1554   // whenever possible.
1555   if (CS.hasFnAttr(Attribute::AlwaysInline)) {
1556     if (isInlineViable(*Callee))
1557       return llvm::InlineCost::getAlways();
1558     return llvm::InlineCost::getNever();
1559   }
1560
1561   // Never inline functions with conflicting attributes (unless callee has
1562   // always-inline attribute).
1563   if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
1564     return llvm::InlineCost::getNever();
1565
1566   // Don't inline this call if the caller has the optnone attribute.
1567   if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
1568     return llvm::InlineCost::getNever();
1569
1570   // Don't inline functions which can be interposed at link-time.  Don't inline
1571   // functions marked noinline or call sites marked noinline.
1572   // Note: inlining non-exact non-interposable functions is fine, since we know
1573   // we have *a* correct implementation of the source level function.
1574   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
1575       CS.isNoInline())
1576     return llvm::InlineCost::getNever();
1577
1578   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
1579                      << "...\n");
1580
1581   CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
1582                   Params);
1583   bool ShouldInline = CA.analyzeCall(CS);
1584
1585   DEBUG(CA.dump());
1586
1587   // Check if there was a reason to force inlining or no inlining.
1588   if (!ShouldInline && CA.getCost() < CA.getThreshold())
1589     return InlineCost::getNever();
1590   if (ShouldInline && CA.getCost() >= CA.getThreshold())
1591     return InlineCost::getAlways();
1592
1593   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
1594 }
1595
1596 bool llvm::isInlineViable(Function &F) {
1597   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
1598   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
1599     // Disallow inlining of functions which contain indirect branches or
1600     // blockaddresses.
1601     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
1602       return false;
1603
1604     for (auto &II : *BI) {
1605       CallSite CS(&II);
1606       if (!CS)
1607         continue;
1608
1609       // Disallow recursive calls.
1610       if (&F == CS.getCalledFunction())
1611         return false;
1612
1613       // Disallow calls which expose returns-twice to a function not previously
1614       // attributed as such.
1615       if (!ReturnsTwice && CS.isCall() &&
1616           cast<CallInst>(CS.getInstruction())->canReturnTwice())
1617         return false;
1618
1619       // Disallow inlining functions that call @llvm.localescape. Doing this
1620       // correctly would require major changes to the inliner.
1621       if (CS.getCalledFunction() &&
1622           CS.getCalledFunction()->getIntrinsicID() ==
1623               llvm::Intrinsic::localescape)
1624         return false;
1625     }
1626   }
1627
1628   return true;
1629 }
1630
1631 // APIs to create InlineParams based on command line flags and/or other
1632 // parameters.
1633
1634 InlineParams llvm::getInlineParams(int Threshold) {
1635   InlineParams Params;
1636
1637   // This field is the threshold to use for a callee by default. This is
1638   // derived from one or more of:
1639   //  * optimization or size-optimization levels,
1640   //  * a value passed to createFunctionInliningPass function, or
1641   //  * the -inline-threshold flag.
1642   //  If the -inline-threshold flag is explicitly specified, that is used
1643   //  irrespective of anything else.
1644   if (InlineThreshold.getNumOccurrences() > 0)
1645     Params.DefaultThreshold = InlineThreshold;
1646   else
1647     Params.DefaultThreshold = Threshold;
1648
1649   // Set the HintThreshold knob from the -inlinehint-threshold.
1650   Params.HintThreshold = HintThreshold;
1651
1652   // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
1653   Params.HotCallSiteThreshold = HotCallSiteThreshold;
1654
1655   // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
1656   Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
1657
1658   // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
1659   // -inlinehint-threshold commandline option is not explicitly given. If that
1660   // option is present, then its value applies even for callees with size and
1661   // minsize attributes.
1662   // If the -inline-threshold is not specified, set the ColdThreshold from the
1663   // -inlinecold-threshold even if it is not explicitly passed. If
1664   // -inline-threshold is specified, then -inlinecold-threshold needs to be
1665   // explicitly specified to set the ColdThreshold knob
1666   if (InlineThreshold.getNumOccurrences() == 0) {
1667     Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
1668     Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
1669     Params.ColdThreshold = ColdThreshold;
1670   } else if (ColdThreshold.getNumOccurrences() > 0) {
1671     Params.ColdThreshold = ColdThreshold;
1672   }
1673   return Params;
1674 }
1675
1676 InlineParams llvm::getInlineParams() {
1677   return getInlineParams(InlineThreshold);
1678 }
1679
1680 // Compute the default threshold for inlining based on the opt level and the
1681 // size opt level.
1682 static int computeThresholdFromOptLevels(unsigned OptLevel,
1683                                          unsigned SizeOptLevel) {
1684   if (OptLevel > 2)
1685     return InlineConstants::OptAggressiveThreshold;
1686   if (SizeOptLevel == 1) // -Os
1687     return InlineConstants::OptSizeThreshold;
1688   if (SizeOptLevel == 2) // -Oz
1689     return InlineConstants::OptMinSizeThreshold;
1690   return InlineThreshold;
1691 }
1692
1693 InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
1694   return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
1695 }