1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/IPO/PartialInlining.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/BlockFrequencyInfo.h"
18 #include "llvm/Analysis/BranchProbabilityInfo.h"
19 #include "llvm/Analysis/CodeMetrics.h"
20 #include "llvm/Analysis/InlineCost.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
23 #include "llvm/Analysis/ProfileSummaryInfo.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Transforms/IPO.h"
33 #include "llvm/Transforms/Utils/Cloning.h"
34 #include "llvm/Transforms/Utils/CodeExtractor.h"
37 #define DEBUG_TYPE "partial-inlining"
39 STATISTIC(NumPartialInlined,
40 "Number of callsites functions partially inlined into.");
42 // Command line option to disable partial-inlining. The default is false:
44 DisablePartialInlining("disable-partial-inlining", cl::init(false),
45 cl::Hidden, cl::desc("Disable partial ininling"));
46 // This is an option used by testing:
47 static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
48 cl::init(false), cl::ZeroOrMore,
50 cl::desc("Skip Cost Analysis"));
52 static cl::opt<unsigned> MaxNumInlineBlocks(
53 "max-num-inline-blocks", cl::init(5), cl::Hidden,
54 cl::desc("Max Number of Blocks To be Partially Inlined"));
56 // Command line option to set the maximum number of partial inlining allowed
57 // for the module. The default value of -1 means no limit.
58 static cl::opt<int> MaxNumPartialInlining(
59 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
60 cl::desc("Max number of partial inlining. The default is unlimited"));
62 // Used only when PGO or user annotated branch data is absent. It is
63 // the least value that is used to weigh the outline region. If BFI
64 // produces larger value, the BFI value will be used.
66 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
67 cl::Hidden, cl::ZeroOrMore,
68 cl::desc("Relative frequency of outline region to "
73 struct FunctionOutliningInfo {
74 FunctionOutliningInfo()
75 : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
77 // Returns the number of blocks to be inlined including all blocks
78 // in Entries and one return block.
79 unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
81 // A set of blocks including the function entry that guard
82 // the region to be outlined.
83 SmallVector<BasicBlock *, 4> Entries;
84 // The return block that is not included in the outlined region.
85 BasicBlock *ReturnBlock;
86 // The dominating block of the region ot be outlined.
87 BasicBlock *NonReturnBlock;
88 // The set of blocks in Entries that that are predecessors to ReturnBlock
89 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
92 struct PartialInlinerImpl {
94 std::function<AssumptionCache &(Function &)> *GetAC,
95 std::function<TargetTransformInfo &(Function &)> *GTTI,
96 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
97 ProfileSummaryInfo *ProfSI)
98 : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
100 Function *unswitchFunction(Function *F);
103 int NumPartialInlining = 0;
104 std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
105 std::function<TargetTransformInfo &(Function &)> *GetTTI;
106 Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
107 ProfileSummaryInfo *PSI;
109 // Return the frequency of the OutlininingBB relative to F's entry point.
110 // The result is no larger than 1 and is represented using BP.
111 // (Note that the outlined region's 'head' block can only have incoming
112 // edges from the guarding entry blocks).
113 BranchProbability getOutliningCallBBRelativeFreq(Function *F,
114 FunctionOutliningInfo *OI,
115 Function *DuplicateFunction,
116 BlockFrequencyInfo *BFI,
117 BasicBlock *OutliningCallBB);
119 // Return true if the callee of CS should be partially inlined with
121 bool shouldPartialInline(CallSite CS, Function *F, FunctionOutliningInfo *OI,
122 BlockFrequencyInfo *CalleeBFI,
123 BasicBlock *OutliningCallBB,
124 int OutliningCallOverhead,
125 OptimizationRemarkEmitter &ORE);
127 // Try to inline DuplicateFunction (cloned from F with call to
128 // the OutlinedFunction into its callers. Return true
129 // if there is any successful inlining.
130 bool tryPartialInline(Function *DuplicateFunction,
131 Function *F, /*orignal function */
132 FunctionOutliningInfo *OI, Function *OutlinedFunction,
133 BlockFrequencyInfo *CalleeBFI);
135 // Compute the mapping from use site of DuplicationFunction to the enclosing
136 // BB's profile count.
137 void computeCallsiteToProfCountMap(Function *DuplicateFunction,
138 DenseMap<User *, uint64_t> &SiteCountMap);
140 bool IsLimitReached() {
141 return (MaxNumPartialInlining != -1 &&
142 NumPartialInlining >= MaxNumPartialInlining);
145 CallSite getCallSite(User *U) {
147 if (CallInst *CI = dyn_cast<CallInst>(U))
149 else if (InvokeInst *II = dyn_cast<InvokeInst>(U))
152 llvm_unreachable("All uses must be calls");
156 CallSite getOneCallSiteTo(Function *F) {
157 User *User = *F->user_begin();
158 return getCallSite(User);
161 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function *F) {
162 CallSite CS = getOneCallSiteTo(F);
163 DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
164 BasicBlock *Block = CS.getParent();
165 return std::make_tuple(DLoc, Block);
168 // Returns the costs associated with function outlining:
169 // - The first value is the non-weighted runtime cost for making the call
170 // to the outlined function 'OutlinedFunction', including the addtional
171 // setup cost in the outlined function itself;
172 // - The second value is the estimated size of the new call sequence in
173 // basic block 'OutliningCallBB';
174 // - The third value is the estimated size of the original code from
175 // function 'F' that is extracted into the outlined function.
176 std::tuple<int, int, int>
177 computeOutliningCosts(Function *F, const FunctionOutliningInfo *OutliningInfo,
178 Function *OutlinedFunction,
179 BasicBlock *OutliningCallBB);
180 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
181 // approximate both the size and runtime cost (Note that in the current
182 // inline cost analysis, there is no clear distinction there either).
183 int computeBBInlineCost(BasicBlock *BB);
185 std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
189 struct PartialInlinerLegacyPass : public ModulePass {
190 static char ID; // Pass identification, replacement for typeid
191 PartialInlinerLegacyPass() : ModulePass(ID) {
192 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
195 void getAnalysisUsage(AnalysisUsage &AU) const override {
196 AU.addRequired<AssumptionCacheTracker>();
197 AU.addRequired<ProfileSummaryInfoWrapperPass>();
198 AU.addRequired<TargetTransformInfoWrapperPass>();
200 bool runOnModule(Module &M) override {
204 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
205 TargetTransformInfoWrapperPass *TTIWP =
206 &getAnalysis<TargetTransformInfoWrapperPass>();
207 ProfileSummaryInfo *PSI =
208 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
210 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
211 [&ACT](Function &F) -> AssumptionCache & {
212 return ACT->getAssumptionCache(F);
215 std::function<TargetTransformInfo &(Function &)> GetTTI =
216 [&TTIWP](Function &F) -> TargetTransformInfo & {
217 return TTIWP->getTTI(F);
220 return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
225 std::unique_ptr<FunctionOutliningInfo>
226 PartialInlinerImpl::computeOutliningInfo(Function *F) {
227 BasicBlock *EntryBlock = &F->front();
228 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
229 if (!BR || BR->isUnconditional())
230 return std::unique_ptr<FunctionOutliningInfo>();
232 // Returns true if Succ is BB's successor
233 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
234 return is_contained(successors(BB), Succ);
237 auto SuccSize = [](BasicBlock *BB) {
238 return std::distance(succ_begin(BB), succ_end(BB));
241 auto IsReturnBlock = [](BasicBlock *BB) {
242 TerminatorInst *TI = BB->getTerminator();
243 return isa<ReturnInst>(TI);
246 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
247 if (IsReturnBlock(Succ1))
248 return std::make_tuple(Succ1, Succ2);
249 if (IsReturnBlock(Succ2))
250 return std::make_tuple(Succ2, Succ1);
252 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
255 // Detect a triangular shape:
256 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
257 if (IsSuccessor(Succ1, Succ2))
258 return std::make_tuple(Succ1, Succ2);
259 if (IsSuccessor(Succ2, Succ1))
260 return std::make_tuple(Succ2, Succ1);
262 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
265 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
266 llvm::make_unique<FunctionOutliningInfo>();
268 BasicBlock *CurrEntry = EntryBlock;
269 bool CandidateFound = false;
271 // The number of blocks to be inlined has already reached
272 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
273 // disables partial inlining for the function.
274 if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
277 if (SuccSize(CurrEntry) != 2)
280 BasicBlock *Succ1 = *succ_begin(CurrEntry);
281 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
283 BasicBlock *ReturnBlock, *NonReturnBlock;
284 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
287 OutliningInfo->Entries.push_back(CurrEntry);
288 OutliningInfo->ReturnBlock = ReturnBlock;
289 OutliningInfo->NonReturnBlock = NonReturnBlock;
290 CandidateFound = true;
294 BasicBlock *CommSucc;
295 BasicBlock *OtherSucc;
296 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
301 OutliningInfo->Entries.push_back(CurrEntry);
302 CurrEntry = OtherSucc;
307 return std::unique_ptr<FunctionOutliningInfo>();
309 // Do sanity check of the entries: threre should not
310 // be any successors (not in the entry set) other than
311 // {ReturnBlock, NonReturnBlock}
312 assert(OutliningInfo->Entries[0] == &F->front() &&
313 "Function Entry must be the first in Entries vector");
314 DenseSet<BasicBlock *> Entries;
315 for (BasicBlock *E : OutliningInfo->Entries)
318 // Returns true of BB has Predecessor which is not
320 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
321 for (auto Pred : predecessors(BB)) {
322 if (!Entries.count(Pred))
327 auto CheckAndNormalizeCandidate =
328 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
329 for (BasicBlock *E : OutliningInfo->Entries) {
330 for (auto Succ : successors(E)) {
331 if (Entries.count(Succ))
333 if (Succ == OutliningInfo->ReturnBlock)
334 OutliningInfo->ReturnBlockPreds.push_back(E);
335 else if (Succ != OutliningInfo->NonReturnBlock)
338 // There should not be any outside incoming edges either:
339 if (HasNonEntryPred(E))
345 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
346 return std::unique_ptr<FunctionOutliningInfo>();
348 // Now further growing the candidate's inlining region by
349 // peeling off dominating blocks from the outlining region:
350 while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
351 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
352 if (SuccSize(Cand) != 2)
355 if (HasNonEntryPred(Cand))
358 BasicBlock *Succ1 = *succ_begin(Cand);
359 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
361 BasicBlock *ReturnBlock, *NonReturnBlock;
362 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
363 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
366 if (NonReturnBlock->getSinglePredecessor() != Cand)
369 // Now grow and update OutlininigInfo:
370 OutliningInfo->Entries.push_back(Cand);
371 OutliningInfo->NonReturnBlock = NonReturnBlock;
372 OutliningInfo->ReturnBlockPreds.push_back(Cand);
373 Entries.insert(Cand);
376 return OutliningInfo;
379 // Check if there is PGO data or user annoated branch data:
380 static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) {
381 if (F->getEntryCount())
383 // Now check if any of the entry block has MD_prof data:
384 for (auto *E : OI->Entries) {
385 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
386 if (!BR || BR->isUnconditional())
389 if (BR->extractProfMetadata(T, F))
395 BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
396 Function *F, FunctionOutliningInfo *OI, Function *DuplicateFunction,
397 BlockFrequencyInfo *BFI, BasicBlock *OutliningCallBB) {
400 BFI->getBlockFreq(&DuplicateFunction->getEntryBlock());
401 auto OutliningCallFreq = BFI->getBlockFreq(OutliningCallBB);
403 auto OutlineRegionRelFreq =
404 BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(),
405 EntryFreq.getFrequency());
407 if (hasProfileData(F, OI))
408 return OutlineRegionRelFreq;
410 // When profile data is not available, we need to be very
411 // conservative in estimating the overall savings. We need to make sure
412 // the outline region relative frequency is not below the threshold
413 // specified by the option.
414 OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
416 return OutlineRegionRelFreq;
419 bool PartialInlinerImpl::shouldPartialInline(
420 CallSite CS, Function *F /* Original Callee */, FunctionOutliningInfo *OI,
421 BlockFrequencyInfo *CalleeBFI, BasicBlock *OutliningCallBB,
422 int NonWeightedOutliningRcost, OptimizationRemarkEmitter &ORE) {
424 if (SkipCostAnalysis)
427 Instruction *Call = CS.getInstruction();
428 Function *Callee = CS.getCalledFunction();
429 Function *Caller = CS.getCaller();
430 auto &CalleeTTI = (*GetTTI)(*Callee);
431 InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
432 *GetAssumptionCache, GetBFI, PSI);
435 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
437 << " should always be fully inlined, not partially");
442 ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
443 << NV("Callee", F) << " not partially inlined into "
444 << NV("Caller", Caller)
445 << " because it should never be inlined (cost=never)");
450 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
451 << NV("Callee", F) << " not partially inlined into "
452 << NV("Caller", Caller) << " because too costly to inline (cost="
453 << NV("Cost", IC.getCost()) << ", threshold="
454 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
457 const DataLayout &DL = Caller->getParent()->getDataLayout();
458 // The savings of eliminating the call:
459 int NonWeightedSavings = getCallsiteCost(CS, DL);
460 BlockFrequency NormWeightedSavings(NonWeightedSavings);
463 getOutliningCallBBRelativeFreq(F, OI, Callee, CalleeBFI, OutliningCallBB);
464 auto NormWeightedRcost =
465 BlockFrequency(NonWeightedOutliningRcost) * RelativeFreq;
467 // Weighted saving is smaller than weighted cost, return false
468 if (NormWeightedSavings < NormWeightedRcost) {
470 OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call)
471 << NV("Callee", F) << " not partially inlined into "
472 << NV("Caller", Caller) << " runtime overhead (overhead="
473 << NV("Overhead", (unsigned)NormWeightedRcost.getFrequency())
475 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")"
476 << " of making the outlined call is too high");
481 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
482 << NV("Callee", F) << " can be partially inlined into "
483 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
485 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
489 // TODO: Ideally we should share Inliner's InlineCost Analysis code.
490 // For now use a simplified version. The returned 'InlineCost' will be used
491 // to esimate the size cost as well as runtime cost of the BB.
492 int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
494 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
495 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
496 if (isa<DbgInfoIntrinsic>(I))
499 if (CallInst *CI = dyn_cast<CallInst>(I)) {
500 InlineCost += getCallsiteCost(CallSite(CI), DL);
504 if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
505 InlineCost += getCallsiteCost(CallSite(II), DL);
509 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
510 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
513 InlineCost += InlineConstants::InstrCost;
518 std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
519 Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
520 BasicBlock *OutliningCallBB) {
521 // First compute the cost of the outlined region 'OI' in the original
523 int OutlinedRegionCost = 0;
524 for (BasicBlock &BB : *F) {
525 if (&BB != OI->ReturnBlock &&
526 // Assuming Entry set is small -- do a linear search here:
527 std::find(OI->Entries.begin(), OI->Entries.end(), &BB) ==
529 OutlinedRegionCost += computeBBInlineCost(&BB);
533 // Now compute the cost of the call sequence to the outlined function
534 // 'OutlinedFunction' in BB 'OutliningCallBB':
535 int OutliningFuncCallCost = computeBBInlineCost(OutliningCallBB);
537 // Now compute the cost of the extracted/outlined function itself:
538 int OutlinedFunctionCost = 0;
539 for (BasicBlock &BB : *OutlinedFunction) {
540 OutlinedFunctionCost += computeBBInlineCost(&BB);
543 assert(OutlinedFunctionCost >= OutlinedRegionCost &&
544 "Outlined function cost should be no less than the outlined region");
545 int OutliningRuntimeOverhead =
546 OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
548 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
552 // Create the callsite to profile count map which is
553 // used to update the original function's entry count,
554 // after the function is partially inlined into the callsite.
555 void PartialInlinerImpl::computeCallsiteToProfCountMap(
556 Function *DuplicateFunction,
557 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) {
558 std::vector<User *> Users(DuplicateFunction->user_begin(),
559 DuplicateFunction->user_end());
560 Function *CurrentCaller = nullptr;
561 std::unique_ptr<BlockFrequencyInfo> TempBFI;
562 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
564 auto ComputeCurrBFI = [&,this](Function *Caller) {
565 // For the old pass manager:
567 DominatorTree DT(*Caller);
569 BranchProbabilityInfo BPI(*Caller, LI);
570 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
571 CurrentCallerBFI = TempBFI.get();
574 CurrentCallerBFI = &(*GetBFI)(*Caller);
578 for (User *User : Users) {
579 CallSite CS = getCallSite(User);
580 Function *Caller = CS.getCaller();
581 if (CurrentCaller != Caller) {
582 CurrentCaller = Caller;
583 ComputeCurrBFI(Caller);
585 assert(CurrentCallerBFI && "CallerBFI is not set");
587 BasicBlock *CallBB = CS.getInstruction()->getParent();
588 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
590 CallSiteToProfCountMap[User] = *Count;
592 CallSiteToProfCountMap[User] = 0;
596 Function *PartialInlinerImpl::unswitchFunction(Function *F) {
598 if (F->hasAddressTaken())
601 // Let inliner handle it
602 if (F->hasFnAttribute(Attribute::AlwaysInline))
605 if (F->hasFnAttribute(Attribute::NoInline))
608 if (PSI->isFunctionEntryCold(F))
611 if (F->user_begin() == F->user_end())
614 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
619 // Clone the function, so that we can hack away on it.
620 ValueToValueMapTy VMap;
621 Function *DuplicateFunction = CloneFunction(F, VMap);
622 BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
623 BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
624 DenseSet<BasicBlock *> NewEntries;
625 for (BasicBlock *BB : OI->Entries) {
626 NewEntries.insert(cast<BasicBlock>(VMap[BB]));
629 // Go ahead and update all uses to the duplicate, so that we can just
630 // use the inliner functionality when we're done hacking.
631 F->replaceAllUsesWith(DuplicateFunction);
633 auto getFirstPHI = [](BasicBlock *BB) {
634 BasicBlock::iterator I = BB->begin();
635 PHINode *FirstPhi = nullptr;
636 while (I != BB->end()) {
637 PHINode *Phi = dyn_cast<PHINode>(I);
647 // Special hackery is needed with PHI nodes that have inputs from more than
648 // one extracted block. For simplicity, just split the PHIs into a two-level
649 // sequence of PHIs, some of which will go in the extracted region, and some
650 // of which will go outside.
651 BasicBlock *PreReturn = NewReturnBlock;
652 // only split block when necessary:
653 PHINode *FirstPhi = getFirstPHI(PreReturn);
654 unsigned NumPredsFromEntries = OI->ReturnBlockPreds.size();
655 if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
657 NewReturnBlock = NewReturnBlock->splitBasicBlock(
658 NewReturnBlock->getFirstNonPHI()->getIterator());
659 BasicBlock::iterator I = PreReturn->begin();
660 Instruction *Ins = &NewReturnBlock->front();
661 while (I != PreReturn->end()) {
662 PHINode *OldPhi = dyn_cast<PHINode>(I);
667 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
668 OldPhi->replaceAllUsesWith(RetPhi);
669 Ins = NewReturnBlock->getFirstNonPHI();
671 RetPhi->addIncoming(&*I, PreReturn);
672 for (BasicBlock *E : OI->ReturnBlockPreds) {
673 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
674 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
675 OldPhi->removeIncomingValue(NewE);
679 for (auto E : OI->ReturnBlockPreds) {
680 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
681 NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
685 // Returns true if the block is to be partial inlined into the caller
686 // (i.e. not to be extracted to the out of line function)
687 auto ToBeInlined = [&](BasicBlock *BB) {
688 return BB == NewReturnBlock || NewEntries.count(BB);
690 // Gather up the blocks that we're going to extract.
691 std::vector<BasicBlock *> ToExtract;
692 ToExtract.push_back(NewNonReturnBlock);
693 for (BasicBlock &BB : *DuplicateFunction)
694 if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
695 ToExtract.push_back(&BB);
697 // The CodeExtractor needs a dominator tree.
699 DT.recalculate(*DuplicateFunction);
701 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
703 BranchProbabilityInfo BPI(*DuplicateFunction, LI);
704 BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
706 // Extract the body of the if.
707 Function *OutlinedFunction =
708 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
709 .extractCodeRegion();
712 tryPartialInline(DuplicateFunction, F, OI.get(), OutlinedFunction, &BFI);
714 // Ditch the duplicate, since we're done with it, and rewrite all remaining
715 // users (function pointers, etc.) back to the original function.
716 DuplicateFunction->replaceAllUsesWith(F);
717 DuplicateFunction->eraseFromParent();
720 return OutlinedFunction;
722 // Remove the function that is speculatively created:
723 if (OutlinedFunction)
724 OutlinedFunction->eraseFromParent();
729 bool PartialInlinerImpl::tryPartialInline(Function *DuplicateFunction,
731 FunctionOutliningInfo *OI,
732 Function *OutlinedFunction,
733 BlockFrequencyInfo *CalleeBFI) {
734 if (OutlinedFunction == nullptr)
737 int NonWeightedRcost;
739 int OutlinedRegionSizeCost;
741 auto OutliningCallBB =
742 getOneCallSiteTo(OutlinedFunction).getInstruction()->getParent();
744 std::tie(SizeCost, NonWeightedRcost, OutlinedRegionSizeCost) =
745 computeOutliningCosts(F, OI, OutlinedFunction, OutliningCallBB);
747 // The call sequence to the outlined function is larger than the original
748 // outlined region size, it does not increase the chances of inlining
749 // 'F' with outlining (The inliner usies the size increase to model the
750 // the cost of inlining a callee).
751 if (!SkipCostAnalysis && OutlinedRegionSizeCost < SizeCost) {
752 OptimizationRemarkEmitter ORE(F);
755 std::tie(DLoc, Block) = getOneDebugLoc(DuplicateFunction);
756 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall",
758 << ore::NV("Function", F)
759 << " not partially inlined into callers (Original Size = "
760 << ore::NV("OutlinedRegionOriginalSize", OutlinedRegionSizeCost)
761 << ", Size of call sequence to outlined function = "
762 << ore::NV("NewSize", SizeCost) << ")");
766 assert(F->user_begin() == F->user_end() &&
767 "F's users should all be replaced!");
768 std::vector<User *> Users(DuplicateFunction->user_begin(),
769 DuplicateFunction->user_end());
771 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
772 if (F->getEntryCount())
773 computeCallsiteToProfCountMap(DuplicateFunction, CallSiteToProfCountMap);
775 auto CalleeEntryCount = F->getEntryCount();
776 uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0);
777 bool AnyInline = false;
778 for (User *User : Users) {
779 CallSite CS = getCallSite(User);
781 if (IsLimitReached())
784 OptimizationRemarkEmitter ORE(CS.getCaller());
786 if (!shouldPartialInline(CS, F, OI, CalleeBFI, OutliningCallBB,
787 NonWeightedRcost, ORE))
791 OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction())
792 << ore::NV("Callee", F) << " partially inlined into "
793 << ore::NV("Caller", CS.getCaller()));
795 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI);
796 InlineFunction(CS, IFI);
798 // Now update the entry count:
799 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
800 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
801 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
805 NumPartialInlining++;
810 if (AnyInline && CalleeEntryCount)
811 F->setEntryCount(CalleeEntryCountV);
816 bool PartialInlinerImpl::run(Module &M) {
817 if (DisablePartialInlining)
820 std::vector<Function *> Worklist;
821 Worklist.reserve(M.size());
822 for (Function &F : M)
823 if (!F.use_empty() && !F.isDeclaration())
824 Worklist.push_back(&F);
826 bool Changed = false;
827 while (!Worklist.empty()) {
828 Function *CurrFunc = Worklist.back();
831 if (CurrFunc->use_empty())
834 bool Recursive = false;
835 for (User *U : CurrFunc->users())
836 if (Instruction *I = dyn_cast<Instruction>(U))
837 if (I->getParent()->getParent() == CurrFunc) {
844 if (Function *NewFunc = unswitchFunction(CurrFunc)) {
845 Worklist.push_back(NewFunc);
853 char PartialInlinerLegacyPass::ID = 0;
854 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
855 "Partial Inliner", false, false)
856 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
857 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
858 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
859 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
860 "Partial Inliner", false, false)
862 ModulePass *llvm::createPartialInliningPass() {
863 return new PartialInlinerLegacyPass();
866 PreservedAnalyses PartialInlinerPass::run(Module &M,
867 ModuleAnalysisManager &AM) {
868 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
870 std::function<AssumptionCache &(Function &)> GetAssumptionCache =
871 [&FAM](Function &F) -> AssumptionCache & {
872 return FAM.getResult<AssumptionAnalysis>(F);
875 std::function<BlockFrequencyInfo &(Function &)> GetBFI =
876 [&FAM](Function &F) -> BlockFrequencyInfo & {
877 return FAM.getResult<BlockFrequencyAnalysis>(F);
880 std::function<TargetTransformInfo &(Function &)> GetTTI =
881 [&FAM](Function &F) -> TargetTransformInfo & {
882 return FAM.getResult<TargetIRAnalysis>(F);
885 ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
887 if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
888 return PreservedAnalyses::none();
889 return PreservedAnalyses::all();