]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
Merge ^/head r317971 through r318379.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Transforms / IPO / PartialInlining.cpp
1 //===- PartialInlining.cpp - Inline parts of functions --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs partial inlining, typically by inlining an if statement
11 // that surrounds the body of the function.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Transforms/IPO/PartialInlining.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/Analysis/BlockFrequencyInfo.h"
18 #include "llvm/Analysis/BranchProbabilityInfo.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/IR/CFG.h"
26 #include "llvm/IR/DiagnosticInfo.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/Pass.h"
31 #include "llvm/Transforms/IPO.h"
32 #include "llvm/Transforms/Utils/Cloning.h"
33 #include "llvm/Transforms/Utils/CodeExtractor.h"
34 using namespace llvm;
35
36 #define DEBUG_TYPE "partial-inlining"
37
38 STATISTIC(NumPartialInlined,
39           "Number of callsites functions partially inlined into.");
40
41 // Command line option to disable partial-inlining. The default is false:
42 static cl::opt<bool>
43     DisablePartialInlining("disable-partial-inlining", cl::init(false),
44                            cl::Hidden, cl::desc("Disable partial ininling"));
45
46 static cl::opt<unsigned> MaxNumInlineBlocks(
47     "max-num-inline-blocks", cl::init(5), cl::Hidden,
48     cl::desc("Max Number of Blocks  To be Partially Inlined"));
49
50 // Command line option to set the maximum number of partial inlining allowed
51 // for the module. The default value of -1 means no limit.
52 static cl::opt<int> MaxNumPartialInlining(
53     "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
54     cl::desc("Max number of partial inlining. The default is unlimited"));
55
56 namespace {
57
58 struct FunctionOutliningInfo {
59   FunctionOutliningInfo()
60       : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr),
61         ReturnBlockPreds() {}
62   // Returns the number of blocks to be inlined including all blocks
63   // in Entries and one return block.
64   unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; }
65
66   // A set of blocks including the function entry that guard
67   // the region to be outlined.
68   SmallVector<BasicBlock *, 4> Entries;
69   // The return block that is not included in the outlined region.
70   BasicBlock *ReturnBlock;
71   // The dominating block of the region ot be outlined.
72   BasicBlock *NonReturnBlock;
73   // The set of blocks in Entries that that are predecessors to ReturnBlock
74   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
75 };
76
77 struct PartialInlinerImpl {
78   PartialInlinerImpl(
79       std::function<AssumptionCache &(Function &)> *GetAC,
80       std::function<TargetTransformInfo &(Function &)> *GTTI,
81       Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI,
82       ProfileSummaryInfo *ProfSI)
83       : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}
84   bool run(Module &M);
85   Function *unswitchFunction(Function *F);
86
87   std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F);
88
89 private:
90   int NumPartialInlining = 0;
91   std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
92   std::function<TargetTransformInfo &(Function &)> *GetTTI;
93   Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI;
94   ProfileSummaryInfo *PSI;
95
96   bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);
97   bool IsLimitReached() {
98     return (MaxNumPartialInlining != -1 &&
99             NumPartialInlining >= MaxNumPartialInlining);
100   }
101 };
102
103 struct PartialInlinerLegacyPass : public ModulePass {
104   static char ID; // Pass identification, replacement for typeid
105   PartialInlinerLegacyPass() : ModulePass(ID) {
106     initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
107   }
108
109   void getAnalysisUsage(AnalysisUsage &AU) const override {
110     AU.addRequired<AssumptionCacheTracker>();
111     AU.addRequired<ProfileSummaryInfoWrapperPass>();
112     AU.addRequired<TargetTransformInfoWrapperPass>();
113   }
114   bool runOnModule(Module &M) override {
115     if (skipModule(M))
116       return false;
117
118     AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
119     TargetTransformInfoWrapperPass *TTIWP =
120         &getAnalysis<TargetTransformInfoWrapperPass>();
121     ProfileSummaryInfo *PSI =
122         getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
123
124     std::function<AssumptionCache &(Function &)> GetAssumptionCache =
125         [&ACT](Function &F) -> AssumptionCache & {
126       return ACT->getAssumptionCache(F);
127     };
128
129     std::function<TargetTransformInfo &(Function &)> GetTTI =
130         [&TTIWP](Function &F) -> TargetTransformInfo & {
131       return TTIWP->getTTI(F);
132     };
133
134     return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);
135   }
136 };
137 }
138
139 std::unique_ptr<FunctionOutliningInfo>
140 PartialInlinerImpl::computeOutliningInfo(Function *F) {
141   BasicBlock *EntryBlock = &F->front();
142   BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
143   if (!BR || BR->isUnconditional())
144     return std::unique_ptr<FunctionOutliningInfo>();
145
146   // Returns true if Succ is BB's successor
147   auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
148     return is_contained(successors(BB), Succ);
149   };
150
151   auto SuccSize = [](BasicBlock *BB) {
152     return std::distance(succ_begin(BB), succ_end(BB));
153   };
154
155   auto IsReturnBlock = [](BasicBlock *BB) {
156     TerminatorInst *TI = BB->getTerminator();
157     return isa<ReturnInst>(TI);
158   };
159
160   auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
161     if (IsReturnBlock(Succ1))
162       return std::make_tuple(Succ1, Succ2);
163     if (IsReturnBlock(Succ2))
164       return std::make_tuple(Succ2, Succ1);
165
166     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
167   };
168
169   // Detect a triangular shape:
170   auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) {
171     if (IsSuccessor(Succ1, Succ2))
172       return std::make_tuple(Succ1, Succ2);
173     if (IsSuccessor(Succ2, Succ1))
174       return std::make_tuple(Succ2, Succ1);
175
176     return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
177   };
178
179   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
180       llvm::make_unique<FunctionOutliningInfo>();
181
182   BasicBlock *CurrEntry = EntryBlock;
183   bool CandidateFound = false;
184   do {
185     // The number of blocks to be inlined has already reached
186     // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
187     // disables partial inlining for the function.
188     if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks)
189       break;
190
191     if (SuccSize(CurrEntry) != 2)
192       break;
193
194     BasicBlock *Succ1 = *succ_begin(CurrEntry);
195     BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
196
197     BasicBlock *ReturnBlock, *NonReturnBlock;
198     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
199
200     if (ReturnBlock) {
201       OutliningInfo->Entries.push_back(CurrEntry);
202       OutliningInfo->ReturnBlock = ReturnBlock;
203       OutliningInfo->NonReturnBlock = NonReturnBlock;
204       CandidateFound = true;
205       break;
206     }
207
208     BasicBlock *CommSucc;
209     BasicBlock *OtherSucc;
210     std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
211
212     if (!CommSucc)
213       break;
214
215     OutliningInfo->Entries.push_back(CurrEntry);
216     CurrEntry = OtherSucc;
217
218   } while (true);
219
220   if (!CandidateFound)
221     return std::unique_ptr<FunctionOutliningInfo>();
222
223   // Do sanity check of the entries: threre should not
224   // be any successors (not in the entry set) other than
225   // {ReturnBlock, NonReturnBlock}
226   assert(OutliningInfo->Entries[0] == &F->front());
227   DenseSet<BasicBlock *> Entries;
228   for (BasicBlock *E : OutliningInfo->Entries)
229     Entries.insert(E);
230
231   // Returns true of BB has Predecessor which is not
232   // in Entries set.
233   auto HasNonEntryPred = [Entries](BasicBlock *BB) {
234     for (auto Pred : predecessors(BB)) {
235       if (!Entries.count(Pred))
236         return true;
237     }
238     return false;
239   };
240   auto CheckAndNormalizeCandidate =
241       [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
242         for (BasicBlock *E : OutliningInfo->Entries) {
243           for (auto Succ : successors(E)) {
244             if (Entries.count(Succ))
245               continue;
246             if (Succ == OutliningInfo->ReturnBlock)
247               OutliningInfo->ReturnBlockPreds.push_back(E);
248             else if (Succ != OutliningInfo->NonReturnBlock)
249               return false;
250           }
251           // There should not be any outside incoming edges either:
252           if (HasNonEntryPred(E))
253             return false;
254         }
255         return true;
256       };
257
258   if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
259     return std::unique_ptr<FunctionOutliningInfo>();
260
261   // Now further growing the candidate's inlining region by
262   // peeling off dominating blocks from the outlining region:
263   while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) {
264     BasicBlock *Cand = OutliningInfo->NonReturnBlock;
265     if (SuccSize(Cand) != 2)
266       break;
267
268     if (HasNonEntryPred(Cand))
269       break;
270
271     BasicBlock *Succ1 = *succ_begin(Cand);
272     BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
273
274     BasicBlock *ReturnBlock, *NonReturnBlock;
275     std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
276     if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
277       break;
278
279     if (NonReturnBlock->getSinglePredecessor() != Cand)
280       break;
281
282     // Now grow and update OutlininigInfo:
283     OutliningInfo->Entries.push_back(Cand);
284     OutliningInfo->NonReturnBlock = NonReturnBlock;
285     OutliningInfo->ReturnBlockPreds.push_back(Cand);
286     Entries.insert(Cand);
287   }
288
289   return OutliningInfo;
290 }
291
292 bool PartialInlinerImpl::shouldPartialInline(CallSite CS,
293                                              OptimizationRemarkEmitter &ORE) {
294   // TODO : more sharing with shouldInline in Inliner.cpp
295   using namespace ore;
296   Instruction *Call = CS.getInstruction();
297   Function *Callee = CS.getCalledFunction();
298   Function *Caller = CS.getCaller();
299   auto &CalleeTTI = (*GetTTI)(*Callee);
300   InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI,
301                                 *GetAssumptionCache, GetBFI, PSI);
302
303   if (IC.isAlways()) {
304     ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
305              << NV("Callee", Callee)
306              << " should always be fully inlined, not partially");
307     return false;
308   }
309
310   if (IC.isNever()) {
311     ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
312              << NV("Callee", Callee) << " not partially inlined into "
313              << NV("Caller", Caller)
314              << " because it should never be inlined (cost=never)");
315     return false;
316   }
317
318   if (!IC) {
319     ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
320              << NV("Callee", Callee) << " not partially inlined into "
321              << NV("Caller", Caller) << " because too costly to inline (cost="
322              << NV("Cost", IC.getCost()) << ", threshold="
323              << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
324     return false;
325   }
326
327   ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call)
328            << NV("Callee", Callee) << " can be partially inlined into "
329            << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
330            << " (threshold="
331            << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
332   return true;
333 }
334
335 Function *PartialInlinerImpl::unswitchFunction(Function *F) {
336
337   if (F->hasAddressTaken())
338     return nullptr;
339
340   // Let inliner handle it
341   if (F->hasFnAttribute(Attribute::AlwaysInline))
342     return nullptr;
343
344   if (F->hasFnAttribute(Attribute::NoInline))
345     return nullptr;
346
347   if (PSI->isFunctionEntryCold(F))
348     return nullptr;
349
350   std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
351       computeOutliningInfo(F);
352
353   if (!OutliningInfo)
354     return nullptr;
355
356   // Clone the function, so that we can hack away on it.
357   ValueToValueMapTy VMap;
358   Function *DuplicateFunction = CloneFunction(F, VMap);
359   BasicBlock *NewReturnBlock =
360       cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]);
361   BasicBlock *NewNonReturnBlock =
362       cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]);
363   DenseSet<BasicBlock *> NewEntries;
364   for (BasicBlock *BB : OutliningInfo->Entries) {
365     NewEntries.insert(cast<BasicBlock>(VMap[BB]));
366   }
367
368   // Go ahead and update all uses to the duplicate, so that we can just
369   // use the inliner functionality when we're done hacking.
370   F->replaceAllUsesWith(DuplicateFunction);
371
372   auto getFirstPHI = [](BasicBlock *BB) {
373     BasicBlock::iterator I = BB->begin();
374     PHINode *FirstPhi = nullptr;
375     while (I != BB->end()) {
376       PHINode *Phi = dyn_cast<PHINode>(I);
377       if (!Phi)
378         break;
379       if (!FirstPhi) {
380         FirstPhi = Phi;
381         break;
382       }
383     }
384     return FirstPhi;
385   };
386   // Special hackery is needed with PHI nodes that have inputs from more than
387   // one extracted block.  For simplicity, just split the PHIs into a two-level
388   // sequence of PHIs, some of which will go in the extracted region, and some
389   // of which will go outside.
390   BasicBlock *PreReturn = NewReturnBlock;
391   // only split block when necessary:
392   PHINode *FirstPhi = getFirstPHI(PreReturn);
393   unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size();
394   if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) {
395
396     NewReturnBlock = NewReturnBlock->splitBasicBlock(
397         NewReturnBlock->getFirstNonPHI()->getIterator());
398     BasicBlock::iterator I = PreReturn->begin();
399     Instruction *Ins = &NewReturnBlock->front();
400     while (I != PreReturn->end()) {
401       PHINode *OldPhi = dyn_cast<PHINode>(I);
402       if (!OldPhi)
403         break;
404
405       PHINode *RetPhi =
406           PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
407       OldPhi->replaceAllUsesWith(RetPhi);
408       Ins = NewReturnBlock->getFirstNonPHI();
409
410       RetPhi->addIncoming(&*I, PreReturn);
411       for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) {
412         BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
413         RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE);
414         OldPhi->removeIncomingValue(NewE);
415       }
416       ++I;
417     }
418     for (auto E : OutliningInfo->ReturnBlockPreds) {
419       BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
420       NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
421     }
422   }
423
424   // Returns true if the block is to be partial inlined into the caller
425   // (i.e. not to be extracted to the out of line function)
426   auto ToBeInlined = [=](BasicBlock *BB) {
427     return BB == NewReturnBlock || NewEntries.count(BB);
428   };
429   // Gather up the blocks that we're going to extract.
430   std::vector<BasicBlock *> ToExtract;
431   ToExtract.push_back(NewNonReturnBlock);
432   for (BasicBlock &BB : *DuplicateFunction)
433     if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)
434       ToExtract.push_back(&BB);
435
436   // The CodeExtractor needs a dominator tree.
437   DominatorTree DT;
438   DT.recalculate(*DuplicateFunction);
439
440   // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
441   LoopInfo LI(DT);
442   BranchProbabilityInfo BPI(*DuplicateFunction, LI);
443   BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
444
445   // Extract the body of the if.
446   Function *ExtractedFunction =
447       CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
448           .extractCodeRegion();
449
450   // Inline the top-level if test into all callers.
451   std::vector<User *> Users(DuplicateFunction->user_begin(),
452                             DuplicateFunction->user_end());
453
454   for (User *User : Users) {
455     CallSite CS;
456     if (CallInst *CI = dyn_cast<CallInst>(User))
457       CS = CallSite(CI);
458     else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
459       CS = CallSite(II);
460     else
461       llvm_unreachable("All uses must be calls");
462
463     if (IsLimitReached())
464       continue;
465
466     OptimizationRemarkEmitter ORE(CS.getCaller());
467     if (!shouldPartialInline(CS, ORE))
468       continue;
469
470     DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
471     BasicBlock *Block = CS.getParent();
472     ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
473              << ore::NV("Callee", F) << " partially inlined into "
474              << ore::NV("Caller", CS.getCaller()));
475
476     InlineFunctionInfo IFI(nullptr, GetAssumptionCache);
477     InlineFunction(CS, IFI);
478     NumPartialInlining++;
479     // update stats
480     NumPartialInlined++;
481   }
482
483   // Ditch the duplicate, since we're done with it, and rewrite all remaining
484   // users (function pointers, etc.) back to the original function.
485   DuplicateFunction->replaceAllUsesWith(F);
486   DuplicateFunction->eraseFromParent();
487
488
489   return ExtractedFunction;
490 }
491
492 bool PartialInlinerImpl::run(Module &M) {
493   if (DisablePartialInlining)
494     return false;
495
496   std::vector<Function *> Worklist;
497   Worklist.reserve(M.size());
498   for (Function &F : M)
499     if (!F.use_empty() && !F.isDeclaration())
500       Worklist.push_back(&F);
501
502   bool Changed = false;
503   while (!Worklist.empty()) {
504     Function *CurrFunc = Worklist.back();
505     Worklist.pop_back();
506
507     if (CurrFunc->use_empty())
508       continue;
509
510     bool Recursive = false;
511     for (User *U : CurrFunc->users())
512       if (Instruction *I = dyn_cast<Instruction>(U))
513         if (I->getParent()->getParent() == CurrFunc) {
514           Recursive = true;
515           break;
516         }
517     if (Recursive)
518       continue;
519
520     if (Function *NewFunc = unswitchFunction(CurrFunc)) {
521       Worklist.push_back(NewFunc);
522       Changed = true;
523     }
524   }
525
526   return Changed;
527 }
528
529 char PartialInlinerLegacyPass::ID = 0;
530 INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
531                       "Partial Inliner", false, false)
532 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
533 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
534 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
535 INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
536                     "Partial Inliner", false, false)
537
538 ModulePass *llvm::createPartialInliningPass() {
539   return new PartialInlinerLegacyPass();
540 }
541
542 PreservedAnalyses PartialInlinerPass::run(Module &M,
543                                           ModuleAnalysisManager &AM) {
544   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
545
546   std::function<AssumptionCache &(Function &)> GetAssumptionCache =
547       [&FAM](Function &F) -> AssumptionCache & {
548     return FAM.getResult<AssumptionAnalysis>(F);
549   };
550
551   std::function<BlockFrequencyInfo &(Function &)> GetBFI =
552       [&FAM](Function &F) -> BlockFrequencyInfo & {
553     return FAM.getResult<BlockFrequencyAnalysis>(F);
554   };
555
556   std::function<TargetTransformInfo &(Function &)> GetTTI =
557       [&FAM](Function &F) -> TargetTransformInfo & {
558     return FAM.getResult<TargetIRAnalysis>(F);
559   };
560
561   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
562
563   if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))
564     return PreservedAnalyses::none();
565   return PreservedAnalyses::all();
566 }