1 //===- MLInlineAdvisor.cpp - machine learned InlineAdvisor ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the interface between the inliner and a learned model.
10 // It delegates model evaluation to either the AOT compiled model (the
11 // 'release' mode) or a runtime-loaded model (the 'development' case).
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/MLInlineAdvisor.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/CallGraph.h"
18 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
19 #include "llvm/Analysis/InlineCost.h"
20 #include "llvm/Analysis/InlineModelFeatureMaps.h"
21 #include "llvm/Analysis/InteractiveModelRunner.h"
22 #include "llvm/Analysis/LazyCallGraph.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/MLModelRunner.h"
25 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
26 #include "llvm/Analysis/ReleaseModeModelRunner.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/InstIterator.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/Support/CommandLine.h"
35 static cl::opt<std::string> InteractiveChannelBaseName(
36 "inliner-interactive-channel-base", cl::Hidden,
38 "Base file path for the interactive mode. The incoming filename should "
39 "have the name <inliner-interactive-channel-base>.in, while the "
40 "outgoing name should be <inliner-interactive-channel-base>.out"));
41 static const std::string InclDefaultMsg =
42 (Twine("In interactive mode, also send the default policy decision: ") +
43 DefaultDecisionName + ".")
46 InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden,
47 cl::desc(InclDefaultMsg));
49 #if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
51 #include "InlinerSizeModel.h" // NOLINT
52 using CompiledModelType = llvm::InlinerSizeModel;
54 using CompiledModelType = NoopSavedModelImpl;
57 std::unique_ptr<InlineAdvisor>
58 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
59 std::function<bool(CallBase &)> GetDefaultAdvice) {
60 if (!llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() &&
61 InteractiveChannelBaseName.empty())
63 std::unique_ptr<MLModelRunner> AOTRunner;
64 if (InteractiveChannelBaseName.empty())
65 AOTRunner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
66 M.getContext(), FeatureMap, DecisionName);
68 auto Features = FeatureMap;
69 if (InteractiveIncludeDefault)
70 Features.push_back(DefaultDecisionSpec);
71 AOTRunner = std::make_unique<InteractiveModelRunner>(
72 M.getContext(), Features, InlineDecisionSpec,
73 InteractiveChannelBaseName + ".out",
74 InteractiveChannelBaseName + ".in");
76 return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner),
80 #define DEBUG_TYPE "inline-ml"
82 static cl::opt<float> SizeIncreaseThreshold(
83 "ml-advisor-size-increase-threshold", cl::Hidden,
84 cl::desc("Maximum factor by which expected native size may increase before "
85 "blocking any further inlining."),
88 static cl::opt<bool> KeepFPICache(
89 "ml-advisor-keep-fpi-cache", cl::Hidden,
91 "For test - keep the ML Inline advisor's FunctionPropertiesInfo cache"),
95 const std::vector<TensorSpec> llvm::FeatureMap{
96 #define POPULATE_NAMES(DTYPE, SHAPE, NAME, __) TensorSpec::createSpec<DTYPE>(#NAME, SHAPE),
97 // InlineCost features - these must come first
98 INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
101 INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
102 #undef POPULATE_NAMES
106 const char *const llvm::DecisionName = "inlining_decision";
107 const TensorSpec llvm::InlineDecisionSpec =
108 TensorSpec::createSpec<int64_t>(DecisionName, {1});
109 const char *const llvm::DefaultDecisionName = "inlining_default";
110 const TensorSpec llvm::DefaultDecisionSpec =
111 TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1});
112 const char *const llvm::RewardName = "delta_size";
114 CallBase *getInlinableCS(Instruction &I) {
115 if (auto *CS = dyn_cast<CallBase>(&I))
116 if (Function *Callee = CS->getCalledFunction()) {
117 if (!Callee->isDeclaration()) {
124 MLInlineAdvisor::MLInlineAdvisor(
125 Module &M, ModuleAnalysisManager &MAM,
126 std::unique_ptr<MLModelRunner> Runner,
127 std::function<bool(CallBase &)> GetDefaultAdvice)
129 M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
130 ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice),
131 CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
132 InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
134 ModelRunner->switchContext("");
135 // Extract the 'call site height' feature - the position of a call site
136 // relative to the farthest statically reachable SCC node. We don't mutate
137 // this value while inlining happens. Empirically, this feature proved
138 // critical in behavioral cloning - i.e. training a model to mimic the manual
139 // heuristic's decisions - and, thus, equally important for training for
142 for (auto I = scc_begin(&CGraph); !I.isAtEnd(); ++I) {
143 const std::vector<CallGraphNode *> &CGNodes = *I;
145 for (auto *CGNode : CGNodes) {
146 Function *F = CGNode->getFunction();
147 if (!F || F->isDeclaration())
149 for (auto &I : instructions(F)) {
150 if (auto *CS = getInlinableCS(I)) {
151 auto *Called = CS->getCalledFunction();
152 auto Pos = FunctionLevels.find(&CG.get(*Called));
153 // In bottom up traversal, an inlinable callee is either in the
154 // same SCC, or to a function in a visited SCC. So not finding its
155 // level means we haven't visited it yet, meaning it's in this SCC.
156 if (Pos == FunctionLevels.end())
158 Level = std::max(Level, Pos->second + 1);
162 for (auto *CGNode : CGNodes) {
163 Function *F = CGNode->getFunction();
164 if (F && !F->isDeclaration())
165 FunctionLevels[&CG.get(*F)] = Level;
168 for (auto KVP : FunctionLevels) {
169 AllNodes.insert(KVP.first);
170 EdgeCount += getLocalCalls(KVP.first->getFunction());
172 NodeCount = AllNodes.size();
175 unsigned MLInlineAdvisor::getInitialFunctionLevel(const Function &F) const {
176 return CG.lookup(F) ? FunctionLevels.at(CG.lookup(F)) : 0;
179 void MLInlineAdvisor::onPassEntry(LazyCallGraph::SCC *LastSCC) {
180 if (!LastSCC || ForceStop)
183 // Function passes executed between InlinerPass runs may have changed the
184 // module-wide features.
185 // The cgscc pass manager rules are such that:
186 // - if a pass leads to merging SCCs, then the pipeline is restarted on the
188 // - if a pass leads to splitting the SCC, then we continue with one of the
190 // This means that the NodesInLastSCC is a superset (not strict) of the nodes
191 // that subsequent passes would have processed
192 // - in addition, if new Nodes were created by a pass (e.g. CoroSplit),
193 // they'd be adjacent to Nodes in the last SCC. So we just need to check the
194 // boundary of Nodes in NodesInLastSCC for Nodes we haven't seen. We don't
195 // care about the nature of the Edge (call or ref).
196 NodeCount -= static_cast<int64_t>(NodesInLastSCC.size());
197 while (!NodesInLastSCC.empty()) {
198 const auto *N = *NodesInLastSCC.begin();
199 NodesInLastSCC.erase(N);
200 // The Function wrapped by N could have been deleted since we last saw it.
202 assert(!N->getFunction().isDeclaration());
206 EdgeCount += getLocalCalls(N->getFunction());
207 for (const auto &E : *(*N)) {
208 const auto *AdjNode = &E.getNode();
209 assert(!AdjNode->isDead() && !AdjNode->getFunction().isDeclaration());
210 auto I = AllNodes.insert(AdjNode);
212 NodesInLastSCC.insert(AdjNode);
216 EdgeCount -= EdgesOfLastSeenNodes;
217 EdgesOfLastSeenNodes = 0;
219 // (Re)use NodesInLastSCC to remember the nodes in the SCC right now,
220 // in case the SCC is split before onPassExit and some nodes are split out
221 assert(NodesInLastSCC.empty());
222 for (const auto &N : *LastSCC)
223 NodesInLastSCC.insert(&N);
226 void MLInlineAdvisor::onPassExit(LazyCallGraph::SCC *LastSCC) {
227 // No need to keep this around - function passes will invalidate it.
230 if (!LastSCC || ForceStop)
232 // Keep track of the nodes and edges we last saw. Then, in onPassEntry,
233 // we update the node count and edge count from the subset of these nodes that
235 EdgesOfLastSeenNodes = 0;
237 // Check on nodes that were in SCC onPassEntry
238 for (auto I = NodesInLastSCC.begin(); I != NodesInLastSCC.end();) {
240 NodesInLastSCC.erase(*I++);
242 EdgesOfLastSeenNodes += getLocalCalls((*I++)->getFunction());
245 // Check on nodes that may have got added to SCC
246 for (const auto &N : *LastSCC) {
248 auto I = NodesInLastSCC.insert(&N);
250 EdgesOfLastSeenNodes += getLocalCalls(N.getFunction());
252 assert(NodeCount >= NodesInLastSCC.size());
253 assert(EdgeCount >= EdgesOfLastSeenNodes);
256 int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
257 return getCachedFPI(F).DirectCallsToDefinedFunctions;
260 // Update the internal state of the advisor, and force invalidate feature
261 // analysis. Currently, we maintain minimal (and very simple) global state - the
262 // number of functions and the number of static calls. We also keep track of the
263 // total IR size in this module, to stop misbehaving policies at a certain bloat
264 // factor (SizeIncreaseThreshold)
265 void MLInlineAdvisor::onSuccessfulInlining(const MLInlineAdvice &Advice,
266 bool CalleeWasDeleted) {
268 Function *Caller = Advice.getCaller();
269 Function *Callee = Advice.getCallee();
270 // The caller features aren't valid anymore.
272 PreservedAnalyses PA = PreservedAnalyses::all();
273 PA.abandon<FunctionPropertiesAnalysis>();
274 PA.abandon<DominatorTreeAnalysis>();
275 PA.abandon<LoopAnalysis>();
276 FAM.invalidate(*Caller, PA);
278 Advice.updateCachedCallerFPI(FAM);
279 int64_t IRSizeAfter =
280 getIRSize(*Caller) + (CalleeWasDeleted ? 0 : Advice.CalleeIRSize);
281 CurrentIRSize += IRSizeAfter - (Advice.CallerIRSize + Advice.CalleeIRSize);
282 if (CurrentIRSize > SizeIncreaseThreshold * InitialIRSize)
285 // We can delta-update module-wide features. We know the inlining only changed
286 // the caller, and maybe the callee (by deleting the latter).
287 // Nodes are simple to update.
288 // For edges, we 'forget' the edges that the caller and callee used to have
289 // before inlining, and add back what they currently have together.
290 int64_t NewCallerAndCalleeEdges =
291 getCachedFPI(*Caller).DirectCallsToDefinedFunctions;
293 if (CalleeWasDeleted)
296 NewCallerAndCalleeEdges +=
297 getCachedFPI(*Callee).DirectCallsToDefinedFunctions;
298 EdgeCount += (NewCallerAndCalleeEdges - Advice.CallerAndCalleeEdges);
299 assert(CurrentIRSize >= 0 && EdgeCount >= 0 && NodeCount >= 0);
302 int64_t MLInlineAdvisor::getModuleIRSize() const {
305 if (!F.isDeclaration())
310 FunctionPropertiesInfo &MLInlineAdvisor::getCachedFPI(Function &F) const {
312 FPICache.insert(std::make_pair(&F, FunctionPropertiesInfo()));
313 if (!InsertPair.second)
314 return InsertPair.first->second;
315 InsertPair.first->second = FAM.getResult<FunctionPropertiesAnalysis>(F);
316 return InsertPair.first->second;
319 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
320 if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
323 auto &Caller = *CB.getCaller();
324 auto &Callee = *CB.getCalledFunction();
326 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
327 return FAM.getResult<AssumptionAnalysis>(F);
329 auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
330 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
332 auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
333 // If this is a "never inline" case, there won't be any changes to internal
334 // state we need to track, so we can just return the base InlineAdvice, which
335 // will do nothing interesting.
336 // Same thing if this is a recursive case.
337 if (MandatoryKind == InlineAdvisor::MandatoryInliningKind::Never ||
339 return getMandatoryAdvice(CB, false);
342 MandatoryKind == InlineAdvisor::MandatoryInliningKind::Always;
344 // If we need to stop, we won't want to track anymore any state changes, so
345 // we just return the base InlineAdvice, which acts as a noop.
348 return OptimizationRemarkMissed(DEBUG_TYPE, "ForceStop", &CB)
349 << "Won't attempt inlining because module size grew too much.";
351 return std::make_unique<InlineAdvice>(this, CB, ORE, Mandatory);
354 int CostEstimate = 0;
356 auto IsCallSiteInlinable =
357 llvm::getInliningCostEstimate(CB, TIR, GetAssumptionCache);
358 if (!IsCallSiteInlinable) {
359 // We can't inline this for correctness reasons, so return the base
360 // InlineAdvice, as we don't care about tracking any state changes (which
362 return std::make_unique<InlineAdvice>(this, CB, ORE, false);
364 CostEstimate = *IsCallSiteInlinable;
367 const auto CostFeatures =
368 llvm::getInliningCostFeatures(CB, TIR, GetAssumptionCache);
370 return std::make_unique<InlineAdvice>(this, CB, ORE, false);
374 return getMandatoryAdvice(CB, true);
376 auto NrCtantParams = 0;
377 for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
378 NrCtantParams += (isa<Constant>(*I));
381 auto &CallerBefore = getCachedFPI(Caller);
382 auto &CalleeBefore = getCachedFPI(Callee);
384 *ModelRunner->getTensor<int64_t>(FeatureIndex::callee_basic_block_count) =
385 CalleeBefore.BasicBlockCount;
386 *ModelRunner->getTensor<int64_t>(FeatureIndex::callsite_height) =
387 getInitialFunctionLevel(Caller);
388 *ModelRunner->getTensor<int64_t>(FeatureIndex::node_count) = NodeCount;
389 *ModelRunner->getTensor<int64_t>(FeatureIndex::nr_ctant_params) =
391 *ModelRunner->getTensor<int64_t>(FeatureIndex::edge_count) = EdgeCount;
392 *ModelRunner->getTensor<int64_t>(FeatureIndex::caller_users) =
394 *ModelRunner->getTensor<int64_t>(
395 FeatureIndex::caller_conditionally_executed_blocks) =
396 CallerBefore.BlocksReachedFromConditionalInstruction;
397 *ModelRunner->getTensor<int64_t>(FeatureIndex::caller_basic_block_count) =
398 CallerBefore.BasicBlockCount;
399 *ModelRunner->getTensor<int64_t>(
400 FeatureIndex::callee_conditionally_executed_blocks) =
401 CalleeBefore.BlocksReachedFromConditionalInstruction;
402 *ModelRunner->getTensor<int64_t>(FeatureIndex::callee_users) =
404 *ModelRunner->getTensor<int64_t>(FeatureIndex::cost_estimate) = CostEstimate;
406 // Add the cost features
408 I < static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures); ++I) {
409 *ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
410 static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
412 // This one would have been set up to be right at the end.
413 if (!InteractiveChannelBaseName.empty() && InteractiveIncludeDefault)
414 *ModelRunner->getTensor<int64_t>(InlineCostFeatureIndex::NumberOfFeatures) =
415 GetDefaultAdvice(CB);
416 return getAdviceFromModel(CB, ORE);
419 std::unique_ptr<MLInlineAdvice>
420 MLInlineAdvisor::getAdviceFromModel(CallBase &CB,
421 OptimizationRemarkEmitter &ORE) {
422 return std::make_unique<MLInlineAdvice>(
423 this, CB, ORE, static_cast<bool>(ModelRunner->evaluate<int64_t>()));
426 std::unique_ptr<InlineAdvice>
427 MLInlineAdvisor::getSkipAdviceIfUnreachableCallsite(CallBase &CB) {
428 if (!FAM.getResult<DominatorTreeAnalysis>(*CB.getCaller())
429 .isReachableFromEntry(CB.getParent()))
430 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), false);
434 std::unique_ptr<InlineAdvice> MLInlineAdvisor::getMandatoryAdvice(CallBase &CB,
436 // Make sure we track inlinings in all cases - mandatory or not.
437 if (auto Skip = getSkipAdviceIfUnreachableCallsite(CB))
439 if (Advice && !ForceStop)
440 return getMandatoryAdviceImpl(CB);
442 // If this is a "never inline" case, there won't be any changes to internal
443 // state we need to track, so we can just return the base InlineAdvice, which
444 // will do nothing interesting.
445 // Same if we are forced to stop - we don't track anymore.
446 return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
449 std::unique_ptr<MLInlineAdvice>
450 MLInlineAdvisor::getMandatoryAdviceImpl(CallBase &CB) {
451 return std::make_unique<MLInlineAdvice>(this, CB, getCallerORE(CB), true);
454 void MLInlineAdvisor::print(raw_ostream &OS) const {
455 OS << "[MLInlineAdvisor] Nodes: " << NodeCount << " Edges: " << EdgeCount
456 << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n";
457 OS << "[MLInlineAdvisor] FPI:\n";
458 for (auto I : FPICache) {
459 OS << I.first->getName() << ":\n";
466 MLInlineAdvice::MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
467 OptimizationRemarkEmitter &ORE,
469 : InlineAdvice(Advisor, CB, ORE, Recommendation),
470 CallerIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Caller)),
471 CalleeIRSize(Advisor->isForcedToStop() ? 0 : Advisor->getIRSize(*Callee)),
472 CallerAndCalleeEdges(Advisor->isForcedToStop()
474 : (Advisor->getLocalCalls(*Caller) +
475 Advisor->getLocalCalls(*Callee))),
476 PreInlineCallerFPI(Advisor->getCachedFPI(*Caller)) {
478 FPU.emplace(Advisor->getCachedFPI(*getCaller()), CB);
481 void MLInlineAdvice::reportContextForRemark(
482 DiagnosticInfoOptimizationBase &OR) {
484 OR << NV("Callee", Callee->getName());
485 for (size_t I = 0; I < NumberOfFeatures; ++I)
486 OR << NV(FeatureMap[I].name(),
487 *getAdvisor()->getModelRunner().getTensor<int64_t>(I));
488 OR << NV("ShouldInline", isInliningRecommended());
491 void MLInlineAdvice::updateCachedCallerFPI(FunctionAnalysisManager &FAM) const {
495 void MLInlineAdvice::recordInliningImpl() {
497 OptimizationRemark R(DEBUG_TYPE, "InliningSuccess", DLoc, Block);
498 reportContextForRemark(R);
501 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ false);
504 void MLInlineAdvice::recordInliningWithCalleeDeletedImpl() {
506 OptimizationRemark R(DEBUG_TYPE, "InliningSuccessWithCalleeDeleted", DLoc,
508 reportContextForRemark(R);
511 getAdvisor()->onSuccessfulInlining(*this, /*CalleeWasDeleted*/ true);
514 void MLInlineAdvice::recordUnsuccessfulInliningImpl(
515 const InlineResult &Result) {
516 getAdvisor()->getCachedFPI(*Caller) = PreInlineCallerFPI;
518 OptimizationRemarkMissed R(DEBUG_TYPE, "InliningAttemptedAndUnsuccessful",
520 reportContextForRemark(R);
524 void MLInlineAdvice::recordUnattemptedInliningImpl() {
527 OptimizationRemarkMissed R(DEBUG_TYPE, "IniningNotAttempted", DLoc, Block);
528 reportContextForRemark(R);