1 //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Loops should be simplified before this analysis.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/BranchProbabilityInfo.h"
14 #include "llvm/ADT/PostOrderIterator.h"
15 #include "llvm/ADT/SCCIterator.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/PostDominators.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/IR/Attributes.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/CFG.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PassManager.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Pass.h"
37 #include "llvm/Support/BranchProbability.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/raw_ostream.h"
49 #define DEBUG_TYPE "branch-prob"
51 static cl::opt<bool> PrintBranchProb(
52 "print-bpi", cl::init(false), cl::Hidden,
53 cl::desc("Print the branch probability info."));
55 cl::opt<std::string> PrintBranchProbFuncName(
56 "print-bpi-func-name", cl::Hidden,
57 cl::desc("The option to specify the name of the function "
58 "whose branch probability info is printed."));
60 INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
61 "Branch Probability Analysis", false, true)
62 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
63 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
64 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
65 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
66 "Branch Probability Analysis", false, true)
68 BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass()
70 initializeBranchProbabilityInfoWrapperPassPass(
71 *PassRegistry::getPassRegistry());
74 char BranchProbabilityInfoWrapperPass::ID = 0;
76 // Weights are for internal use only. They are used by heuristics to help to
77 // estimate edges' probability. Example:
79 // Using "Loop Branch Heuristics" we predict weights of edges for the
94 // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
95 // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
96 static const uint32_t LBH_TAKEN_WEIGHT = 124;
97 static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
98 // Unlikely edges within a loop are half as likely as other edges
99 static const uint32_t LBH_UNLIKELY_WEIGHT = 62;
101 /// Unreachable-terminating branch taken probability.
103 /// This is the probability for a branch being taken to a block that terminates
104 /// (eventually) in unreachable. These are predicted as unlikely as possible.
105 /// All reachable probability will proportionally share the remaining part.
106 static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
108 /// Weight for a branch taken going into a cold block.
110 /// This is the weight for a branch taken toward a block marked
111 /// cold. A block is marked cold if it's postdominated by a
112 /// block containing a call to a cold function. Cold functions
113 /// are those marked with attribute 'cold'.
114 static const uint32_t CC_TAKEN_WEIGHT = 4;
116 /// Weight for a branch not-taken into a cold block.
118 /// This is the weight for a branch not taken toward a block marked
120 static const uint32_t CC_NONTAKEN_WEIGHT = 64;
122 static const uint32_t PH_TAKEN_WEIGHT = 20;
123 static const uint32_t PH_NONTAKEN_WEIGHT = 12;
125 static const uint32_t ZH_TAKEN_WEIGHT = 20;
126 static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
128 static const uint32_t FPH_TAKEN_WEIGHT = 20;
129 static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
131 /// This is the probability for an ordered floating point comparison.
132 static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
133 /// This is the probability for an unordered floating point comparison, it means
134 /// one or two of the operands are NaN. Usually it is used to test for an
135 /// exceptional case, so the result is unlikely.
136 static const uint32_t FPH_UNO_WEIGHT = 1;
138 /// Invoke-terminating normal branch taken weight
140 /// This is the weight for branching to the normal destination of an invoke
141 /// instruction. We expect this to happen most of the time. Set the weight to an
142 /// absurdly high value so that nested loops subsume it.
143 static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
145 /// Invoke-terminating normal branch not-taken weight.
147 /// This is the weight for branching to the unwind destination of an invoke
148 /// instruction. This is essentially never taken.
149 static const uint32_t IH_NONTAKEN_WEIGHT = 1;
151 static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
152 SmallVectorImpl<const BasicBlock *> &WorkList,
153 SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
154 SmallVector<BasicBlock *, 8> Descendants;
155 SmallPtrSet<const BasicBlock *, 16> NewItems;
157 PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
158 for (auto *BB : Descendants)
159 if (TargetSet.insert(BB).second)
160 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
161 if (!TargetSet.count(*PI))
162 NewItems.insert(*PI);
163 WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
166 /// Compute a set of basic blocks that are post-dominated by unreachables.
167 void BranchProbabilityInfo::computePostDominatedByUnreachable(
168 const Function &F, PostDominatorTree *PDT) {
169 SmallVector<const BasicBlock *, 8> WorkList;
171 const Instruction *TI = BB.getTerminator();
172 if (TI->getNumSuccessors() == 0) {
173 if (isa<UnreachableInst>(TI) ||
174 // If this block is terminated by a call to
175 // @llvm.experimental.deoptimize then treat it like an unreachable
176 // since the @llvm.experimental.deoptimize call is expected to
177 // practically never execute.
178 BB.getTerminatingDeoptimizeCall())
179 UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
183 while (!WorkList.empty()) {
184 const BasicBlock *BB = WorkList.pop_back_val();
185 if (PostDominatedByUnreachable.count(BB))
187 // If the terminator is an InvokeInst, check only the normal destination
188 // block as the unwind edge of InvokeInst is also very unlikely taken.
189 if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
190 if (PostDominatedByUnreachable.count(II->getNormalDest()))
191 UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
193 // If all the successors are unreachable, BB is unreachable as well.
194 else if (!successors(BB).empty() &&
195 llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
196 return PostDominatedByUnreachable.count(Succ);
198 UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
202 /// compute a set of basic blocks that are post-dominated by ColdCalls.
203 void BranchProbabilityInfo::computePostDominatedByColdCall(
204 const Function &F, PostDominatorTree *PDT) {
205 SmallVector<const BasicBlock *, 8> WorkList;
208 if (const CallInst *CI = dyn_cast<CallInst>(&I))
209 if (CI->hasFnAttr(Attribute::Cold))
210 UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
212 while (!WorkList.empty()) {
213 const BasicBlock *BB = WorkList.pop_back_val();
215 // If the terminator is an InvokeInst, check only the normal destination
216 // block as the unwind edge of InvokeInst is also very unlikely taken.
217 if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
218 if (PostDominatedByColdCall.count(II->getNormalDest()))
219 UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
221 // If all of successor are post dominated then BB is also done.
222 else if (!successors(BB).empty() &&
223 llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
224 return PostDominatedByColdCall.count(Succ);
226 UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
230 /// Calculate edge weights for successors lead to unreachable.
232 /// Predict that a successor which leads necessarily to an
233 /// unreachable-terminated block as extremely unlikely.
234 bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
235 const Instruction *TI = BB->getTerminator();
237 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
238 assert(!isa<InvokeInst>(TI) &&
239 "Invokes should have already been handled by calcInvokeHeuristics");
241 SmallVector<unsigned, 4> UnreachableEdges;
242 SmallVector<unsigned, 4> ReachableEdges;
244 for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
245 if (PostDominatedByUnreachable.count(*I))
246 UnreachableEdges.push_back(I.getSuccessorIndex());
248 ReachableEdges.push_back(I.getSuccessorIndex());
250 // Skip probabilities if all were reachable.
251 if (UnreachableEdges.empty())
254 SmallVector<BranchProbability, 4> EdgeProbabilities(
255 BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
256 if (ReachableEdges.empty()) {
257 BranchProbability Prob(1, UnreachableEdges.size());
258 for (unsigned SuccIdx : UnreachableEdges)
259 EdgeProbabilities[SuccIdx] = Prob;
260 setEdgeProbability(BB, EdgeProbabilities);
264 auto UnreachableProb = UR_TAKEN_PROB;
266 (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) /
267 ReachableEdges.size();
269 for (unsigned SuccIdx : UnreachableEdges)
270 EdgeProbabilities[SuccIdx] = UnreachableProb;
271 for (unsigned SuccIdx : ReachableEdges)
272 EdgeProbabilities[SuccIdx] = ReachableProb;
274 setEdgeProbability(BB, EdgeProbabilities);
278 // Propagate existing explicit probabilities from either profile data or
279 // 'expect' intrinsic processing. Examine metadata against unreachable
280 // heuristic. The probability of the edge coming to unreachable block is
281 // set to min of metadata and unreachable heuristic.
282 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
283 const Instruction *TI = BB->getTerminator();
284 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
285 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) ||
286 isa<InvokeInst>(TI)))
289 MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
293 // Check that the number of successors is manageable.
294 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
296 // Ensure there are weights for all of the successors. Note that the first
297 // operand to the metadata node is a name, not a weight.
298 if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
301 // Build up the final weights that will be used in a temporary buffer.
302 // Compute the sum of all weights to later decide whether they need to
303 // be scaled to fit in 32 bits.
304 uint64_t WeightSum = 0;
305 SmallVector<uint32_t, 2> Weights;
306 SmallVector<unsigned, 2> UnreachableIdxs;
307 SmallVector<unsigned, 2> ReachableIdxs;
308 Weights.reserve(TI->getNumSuccessors());
309 for (unsigned I = 1, E = WeightsNode->getNumOperands(); I != E; ++I) {
310 ConstantInt *Weight =
311 mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(I));
314 assert(Weight->getValue().getActiveBits() <= 32 &&
315 "Too many bits for uint32_t");
316 Weights.push_back(Weight->getZExtValue());
317 WeightSum += Weights.back();
318 if (PostDominatedByUnreachable.count(TI->getSuccessor(I - 1)))
319 UnreachableIdxs.push_back(I - 1);
321 ReachableIdxs.push_back(I - 1);
323 assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
325 // If the sum of weights does not fit in 32 bits, scale every weight down
327 uint64_t ScalingFactor =
328 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
330 if (ScalingFactor > 1) {
332 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
333 Weights[I] /= ScalingFactor;
334 WeightSum += Weights[I];
337 assert(WeightSum <= UINT32_MAX &&
338 "Expected weights to scale down to 32 bits");
340 if (WeightSum == 0 || ReachableIdxs.size() == 0) {
341 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
343 WeightSum = TI->getNumSuccessors();
346 // Set the probability.
347 SmallVector<BranchProbability, 2> BP;
348 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I)
349 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) });
351 // Examine the metadata against unreachable heuristic.
352 // If the unreachable heuristic is more strong then we use it for this edge.
353 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) {
354 setEdgeProbability(BB, BP);
358 auto UnreachableProb = UR_TAKEN_PROB;
359 for (auto I : UnreachableIdxs)
360 if (UnreachableProb < BP[I]) {
361 BP[I] = UnreachableProb;
364 // Sum of all edge probabilities must be 1.0. If we modified the probability
365 // of some edges then we must distribute the introduced difference over the
368 // Proportional distribution: the relation between probabilities of the
369 // reachable edges is kept unchanged. That is for any reachable edges i and j:
370 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] =>
371 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K
372 // Where K is independent of i,j.
373 // newBP[i] == oldBP[i] * K
374 // We need to find K.
375 // Make sum of all reachables of the left and right parts:
376 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP)
377 // Sum of newBP must be equal to 1.0:
378 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 =>
379 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP)
380 // Where sum_of_unreachable(newBP) is what has been just changed.
382 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) =>
383 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP)
384 BranchProbability NewUnreachableSum = BranchProbability::getZero();
385 for (auto I : UnreachableIdxs)
386 NewUnreachableSum += BP[I];
388 BranchProbability NewReachableSum =
389 BranchProbability::getOne() - NewUnreachableSum;
391 BranchProbability OldReachableSum = BranchProbability::getZero();
392 for (auto I : ReachableIdxs)
393 OldReachableSum += BP[I];
395 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute?
396 if (OldReachableSum.isZero()) {
397 // If all oldBP[i] are zeroes then the proportional distribution results
398 // in all zero probabilities and the error stays big. In this case we
399 // evenly spread NewReachableSum over the reachable edges.
400 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size();
401 for (auto I : ReachableIdxs)
404 for (auto I : ReachableIdxs) {
405 // We use uint64_t to avoid double rounding error of the following
406 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum
407 // The formula is taken from the private constructor
408 // BranchProbability(uint32_t Numerator, uint32_t Denominator)
409 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) *
410 BP[I].getNumerator();
411 uint32_t Div = static_cast<uint32_t>(
412 divideNearest(Mul, OldReachableSum.getNumerator()));
413 BP[I] = BranchProbability::getRaw(Div);
418 setEdgeProbability(BB, BP);
423 /// Calculate edge weights for edges leading to cold blocks.
425 /// A cold block is one post-dominated by a block with a call to a
426 /// cold function. Those edges are unlikely to be taken, so we give
427 /// them relatively low weight.
429 /// Return true if we could compute the weights for cold edges.
430 /// Return false, otherwise.
431 bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
432 const Instruction *TI = BB->getTerminator();
434 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
435 assert(!isa<InvokeInst>(TI) &&
436 "Invokes should have already been handled by calcInvokeHeuristics");
438 // Determine which successors are post-dominated by a cold block.
439 SmallVector<unsigned, 4> ColdEdges;
440 SmallVector<unsigned, 4> NormalEdges;
441 for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
442 if (PostDominatedByColdCall.count(*I))
443 ColdEdges.push_back(I.getSuccessorIndex());
445 NormalEdges.push_back(I.getSuccessorIndex());
447 // Skip probabilities if no cold edges.
448 if (ColdEdges.empty())
451 SmallVector<BranchProbability, 4> EdgeProbabilities(
452 BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
453 if (NormalEdges.empty()) {
454 BranchProbability Prob(1, ColdEdges.size());
455 for (unsigned SuccIdx : ColdEdges)
456 EdgeProbabilities[SuccIdx] = Prob;
457 setEdgeProbability(BB, EdgeProbabilities);
461 auto ColdProb = BranchProbability::getBranchProbability(
463 (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size()));
464 auto NormalProb = BranchProbability::getBranchProbability(
466 (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size()));
468 for (unsigned SuccIdx : ColdEdges)
469 EdgeProbabilities[SuccIdx] = ColdProb;
470 for (unsigned SuccIdx : NormalEdges)
471 EdgeProbabilities[SuccIdx] = NormalProb;
473 setEdgeProbability(BB, EdgeProbabilities);
477 // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison
478 // between two pointer or pointer and NULL will fail.
479 bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
480 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
481 if (!BI || !BI->isConditional())
484 Value *Cond = BI->getCondition();
485 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
486 if (!CI || !CI->isEquality())
489 Value *LHS = CI->getOperand(0);
491 if (!LHS->getType()->isPointerTy())
494 assert(CI->getOperand(1)->getType()->isPointerTy());
496 BranchProbability TakenProb(PH_TAKEN_WEIGHT,
497 PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
498 BranchProbability UntakenProb(PH_NONTAKEN_WEIGHT,
499 PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
501 // p != 0 -> isProb = true
502 // p == 0 -> isProb = false
503 // p != q -> isProb = true
504 // p == q -> isProb = false;
505 bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
507 std::swap(TakenProb, UntakenProb);
510 BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
514 static int getSCCNum(const BasicBlock *BB,
515 const BranchProbabilityInfo::SccInfo &SccI) {
516 auto SccIt = SccI.SccNums.find(BB);
517 if (SccIt == SccI.SccNums.end())
519 return SccIt->second;
522 // Consider any block that is an entry point to the SCC as a header.
523 static bool isSCCHeader(const BasicBlock *BB, int SccNum,
524 BranchProbabilityInfo::SccInfo &SccI) {
525 assert(getSCCNum(BB, SccI) == SccNum);
527 // Lazily compute the set of headers for a given SCC and cache the results
528 // in the SccHeaderMap.
529 if (SccI.SccHeaders.size() <= static_cast<unsigned>(SccNum))
530 SccI.SccHeaders.resize(SccNum + 1);
531 auto &HeaderMap = SccI.SccHeaders[SccNum];
533 BranchProbabilityInfo::SccHeaderMap::iterator HeaderMapIt;
534 std::tie(HeaderMapIt, Inserted) = HeaderMap.insert(std::make_pair(BB, false));
536 bool IsHeader = llvm::any_of(make_range(pred_begin(BB), pred_end(BB)),
537 [&](const BasicBlock *Pred) {
538 return getSCCNum(Pred, SccI) != SccNum;
540 HeaderMapIt->second = IsHeader;
543 return HeaderMapIt->second;
546 // Compute the unlikely successors to the block BB in the loop L, specifically
547 // those that are unlikely because this is a loop, and add them to the
548 // UnlikelyBlocks set.
550 computeUnlikelySuccessors(const BasicBlock *BB, Loop *L,
551 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) {
552 // Sometimes in a loop we have a branch whose condition is made false by
553 // taking it. This is typically something like
560 // In this sort of situation taking the branch means that at the very least it
561 // won't be taken again in the next iteration of the loop, so we should
562 // consider it less likely than a typical branch.
564 // We detect this by looking back through the graph of PHI nodes that sets the
565 // value that the condition depends on, and seeing if we can reach a successor
566 // block which can be determined to make the condition false.
568 // FIXME: We currently consider unlikely blocks to be half as likely as other
569 // blocks, but if we consider the example above the likelyhood is actually
570 // 1/MAX. We could therefore be more precise in how unlikely we consider
571 // blocks to be, but it would require more careful examination of the form
572 // of the comparison expression.
573 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
574 if (!BI || !BI->isConditional())
577 // Check if the branch is based on an instruction compared with a constant
578 CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
579 if (!CI || !isa<Instruction>(CI->getOperand(0)) ||
580 !isa<Constant>(CI->getOperand(1)))
583 // Either the instruction must be a PHI, or a chain of operations involving
584 // constants that ends in a PHI which we can then collapse into a single value
585 // if the PHI value is known.
586 Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0));
587 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS);
588 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1));
589 // Collect the instructions until we hit a PHI
590 SmallVector<BinaryOperator *, 1> InstChain;
591 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) &&
592 isa<Constant>(CmpLHS->getOperand(1))) {
593 // Stop if the chain extends outside of the loop
594 if (!L->contains(CmpLHS))
596 InstChain.push_back(cast<BinaryOperator>(CmpLHS));
597 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0));
599 CmpPHI = dyn_cast<PHINode>(CmpLHS);
601 if (!CmpPHI || !L->contains(CmpPHI))
604 // Trace the phi node to find all values that come from successors of BB
605 SmallPtrSet<PHINode*, 8> VisitedInsts;
606 SmallVector<PHINode*, 8> WorkList;
607 WorkList.push_back(CmpPHI);
608 VisitedInsts.insert(CmpPHI);
609 while (!WorkList.empty()) {
610 PHINode *P = WorkList.back();
612 for (BasicBlock *B : P->blocks()) {
613 // Skip blocks that aren't part of the loop
616 Value *V = P->getIncomingValueForBlock(B);
617 // If the source is a PHI add it to the work list if we haven't
618 // already visited it.
619 if (PHINode *PN = dyn_cast<PHINode>(V)) {
620 if (VisitedInsts.insert(PN).second)
621 WorkList.push_back(PN);
624 // If this incoming value is a constant and B is a successor of BB, then
625 // we can constant-evaluate the compare to see if it makes the branch be
627 Constant *CmpLHSConst = dyn_cast<Constant>(V);
629 std::find(succ_begin(BB), succ_end(BB), B) == succ_end(BB))
631 // First collapse InstChain
632 for (Instruction *I : llvm::reverse(InstChain)) {
633 CmpLHSConst = ConstantExpr::get(I->getOpcode(), CmpLHSConst,
634 cast<Constant>(I->getOperand(1)), true);
640 // Now constant-evaluate the compare
641 Constant *Result = ConstantExpr::getCompare(CI->getPredicate(),
642 CmpLHSConst, CmpConst, true);
643 // If the result means we don't branch to the block then that block is
646 ((Result->isZeroValue() && B == BI->getSuccessor(0)) ||
647 (Result->isOneValue() && B == BI->getSuccessor(1))))
648 UnlikelyBlocks.insert(B);
653 // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
654 // as taken, exiting edges as not-taken.
655 bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB,
659 Loop *L = LI.getLoopFor(BB);
661 SccNum = getSCCNum(BB, SccI);
666 SmallPtrSet<const BasicBlock*, 8> UnlikelyBlocks;
668 computeUnlikelySuccessors(BB, L, UnlikelyBlocks);
670 SmallVector<unsigned, 8> BackEdges;
671 SmallVector<unsigned, 8> ExitingEdges;
672 SmallVector<unsigned, 8> InEdges; // Edges from header to the loop.
673 SmallVector<unsigned, 8> UnlikelyEdges;
675 for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
676 // Use LoopInfo if we have it, otherwise fall-back to SCC info to catch
677 // irreducible loops.
679 if (UnlikelyBlocks.count(*I) != 0)
680 UnlikelyEdges.push_back(I.getSuccessorIndex());
681 else if (!L->contains(*I))
682 ExitingEdges.push_back(I.getSuccessorIndex());
683 else if (L->getHeader() == *I)
684 BackEdges.push_back(I.getSuccessorIndex());
686 InEdges.push_back(I.getSuccessorIndex());
688 if (getSCCNum(*I, SccI) != SccNum)
689 ExitingEdges.push_back(I.getSuccessorIndex());
690 else if (isSCCHeader(*I, SccNum, SccI))
691 BackEdges.push_back(I.getSuccessorIndex());
693 InEdges.push_back(I.getSuccessorIndex());
697 if (BackEdges.empty() && ExitingEdges.empty() && UnlikelyEdges.empty())
700 // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
701 // normalize them so that they sum up to one.
702 unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
703 (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
704 (UnlikelyEdges.empty() ? 0 : LBH_UNLIKELY_WEIGHT) +
705 (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
707 SmallVector<BranchProbability, 4> EdgeProbabilities(
708 BB->getTerminator()->getNumSuccessors(), BranchProbability::getUnknown());
709 if (uint32_t numBackEdges = BackEdges.size()) {
710 BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
711 auto Prob = TakenProb / numBackEdges;
712 for (unsigned SuccIdx : BackEdges)
713 EdgeProbabilities[SuccIdx] = Prob;
716 if (uint32_t numInEdges = InEdges.size()) {
717 BranchProbability TakenProb = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
718 auto Prob = TakenProb / numInEdges;
719 for (unsigned SuccIdx : InEdges)
720 EdgeProbabilities[SuccIdx] = Prob;
723 if (uint32_t numExitingEdges = ExitingEdges.size()) {
724 BranchProbability NotTakenProb = BranchProbability(LBH_NONTAKEN_WEIGHT,
726 auto Prob = NotTakenProb / numExitingEdges;
727 for (unsigned SuccIdx : ExitingEdges)
728 EdgeProbabilities[SuccIdx] = Prob;
731 if (uint32_t numUnlikelyEdges = UnlikelyEdges.size()) {
732 BranchProbability UnlikelyProb = BranchProbability(LBH_UNLIKELY_WEIGHT,
734 auto Prob = UnlikelyProb / numUnlikelyEdges;
735 for (unsigned SuccIdx : UnlikelyEdges)
736 EdgeProbabilities[SuccIdx] = Prob;
739 setEdgeProbability(BB, EdgeProbabilities);
743 bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
744 const TargetLibraryInfo *TLI) {
745 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
746 if (!BI || !BI->isConditional())
749 Value *Cond = BI->getCondition();
750 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
754 auto GetConstantInt = [](Value *V) {
755 if (auto *I = dyn_cast<BitCastInst>(V))
756 return dyn_cast<ConstantInt>(I->getOperand(0));
757 return dyn_cast<ConstantInt>(V);
760 Value *RHS = CI->getOperand(1);
761 ConstantInt *CV = GetConstantInt(RHS);
765 // If the LHS is the result of AND'ing a value with a single bit bitmask,
766 // we don't have information about probabilities.
767 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
768 if (LHS->getOpcode() == Instruction::And)
769 if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
770 if (AndRHS->getValue().isPowerOf2())
773 // Check if the LHS is the return value of a library function
774 LibFunc Func = NumLibFuncs;
776 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0)))
777 if (Function *CalledFn = Call->getCalledFunction())
778 TLI->getLibFunc(*CalledFn, Func);
781 if (Func == LibFunc_strcasecmp ||
782 Func == LibFunc_strcmp ||
783 Func == LibFunc_strncasecmp ||
784 Func == LibFunc_strncmp ||
785 Func == LibFunc_memcmp) {
786 // strcmp and similar functions return zero, negative, or positive, if the
787 // first string is equal, less, or greater than the second. We consider it
788 // likely that the strings are not equal, so a comparison with zero is
789 // probably false, but also a comparison with any other number is also
790 // probably false given that what exactly is returned for nonzero values is
791 // not specified. Any kind of comparison other than equality we know
793 switch (CI->getPredicate()) {
794 case CmpInst::ICMP_EQ:
797 case CmpInst::ICMP_NE:
803 } else if (CV->isZero()) {
804 switch (CI->getPredicate()) {
805 case CmpInst::ICMP_EQ:
806 // X == 0 -> Unlikely
809 case CmpInst::ICMP_NE:
813 case CmpInst::ICMP_SLT:
817 case CmpInst::ICMP_SGT:
824 } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
825 // InstCombine canonicalizes X <= 0 into X < 1.
826 // X <= 0 -> Unlikely
828 } else if (CV->isMinusOne()) {
829 switch (CI->getPredicate()) {
830 case CmpInst::ICMP_EQ:
831 // X == -1 -> Unlikely
834 case CmpInst::ICMP_NE:
838 case CmpInst::ICMP_SGT:
839 // InstCombine canonicalizes X >= 0 into X > -1.
850 BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
851 ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
852 BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT,
853 ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
855 std::swap(TakenProb, UntakenProb);
858 BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
862 bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
863 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
864 if (!BI || !BI->isConditional())
867 Value *Cond = BI->getCondition();
868 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
872 uint32_t TakenWeight = FPH_TAKEN_WEIGHT;
873 uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT;
875 if (FCmp->isEquality()) {
876 // f1 == f2 -> Unlikely
877 // f1 != f2 -> Likely
878 isProb = !FCmp->isTrueWhenEqual();
879 } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
882 TakenWeight = FPH_ORD_WEIGHT;
883 NontakenWeight = FPH_UNO_WEIGHT;
884 } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
887 TakenWeight = FPH_ORD_WEIGHT;
888 NontakenWeight = FPH_UNO_WEIGHT;
893 BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight);
894 BranchProbability UntakenProb(NontakenWeight, TakenWeight + NontakenWeight);
896 std::swap(TakenProb, UntakenProb);
899 BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
903 bool BranchProbabilityInfo::calcInvokeHeuristics(const BasicBlock *BB) {
904 const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
908 BranchProbability TakenProb(IH_TAKEN_WEIGHT,
909 IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
911 BB, SmallVector<BranchProbability, 2>({TakenProb, TakenProb.getCompl()}));
915 void BranchProbabilityInfo::releaseMemory() {
920 bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA,
921 FunctionAnalysisManager::Invalidator &) {
922 // Check whether the analysis, all analyses on functions, or the function's
923 // CFG have been preserved.
924 auto PAC = PA.getChecker<BranchProbabilityAnalysis>();
925 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
926 PAC.preservedSet<CFGAnalyses>());
929 void BranchProbabilityInfo::print(raw_ostream &OS) const {
930 OS << "---- Branch Probabilities ----\n";
931 // We print the probabilities from the last function the analysis ran over,
932 // or the function it is currently running over.
933 assert(LastF && "Cannot print prior to running over a function");
934 for (const auto &BI : *LastF) {
935 for (const_succ_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE;
937 printEdgeProbability(OS << " ", &BI, *SI);
942 bool BranchProbabilityInfo::
943 isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
944 // Hot probability is at least 4/5 = 80%
945 // FIXME: Compare against a static "hot" BranchProbability.
946 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
950 BranchProbabilityInfo::getHotSucc(const BasicBlock *BB) const {
951 auto MaxProb = BranchProbability::getZero();
952 const BasicBlock *MaxSucc = nullptr;
954 for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
955 const BasicBlock *Succ = *I;
956 auto Prob = getEdgeProbability(BB, Succ);
957 if (Prob > MaxProb) {
963 // Hot probability is at least 4/5 = 80%
964 if (MaxProb > BranchProbability(4, 5))
970 /// Get the raw edge probability for the edge. If can't find it, return a
971 /// default probability 1/N where N is the number of successors. Here an edge is
972 /// specified using PredBlock and an
973 /// index to the successors.
975 BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
976 unsigned IndexInSuccessors) const {
977 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
979 if (I != Probs.end())
982 return {1, static_cast<uint32_t>(succ_size(Src))};
986 BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
987 const_succ_iterator Dst) const {
988 return getEdgeProbability(Src, Dst.getSuccessorIndex());
991 /// Get the raw edge probability calculated for the block pair. This returns the
992 /// sum of all raw edge probabilities from Src to Dst.
994 BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
995 const BasicBlock *Dst) const {
996 auto Prob = BranchProbability::getZero();
997 bool FoundProb = false;
998 uint32_t EdgeCount = 0;
999 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
1002 auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
1003 if (MapI != Probs.end()) {
1005 Prob += MapI->second;
1008 uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
1009 return FoundProb ? Prob : BranchProbability(EdgeCount, succ_num);
1012 /// Set the edge probability for a given edge specified by PredBlock and an
1013 /// index to the successors.
1014 void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
1015 unsigned IndexInSuccessors,
1016 BranchProbability Prob) {
1017 Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
1018 Handles.insert(BasicBlockCallbackVH(Src, this));
1019 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
1020 << IndexInSuccessors << " successor probability to " << Prob
1024 /// Set the edge probability for all edges at once.
1025 void BranchProbabilityInfo::setEdgeProbability(
1026 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) {
1027 assert(Src->getTerminator()->getNumSuccessors() == Probs.size());
1028 if (Probs.size() == 0)
1029 return; // Nothing to set.
1031 uint64_t TotalNumerator = 0;
1032 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) {
1033 setEdgeProbability(Src, SuccIdx, Probs[SuccIdx]);
1034 TotalNumerator += Probs[SuccIdx].getNumerator();
1037 // Because of rounding errors the total probability cannot be checked to be
1038 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator.
1039 // Instead, every single probability in Probs must be as accurate as possible.
1040 // This results in error 1/denominator at most, thus the total absolute error
1041 // should be within Probs.size / BranchProbability::getDenominator.
1042 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size());
1043 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size());
1047 BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
1048 const BasicBlock *Src,
1049 const BasicBlock *Dst) const {
1050 const BranchProbability Prob = getEdgeProbability(Src, Dst);
1051 OS << "edge " << Src->getName() << " -> " << Dst->getName()
1052 << " probability is " << Prob
1053 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
1058 void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) {
1059 for (const_succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
1060 auto MapI = Probs.find(std::make_pair(BB, I.getSuccessorIndex()));
1061 if (MapI != Probs.end())
1066 void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
1067 const TargetLibraryInfo *TLI,
1068 PostDominatorTree *PDT) {
1069 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
1071 LastF = &F; // Store the last function we ran on for printing.
1072 assert(PostDominatedByUnreachable.empty());
1073 assert(PostDominatedByColdCall.empty());
1075 // Record SCC numbers of blocks in the CFG to identify irreducible loops.
1076 // FIXME: We could only calculate this if the CFG is known to be irreducible
1077 // (perhaps cache this info in LoopInfo if we can easily calculate it there?).
1080 for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd();
1082 // Ignore single-block SCCs since they either aren't loops or LoopInfo will
1084 const std::vector<const BasicBlock *> &Scc = *It;
1085 if (Scc.size() == 1)
1088 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":");
1089 for (auto *BB : Scc) {
1090 LLVM_DEBUG(dbgs() << " " << BB->getName());
1091 SccI.SccNums[BB] = SccNum;
1093 LLVM_DEBUG(dbgs() << "\n");
1096 std::unique_ptr<PostDominatorTree> PDTPtr;
1099 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1103 computePostDominatedByUnreachable(F, PDT);
1104 computePostDominatedByColdCall(F, PDT);
1106 // Walk the basic blocks in post-order so that we can build up state about
1107 // the successors of a block iteratively.
1108 for (auto BB : post_order(&F.getEntryBlock())) {
1109 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
1111 // If there is no at least two successors, no sense to set probability.
1112 if (BB->getTerminator()->getNumSuccessors() < 2)
1114 if (calcMetadataWeights(BB))
1116 if (calcInvokeHeuristics(BB))
1118 if (calcUnreachableHeuristics(BB))
1120 if (calcColdCallHeuristics(BB))
1122 if (calcLoopBranchHeuristics(BB, LI, SccI))
1124 if (calcPointerHeuristics(BB))
1126 if (calcZeroHeuristics(BB, TLI))
1128 if (calcFloatingPointHeuristics(BB))
1132 PostDominatedByUnreachable.clear();
1133 PostDominatedByColdCall.clear();
1135 if (PrintBranchProb &&
1136 (PrintBranchProbFuncName.empty() ||
1137 F.getName().equals(PrintBranchProbFuncName))) {
1142 void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
1143 AnalysisUsage &AU) const {
1144 // We require DT so it's available when LI is available. The LI updating code
1145 // asserts that DT is also present so if we don't make sure that we have DT
1146 // here, that assert will trigger.
1147 AU.addRequired<DominatorTreeWrapperPass>();
1148 AU.addRequired<LoopInfoWrapperPass>();
1149 AU.addRequired<TargetLibraryInfoWrapperPass>();
1150 AU.addRequired<PostDominatorTreeWrapperPass>();
1151 AU.setPreservesAll();
1154 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
1155 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1156 const TargetLibraryInfo &TLI =
1157 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1158 PostDominatorTree &PDT =
1159 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
1160 BPI.calculate(F, LI, &TLI, &PDT);
1164 void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); }
1166 void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
1167 const Module *) const {
1171 AnalysisKey BranchProbabilityAnalysis::Key;
1172 BranchProbabilityInfo
1173 BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
1174 BranchProbabilityInfo BPI;
1175 BPI.calculate(F, AM.getResult<LoopAnalysis>(F),
1176 &AM.getResult<TargetLibraryAnalysis>(F),
1177 &AM.getResult<PostDominatorTreeAnalysis>(F));
1182 BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
1183 OS << "Printing analysis results of BPI for function "
1184 << "'" << F.getName() << "':"
1186 AM.getResult<BranchProbabilityAnalysis>(F).print(OS);
1187 return PreservedAnalyses::all();