1 //===-- ReachableCode.cpp - Code Reachability Analysis --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a flow-sensitive, path-insensitive analysis of
10 // determining reachable blocks within a CFG.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Analysis/Analyses/ReachableCode.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/ExprObjC.h"
18 #include "clang/AST/ParentMap.h"
19 #include "clang/AST/StmtCXX.h"
20 #include "clang/Analysis/AnalysisDeclContext.h"
21 #include "clang/Analysis/CFG.h"
22 #include "clang/Basic/Builtins.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "llvm/ADT/BitVector.h"
26 #include "llvm/ADT/SmallVector.h"
28 using namespace clang;
30 //===----------------------------------------------------------------------===//
31 // Core Reachability Analysis routines.
32 //===----------------------------------------------------------------------===//
34 static bool isEnumConstant(const Expr *Ex) {
35 const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex);
38 return isa<EnumConstantDecl>(DR->getDecl());
41 static bool isTrivialExpression(const Expr *Ex) {
42 Ex = Ex->IgnoreParenCasts();
43 return isa<IntegerLiteral>(Ex) || isa<StringLiteral>(Ex) ||
44 isa<CXXBoolLiteralExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) ||
45 isa<CharacterLiteral>(Ex) ||
49 static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) {
50 // Check if the block ends with a do...while() and see if 'S' is the
52 if (const Stmt *Term = B->getTerminatorStmt()) {
53 if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) {
54 const Expr *Cond = DS->getCond()->IgnoreParenCasts();
55 return Cond == S && isTrivialExpression(Cond);
61 static bool isBuiltinUnreachable(const Stmt *S) {
62 if (const auto *DRE = dyn_cast<DeclRefExpr>(S))
63 if (const auto *FDecl = dyn_cast<FunctionDecl>(DRE->getDecl()))
64 return FDecl->getIdentifier() &&
65 FDecl->getBuiltinID() == Builtin::BI__builtin_unreachable;
69 static bool isBuiltinAssumeFalse(const CFGBlock *B, const Stmt *S,
72 // Happens if S is B's terminator and B contains nothing else
73 // (e.g. a CFGBlock containing only a goto).
76 if (Optional<CFGStmt> CS = B->back().getAs<CFGStmt>()) {
77 if (const auto *CE = dyn_cast<CallExpr>(CS->getStmt())) {
78 return CE->getCallee()->IgnoreCasts() == S && CE->isBuiltinAssumeFalse(C);
84 static bool isDeadReturn(const CFGBlock *B, const Stmt *S) {
85 // Look to see if the current control flow ends with a 'return', and see if
86 // 'S' is a substatement. The 'return' may not be the last element in the
87 // block, or may be in a subsequent block because of destructors.
88 const CFGBlock *Current = B;
90 for (CFGBlock::const_reverse_iterator I = Current->rbegin(),
93 if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
94 if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) {
97 if (const Expr *RE = RS->getRetValue()) {
98 RE = RE->IgnoreParenCasts();
101 ParentMap PM(const_cast<Expr *>(RE));
102 // If 'S' is in the ParentMap, it is a subexpression of
103 // the return statement.
104 return PM.getParent(S);
110 // Note also that we are restricting the search for the return statement
111 // to stop at control-flow; only part of a return statement may be dead,
112 // without the whole return statement being dead.
113 if (Current->getTerminator().isTemporaryDtorsBranch()) {
114 // Temporary destructors have a predictable control flow, thus we want to
115 // look into the next block for the return statement.
116 // We look into the false branch, as we know the true branch only contains
117 // the call to the destructor.
118 assert(Current->succ_size() == 2);
119 Current = *(Current->succ_begin() + 1);
120 } else if (!Current->getTerminatorStmt() && Current->succ_size() == 1) {
121 // If there is only one successor, we're not dealing with outgoing control
122 // flow. Thus, look into the next block.
123 Current = *Current->succ_begin();
124 if (Current->pred_size() > 1) {
125 // If there is more than one predecessor, we're dealing with incoming
126 // control flow - if the return statement is in that block, it might
127 // well be reachable via a different control flow, thus it's not dead.
131 // We hit control flow or a dead end. Stop searching.
135 llvm_unreachable("Broke out of infinite loop.");
138 static SourceLocation getTopMostMacro(SourceLocation Loc, SourceManager &SM) {
139 assert(Loc.isMacroID());
141 while (Loc.isMacroID()) {
143 Loc = SM.getImmediateMacroCallerLoc(Loc);
148 /// Returns true if the statement is expanded from a configuration macro.
149 static bool isExpandedFromConfigurationMacro(const Stmt *S,
151 bool IgnoreYES_NO = false) {
152 // FIXME: This is not very precise. Here we just check to see if the
153 // value comes from a macro, but we can do much better. This is likely
154 // to be over conservative. This logic is factored into a separate function
155 // so that we can refine it later.
156 SourceLocation L = S->getBeginLoc();
158 SourceManager &SM = PP.getSourceManager();
160 // The Objective-C constant 'YES' and 'NO'
161 // are defined as macros. Do not treat them
162 // as configuration values.
163 SourceLocation TopL = getTopMostMacro(L, SM);
164 StringRef MacroName = PP.getImmediateMacroName(TopL);
165 if (MacroName == "YES" || MacroName == "NO")
167 } else if (!PP.getLangOpts().CPlusPlus) {
168 // Do not treat C 'false' and 'true' macros as configuration values.
169 SourceLocation TopL = getTopMostMacro(L, SM);
170 StringRef MacroName = PP.getImmediateMacroName(TopL);
171 if (MacroName == "false" || MacroName == "true")
179 static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP);
181 /// Returns true if the statement represents a configuration value.
183 /// A configuration value is something usually determined at compile-time
184 /// to conditionally always execute some branch. Such guards are for
185 /// "sometimes unreachable" code. Such code is usually not interesting
186 /// to report as unreachable, and may mask truly unreachable code within
188 static bool isConfigurationValue(const Stmt *S,
190 SourceRange *SilenceableCondVal = nullptr,
191 bool IncludeIntegers = true,
192 bool WrappedInParens = false) {
196 if (const auto *Ex = dyn_cast<Expr>(S))
197 S = Ex->IgnoreImplicit();
199 if (const auto *Ex = dyn_cast<Expr>(S))
200 S = Ex->IgnoreCasts();
202 // Special case looking for the sigil '()' around an integer literal.
203 if (const ParenExpr *PE = dyn_cast<ParenExpr>(S))
204 if (!PE->getBeginLoc().isMacroID())
205 return isConfigurationValue(PE->getSubExpr(), PP, SilenceableCondVal,
206 IncludeIntegers, true);
208 if (const Expr *Ex = dyn_cast<Expr>(S))
209 S = Ex->IgnoreCasts();
211 bool IgnoreYES_NO = false;
213 switch (S->getStmtClass()) {
214 case Stmt::CallExprClass: {
215 const FunctionDecl *Callee =
216 dyn_cast_or_null<FunctionDecl>(cast<CallExpr>(S)->getCalleeDecl());
217 return Callee ? Callee->isConstexpr() : false;
219 case Stmt::DeclRefExprClass:
220 return isConfigurationValue(cast<DeclRefExpr>(S)->getDecl(), PP);
221 case Stmt::ObjCBoolLiteralExprClass:
224 case Stmt::CXXBoolLiteralExprClass:
225 case Stmt::IntegerLiteralClass: {
226 const Expr *E = cast<Expr>(S);
227 if (IncludeIntegers) {
228 if (SilenceableCondVal && !SilenceableCondVal->getBegin().isValid())
229 *SilenceableCondVal = E->getSourceRange();
230 return WrappedInParens || isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO);
234 case Stmt::MemberExprClass:
235 return isConfigurationValue(cast<MemberExpr>(S)->getMemberDecl(), PP);
236 case Stmt::UnaryExprOrTypeTraitExprClass:
238 case Stmt::BinaryOperatorClass: {
239 const BinaryOperator *B = cast<BinaryOperator>(S);
240 // Only include raw integers (not enums) as configuration
241 // values if they are used in a logical or comparison operator
243 IncludeIntegers &= (B->isLogicalOp() || B->isComparisonOp());
244 return isConfigurationValue(B->getLHS(), PP, SilenceableCondVal,
246 isConfigurationValue(B->getRHS(), PP, SilenceableCondVal,
249 case Stmt::UnaryOperatorClass: {
250 const UnaryOperator *UO = cast<UnaryOperator>(S);
251 if (UO->getOpcode() != UO_LNot && UO->getOpcode() != UO_Minus)
253 bool SilenceableCondValNotSet =
254 SilenceableCondVal && SilenceableCondVal->getBegin().isInvalid();
255 bool IsSubExprConfigValue =
256 isConfigurationValue(UO->getSubExpr(), PP, SilenceableCondVal,
257 IncludeIntegers, WrappedInParens);
258 // Update the silenceable condition value source range only if the range
259 // was set directly by the child expression.
260 if (SilenceableCondValNotSet &&
261 SilenceableCondVal->getBegin().isValid() &&
262 *SilenceableCondVal ==
263 UO->getSubExpr()->IgnoreCasts()->getSourceRange())
264 *SilenceableCondVal = UO->getSourceRange();
265 return IsSubExprConfigValue;
272 static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP) {
273 if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D))
274 return isConfigurationValue(ED->getInitExpr(), PP);
275 if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
276 // As a heuristic, treat globals as configuration values. Note
277 // that we only will get here if Sema evaluated this
278 // condition to a constant expression, which means the global
279 // had to be declared in a way to be a truly constant value.
280 // We could generalize this to local variables, but it isn't
281 // clear if those truly represent configuration values that
282 // gate unreachable code.
283 if (!VD->hasLocalStorage())
286 // As a heuristic, locals that have been marked 'const' explicitly
287 // can be treated as configuration values as well.
288 return VD->getType().isLocalConstQualified();
293 /// Returns true if we should always explore all successors of a block.
294 static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B,
296 if (const Stmt *Term = B->getTerminatorStmt()) {
297 if (isa<SwitchStmt>(Term))
299 // Specially handle '||' and '&&'.
300 if (isa<BinaryOperator>(Term)) {
301 return isConfigurationValue(Term, PP);
305 const Stmt *Cond = B->getTerminatorCondition(/* stripParens */ false);
306 return isConfigurationValue(Cond, PP);
309 static unsigned scanFromBlock(const CFGBlock *Start,
310 llvm::BitVector &Reachable,
312 bool IncludeSometimesUnreachableEdges) {
316 SmallVector<const CFGBlock*, 32> WL;
318 // The entry block may have already been marked reachable
320 if (!Reachable[Start->getBlockID()]) {
322 Reachable[Start->getBlockID()] = true;
327 // Find the reachable blocks from 'Start'.
328 while (!WL.empty()) {
329 const CFGBlock *item = WL.pop_back_val();
331 // There are cases where we want to treat all successors as reachable.
332 // The idea is that some "sometimes unreachable" code is not interesting,
333 // and that we should forge ahead and explore those branches anyway.
334 // This allows us to potentially uncover some "always unreachable" code
335 // within the "sometimes unreachable" code.
336 // Look at the successors and mark then reachable.
337 Optional<bool> TreatAllSuccessorsAsReachable;
338 if (!IncludeSometimesUnreachableEdges)
339 TreatAllSuccessorsAsReachable = false;
341 for (CFGBlock::const_succ_iterator I = item->succ_begin(),
342 E = item->succ_end(); I != E; ++I) {
343 const CFGBlock *B = *I;
345 const CFGBlock *UB = I->getPossiblyUnreachableBlock();
349 if (!TreatAllSuccessorsAsReachable.hasValue()) {
351 TreatAllSuccessorsAsReachable =
352 shouldTreatSuccessorsAsReachable(item, *PP);
355 if (TreatAllSuccessorsAsReachable.getValue()) {
363 unsigned blockID = B->getBlockID();
364 if (!Reachable[blockID]) {
365 Reachable.set(blockID);
375 static unsigned scanMaybeReachableFromBlock(const CFGBlock *Start,
377 llvm::BitVector &Reachable) {
378 return scanFromBlock(Start, Reachable, &PP, true);
381 //===----------------------------------------------------------------------===//
382 // Dead Code Scanner.
383 //===----------------------------------------------------------------------===//
387 llvm::BitVector Visited;
388 llvm::BitVector &Reachable;
389 SmallVector<const CFGBlock *, 10> WorkList;
393 typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12>
396 DeferredLocsTy DeferredLocs;
399 DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP, ASTContext &C)
400 : Visited(reachable.size()),
401 Reachable(reachable),
404 void enqueue(const CFGBlock *block);
405 unsigned scanBackwards(const CFGBlock *Start,
406 clang::reachable_code::Callback &CB);
408 bool isDeadCodeRoot(const CFGBlock *Block);
410 const Stmt *findDeadCode(const CFGBlock *Block);
412 void reportDeadCode(const CFGBlock *B,
414 clang::reachable_code::Callback &CB);
418 void DeadCodeScan::enqueue(const CFGBlock *block) {
419 unsigned blockID = block->getBlockID();
420 if (Reachable[blockID] || Visited[blockID])
422 Visited[blockID] = true;
423 WorkList.push_back(block);
426 bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) {
427 bool isDeadRoot = true;
429 for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
430 E = Block->pred_end(); I != E; ++I) {
431 if (const CFGBlock *PredBlock = *I) {
432 unsigned blockID = PredBlock->getBlockID();
433 if (Visited[blockID]) {
437 if (!Reachable[blockID]) {
439 Visited[blockID] = true;
440 WorkList.push_back(PredBlock);
449 static bool isValidDeadStmt(const Stmt *S) {
450 if (S->getBeginLoc().isInvalid())
452 if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S))
453 return BO->getOpcode() != BO_Comma;
457 const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) {
458 for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I)
459 if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
460 const Stmt *S = CS->getStmt();
461 if (isValidDeadStmt(S))
465 CFGTerminator T = Block->getTerminator();
466 if (T.isStmtBranch()) {
467 const Stmt *S = T.getStmt();
468 if (S && isValidDeadStmt(S))
475 static int SrcCmp(const std::pair<const CFGBlock *, const Stmt *> *p1,
476 const std::pair<const CFGBlock *, const Stmt *> *p2) {
477 if (p1->second->getBeginLoc() < p2->second->getBeginLoc())
479 if (p2->second->getBeginLoc() < p1->second->getBeginLoc())
484 unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start,
485 clang::reachable_code::Callback &CB) {
490 while (!WorkList.empty()) {
491 const CFGBlock *Block = WorkList.pop_back_val();
493 // It is possible that this block has been marked reachable after
495 if (Reachable[Block->getBlockID()])
498 // Look for any dead code within the block.
499 const Stmt *S = findDeadCode(Block);
502 // No dead code. Possibly an empty block. Look at dead predecessors.
503 for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
504 E = Block->pred_end(); I != E; ++I) {
505 if (const CFGBlock *predBlock = *I)
511 // Specially handle macro-expanded code.
512 if (S->getBeginLoc().isMacroID()) {
513 count += scanMaybeReachableFromBlock(Block, PP, Reachable);
517 if (isDeadCodeRoot(Block)) {
518 reportDeadCode(Block, S, CB);
519 count += scanMaybeReachableFromBlock(Block, PP, Reachable);
522 // Record this statement as the possibly best location in a
523 // strongly-connected component of dead code for emitting a
525 DeferredLocs.push_back(std::make_pair(Block, S));
529 // If we didn't find a dead root, then report the dead code with the
530 // earliest location.
531 if (!DeferredLocs.empty()) {
532 llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp);
533 for (DeferredLocsTy::iterator I = DeferredLocs.begin(),
534 E = DeferredLocs.end(); I != E; ++I) {
535 const CFGBlock *Block = I->first;
536 if (Reachable[Block->getBlockID()])
538 reportDeadCode(Block, I->second, CB);
539 count += scanMaybeReachableFromBlock(Block, PP, Reachable);
546 static SourceLocation GetUnreachableLoc(const Stmt *S,
549 R1 = R2 = SourceRange();
551 if (const Expr *Ex = dyn_cast<Expr>(S))
552 S = Ex->IgnoreParenImpCasts();
554 switch (S->getStmtClass()) {
555 case Expr::BinaryOperatorClass: {
556 const BinaryOperator *BO = cast<BinaryOperator>(S);
557 return BO->getOperatorLoc();
559 case Expr::UnaryOperatorClass: {
560 const UnaryOperator *UO = cast<UnaryOperator>(S);
561 R1 = UO->getSubExpr()->getSourceRange();
562 return UO->getOperatorLoc();
564 case Expr::CompoundAssignOperatorClass: {
565 const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(S);
566 R1 = CAO->getLHS()->getSourceRange();
567 R2 = CAO->getRHS()->getSourceRange();
568 return CAO->getOperatorLoc();
570 case Expr::BinaryConditionalOperatorClass:
571 case Expr::ConditionalOperatorClass: {
572 const AbstractConditionalOperator *CO =
573 cast<AbstractConditionalOperator>(S);
574 return CO->getQuestionLoc();
576 case Expr::MemberExprClass: {
577 const MemberExpr *ME = cast<MemberExpr>(S);
578 R1 = ME->getSourceRange();
579 return ME->getMemberLoc();
581 case Expr::ArraySubscriptExprClass: {
582 const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(S);
583 R1 = ASE->getLHS()->getSourceRange();
584 R2 = ASE->getRHS()->getSourceRange();
585 return ASE->getRBracketLoc();
587 case Expr::CStyleCastExprClass: {
588 const CStyleCastExpr *CSC = cast<CStyleCastExpr>(S);
589 R1 = CSC->getSubExpr()->getSourceRange();
590 return CSC->getLParenLoc();
592 case Expr::CXXFunctionalCastExprClass: {
593 const CXXFunctionalCastExpr *CE = cast <CXXFunctionalCastExpr>(S);
594 R1 = CE->getSubExpr()->getSourceRange();
595 return CE->getBeginLoc();
597 case Stmt::CXXTryStmtClass: {
598 return cast<CXXTryStmt>(S)->getHandler(0)->getCatchLoc();
600 case Expr::ObjCBridgedCastExprClass: {
601 const ObjCBridgedCastExpr *CSC = cast<ObjCBridgedCastExpr>(S);
602 R1 = CSC->getSubExpr()->getSourceRange();
603 return CSC->getLParenLoc();
607 R1 = S->getSourceRange();
608 return S->getBeginLoc();
611 void DeadCodeScan::reportDeadCode(const CFGBlock *B,
613 clang::reachable_code::Callback &CB) {
614 // Classify the unreachable code found, or suppress it in some cases.
615 reachable_code::UnreachableKind UK = reachable_code::UK_Other;
617 if (isa<BreakStmt>(S)) {
618 UK = reachable_code::UK_Break;
619 } else if (isTrivialDoWhile(B, S) || isBuiltinUnreachable(S) ||
620 isBuiltinAssumeFalse(B, S, C)) {
623 else if (isDeadReturn(B, S)) {
624 UK = reachable_code::UK_Return;
627 SourceRange SilenceableCondVal;
629 if (UK == reachable_code::UK_Other) {
630 // Check if the dead code is part of the "loop target" of
631 // a for/for-range loop. This is the block that contains
632 // the increment code.
633 if (const Stmt *LoopTarget = B->getLoopTarget()) {
634 SourceLocation Loc = LoopTarget->getBeginLoc();
635 SourceRange R1(Loc, Loc), R2;
637 if (const ForStmt *FS = dyn_cast<ForStmt>(LoopTarget)) {
638 const Expr *Inc = FS->getInc();
639 Loc = Inc->getBeginLoc();
640 R2 = Inc->getSourceRange();
643 CB.HandleUnreachable(reachable_code::UK_Loop_Increment,
644 Loc, SourceRange(), SourceRange(Loc, Loc), R2);
648 // Check if the dead block has a predecessor whose branch has
649 // a configuration value that *could* be modified to
650 // silence the warning.
651 CFGBlock::const_pred_iterator PI = B->pred_begin();
652 if (PI != B->pred_end()) {
653 if (const CFGBlock *PredBlock = PI->getPossiblyUnreachableBlock()) {
654 const Stmt *TermCond =
655 PredBlock->getTerminatorCondition(/* strip parens */ false);
656 isConfigurationValue(TermCond, PP, &SilenceableCondVal);
662 SourceLocation Loc = GetUnreachableLoc(S, R1, R2);
663 CB.HandleUnreachable(UK, Loc, SilenceableCondVal, R1, R2);
666 //===----------------------------------------------------------------------===//
667 // Reachability APIs.
668 //===----------------------------------------------------------------------===//
670 namespace clang { namespace reachable_code {
672 void Callback::anchor() { }
674 unsigned ScanReachableFromBlock(const CFGBlock *Start,
675 llvm::BitVector &Reachable) {
676 return scanFromBlock(Start, Reachable, /* SourceManager* */ nullptr, false);
679 void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP,
682 CFG *cfg = AC.getCFG();
686 // Scan for reachable blocks from the entrance of the CFG.
687 // If there are no unreachable blocks, we're done.
688 llvm::BitVector reachable(cfg->getNumBlockIDs());
689 unsigned numReachable =
690 scanMaybeReachableFromBlock(&cfg->getEntry(), PP, reachable);
691 if (numReachable == cfg->getNumBlockIDs())
694 // If there aren't explicit EH edges, we should include the 'try' dispatch
696 if (!AC.getCFGBuildOptions().AddEHEdges) {
697 for (CFG::try_block_iterator I = cfg->try_blocks_begin(),
698 E = cfg->try_blocks_end() ; I != E; ++I) {
699 numReachable += scanMaybeReachableFromBlock(*I, PP, reachable);
701 if (numReachable == cfg->getNumBlockIDs())
705 // There are some unreachable blocks. We need to find the root blocks that
706 // contain code that should be considered unreachable.
707 for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) {
708 const CFGBlock *block = *I;
709 // A block may have been marked reachable during this loop.
710 if (reachable[block->getBlockID()])
713 DeadCodeScan DS(reachable, PP, AC.getASTContext());
714 numReachable += DS.scanBackwards(block, CB);
716 if (numReachable == cfg->getNumBlockIDs())
721 }} // end namespace clang::reachable_code