1 //===-- ReachableCode.cpp - Code Reachability Analysis --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a flow-sensitive, path-insensitive analysis of
10 // determining reachable blocks within a CFG.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Analysis/Analyses/ReachableCode.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/ExprObjC.h"
18 #include "clang/AST/ParentMap.h"
19 #include "clang/AST/StmtCXX.h"
20 #include "clang/Analysis/AnalysisDeclContext.h"
21 #include "clang/Analysis/CFG.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Lex/Preprocessor.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/SmallVector.h"
27 using namespace clang;
29 //===----------------------------------------------------------------------===//
30 // Core Reachability Analysis routines.
31 //===----------------------------------------------------------------------===//
33 static bool isEnumConstant(const Expr *Ex) {
34 const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Ex);
37 return isa<EnumConstantDecl>(DR->getDecl());
40 static bool isTrivialExpression(const Expr *Ex) {
41 Ex = Ex->IgnoreParenCasts();
42 return isa<IntegerLiteral>(Ex) || isa<StringLiteral>(Ex) ||
43 isa<CXXBoolLiteralExpr>(Ex) || isa<ObjCBoolLiteralExpr>(Ex) ||
44 isa<CharacterLiteral>(Ex) ||
48 static bool isTrivialDoWhile(const CFGBlock *B, const Stmt *S) {
49 // Check if the block ends with a do...while() and see if 'S' is the
51 if (const Stmt *Term = B->getTerminatorStmt()) {
52 if (const DoStmt *DS = dyn_cast<DoStmt>(Term)) {
53 const Expr *Cond = DS->getCond()->IgnoreParenCasts();
54 return Cond == S && isTrivialExpression(Cond);
60 static bool isBuiltinUnreachable(const Stmt *S) {
61 if (const auto *DRE = dyn_cast<DeclRefExpr>(S))
62 if (const auto *FDecl = dyn_cast<FunctionDecl>(DRE->getDecl()))
63 return FDecl->getIdentifier() &&
64 FDecl->getBuiltinID() == Builtin::BI__builtin_unreachable;
68 static bool isBuiltinAssumeFalse(const CFGBlock *B, const Stmt *S,
71 // Happens if S is B's terminator and B contains nothing else
72 // (e.g. a CFGBlock containing only a goto).
75 if (Optional<CFGStmt> CS = B->back().getAs<CFGStmt>()) {
76 if (const auto *CE = dyn_cast<CallExpr>(CS->getStmt())) {
77 return CE->getCallee()->IgnoreCasts() == S && CE->isBuiltinAssumeFalse(C);
83 static bool isDeadReturn(const CFGBlock *B, const Stmt *S) {
84 // Look to see if the current control flow ends with a 'return', and see if
85 // 'S' is a substatement. The 'return' may not be the last element in the
86 // block, or may be in a subsequent block because of destructors.
87 const CFGBlock *Current = B;
89 for (CFGBlock::const_reverse_iterator I = Current->rbegin(),
92 if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
93 if (const ReturnStmt *RS = dyn_cast<ReturnStmt>(CS->getStmt())) {
96 if (const Expr *RE = RS->getRetValue()) {
97 RE = RE->IgnoreParenCasts();
100 ParentMap PM(const_cast<Expr *>(RE));
101 // If 'S' is in the ParentMap, it is a subexpression of
102 // the return statement.
103 return PM.getParent(S);
109 // Note also that we are restricting the search for the return statement
110 // to stop at control-flow; only part of a return statement may be dead,
111 // without the whole return statement being dead.
112 if (Current->getTerminator().isTemporaryDtorsBranch()) {
113 // Temporary destructors have a predictable control flow, thus we want to
114 // look into the next block for the return statement.
115 // We look into the false branch, as we know the true branch only contains
116 // the call to the destructor.
117 assert(Current->succ_size() == 2);
118 Current = *(Current->succ_begin() + 1);
119 } else if (!Current->getTerminatorStmt() && Current->succ_size() == 1) {
120 // If there is only one successor, we're not dealing with outgoing control
121 // flow. Thus, look into the next block.
122 Current = *Current->succ_begin();
123 if (Current->pred_size() > 1) {
124 // If there is more than one predecessor, we're dealing with incoming
125 // control flow - if the return statement is in that block, it might
126 // well be reachable via a different control flow, thus it's not dead.
130 // We hit control flow or a dead end. Stop searching.
134 llvm_unreachable("Broke out of infinite loop.");
137 static SourceLocation getTopMostMacro(SourceLocation Loc, SourceManager &SM) {
138 assert(Loc.isMacroID());
140 while (Loc.isMacroID()) {
142 Loc = SM.getImmediateMacroCallerLoc(Loc);
147 /// Returns true if the statement is expanded from a configuration macro.
148 static bool isExpandedFromConfigurationMacro(const Stmt *S,
150 bool IgnoreYES_NO = false) {
151 // FIXME: This is not very precise. Here we just check to see if the
152 // value comes from a macro, but we can do much better. This is likely
153 // to be over conservative. This logic is factored into a separate function
154 // so that we can refine it later.
155 SourceLocation L = S->getBeginLoc();
157 SourceManager &SM = PP.getSourceManager();
159 // The Objective-C constant 'YES' and 'NO'
160 // are defined as macros. Do not treat them
161 // as configuration values.
162 SourceLocation TopL = getTopMostMacro(L, SM);
163 StringRef MacroName = PP.getImmediateMacroName(TopL);
164 if (MacroName == "YES" || MacroName == "NO")
166 } else if (!PP.getLangOpts().CPlusPlus) {
167 // Do not treat C 'false' and 'true' macros as configuration values.
168 SourceLocation TopL = getTopMostMacro(L, SM);
169 StringRef MacroName = PP.getImmediateMacroName(TopL);
170 if (MacroName == "false" || MacroName == "true")
178 static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP);
180 /// Returns true if the statement represents a configuration value.
182 /// A configuration value is something usually determined at compile-time
183 /// to conditionally always execute some branch. Such guards are for
184 /// "sometimes unreachable" code. Such code is usually not interesting
185 /// to report as unreachable, and may mask truly unreachable code within
187 static bool isConfigurationValue(const Stmt *S,
189 SourceRange *SilenceableCondVal = nullptr,
190 bool IncludeIntegers = true,
191 bool WrappedInParens = false) {
195 if (const auto *Ex = dyn_cast<Expr>(S))
196 S = Ex->IgnoreImplicit();
198 if (const auto *Ex = dyn_cast<Expr>(S))
199 S = Ex->IgnoreCasts();
201 // Special case looking for the sigil '()' around an integer literal.
202 if (const ParenExpr *PE = dyn_cast<ParenExpr>(S))
203 if (!PE->getBeginLoc().isMacroID())
204 return isConfigurationValue(PE->getSubExpr(), PP, SilenceableCondVal,
205 IncludeIntegers, true);
207 if (const Expr *Ex = dyn_cast<Expr>(S))
208 S = Ex->IgnoreCasts();
210 bool IgnoreYES_NO = false;
212 switch (S->getStmtClass()) {
213 case Stmt::CallExprClass: {
214 const FunctionDecl *Callee =
215 dyn_cast_or_null<FunctionDecl>(cast<CallExpr>(S)->getCalleeDecl());
216 return Callee ? Callee->isConstexpr() : false;
218 case Stmt::DeclRefExprClass:
219 return isConfigurationValue(cast<DeclRefExpr>(S)->getDecl(), PP);
220 case Stmt::ObjCBoolLiteralExprClass:
223 case Stmt::CXXBoolLiteralExprClass:
224 case Stmt::IntegerLiteralClass: {
225 const Expr *E = cast<Expr>(S);
226 if (IncludeIntegers) {
227 if (SilenceableCondVal && !SilenceableCondVal->getBegin().isValid())
228 *SilenceableCondVal = E->getSourceRange();
229 return WrappedInParens || isExpandedFromConfigurationMacro(E, PP, IgnoreYES_NO);
233 case Stmt::MemberExprClass:
234 return isConfigurationValue(cast<MemberExpr>(S)->getMemberDecl(), PP);
235 case Stmt::UnaryExprOrTypeTraitExprClass:
237 case Stmt::BinaryOperatorClass: {
238 const BinaryOperator *B = cast<BinaryOperator>(S);
239 // Only include raw integers (not enums) as configuration
240 // values if they are used in a logical or comparison operator
242 IncludeIntegers &= (B->isLogicalOp() || B->isComparisonOp());
243 return isConfigurationValue(B->getLHS(), PP, SilenceableCondVal,
245 isConfigurationValue(B->getRHS(), PP, SilenceableCondVal,
248 case Stmt::UnaryOperatorClass: {
249 const UnaryOperator *UO = cast<UnaryOperator>(S);
250 if (UO->getOpcode() != UO_LNot)
252 bool SilenceableCondValNotSet =
253 SilenceableCondVal && SilenceableCondVal->getBegin().isInvalid();
254 bool IsSubExprConfigValue =
255 isConfigurationValue(UO->getSubExpr(), PP, SilenceableCondVal,
256 IncludeIntegers, WrappedInParens);
257 // Update the silenceable condition value source range only if the range
258 // was set directly by the child expression.
259 if (SilenceableCondValNotSet &&
260 SilenceableCondVal->getBegin().isValid() &&
261 *SilenceableCondVal ==
262 UO->getSubExpr()->IgnoreCasts()->getSourceRange())
263 *SilenceableCondVal = UO->getSourceRange();
264 return IsSubExprConfigValue;
271 static bool isConfigurationValue(const ValueDecl *D, Preprocessor &PP) {
272 if (const EnumConstantDecl *ED = dyn_cast<EnumConstantDecl>(D))
273 return isConfigurationValue(ED->getInitExpr(), PP);
274 if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
275 // As a heuristic, treat globals as configuration values. Note
276 // that we only will get here if Sema evaluated this
277 // condition to a constant expression, which means the global
278 // had to be declared in a way to be a truly constant value.
279 // We could generalize this to local variables, but it isn't
280 // clear if those truly represent configuration values that
281 // gate unreachable code.
282 if (!VD->hasLocalStorage())
285 // As a heuristic, locals that have been marked 'const' explicitly
286 // can be treated as configuration values as well.
287 return VD->getType().isLocalConstQualified();
292 /// Returns true if we should always explore all successors of a block.
293 static bool shouldTreatSuccessorsAsReachable(const CFGBlock *B,
295 if (const Stmt *Term = B->getTerminatorStmt()) {
296 if (isa<SwitchStmt>(Term))
298 // Specially handle '||' and '&&'.
299 if (isa<BinaryOperator>(Term)) {
300 return isConfigurationValue(Term, PP);
304 const Stmt *Cond = B->getTerminatorCondition(/* stripParens */ false);
305 return isConfigurationValue(Cond, PP);
308 static unsigned scanFromBlock(const CFGBlock *Start,
309 llvm::BitVector &Reachable,
311 bool IncludeSometimesUnreachableEdges) {
315 SmallVector<const CFGBlock*, 32> WL;
317 // The entry block may have already been marked reachable
319 if (!Reachable[Start->getBlockID()]) {
321 Reachable[Start->getBlockID()] = true;
326 // Find the reachable blocks from 'Start'.
327 while (!WL.empty()) {
328 const CFGBlock *item = WL.pop_back_val();
330 // There are cases where we want to treat all successors as reachable.
331 // The idea is that some "sometimes unreachable" code is not interesting,
332 // and that we should forge ahead and explore those branches anyway.
333 // This allows us to potentially uncover some "always unreachable" code
334 // within the "sometimes unreachable" code.
335 // Look at the successors and mark then reachable.
336 Optional<bool> TreatAllSuccessorsAsReachable;
337 if (!IncludeSometimesUnreachableEdges)
338 TreatAllSuccessorsAsReachable = false;
340 for (CFGBlock::const_succ_iterator I = item->succ_begin(),
341 E = item->succ_end(); I != E; ++I) {
342 const CFGBlock *B = *I;
344 const CFGBlock *UB = I->getPossiblyUnreachableBlock();
348 if (!TreatAllSuccessorsAsReachable.hasValue()) {
350 TreatAllSuccessorsAsReachable =
351 shouldTreatSuccessorsAsReachable(item, *PP);
354 if (TreatAllSuccessorsAsReachable.getValue()) {
362 unsigned blockID = B->getBlockID();
363 if (!Reachable[blockID]) {
364 Reachable.set(blockID);
374 static unsigned scanMaybeReachableFromBlock(const CFGBlock *Start,
376 llvm::BitVector &Reachable) {
377 return scanFromBlock(Start, Reachable, &PP, true);
380 //===----------------------------------------------------------------------===//
381 // Dead Code Scanner.
382 //===----------------------------------------------------------------------===//
386 llvm::BitVector Visited;
387 llvm::BitVector &Reachable;
388 SmallVector<const CFGBlock *, 10> WorkList;
392 typedef SmallVector<std::pair<const CFGBlock *, const Stmt *>, 12>
395 DeferredLocsTy DeferredLocs;
398 DeadCodeScan(llvm::BitVector &reachable, Preprocessor &PP, ASTContext &C)
399 : Visited(reachable.size()),
400 Reachable(reachable),
403 void enqueue(const CFGBlock *block);
404 unsigned scanBackwards(const CFGBlock *Start,
405 clang::reachable_code::Callback &CB);
407 bool isDeadCodeRoot(const CFGBlock *Block);
409 const Stmt *findDeadCode(const CFGBlock *Block);
411 void reportDeadCode(const CFGBlock *B,
413 clang::reachable_code::Callback &CB);
417 void DeadCodeScan::enqueue(const CFGBlock *block) {
418 unsigned blockID = block->getBlockID();
419 if (Reachable[blockID] || Visited[blockID])
421 Visited[blockID] = true;
422 WorkList.push_back(block);
425 bool DeadCodeScan::isDeadCodeRoot(const clang::CFGBlock *Block) {
426 bool isDeadRoot = true;
428 for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
429 E = Block->pred_end(); I != E; ++I) {
430 if (const CFGBlock *PredBlock = *I) {
431 unsigned blockID = PredBlock->getBlockID();
432 if (Visited[blockID]) {
436 if (!Reachable[blockID]) {
438 Visited[blockID] = true;
439 WorkList.push_back(PredBlock);
448 static bool isValidDeadStmt(const Stmt *S) {
449 if (S->getBeginLoc().isInvalid())
451 if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S))
452 return BO->getOpcode() != BO_Comma;
456 const Stmt *DeadCodeScan::findDeadCode(const clang::CFGBlock *Block) {
457 for (CFGBlock::const_iterator I = Block->begin(), E = Block->end(); I!=E; ++I)
458 if (Optional<CFGStmt> CS = I->getAs<CFGStmt>()) {
459 const Stmt *S = CS->getStmt();
460 if (isValidDeadStmt(S))
464 CFGTerminator T = Block->getTerminator();
465 if (T.isStmtBranch()) {
466 const Stmt *S = T.getStmt();
467 if (S && isValidDeadStmt(S))
474 static int SrcCmp(const std::pair<const CFGBlock *, const Stmt *> *p1,
475 const std::pair<const CFGBlock *, const Stmt *> *p2) {
476 if (p1->second->getBeginLoc() < p2->second->getBeginLoc())
478 if (p2->second->getBeginLoc() < p1->second->getBeginLoc())
483 unsigned DeadCodeScan::scanBackwards(const clang::CFGBlock *Start,
484 clang::reachable_code::Callback &CB) {
489 while (!WorkList.empty()) {
490 const CFGBlock *Block = WorkList.pop_back_val();
492 // It is possible that this block has been marked reachable after
494 if (Reachable[Block->getBlockID()])
497 // Look for any dead code within the block.
498 const Stmt *S = findDeadCode(Block);
501 // No dead code. Possibly an empty block. Look at dead predecessors.
502 for (CFGBlock::const_pred_iterator I = Block->pred_begin(),
503 E = Block->pred_end(); I != E; ++I) {
504 if (const CFGBlock *predBlock = *I)
510 // Specially handle macro-expanded code.
511 if (S->getBeginLoc().isMacroID()) {
512 count += scanMaybeReachableFromBlock(Block, PP, Reachable);
516 if (isDeadCodeRoot(Block)) {
517 reportDeadCode(Block, S, CB);
518 count += scanMaybeReachableFromBlock(Block, PP, Reachable);
521 // Record this statement as the possibly best location in a
522 // strongly-connected component of dead code for emitting a
524 DeferredLocs.push_back(std::make_pair(Block, S));
528 // If we didn't find a dead root, then report the dead code with the
529 // earliest location.
530 if (!DeferredLocs.empty()) {
531 llvm::array_pod_sort(DeferredLocs.begin(), DeferredLocs.end(), SrcCmp);
532 for (DeferredLocsTy::iterator I = DeferredLocs.begin(),
533 E = DeferredLocs.end(); I != E; ++I) {
534 const CFGBlock *Block = I->first;
535 if (Reachable[Block->getBlockID()])
537 reportDeadCode(Block, I->second, CB);
538 count += scanMaybeReachableFromBlock(Block, PP, Reachable);
545 static SourceLocation GetUnreachableLoc(const Stmt *S,
548 R1 = R2 = SourceRange();
550 if (const Expr *Ex = dyn_cast<Expr>(S))
551 S = Ex->IgnoreParenImpCasts();
553 switch (S->getStmtClass()) {
554 case Expr::BinaryOperatorClass: {
555 const BinaryOperator *BO = cast<BinaryOperator>(S);
556 return BO->getOperatorLoc();
558 case Expr::UnaryOperatorClass: {
559 const UnaryOperator *UO = cast<UnaryOperator>(S);
560 R1 = UO->getSubExpr()->getSourceRange();
561 return UO->getOperatorLoc();
563 case Expr::CompoundAssignOperatorClass: {
564 const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(S);
565 R1 = CAO->getLHS()->getSourceRange();
566 R2 = CAO->getRHS()->getSourceRange();
567 return CAO->getOperatorLoc();
569 case Expr::BinaryConditionalOperatorClass:
570 case Expr::ConditionalOperatorClass: {
571 const AbstractConditionalOperator *CO =
572 cast<AbstractConditionalOperator>(S);
573 return CO->getQuestionLoc();
575 case Expr::MemberExprClass: {
576 const MemberExpr *ME = cast<MemberExpr>(S);
577 R1 = ME->getSourceRange();
578 return ME->getMemberLoc();
580 case Expr::ArraySubscriptExprClass: {
581 const ArraySubscriptExpr *ASE = cast<ArraySubscriptExpr>(S);
582 R1 = ASE->getLHS()->getSourceRange();
583 R2 = ASE->getRHS()->getSourceRange();
584 return ASE->getRBracketLoc();
586 case Expr::CStyleCastExprClass: {
587 const CStyleCastExpr *CSC = cast<CStyleCastExpr>(S);
588 R1 = CSC->getSubExpr()->getSourceRange();
589 return CSC->getLParenLoc();
591 case Expr::CXXFunctionalCastExprClass: {
592 const CXXFunctionalCastExpr *CE = cast <CXXFunctionalCastExpr>(S);
593 R1 = CE->getSubExpr()->getSourceRange();
594 return CE->getBeginLoc();
596 case Stmt::CXXTryStmtClass: {
597 return cast<CXXTryStmt>(S)->getHandler(0)->getCatchLoc();
599 case Expr::ObjCBridgedCastExprClass: {
600 const ObjCBridgedCastExpr *CSC = cast<ObjCBridgedCastExpr>(S);
601 R1 = CSC->getSubExpr()->getSourceRange();
602 return CSC->getLParenLoc();
606 R1 = S->getSourceRange();
607 return S->getBeginLoc();
610 void DeadCodeScan::reportDeadCode(const CFGBlock *B,
612 clang::reachable_code::Callback &CB) {
613 // Classify the unreachable code found, or suppress it in some cases.
614 reachable_code::UnreachableKind UK = reachable_code::UK_Other;
616 if (isa<BreakStmt>(S)) {
617 UK = reachable_code::UK_Break;
618 } else if (isTrivialDoWhile(B, S) || isBuiltinUnreachable(S) ||
619 isBuiltinAssumeFalse(B, S, C)) {
622 else if (isDeadReturn(B, S)) {
623 UK = reachable_code::UK_Return;
626 SourceRange SilenceableCondVal;
628 if (UK == reachable_code::UK_Other) {
629 // Check if the dead code is part of the "loop target" of
630 // a for/for-range loop. This is the block that contains
631 // the increment code.
632 if (const Stmt *LoopTarget = B->getLoopTarget()) {
633 SourceLocation Loc = LoopTarget->getBeginLoc();
634 SourceRange R1(Loc, Loc), R2;
636 if (const ForStmt *FS = dyn_cast<ForStmt>(LoopTarget)) {
637 const Expr *Inc = FS->getInc();
638 Loc = Inc->getBeginLoc();
639 R2 = Inc->getSourceRange();
642 CB.HandleUnreachable(reachable_code::UK_Loop_Increment,
643 Loc, SourceRange(), SourceRange(Loc, Loc), R2);
647 // Check if the dead block has a predecessor whose branch has
648 // a configuration value that *could* be modified to
649 // silence the warning.
650 CFGBlock::const_pred_iterator PI = B->pred_begin();
651 if (PI != B->pred_end()) {
652 if (const CFGBlock *PredBlock = PI->getPossiblyUnreachableBlock()) {
653 const Stmt *TermCond =
654 PredBlock->getTerminatorCondition(/* strip parens */ false);
655 isConfigurationValue(TermCond, PP, &SilenceableCondVal);
661 SourceLocation Loc = GetUnreachableLoc(S, R1, R2);
662 CB.HandleUnreachable(UK, Loc, SilenceableCondVal, R1, R2);
665 //===----------------------------------------------------------------------===//
666 // Reachability APIs.
667 //===----------------------------------------------------------------------===//
669 namespace clang { namespace reachable_code {
671 void Callback::anchor() { }
673 unsigned ScanReachableFromBlock(const CFGBlock *Start,
674 llvm::BitVector &Reachable) {
675 return scanFromBlock(Start, Reachable, /* SourceManager* */ nullptr, false);
678 void FindUnreachableCode(AnalysisDeclContext &AC, Preprocessor &PP,
681 CFG *cfg = AC.getCFG();
685 // Scan for reachable blocks from the entrance of the CFG.
686 // If there are no unreachable blocks, we're done.
687 llvm::BitVector reachable(cfg->getNumBlockIDs());
688 unsigned numReachable =
689 scanMaybeReachableFromBlock(&cfg->getEntry(), PP, reachable);
690 if (numReachable == cfg->getNumBlockIDs())
693 // If there aren't explicit EH edges, we should include the 'try' dispatch
695 if (!AC.getCFGBuildOptions().AddEHEdges) {
696 for (CFG::try_block_iterator I = cfg->try_blocks_begin(),
697 E = cfg->try_blocks_end() ; I != E; ++I) {
698 numReachable += scanMaybeReachableFromBlock(*I, PP, reachable);
700 if (numReachable == cfg->getNumBlockIDs())
704 // There are some unreachable blocks. We need to find the root blocks that
705 // contain code that should be considered unreachable.
706 for (CFG::iterator I = cfg->begin(), E = cfg->end(); I != E; ++I) {
707 const CFGBlock *block = *I;
708 // A block may have been marked reachable during this loop.
709 if (reachable[block->getBlockID()])
712 DeadCodeScan DS(reachable, PP, AC.getASTContext());
713 numReachable += DS.scanBackwards(block, CB);
715 if (numReachable == cfg->getNumBlockIDs())
720 }} // end namespace clang::reachable_code