1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This checker defines the attack surface for generic taint propagation.
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
15 //===----------------------------------------------------------------------===//
18 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
22 #include "clang/StaticAnalyzer/Core/Checker.h"
23 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27 #include <initializer_list>
30 using namespace clang;
32 using namespace taint;
35 class GenericTaintChecker
36 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
38 static void *getTag() {
43 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
45 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
47 void printState(raw_ostream &Out, ProgramStateRef State,
48 const char *NL, const char *Sep) const override;
51 static const unsigned InvalidArgIndex = UINT_MAX;
52 /// Denotes the return vale.
53 static const unsigned ReturnValueIndex = UINT_MAX - 1;
55 mutable std::unique_ptr<BugType> BT;
56 void initBugType() const {
58 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
61 /// Catch taint related bugs. Check if tainted data is passed to a
63 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
65 /// Add taint sources on a pre-visit.
66 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
68 /// Propagate taint generated at pre-visit.
69 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
71 /// Check if the region the expression evaluates to is the standard input,
72 /// and thus, is tainted.
73 static bool isStdin(const Expr *E, CheckerContext &C);
75 /// Given a pointer argument, return the value it points to.
76 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
78 /// Check for CWE-134: Uncontrolled Format String.
79 static const char MsgUncontrolledFormatString[];
80 bool checkUncontrolledFormatString(const CallExpr *CE,
81 CheckerContext &C) const;
84 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
85 /// CWE-78, "Failure to Sanitize Data into an OS Command"
86 static const char MsgSanitizeSystemArgs[];
87 bool checkSystemCall(const CallExpr *CE, StringRef Name,
88 CheckerContext &C) const;
90 /// Check if tainted data is used as a buffer size ins strn.. functions,
92 static const char MsgTaintedBufferSize[];
93 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
94 CheckerContext &C) const;
96 /// Generate a report if the expression is tainted or points to tainted data.
97 bool generateReportIfTainted(const Expr *E, const char Msg[],
98 CheckerContext &C) const;
100 using ArgVector = SmallVector<unsigned, 2>;
102 /// A struct used to specify taint propagation rules for a function.
104 /// If any of the possible taint source arguments is tainted, all of the
105 /// destination arguments should also be tainted. Use InvalidArgIndex in the
106 /// src list to specify that all of the arguments can introduce taint. Use
107 /// InvalidArgIndex in the dst arguments to signify that all the non-const
108 /// pointer and reference arguments might be tainted on return. If
109 /// ReturnValueIndex is added to the dst list, the return value will be
111 struct TaintPropagationRule {
112 enum class VariadicType { None, Src, Dst };
114 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
117 /// List of arguments which can be taint sources and should be checked.
119 /// List of arguments which should be tainted on function return.
121 /// Index for the first variadic parameter if exist.
122 unsigned VariadicIndex;
123 /// Show when a function has variadic parameters. If it has, it marks all
124 /// of them as source or destination.
125 VariadicType VarType;
126 /// Special function for tainted source determination. If defined, it can
127 /// override the default behavior.
128 PropagationFuncType PropagationFunc;
130 TaintPropagationRule()
131 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
132 PropagationFunc(nullptr) {}
134 TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135 std::initializer_list<unsigned> &&Dst,
136 VariadicType Var = VariadicType::None,
137 unsigned VarIndex = InvalidArgIndex,
138 PropagationFuncType Func = nullptr)
139 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
140 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
142 /// Get the propagation rule for a given function.
143 static TaintPropagationRule
144 getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
147 void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
148 void addDstArg(unsigned A) { DstArgs.push_back(A); }
150 bool isNull() const {
151 return SrcArgs.empty() && DstArgs.empty() &&
152 VariadicType::None == VarType;
155 bool isDestinationArgument(unsigned ArgNum) const {
156 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
159 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
161 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
164 if (!E->getType().getTypePtr()->isPointerType())
167 Optional<SVal> V = getPointedToSVal(C, E);
168 return (V && isTainted(State, *V));
171 /// Pre-process a function which propagates taint according to the
173 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
175 // Functions for custom taintedness propagation.
176 static bool postSocket(bool IsTainted, const CallExpr *CE,
181 const unsigned GenericTaintChecker::ReturnValueIndex;
182 const unsigned GenericTaintChecker::InvalidArgIndex;
184 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
185 "Untrusted data is used as a format string "
186 "(CWE-134: Uncontrolled Format String)";
188 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
189 "Untrusted data is passed to a system call "
190 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
192 const char GenericTaintChecker::MsgTaintedBufferSize[] =
193 "Untrusted data is used to specify the buffer size "
194 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
195 "for character data and the null terminator)";
197 } // end of anonymous namespace
199 /// A set which is used to pass information from call pre-visit instruction
200 /// to the call post-visit. The values are unsigned integers, which are either
201 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
202 /// points to data, which should be tainted on return.
203 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
205 GenericTaintChecker::TaintPropagationRule
206 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
207 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
208 // TODO: Currently, we might lose precision here: we always mark a return
209 // value as tainted even if it's just a pointer, pointing to tainted data.
211 // Check for exact name match for functions without builtin substitutes.
212 TaintPropagationRule Rule =
213 llvm::StringSwitch<TaintPropagationRule>(Name)
215 // TODO: Add support for vfscanf & family.
216 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
217 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
218 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
219 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
220 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
221 .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
222 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
223 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
226 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
228 &TaintPropagationRule::postSocket))
229 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
230 // Propagating functions
231 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
232 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
233 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
234 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
235 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
236 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
237 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
238 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
239 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
240 .Case("getdelim", TaintPropagationRule({3}, {0}))
241 .Case("getline", TaintPropagationRule({2}, {0}))
242 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
244 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
245 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
246 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
247 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
248 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
249 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
250 .Default(TaintPropagationRule());
255 // Check if it's one of the memory setting/copying functions.
256 // This check is specialized but faster then calling isCLibraryFunction.
258 if ((BId = FDecl->getMemoryFunctionKind()))
260 case Builtin::BImemcpy:
261 case Builtin::BImemmove:
262 case Builtin::BIstrncpy:
263 case Builtin::BIstrncat:
264 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
265 case Builtin::BIstrlcpy:
266 case Builtin::BIstrlcat:
267 return TaintPropagationRule({1, 2}, {0});
268 case Builtin::BIstrndup:
269 return TaintPropagationRule({0, 1}, {ReturnValueIndex});
275 // Process all other functions which could be defined as builtins.
277 if (C.isCLibraryFunction(FDecl, "snprintf"))
278 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
280 else if (C.isCLibraryFunction(FDecl, "sprintf"))
281 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
283 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
284 C.isCLibraryFunction(FDecl, "stpcpy") ||
285 C.isCLibraryFunction(FDecl, "strcat"))
286 return TaintPropagationRule({1}, {0, ReturnValueIndex});
287 else if (C.isCLibraryFunction(FDecl, "bcopy"))
288 return TaintPropagationRule({0, 2}, {1});
289 else if (C.isCLibraryFunction(FDecl, "strdup") ||
290 C.isCLibraryFunction(FDecl, "strdupa"))
291 return TaintPropagationRule({0}, {ReturnValueIndex});
292 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
293 return TaintPropagationRule({0}, {ReturnValueIndex});
296 // Skipping the following functions, since they might be used for cleansing
297 // or smart memory copy:
298 // - memccpy - copying until hitting a special character.
300 return TaintPropagationRule();
303 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
304 CheckerContext &C) const {
305 // Check for taintedness related errors first: system call, uncontrolled
306 // format string, tainted buffer size.
310 // Marks the function's arguments and/or return value tainted if it present in
312 addSourcesPre(CE, C);
315 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
316 CheckerContext &C) const {
317 // Set the marked values as tainted. The return value only accessible from
319 propagateFromPre(CE, C);
322 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
323 const char *NL, const char *Sep) const {
324 printTaint(State, Out, NL, Sep);
327 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
328 CheckerContext &C) const {
329 ProgramStateRef State = nullptr;
330 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
331 if (!FDecl || FDecl->getKind() != Decl::Function)
334 StringRef Name = C.getCalleeName(FDecl);
338 // First, try generating a propagation rule for this function.
339 TaintPropagationRule Rule =
340 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
341 if (!Rule.isNull()) {
342 State = Rule.process(CE, C);
345 C.addTransition(State);
351 C.addTransition(State);
354 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
355 CheckerContext &C) const {
356 ProgramStateRef State = C.getState();
358 // Depending on what was tainted at pre-visit, we determined a set of
359 // arguments which should be tainted after the function returns. These are
360 // stored in the state as TaintArgsOnPostVisit set.
361 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
362 if (TaintArgs.isEmpty())
365 for (unsigned ArgNum : TaintArgs) {
366 // Special handling for the tainted return value.
367 if (ArgNum == ReturnValueIndex) {
368 State = addTaint(State, CE, C.getLocationContext());
372 // The arguments are pointer arguments. The data they are pointing at is
373 // tainted after the call.
374 if (CE->getNumArgs() < (ArgNum + 1))
376 const Expr *Arg = CE->getArg(ArgNum);
377 Optional<SVal> V = getPointedToSVal(C, Arg);
379 State = addTaint(State, *V);
382 // Clear up the taint info from the state.
383 State = State->remove<TaintArgsOnPostVisit>();
385 if (State != C.getState()) {
386 C.addTransition(State);
392 bool GenericTaintChecker::checkPre(const CallExpr *CE,
393 CheckerContext &C) const {
395 if (checkUncontrolledFormatString(CE, C))
398 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
399 if (!FDecl || FDecl->getKind() != Decl::Function)
402 StringRef Name = C.getCalleeName(FDecl);
406 if (checkSystemCall(CE, Name, C))
409 if (checkTaintedBufferSize(CE, FDecl, C))
415 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
417 ProgramStateRef State = C.getState();
418 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
419 if (AddrVal.isUnknownOrUndef())
422 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
426 QualType ArgTy = Arg->getType().getCanonicalType();
427 if (!ArgTy->isPointerType())
430 QualType ValTy = ArgTy->getPointeeType();
432 // Do not dereference void pointers. Treat them as byte pointers instead.
433 // FIXME: we might want to consider more than just the first byte.
434 if (ValTy->isVoidType())
435 ValTy = C.getASTContext().CharTy;
437 return State->getSVal(*AddrLoc, ValTy);
441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
442 CheckerContext &C) const {
443 ProgramStateRef State = C.getState();
445 // Check for taint in arguments.
446 bool IsTainted = true;
447 for (unsigned ArgNum : SrcArgs) {
448 if (ArgNum >= CE->getNumArgs())
450 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
454 // Check for taint in variadic arguments.
455 if (!IsTainted && VariadicType::Src == VarType) {
456 // Check if any of the arguments is tainted
457 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
458 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
464 IsTainted = PropagationFunc(IsTainted, CE, C);
469 // Mark the arguments which should be tainted after the function returns.
470 for (unsigned ArgNum : DstArgs) {
471 // Should mark the return value?
472 if (ArgNum == ReturnValueIndex) {
473 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
477 // Mark the given argument.
478 assert(ArgNum < CE->getNumArgs());
479 State = State->add<TaintArgsOnPostVisit>(ArgNum);
482 // Mark all variadic arguments tainted if present.
483 if (VariadicType::Dst == VarType) {
484 // For all pointer and references that were passed in:
485 // If they are not pointing to const data, mark data as tainted.
486 // TODO: So far we are just going one level down; ideally we'd need to
488 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
489 const Expr *Arg = CE->getArg(i);
490 // Process pointer argument.
491 const Type *ArgTy = Arg->getType().getTypePtr();
492 QualType PType = ArgTy->getPointeeType();
493 if ((!PType.isNull() && !PType.isConstQualified()) ||
494 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
495 State = State->add<TaintArgsOnPostVisit>(i);
502 // If argument 0(protocol domain) is network, the return value should get taint.
503 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
506 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
507 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
508 // White list the internal communication protocols.
509 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
510 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
516 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
517 ProgramStateRef State = C.getState();
518 SVal Val = C.getSVal(E);
520 // stdin is a pointer, so it would be a region.
521 const MemRegion *MemReg = Val.getAsRegion();
523 // The region should be symbolic, we do not know it's value.
524 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
528 // Get it's symbol and find the declaration region it's pointing to.
529 const SymbolRegionValue *Sm =
530 dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
533 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
537 // This region corresponds to a declaration, find out if it's a global/extern
538 // variable named stdin with the proper type.
539 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
540 D = D->getCanonicalDecl();
541 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
542 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
543 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
544 C.getASTContext().getFILEType().getCanonicalType())
551 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
552 const CheckerContext &C,
553 unsigned int &ArgNum) {
554 // Find if the function contains a format string argument.
555 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
556 // vsnprintf, syslog, custom annotated functions.
557 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
560 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
561 ArgNum = Format->getFormatIdx() - 1;
562 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
566 // Or if a function is named setproctitle (this is a heuristic).
567 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
575 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
577 CheckerContext &C) const {
581 ProgramStateRef State = C.getState();
582 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
584 if (PointedToSVal && isTainted(State, *PointedToSVal))
585 TaintedSVal = *PointedToSVal;
586 else if (isTainted(State, E, C.getLocationContext()))
587 TaintedSVal = C.getSVal(E);
591 // Generate diagnostic.
592 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
594 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
595 report->addRange(E->getSourceRange());
596 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
597 C.emitReport(std::move(report));
603 bool GenericTaintChecker::checkUncontrolledFormatString(
604 const CallExpr *CE, CheckerContext &C) const {
605 // Check if the function contains a format string argument.
606 unsigned int ArgNum = 0;
607 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
610 // If either the format string content or the pointer itself are tainted,
612 return generateReportIfTainted(CE->getArg(ArgNum),
613 MsgUncontrolledFormatString, C);
616 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
617 CheckerContext &C) const {
618 // TODO: It might make sense to run this check on demand. In some cases,
619 // we should check if the environment has been cleansed here. We also might
620 // need to know if the user was reset before these calls(seteuid).
621 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
634 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
637 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
640 // TODO: Should this check be a part of the CString checker?
641 // If yes, should taint be a global setting?
642 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
643 const FunctionDecl *FDecl,
644 CheckerContext &C) const {
645 // If the function has a buffer size argument, set ArgNum.
646 unsigned ArgNum = InvalidArgIndex;
648 if ((BId = FDecl->getMemoryFunctionKind()))
650 case Builtin::BImemcpy:
651 case Builtin::BImemmove:
652 case Builtin::BIstrncpy:
655 case Builtin::BIstrndup:
662 if (ArgNum == InvalidArgIndex) {
663 if (C.isCLibraryFunction(FDecl, "malloc") ||
664 C.isCLibraryFunction(FDecl, "calloc") ||
665 C.isCLibraryFunction(FDecl, "alloca"))
667 else if (C.isCLibraryFunction(FDecl, "memccpy"))
669 else if (C.isCLibraryFunction(FDecl, "realloc"))
671 else if (C.isCLibraryFunction(FDecl, "bcopy"))
675 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
676 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
679 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
680 mgr.registerChecker<GenericTaintChecker>();
683 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {