1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This checker defines the attack surface for generic taint propagation.
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
15 //===----------------------------------------------------------------------===//
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27 #include "llvm/Support/YAMLTraits.h"
30 #include <unordered_map>
33 using namespace clang;
35 using namespace taint;
38 class GenericTaintChecker
39 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
41 static void *getTag() {
46 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
48 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
50 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
51 const char *Sep) const override;
53 using ArgVector = SmallVector<unsigned, 2>;
54 using SignedArgVector = SmallVector<int, 2>;
56 enum class VariadicType { None, Src, Dst };
58 /// Used to parse the configuration file.
59 struct TaintConfiguration {
60 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
66 SignedArgVector DstArgs;
71 std::vector<Propagation> Propagations;
72 std::vector<NameScopeArgs> Filters;
73 std::vector<NameScopeArgs> Sinks;
75 TaintConfiguration() = default;
76 TaintConfiguration(const TaintConfiguration &) = default;
77 TaintConfiguration(TaintConfiguration &&) = default;
78 TaintConfiguration &operator=(const TaintConfiguration &) = default;
79 TaintConfiguration &operator=(TaintConfiguration &&) = default;
82 /// Convert SignedArgVector to ArgVector.
83 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
84 SignedArgVector Args);
87 void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
88 TaintConfiguration &&Config);
90 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
91 /// Denotes the return vale.
92 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
96 mutable std::unique_ptr<BugType> BT;
97 void initBugType() const {
99 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
102 struct FunctionData {
103 FunctionData() = delete;
104 FunctionData(const FunctionData &) = default;
105 FunctionData(FunctionData &&) = default;
106 FunctionData &operator=(const FunctionData &) = delete;
107 FunctionData &operator=(FunctionData &&) = delete;
109 static Optional<FunctionData> create(const CallExpr *CE,
110 const CheckerContext &C) {
111 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
112 if (!FDecl || (FDecl->getKind() != Decl::Function &&
113 FDecl->getKind() != Decl::CXXMethod))
116 StringRef Name = C.getCalleeName(FDecl);
117 std::string FullName = FDecl->getQualifiedNameAsString();
118 if (Name.empty() || FullName.empty())
121 return FunctionData{FDecl, Name, FullName};
124 bool isInScope(StringRef Scope) const {
125 return StringRef(FullName).startswith(Scope);
128 const FunctionDecl *const FDecl;
129 const StringRef Name;
130 const std::string FullName;
133 /// Catch taint related bugs. Check if tainted data is passed to a
134 /// system call etc. Returns true on matching.
135 bool checkPre(const CallExpr *CE, const FunctionData &FData,
136 CheckerContext &C) const;
138 /// Add taint sources on a pre-visit. Returns true on matching.
139 bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
140 CheckerContext &C) const;
142 /// Mark filter's arguments not tainted on a pre-visit. Returns true on
144 bool addFiltersPre(const CallExpr *CE, const FunctionData &FData,
145 CheckerContext &C) const;
147 /// Propagate taint generated at pre-visit. Returns true on matching.
148 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
150 /// Check if the region the expression evaluates to is the standard input,
151 /// and thus, is tainted.
152 static bool isStdin(const Expr *E, CheckerContext &C);
154 /// Given a pointer argument, return the value it points to.
155 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
157 /// Check for CWE-134: Uncontrolled Format String.
158 static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
159 "Untrusted data is used as a format string "
160 "(CWE-134: Uncontrolled Format String)";
161 bool checkUncontrolledFormatString(const CallExpr *CE,
162 CheckerContext &C) const;
165 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
166 /// CWE-78, "Failure to Sanitize Data into an OS Command"
167 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
168 "Untrusted data is passed to a system call "
169 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
170 bool checkSystemCall(const CallExpr *CE, StringRef Name,
171 CheckerContext &C) const;
173 /// Check if tainted data is used as a buffer size ins strn.. functions,
175 static constexpr llvm::StringLiteral MsgTaintedBufferSize =
176 "Untrusted data is used to specify the buffer size "
177 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
178 "for character data and the null terminator)";
179 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
180 CheckerContext &C) const;
182 /// Check if tainted data is used as a custom sink's parameter.
183 static constexpr llvm::StringLiteral MsgCustomSink =
184 "Untrusted data is passed to a user-defined sink";
185 bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData,
186 CheckerContext &C) const;
188 /// Generate a report if the expression is tainted or points to tainted data.
189 bool generateReportIfTainted(const Expr *E, StringRef Msg,
190 CheckerContext &C) const;
192 struct TaintPropagationRule;
193 template <typename T>
194 using ConfigDataMap =
195 std::unordered_multimap<std::string, std::pair<std::string, T>>;
196 using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
197 using NameArgMap = ConfigDataMap<ArgVector>;
199 /// Find a function with the given name and scope. Returns the first match
200 /// or the end of the map.
201 template <typename T>
202 static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
203 const FunctionData &FData);
205 /// A struct used to specify taint propagation rules for a function.
207 /// If any of the possible taint source arguments is tainted, all of the
208 /// destination arguments should also be tainted. Use InvalidArgIndex in the
209 /// src list to specify that all of the arguments can introduce taint. Use
210 /// InvalidArgIndex in the dst arguments to signify that all the non-const
211 /// pointer and reference arguments might be tainted on return. If
212 /// ReturnValueIndex is added to the dst list, the return value will be
214 struct TaintPropagationRule {
215 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
218 /// List of arguments which can be taint sources and should be checked.
220 /// List of arguments which should be tainted on function return.
222 /// Index for the first variadic parameter if exist.
223 unsigned VariadicIndex;
224 /// Show when a function has variadic parameters. If it has, it marks all
225 /// of them as source or destination.
226 VariadicType VarType;
227 /// Special function for tainted source determination. If defined, it can
228 /// override the default behavior.
229 PropagationFuncType PropagationFunc;
231 TaintPropagationRule()
232 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
233 PropagationFunc(nullptr) {}
235 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
236 VariadicType Var = VariadicType::None,
237 unsigned VarIndex = InvalidArgIndex,
238 PropagationFuncType Func = nullptr)
239 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
240 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
242 /// Get the propagation rule for a given function.
243 static TaintPropagationRule
244 getTaintPropagationRule(const NameRuleMap &CustomPropagations,
245 const FunctionData &FData, CheckerContext &C);
247 void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
248 void addDstArg(unsigned A) { DstArgs.push_back(A); }
250 bool isNull() const {
251 return SrcArgs.empty() && DstArgs.empty() &&
252 VariadicType::None == VarType;
255 bool isDestinationArgument(unsigned ArgNum) const {
256 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
259 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
261 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
264 if (!E->getType().getTypePtr()->isPointerType())
267 Optional<SVal> V = getPointedToSVal(C, E);
268 return (V && isTainted(State, *V));
271 /// Pre-process a function which propagates taint according to the
273 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
275 // Functions for custom taintedness propagation.
276 static bool postSocket(bool IsTainted, const CallExpr *CE,
280 /// Defines a map between the propagation function's name, scope
281 /// and TaintPropagationRule.
282 NameRuleMap CustomPropagations;
284 /// Defines a map between the filter function's name, scope and filtering
286 NameArgMap CustomFilters;
288 /// Defines a map between the sink function's name, scope and sinking args.
289 NameArgMap CustomSinks;
292 const unsigned GenericTaintChecker::ReturnValueIndex;
293 const unsigned GenericTaintChecker::InvalidArgIndex;
295 // FIXME: these lines can be removed in C++17
296 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
297 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
298 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
299 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
300 } // end of anonymous namespace
302 using TaintConfig = GenericTaintChecker::TaintConfiguration;
304 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
305 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
309 template <> struct MappingTraits<TaintConfig> {
310 static void mapping(IO &IO, TaintConfig &Config) {
311 IO.mapOptional("Propagations", Config.Propagations);
312 IO.mapOptional("Filters", Config.Filters);
313 IO.mapOptional("Sinks", Config.Sinks);
317 template <> struct MappingTraits<TaintConfig::Propagation> {
318 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
319 IO.mapRequired("Name", Propagation.Name);
320 IO.mapOptional("Scope", Propagation.Scope);
321 IO.mapOptional("SrcArgs", Propagation.SrcArgs);
322 IO.mapOptional("DstArgs", Propagation.DstArgs);
323 IO.mapOptional("VariadicType", Propagation.VarType,
324 GenericTaintChecker::VariadicType::None);
325 IO.mapOptional("VariadicIndex", Propagation.VarIndex,
326 GenericTaintChecker::InvalidArgIndex);
330 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
331 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
332 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
333 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
334 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
338 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
339 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
340 IO.mapRequired("Name", std::get<0>(NSA));
341 IO.mapOptional("Scope", std::get<1>(NSA));
342 IO.mapRequired("Args", std::get<2>(NSA));
348 /// A set which is used to pass information from call pre-visit instruction
349 /// to the call post-visit. The values are unsigned integers, which are either
350 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
351 /// points to data, which should be tainted on return.
352 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
354 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
355 CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
357 for (int Arg : Args) {
359 Result.push_back(ReturnValueIndex);
361 Result.push_back(InvalidArgIndex);
362 Mgr.reportInvalidCheckerOptionValue(
364 "an argument number for propagation rules greater or equal to -1");
366 Result.push_back(static_cast<unsigned>(Arg));
371 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
372 const std::string &Option,
373 TaintConfiguration &&Config) {
374 for (auto &P : Config.Propagations) {
375 GenericTaintChecker::CustomPropagations.emplace(
377 std::make_pair(P.Scope, TaintPropagationRule{
378 std::move(P.SrcArgs),
379 convertToArgVector(Mgr, Option, P.DstArgs),
380 P.VarType, P.VarIndex}));
383 for (auto &F : Config.Filters) {
384 GenericTaintChecker::CustomFilters.emplace(
386 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
389 for (auto &S : Config.Sinks) {
390 GenericTaintChecker::CustomSinks.emplace(
392 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
396 template <typename T>
397 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
398 const FunctionData &FData) {
399 auto Range = Map.equal_range(FData.Name);
401 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
402 const auto &Value = Entry.second;
403 StringRef Scope = Value.first;
404 return Scope.empty() || FData.isInScope(Scope);
406 return It != Range.second ? It : Map.end();
409 GenericTaintChecker::TaintPropagationRule
410 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
411 const NameRuleMap &CustomPropagations, const FunctionData &FData,
413 // TODO: Currently, we might lose precision here: we always mark a return
414 // value as tainted even if it's just a pointer, pointing to tainted data.
416 // Check for exact name match for functions without builtin substitutes.
417 // Use qualified name, because these are C functions without namespace.
418 TaintPropagationRule Rule =
419 llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
421 // TODO: Add support for vfscanf & family.
422 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
423 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
424 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
425 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
426 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
427 .Case("getchar_unlocked",
428 TaintPropagationRule({}, {ReturnValueIndex}))
429 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
430 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
431 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
433 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
435 &TaintPropagationRule::postSocket))
436 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
437 // Propagating functions
438 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
439 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
440 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
441 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
442 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
443 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
444 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
445 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
446 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
447 .Case("getdelim", TaintPropagationRule({3}, {0}))
448 .Case("getline", TaintPropagationRule({2}, {0}))
449 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
451 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
452 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
453 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
454 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
455 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
456 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
457 .Default(TaintPropagationRule());
462 // Check if it's one of the memory setting/copying functions.
463 // This check is specialized but faster then calling isCLibraryFunction.
464 const FunctionDecl *FDecl = FData.FDecl;
466 if ((BId = FDecl->getMemoryFunctionKind()))
468 case Builtin::BImemcpy:
469 case Builtin::BImemmove:
470 case Builtin::BIstrncpy:
471 case Builtin::BIstrncat:
472 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
473 case Builtin::BIstrlcpy:
474 case Builtin::BIstrlcat:
475 return TaintPropagationRule({1, 2}, {0});
476 case Builtin::BIstrndup:
477 return TaintPropagationRule({0, 1}, {ReturnValueIndex});
483 // Process all other functions which could be defined as builtins.
485 if (C.isCLibraryFunction(FDecl, "snprintf"))
486 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
488 else if (C.isCLibraryFunction(FDecl, "sprintf"))
489 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
491 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
492 C.isCLibraryFunction(FDecl, "stpcpy") ||
493 C.isCLibraryFunction(FDecl, "strcat"))
494 return TaintPropagationRule({1}, {0, ReturnValueIndex});
495 else if (C.isCLibraryFunction(FDecl, "bcopy"))
496 return TaintPropagationRule({0, 2}, {1});
497 else if (C.isCLibraryFunction(FDecl, "strdup") ||
498 C.isCLibraryFunction(FDecl, "strdupa"))
499 return TaintPropagationRule({0}, {ReturnValueIndex});
500 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
501 return TaintPropagationRule({0}, {ReturnValueIndex});
504 // Skipping the following functions, since they might be used for cleansing
505 // or smart memory copy:
506 // - memccpy - copying until hitting a special character.
508 auto It = findFunctionInConfig(CustomPropagations, FData);
509 if (It != CustomPropagations.end()) {
510 const auto &Value = It->second;
514 return TaintPropagationRule();
517 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
518 CheckerContext &C) const {
519 Optional<FunctionData> FData = FunctionData::create(CE, C);
523 // Check for taintedness related errors first: system call, uncontrolled
524 // format string, tainted buffer size.
525 if (checkPre(CE, *FData, C))
528 // Marks the function's arguments and/or return value tainted if it present in
530 if (addSourcesPre(CE, *FData, C))
533 addFiltersPre(CE, *FData, C);
536 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
537 CheckerContext &C) const {
538 // Set the marked values as tainted. The return value only accessible from
540 propagateFromPre(CE, C);
543 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
544 const char *NL, const char *Sep) const {
545 printTaint(State, Out, NL, Sep);
548 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
549 const FunctionData &FData,
550 CheckerContext &C) const {
551 // First, try generating a propagation rule for this function.
552 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
553 this->CustomPropagations, FData, C);
554 if (!Rule.isNull()) {
555 ProgramStateRef State = Rule.process(CE, C);
557 C.addTransition(State);
564 bool GenericTaintChecker::addFiltersPre(const CallExpr *CE,
565 const FunctionData &FData,
566 CheckerContext &C) const {
567 auto It = findFunctionInConfig(CustomFilters, FData);
568 if (It == CustomFilters.end())
571 ProgramStateRef State = C.getState();
572 const auto &Value = It->second;
573 const ArgVector &Args = Value.second;
574 for (unsigned ArgNum : Args) {
575 if (ArgNum >= CE->getNumArgs())
578 const Expr *Arg = CE->getArg(ArgNum);
579 Optional<SVal> V = getPointedToSVal(C, Arg);
581 State = removeTaint(State, *V);
584 if (State != C.getState()) {
585 C.addTransition(State);
591 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
592 CheckerContext &C) const {
593 ProgramStateRef State = C.getState();
595 // Depending on what was tainted at pre-visit, we determined a set of
596 // arguments which should be tainted after the function returns. These are
597 // stored in the state as TaintArgsOnPostVisit set.
598 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
599 if (TaintArgs.isEmpty())
602 for (unsigned ArgNum : TaintArgs) {
603 // Special handling for the tainted return value.
604 if (ArgNum == ReturnValueIndex) {
605 State = addTaint(State, CE, C.getLocationContext());
609 // The arguments are pointer arguments. The data they are pointing at is
610 // tainted after the call.
611 if (CE->getNumArgs() < (ArgNum + 1))
613 const Expr *Arg = CE->getArg(ArgNum);
614 Optional<SVal> V = getPointedToSVal(C, Arg);
616 State = addTaint(State, *V);
619 // Clear up the taint info from the state.
620 State = State->remove<TaintArgsOnPostVisit>();
622 if (State != C.getState()) {
623 C.addTransition(State);
629 bool GenericTaintChecker::checkPre(const CallExpr *CE,
630 const FunctionData &FData,
631 CheckerContext &C) const {
633 if (checkUncontrolledFormatString(CE, C))
636 if (checkSystemCall(CE, FData.Name, C))
639 if (checkTaintedBufferSize(CE, FData.FDecl, C))
642 if (checkCustomSinks(CE, FData, C))
648 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
650 ProgramStateRef State = C.getState();
651 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
652 if (AddrVal.isUnknownOrUndef())
655 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
659 QualType ArgTy = Arg->getType().getCanonicalType();
660 if (!ArgTy->isPointerType())
661 return State->getSVal(*AddrLoc);
663 QualType ValTy = ArgTy->getPointeeType();
665 // Do not dereference void pointers. Treat them as byte pointers instead.
666 // FIXME: we might want to consider more than just the first byte.
667 if (ValTy->isVoidType())
668 ValTy = C.getASTContext().CharTy;
670 return State->getSVal(*AddrLoc, ValTy);
674 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
675 CheckerContext &C) const {
676 ProgramStateRef State = C.getState();
678 // Check for taint in arguments.
679 bool IsTainted = true;
680 for (unsigned ArgNum : SrcArgs) {
681 if (ArgNum >= CE->getNumArgs())
684 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
688 // Check for taint in variadic arguments.
689 if (!IsTainted && VariadicType::Src == VarType) {
690 // Check if any of the arguments is tainted
691 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
692 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
698 IsTainted = PropagationFunc(IsTainted, CE, C);
703 // Mark the arguments which should be tainted after the function returns.
704 for (unsigned ArgNum : DstArgs) {
705 // Should mark the return value?
706 if (ArgNum == ReturnValueIndex) {
707 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
711 if (ArgNum >= CE->getNumArgs())
714 // Mark the given argument.
715 State = State->add<TaintArgsOnPostVisit>(ArgNum);
718 // Mark all variadic arguments tainted if present.
719 if (VariadicType::Dst == VarType) {
720 // For all pointer and references that were passed in:
721 // If they are not pointing to const data, mark data as tainted.
722 // TODO: So far we are just going one level down; ideally we'd need to
724 for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
725 const Expr *Arg = CE->getArg(i);
726 // Process pointer argument.
727 const Type *ArgTy = Arg->getType().getTypePtr();
728 QualType PType = ArgTy->getPointeeType();
729 if ((!PType.isNull() && !PType.isConstQualified()) ||
730 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
731 State = State->add<TaintArgsOnPostVisit>(i);
738 // If argument 0(protocol domain) is network, the return value should get taint.
739 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
742 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
743 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
744 // White list the internal communication protocols.
745 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
746 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
752 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
753 ProgramStateRef State = C.getState();
754 SVal Val = C.getSVal(E);
756 // stdin is a pointer, so it would be a region.
757 const MemRegion *MemReg = Val.getAsRegion();
759 // The region should be symbolic, we do not know it's value.
760 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
764 // Get it's symbol and find the declaration region it's pointing to.
765 const SymbolRegionValue *Sm =
766 dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
769 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
773 // This region corresponds to a declaration, find out if it's a global/extern
774 // variable named stdin with the proper type.
775 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
776 D = D->getCanonicalDecl();
777 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
778 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
779 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
780 C.getASTContext().getFILEType().getCanonicalType())
787 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
788 const CheckerContext &C,
790 // Find if the function contains a format string argument.
791 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
792 // vsnprintf, syslog, custom annotated functions.
793 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
796 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
797 ArgNum = Format->getFormatIdx() - 1;
798 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
802 // Or if a function is named setproctitle (this is a heuristic).
803 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
811 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
812 CheckerContext &C) const {
816 ProgramStateRef State = C.getState();
817 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
819 if (PointedToSVal && isTainted(State, *PointedToSVal))
820 TaintedSVal = *PointedToSVal;
821 else if (isTainted(State, E, C.getLocationContext()))
822 TaintedSVal = C.getSVal(E);
826 // Generate diagnostic.
827 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
829 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
830 report->addRange(E->getSourceRange());
831 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
832 C.emitReport(std::move(report));
838 bool GenericTaintChecker::checkUncontrolledFormatString(
839 const CallExpr *CE, CheckerContext &C) const {
840 // Check if the function contains a format string argument.
842 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
845 // If either the format string content or the pointer itself are tainted,
847 return generateReportIfTainted(CE->getArg(ArgNum),
848 MsgUncontrolledFormatString, C);
851 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
852 CheckerContext &C) const {
853 // TODO: It might make sense to run this check on demand. In some cases,
854 // we should check if the environment has been cleansed here. We also might
855 // need to know if the user was reset before these calls(seteuid).
856 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
867 .Default(InvalidArgIndex);
869 if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
872 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
875 // TODO: Should this check be a part of the CString checker?
876 // If yes, should taint be a global setting?
877 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
878 const FunctionDecl *FDecl,
879 CheckerContext &C) const {
880 // If the function has a buffer size argument, set ArgNum.
881 unsigned ArgNum = InvalidArgIndex;
883 if ((BId = FDecl->getMemoryFunctionKind()))
885 case Builtin::BImemcpy:
886 case Builtin::BImemmove:
887 case Builtin::BIstrncpy:
890 case Builtin::BIstrndup:
897 if (ArgNum == InvalidArgIndex) {
898 if (C.isCLibraryFunction(FDecl, "malloc") ||
899 C.isCLibraryFunction(FDecl, "calloc") ||
900 C.isCLibraryFunction(FDecl, "alloca"))
902 else if (C.isCLibraryFunction(FDecl, "memccpy"))
904 else if (C.isCLibraryFunction(FDecl, "realloc"))
906 else if (C.isCLibraryFunction(FDecl, "bcopy"))
910 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
911 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
914 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
915 const FunctionData &FData,
916 CheckerContext &C) const {
917 auto It = findFunctionInConfig(CustomSinks, FData);
918 if (It == CustomSinks.end())
921 const auto &Value = It->second;
922 const GenericTaintChecker::ArgVector &Args = Value.second;
923 for (unsigned ArgNum : Args) {
924 if (ArgNum >= CE->getNumArgs())
927 if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
934 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
935 auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
936 std::string Option{"Config"};
937 StringRef ConfigFile =
938 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
939 llvm::Optional<TaintConfig> Config =
940 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
942 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
945 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {