1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This checker defines the attack surface for generic taint propagation.
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
15 //===----------------------------------------------------------------------===//
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "llvm/Support/YAMLTraits.h"
33 #include <unordered_map>
36 using namespace clang;
38 using namespace taint;
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
43 static void *getTag() {
48 void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49 void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
51 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52 const char *Sep) const override;
54 using ArgVector = SmallVector<unsigned, 2>;
55 using SignedArgVector = SmallVector<int, 2>;
57 enum class VariadicType { None, Src, Dst };
59 /// Used to parse the configuration file.
60 struct TaintConfiguration {
61 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
67 SignedArgVector DstArgs;
72 std::vector<Propagation> Propagations;
73 std::vector<NameScopeArgs> Filters;
74 std::vector<NameScopeArgs> Sinks;
76 TaintConfiguration() = default;
77 TaintConfiguration(const TaintConfiguration &) = default;
78 TaintConfiguration(TaintConfiguration &&) = default;
79 TaintConfiguration &operator=(const TaintConfiguration &) = default;
80 TaintConfiguration &operator=(TaintConfiguration &&) = default;
83 /// Convert SignedArgVector to ArgVector.
84 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85 const SignedArgVector &Args);
88 void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89 TaintConfiguration &&Config);
91 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92 /// Denotes the return vale.
93 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
97 mutable std::unique_ptr<BugType> BT;
98 void initBugType() const {
100 BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
104 struct FunctionData {
105 FunctionData() = delete;
106 FunctionData(const FunctionData &) = default;
107 FunctionData(FunctionData &&) = default;
108 FunctionData &operator=(const FunctionData &) = delete;
109 FunctionData &operator=(FunctionData &&) = delete;
111 static Optional<FunctionData> create(const CallEvent &Call,
112 const CheckerContext &C) {
116 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
117 if (!FDecl || (FDecl->getKind() != Decl::Function &&
118 FDecl->getKind() != Decl::CXXMethod))
121 StringRef Name = C.getCalleeName(FDecl);
122 std::string FullName = FDecl->getQualifiedNameAsString();
123 if (Name.empty() || FullName.empty())
126 return FunctionData{FDecl, Name, FullName};
129 bool isInScope(StringRef Scope) const {
130 return StringRef(FullName).startswith(Scope);
133 const FunctionDecl *const FDecl;
134 const StringRef Name;
135 const std::string FullName;
138 /// Catch taint related bugs. Check if tainted data is passed to a
139 /// system call etc. Returns true on matching.
140 bool checkPre(const CallEvent &Call, const FunctionData &FData,
141 CheckerContext &C) const;
143 /// Add taint sources on a pre-visit. Returns true on matching.
144 bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
145 CheckerContext &C) const;
147 /// Mark filter's arguments not tainted on a pre-visit. Returns true on
149 bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
150 CheckerContext &C) const;
152 /// Propagate taint generated at pre-visit. Returns true on matching.
153 static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
155 /// Check if the region the expression evaluates to is the standard input,
156 /// and thus, is tainted.
157 static bool isStdin(const Expr *E, CheckerContext &C);
159 /// Given a pointer argument, return the value it points to.
160 static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
162 /// Check for CWE-134: Uncontrolled Format String.
163 static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
164 "Untrusted data is used as a format string "
165 "(CWE-134: Uncontrolled Format String)";
166 bool checkUncontrolledFormatString(const CallEvent &Call,
167 CheckerContext &C) const;
170 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
171 /// CWE-78, "Failure to Sanitize Data into an OS Command"
172 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
173 "Untrusted data is passed to a system call "
174 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
175 bool checkSystemCall(const CallEvent &Call, StringRef Name,
176 CheckerContext &C) const;
178 /// Check if tainted data is used as a buffer size ins strn.. functions,
180 static constexpr llvm::StringLiteral MsgTaintedBufferSize =
181 "Untrusted data is used to specify the buffer size "
182 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
183 "for character data and the null terminator)";
184 bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
186 /// Check if tainted data is used as a custom sink's parameter.
187 static constexpr llvm::StringLiteral MsgCustomSink =
188 "Untrusted data is passed to a user-defined sink";
189 bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
190 CheckerContext &C) const;
192 /// Generate a report if the expression is tainted or points to tainted data.
193 bool generateReportIfTainted(const Expr *E, StringRef Msg,
194 CheckerContext &C) const;
196 struct TaintPropagationRule;
197 template <typename T>
198 using ConfigDataMap =
199 std::unordered_multimap<std::string, std::pair<std::string, T>>;
200 using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
201 using NameArgMap = ConfigDataMap<ArgVector>;
203 /// Find a function with the given name and scope. Returns the first match
204 /// or the end of the map.
205 template <typename T>
206 static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
207 const FunctionData &FData);
209 /// A struct used to specify taint propagation rules for a function.
211 /// If any of the possible taint source arguments is tainted, all of the
212 /// destination arguments should also be tainted. Use InvalidArgIndex in the
213 /// src list to specify that all of the arguments can introduce taint. Use
214 /// InvalidArgIndex in the dst arguments to signify that all the non-const
215 /// pointer and reference arguments might be tainted on return. If
216 /// ReturnValueIndex is added to the dst list, the return value will be
218 struct TaintPropagationRule {
219 using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
222 /// List of arguments which can be taint sources and should be checked.
224 /// List of arguments which should be tainted on function return.
226 /// Index for the first variadic parameter if exist.
227 unsigned VariadicIndex;
228 /// Show when a function has variadic parameters. If it has, it marks all
229 /// of them as source or destination.
230 VariadicType VarType;
231 /// Special function for tainted source determination. If defined, it can
232 /// override the default behavior.
233 PropagationFuncType PropagationFunc;
235 TaintPropagationRule()
236 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
237 PropagationFunc(nullptr) {}
239 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
240 VariadicType Var = VariadicType::None,
241 unsigned VarIndex = InvalidArgIndex,
242 PropagationFuncType Func = nullptr)
243 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
244 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
246 /// Get the propagation rule for a given function.
247 static TaintPropagationRule
248 getTaintPropagationRule(const NameRuleMap &CustomPropagations,
249 const FunctionData &FData, CheckerContext &C);
251 void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
252 void addDstArg(unsigned A) { DstArgs.push_back(A); }
254 bool isNull() const {
255 return SrcArgs.empty() && DstArgs.empty() &&
256 VariadicType::None == VarType;
259 bool isDestinationArgument(unsigned ArgNum) const {
260 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
263 static bool isTaintedOrPointsToTainted(const Expr *E,
264 const ProgramStateRef &State,
266 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
269 if (!E->getType().getTypePtr()->isPointerType())
272 Optional<SVal> V = getPointeeOf(C, E);
273 return (V && isTainted(State, *V));
276 /// Pre-process a function which propagates taint according to the
278 ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
280 // Functions for custom taintedness propagation.
281 static bool postSocket(bool IsTainted, const CallEvent &Call,
285 /// Defines a map between the propagation function's name, scope
286 /// and TaintPropagationRule.
287 NameRuleMap CustomPropagations;
289 /// Defines a map between the filter function's name, scope and filtering
291 NameArgMap CustomFilters;
293 /// Defines a map between the sink function's name, scope and sinking args.
294 NameArgMap CustomSinks;
297 const unsigned GenericTaintChecker::ReturnValueIndex;
298 const unsigned GenericTaintChecker::InvalidArgIndex;
300 // FIXME: these lines can be removed in C++17
301 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
302 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
303 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
305 } // end of anonymous namespace
307 using TaintConfig = GenericTaintChecker::TaintConfiguration;
309 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
310 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
314 template <> struct MappingTraits<TaintConfig> {
315 static void mapping(IO &IO, TaintConfig &Config) {
316 IO.mapOptional("Propagations", Config.Propagations);
317 IO.mapOptional("Filters", Config.Filters);
318 IO.mapOptional("Sinks", Config.Sinks);
322 template <> struct MappingTraits<TaintConfig::Propagation> {
323 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
324 IO.mapRequired("Name", Propagation.Name);
325 IO.mapOptional("Scope", Propagation.Scope);
326 IO.mapOptional("SrcArgs", Propagation.SrcArgs);
327 IO.mapOptional("DstArgs", Propagation.DstArgs);
328 IO.mapOptional("VariadicType", Propagation.VarType,
329 GenericTaintChecker::VariadicType::None);
330 IO.mapOptional("VariadicIndex", Propagation.VarIndex,
331 GenericTaintChecker::InvalidArgIndex);
335 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
336 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
337 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
338 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
339 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
343 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
344 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
345 IO.mapRequired("Name", std::get<0>(NSA));
346 IO.mapOptional("Scope", std::get<1>(NSA));
347 IO.mapRequired("Args", std::get<2>(NSA));
353 /// A set which is used to pass information from call pre-visit instruction
354 /// to the call post-visit. The values are unsigned integers, which are either
355 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
356 /// points to data, which should be tainted on return.
357 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
359 GenericTaintChecker::ArgVector
360 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
361 const std::string &Option,
362 const SignedArgVector &Args) {
364 for (int Arg : Args) {
366 Result.push_back(ReturnValueIndex);
368 Result.push_back(InvalidArgIndex);
369 Mgr.reportInvalidCheckerOptionValue(
371 "an argument number for propagation rules greater or equal to -1");
373 Result.push_back(static_cast<unsigned>(Arg));
378 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
379 const std::string &Option,
380 TaintConfiguration &&Config) {
381 for (auto &P : Config.Propagations) {
382 GenericTaintChecker::CustomPropagations.emplace(
384 std::make_pair(P.Scope, TaintPropagationRule{
385 std::move(P.SrcArgs),
386 convertToArgVector(Mgr, Option, P.DstArgs),
387 P.VarType, P.VarIndex}));
390 for (auto &F : Config.Filters) {
391 GenericTaintChecker::CustomFilters.emplace(
393 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
396 for (auto &S : Config.Sinks) {
397 GenericTaintChecker::CustomSinks.emplace(
399 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
403 template <typename T>
404 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
405 const FunctionData &FData) {
406 auto Range = Map.equal_range(std::string(FData.Name));
408 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
409 const auto &Value = Entry.second;
410 StringRef Scope = Value.first;
411 return Scope.empty() || FData.isInScope(Scope);
413 return It != Range.second ? It : Map.end();
416 GenericTaintChecker::TaintPropagationRule
417 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
418 const NameRuleMap &CustomPropagations, const FunctionData &FData,
420 // TODO: Currently, we might lose precision here: we always mark a return
421 // value as tainted even if it's just a pointer, pointing to tainted data.
423 // Check for exact name match for functions without builtin substitutes.
424 // Use qualified name, because these are C functions without namespace.
425 TaintPropagationRule Rule =
426 llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
428 // TODO: Add support for vfscanf & family.
429 .Case("fdopen", {{}, {ReturnValueIndex}})
430 .Case("fopen", {{}, {ReturnValueIndex}})
431 .Case("freopen", {{}, {ReturnValueIndex}})
432 .Case("getch", {{}, {ReturnValueIndex}})
433 .Case("getchar", {{}, {ReturnValueIndex}})
434 .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
435 .Case("getenv", {{}, {ReturnValueIndex}})
436 .Case("gets", {{}, {0, ReturnValueIndex}})
437 .Case("scanf", {{}, {}, VariadicType::Dst, 1})
442 &TaintPropagationRule::postSocket})
443 .Case("wgetch", {{}, {ReturnValueIndex}})
444 // Propagating functions
445 .Case("atoi", {{0}, {ReturnValueIndex}})
446 .Case("atol", {{0}, {ReturnValueIndex}})
447 .Case("atoll", {{0}, {ReturnValueIndex}})
448 .Case("fgetc", {{0}, {ReturnValueIndex}})
449 .Case("fgetln", {{0}, {ReturnValueIndex}})
450 .Case("fgets", {{2}, {0, ReturnValueIndex}})
451 .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
452 .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
453 .Case("getc", {{0}, {ReturnValueIndex}})
454 .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
455 .Case("getdelim", {{3}, {0}})
456 .Case("getline", {{2}, {0}})
457 .Case("getw", {{0}, {ReturnValueIndex}})
458 .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
459 .Case("read", {{0, 2}, {1, ReturnValueIndex}})
460 .Case("strchr", {{0}, {ReturnValueIndex}})
461 .Case("strrchr", {{0}, {ReturnValueIndex}})
462 .Case("tolower", {{0}, {ReturnValueIndex}})
463 .Case("toupper", {{0}, {ReturnValueIndex}})
470 // Check if it's one of the memory setting/copying functions.
471 // This check is specialized but faster then calling isCLibraryFunction.
472 const FunctionDecl *FDecl = FData.FDecl;
474 if ((BId = FDecl->getMemoryFunctionKind())) {
476 case Builtin::BImemcpy:
477 case Builtin::BImemmove:
478 case Builtin::BIstrncpy:
479 case Builtin::BIstrncat:
480 return {{1, 2}, {0, ReturnValueIndex}};
481 case Builtin::BIstrlcpy:
482 case Builtin::BIstrlcat:
483 return {{1, 2}, {0}};
484 case Builtin::BIstrndup:
485 return {{0, 1}, {ReturnValueIndex}};
492 // Process all other functions which could be defined as builtins.
494 const auto OneOf = [FDecl](const auto &... Name) {
495 // FIXME: use fold expression in C++17
496 using unused = int[];
498 static_cast<void>(unused{
499 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
502 if (OneOf("snprintf"))
503 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
504 if (OneOf("sprintf"))
505 return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
506 if (OneOf("strcpy", "stpcpy", "strcat"))
507 return {{1}, {0, ReturnValueIndex}};
509 return {{0, 2}, {1}};
510 if (OneOf("strdup", "strdupa", "wcsdup"))
511 return {{0}, {ReturnValueIndex}};
514 // Skipping the following functions, since they might be used for cleansing or
515 // smart memory copy:
516 // - memccpy - copying until hitting a special character.
518 auto It = findFunctionInConfig(CustomPropagations, FData);
519 if (It != CustomPropagations.end())
520 return It->second.second;
524 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
525 CheckerContext &C) const {
526 Optional<FunctionData> FData = FunctionData::create(Call, C);
530 // Check for taintedness related errors first: system call, uncontrolled
531 // format string, tainted buffer size.
532 if (checkPre(Call, *FData, C))
535 // Marks the function's arguments and/or return value tainted if it present in
537 if (addSourcesPre(Call, *FData, C))
540 addFiltersPre(Call, *FData, C);
543 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
544 CheckerContext &C) const {
545 // Set the marked values as tainted. The return value only accessible from
547 propagateFromPre(Call, C);
550 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
551 const char *NL, const char *Sep) const {
552 printTaint(State, Out, NL, Sep);
555 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
556 const FunctionData &FData,
557 CheckerContext &C) const {
558 // First, try generating a propagation rule for this function.
559 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
560 this->CustomPropagations, FData, C);
561 if (!Rule.isNull()) {
562 ProgramStateRef State = Rule.process(Call, C);
564 C.addTransition(State);
571 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
572 const FunctionData &FData,
573 CheckerContext &C) const {
574 auto It = findFunctionInConfig(CustomFilters, FData);
575 if (It == CustomFilters.end())
578 ProgramStateRef State = C.getState();
579 const auto &Value = It->second;
580 const ArgVector &Args = Value.second;
581 for (unsigned ArgNum : Args) {
582 if (ArgNum >= Call.getNumArgs())
585 const Expr *Arg = Call.getArgExpr(ArgNum);
586 Optional<SVal> V = getPointeeOf(C, Arg);
588 State = removeTaint(State, *V);
591 if (State != C.getState()) {
592 C.addTransition(State);
598 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
600 ProgramStateRef State = C.getState();
602 // Depending on what was tainted at pre-visit, we determined a set of
603 // arguments which should be tainted after the function returns. These are
604 // stored in the state as TaintArgsOnPostVisit set.
605 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
606 if (TaintArgs.isEmpty())
609 for (unsigned ArgNum : TaintArgs) {
610 // Special handling for the tainted return value.
611 if (ArgNum == ReturnValueIndex) {
612 State = addTaint(State, Call.getReturnValue());
616 // The arguments are pointer arguments. The data they are pointing at is
617 // tainted after the call.
618 if (Call.getNumArgs() < (ArgNum + 1))
620 const Expr *Arg = Call.getArgExpr(ArgNum);
621 Optional<SVal> V = getPointeeOf(C, Arg);
623 State = addTaint(State, *V);
626 // Clear up the taint info from the state.
627 State = State->remove<TaintArgsOnPostVisit>();
629 if (State != C.getState()) {
630 C.addTransition(State);
636 bool GenericTaintChecker::checkPre(const CallEvent &Call,
637 const FunctionData &FData,
638 CheckerContext &C) const {
639 if (checkUncontrolledFormatString(Call, C))
642 if (checkSystemCall(Call, FData.Name, C))
645 if (checkTaintedBufferSize(Call, C))
648 return checkCustomSinks(Call, FData, C);
651 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
653 ProgramStateRef State = C.getState();
654 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
655 if (AddrVal.isUnknownOrUndef())
658 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
662 QualType ArgTy = Arg->getType().getCanonicalType();
663 if (!ArgTy->isPointerType())
664 return State->getSVal(*AddrLoc);
666 QualType ValTy = ArgTy->getPointeeType();
668 // Do not dereference void pointers. Treat them as byte pointers instead.
669 // FIXME: we might want to consider more than just the first byte.
670 if (ValTy->isVoidType())
671 ValTy = C.getASTContext().CharTy;
673 return State->getSVal(*AddrLoc, ValTy);
677 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
678 CheckerContext &C) const {
679 ProgramStateRef State = C.getState();
681 // Check for taint in arguments.
682 bool IsTainted = true;
683 for (unsigned ArgNum : SrcArgs) {
684 if (ArgNum >= Call.getNumArgs())
688 isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
692 // Check for taint in variadic arguments.
693 if (!IsTainted && VariadicType::Src == VarType) {
694 // Check if any of the arguments is tainted
695 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
697 isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
703 IsTainted = PropagationFunc(IsTainted, Call, C);
708 // Mark the arguments which should be tainted after the function returns.
709 for (unsigned ArgNum : DstArgs) {
710 // Should mark the return value?
711 if (ArgNum == ReturnValueIndex) {
712 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
716 if (ArgNum >= Call.getNumArgs())
719 // Mark the given argument.
720 State = State->add<TaintArgsOnPostVisit>(ArgNum);
723 // Mark all variadic arguments tainted if present.
724 if (VariadicType::Dst == VarType) {
725 // For all pointer and references that were passed in:
726 // If they are not pointing to const data, mark data as tainted.
727 // TODO: So far we are just going one level down; ideally we'd need to
729 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
730 const Expr *Arg = Call.getArgExpr(i);
731 // Process pointer argument.
732 const Type *ArgTy = Arg->getType().getTypePtr();
733 QualType PType = ArgTy->getPointeeType();
734 if ((!PType.isNull() && !PType.isConstQualified()) ||
735 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
736 State = State->add<TaintArgsOnPostVisit>(i);
744 // If argument 0(protocol domain) is network, the return value should get taint.
745 bool GenericTaintChecker::TaintPropagationRule::postSocket(
746 bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
747 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
748 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
749 // White list the internal communication protocols.
750 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
751 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
756 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
757 ProgramStateRef State = C.getState();
758 SVal Val = C.getSVal(E);
760 // stdin is a pointer, so it would be a region.
761 const MemRegion *MemReg = Val.getAsRegion();
763 // The region should be symbolic, we do not know it's value.
764 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
768 // Get it's symbol and find the declaration region it's pointing to.
769 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
772 const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
776 // This region corresponds to a declaration, find out if it's a global/extern
777 // variable named stdin with the proper type.
778 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
779 D = D->getCanonicalDecl();
780 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
781 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
782 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
783 C.getASTContext().getFILEType().getCanonicalType())
790 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
791 const CheckerContext &C,
793 // Find if the function contains a format string argument.
794 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
795 // vsnprintf, syslog, custom annotated functions.
796 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
799 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
800 ArgNum = Format->getFormatIdx() - 1;
801 if ((Format->getType()->getName() == "printf") &&
802 Call.getNumArgs() > ArgNum)
806 // Or if a function is named setproctitle (this is a heuristic).
807 if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
815 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
816 CheckerContext &C) const {
820 ProgramStateRef State = C.getState();
821 Optional<SVal> PointedToSVal = getPointeeOf(C, E);
823 if (PointedToSVal && isTainted(State, *PointedToSVal))
824 TaintedSVal = *PointedToSVal;
825 else if (isTainted(State, E, C.getLocationContext()))
826 TaintedSVal = C.getSVal(E);
830 // Generate diagnostic.
831 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
833 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
834 report->addRange(E->getSourceRange());
835 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
836 C.emitReport(std::move(report));
842 bool GenericTaintChecker::checkUncontrolledFormatString(
843 const CallEvent &Call, CheckerContext &C) const {
844 // Check if the function contains a format string argument.
846 if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
849 // If either the format string content or the pointer itself are tainted,
851 return generateReportIfTainted(Call.getArgExpr(ArgNum),
852 MsgUncontrolledFormatString, C);
855 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
856 CheckerContext &C) const {
857 // TODO: It might make sense to run this check on demand. In some cases,
858 // we should check if the environment has been cleansed here. We also might
859 // need to know if the user was reset before these calls(seteuid).
860 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
871 .Default(InvalidArgIndex);
873 if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
876 return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
880 // TODO: Should this check be a part of the CString checker?
881 // If yes, should taint be a global setting?
882 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
883 CheckerContext &C) const {
884 const auto *FDecl = Call.getDecl()->getAsFunction();
885 // If the function has a buffer size argument, set ArgNum.
886 unsigned ArgNum = InvalidArgIndex;
888 if ((BId = FDecl->getMemoryFunctionKind())) {
890 case Builtin::BImemcpy:
891 case Builtin::BImemmove:
892 case Builtin::BIstrncpy:
895 case Builtin::BIstrndup:
903 if (ArgNum == InvalidArgIndex) {
904 using CCtx = CheckerContext;
905 if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
906 CCtx::isCLibraryFunction(FDecl, "calloc") ||
907 CCtx::isCLibraryFunction(FDecl, "alloca"))
909 else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
911 else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
913 else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
917 return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
918 generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
922 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
923 const FunctionData &FData,
924 CheckerContext &C) const {
925 auto It = findFunctionInConfig(CustomSinks, FData);
926 if (It == CustomSinks.end())
929 const auto &Value = It->second;
930 const GenericTaintChecker::ArgVector &Args = Value.second;
931 for (unsigned ArgNum : Args) {
932 if (ArgNum >= Call.getNumArgs())
935 if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
942 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
943 auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
944 std::string Option{"Config"};
945 StringRef ConfigFile =
946 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
947 llvm::Optional<TaintConfig> Config =
948 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
950 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
953 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {