1 //===----- UninitializedObjectChecker.cpp ------------------------*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a checker that reports uninitialized fields in objects
11 // created after a constructor call.
13 // This checker has two options:
14 // - "Pedantic" (boolean). If its not set or is set to false, the checker
15 // won't emit warnings for objects that don't have at least one initialized
16 // field. This may be set with
18 // `-analyzer-config alpha.cplusplus.UninitializedObject:Pedantic=true`.
20 // - "NotesAsWarnings" (boolean). If set to true, the checker will emit a
21 // warning for each uninitalized field, as opposed to emitting one warning
22 // per constructor call, and listing the uninitialized fields that belongs
23 // to it in notes. Defaults to false.
25 // `-analyzer-config alpha.cplusplus.UninitializedObject:NotesAsWarnings=true`.
27 //===----------------------------------------------------------------------===//
29 #include "ClangSACheckers.h"
30 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
31 #include "clang/StaticAnalyzer/Core/Checker.h"
32 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
35 using namespace clang;
36 using namespace clang::ento;
40 class UninitializedObjectChecker : public Checker<check::EndFunction> {
41 std::unique_ptr<BuiltinBug> BT_uninitField;
44 // These fields will be initialized when registering the checker.
46 bool ShouldConvertNotesToWarnings;
48 UninitializedObjectChecker()
49 : BT_uninitField(new BuiltinBug(this, "Uninitialized fields")) {}
50 void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;
53 /// Represents a field chain. A field chain is a vector of fields where the
54 /// first element of the chain is the object under checking (not stored), and
55 /// every other element is a field, and the element that precedes it is the
56 /// object that contains it.
58 /// Note that this class is immutable, and new fields may only be added through
59 /// constructor calls.
60 class FieldChainInfo {
61 using FieldChain = llvm::ImmutableList<const FieldRegion *>;
65 const bool IsDereferenced = false;
68 FieldChainInfo() = default;
70 FieldChainInfo(const FieldChainInfo &Other, const bool IsDereferenced)
71 : Chain(Other.Chain), IsDereferenced(IsDereferenced) {}
73 FieldChainInfo(const FieldChainInfo &Other, const FieldRegion *FR,
74 const bool IsDereferenced = false);
76 bool contains(const FieldRegion *FR) const { return Chain.contains(FR); }
77 bool isPointer() const;
79 /// If this is a fieldchain whose last element is an uninitialized region of a
80 /// pointer type, `IsDereferenced` will store whether the pointer itself or
81 /// the pointee is uninitialized.
82 bool isDereferenced() const;
83 const FieldDecl *getEndOfChain() const;
84 void print(llvm::raw_ostream &Out) const;
87 /// Prints every element except the last to `Out`. Since ImmutableLists store
88 /// elements in reverse order, and have no reverse iterators, we use a
89 /// recursive function to print the fieldchain correctly. The last element in
90 /// the chain is to be printed by `print`.
91 static void printTail(llvm::raw_ostream &Out,
92 const llvm::ImmutableListImpl<const FieldRegion *> *L);
93 friend struct FieldChainInfoComparator;
96 struct FieldChainInfoComparator {
97 bool operator()(const FieldChainInfo &lhs, const FieldChainInfo &rhs) const {
98 assert(!lhs.Chain.isEmpty() && !rhs.Chain.isEmpty() &&
99 "Attempted to store an empty fieldchain!");
100 return *lhs.Chain.begin() < *rhs.Chain.begin();
104 using UninitFieldSet = std::set<FieldChainInfo, FieldChainInfoComparator>;
106 /// Searches for and stores uninitialized fields in a non-union object.
107 class FindUninitializedFields {
108 ProgramStateRef State;
109 const TypedValueRegion *const ObjectR;
111 const bool IsPedantic;
112 bool IsAnyFieldInitialized = false;
114 UninitFieldSet UninitFields;
117 FindUninitializedFields(ProgramStateRef State,
118 const TypedValueRegion *const R, bool IsPedantic);
119 const UninitFieldSet &getUninitFields();
122 /// Adds a FieldChainInfo object to UninitFields. Return true if an insertion
124 bool addFieldToUninits(FieldChainInfo LocalChain);
126 // For the purposes of this checker, we'll regard the object under checking as
127 // a directed tree, where
128 // * the root is the object under checking
129 // * every node is an object that is
131 // - a non-union record
132 // - a pointer/reference
134 // - of a primitive type, which we'll define later in a helper function.
135 // * the parent of each node is the object that contains it
136 // * every leaf is an array, a primitive object, a nullptr or an undefined
146 // int *iptr = new int;
152 // The directed tree:
158 // A-->iptr->(int value)
162 // From this we'll construct a vector of fieldchains, where each fieldchain
163 // represents an uninitialized field. An uninitialized field may be a
164 // primitive object, a pointer, a pointee or a union without a single
165 // initialized field.
166 // In the above example, for the default constructor call we'll end up with
167 // these fieldchains:
170 // this->iptr (pointee uninit)
171 // this->bptr (pointer uninit)
173 // We'll traverse each node of the above graph with the appropiate one of
176 /// This method checks a region of a union object, and returns true if no
177 /// field is initialized within the region.
178 bool isUnionUninit(const TypedValueRegion *R);
180 /// This method checks a region of a non-union object, and returns true if
181 /// an uninitialized field is found within the region.
182 bool isNonUnionUninit(const TypedValueRegion *R, FieldChainInfo LocalChain);
184 /// This method checks a region of a pointer or reference object, and returns
185 /// true if the ptr/ref object itself or any field within the pointee's region
186 /// is uninitialized.
187 bool isPointerOrReferenceUninit(const FieldRegion *FR,
188 FieldChainInfo LocalChain);
190 /// This method returns true if the value of a primitive object is
192 bool isPrimitiveUninit(const SVal &V);
194 // Note that we don't have a method for arrays -- the elements of an array are
195 // often left uninitialized intentionally even when it is of a C++ record
196 // type, so we'll assume that an array is always initialized.
197 // TODO: Add a support for nonloc::LocAsInteger.
200 } // end of anonymous namespace
202 // Static variable instantionations.
204 static llvm::ImmutableListFactory<const FieldRegion *> Factory;
206 // Utility function declarations.
208 /// Returns the object that was constructed by CtorDecl, or None if that isn't
210 static Optional<nonloc::LazyCompoundVal>
211 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context);
213 /// Checks whether the constructor under checking is called by another
215 static bool isCalledByConstructor(const CheckerContext &Context);
217 /// Returns whether FD can be (transitively) dereferenced to a void pointer type
218 /// (void*, void**, ...). The type of the region behind a void pointer isn't
219 /// known, and thus FD can not be analyzed.
220 static bool isVoidPointer(const FieldDecl *FD);
222 /// Returns true if T is a primitive type. We defined this type so that for
223 /// objects that we'd only like analyze as much as checking whether their
224 /// value is undefined or not, such as ints and doubles, can be analyzed with
225 /// ease. This also helps ensuring that every special field type is handled
227 static bool isPrimitiveType(const QualType &T) {
228 return T->isBuiltinType() || T->isEnumeralType() || T->isMemberPointerType();
231 /// Constructs a note message for a given FieldChainInfo object.
232 static void printNoteMessage(llvm::raw_ostream &Out,
233 const FieldChainInfo &Chain);
235 /// Returns with Field's name. This is a helper function to get the correct name
236 /// even if Field is a captured lambda variable.
237 static StringRef getVariableName(const FieldDecl *Field);
239 //===----------------------------------------------------------------------===//
240 // Methods for UninitializedObjectChecker.
241 //===----------------------------------------------------------------------===//
243 void UninitializedObjectChecker::checkEndFunction(
244 const ReturnStmt *RS, CheckerContext &Context) const {
246 const auto *CtorDecl = dyn_cast_or_null<CXXConstructorDecl>(
247 Context.getLocationContext()->getDecl());
251 if (!CtorDecl->isUserProvided())
254 if (CtorDecl->getParent()->isUnion())
257 // This avoids essentially the same error being reported multiple times.
258 if (isCalledByConstructor(Context))
261 Optional<nonloc::LazyCompoundVal> Object = getObjectVal(CtorDecl, Context);
265 FindUninitializedFields F(Context.getState(), Object->getRegion(),
268 const UninitFieldSet &UninitFields = F.getUninitFields();
270 if (UninitFields.empty())
273 // There are uninitialized fields in the record.
275 ExplodedNode *Node = Context.generateNonFatalErrorNode(Context.getState());
279 PathDiagnosticLocation LocUsedForUniqueing;
280 const Stmt *CallSite = Context.getStackFrame()->getCallSite();
282 LocUsedForUniqueing = PathDiagnosticLocation::createBegin(
283 CallSite, Context.getSourceManager(), Node->getLocationContext());
285 // For Plist consumers that don't support notes just yet, we'll convert notes
287 if (ShouldConvertNotesToWarnings) {
288 for (const auto &Chain : UninitFields) {
289 SmallString<100> WarningBuf;
290 llvm::raw_svector_ostream WarningOS(WarningBuf);
292 printNoteMessage(WarningOS, Chain);
294 auto Report = llvm::make_unique<BugReport>(
295 *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
296 Node->getLocationContext()->getDecl());
297 Context.emitReport(std::move(Report));
302 SmallString<100> WarningBuf;
303 llvm::raw_svector_ostream WarningOS(WarningBuf);
304 WarningOS << UninitFields.size() << " uninitialized field"
305 << (UninitFields.size() == 1 ? "" : "s")
306 << " at the end of the constructor call";
308 auto Report = llvm::make_unique<BugReport>(
309 *BT_uninitField, WarningOS.str(), Node, LocUsedForUniqueing,
310 Node->getLocationContext()->getDecl());
312 for (const auto &Chain : UninitFields) {
313 SmallString<200> NoteBuf;
314 llvm::raw_svector_ostream NoteOS(NoteBuf);
316 printNoteMessage(NoteOS, Chain);
318 Report->addNote(NoteOS.str(),
319 PathDiagnosticLocation::create(Chain.getEndOfChain(),
320 Context.getSourceManager()));
322 Context.emitReport(std::move(Report));
325 //===----------------------------------------------------------------------===//
326 // Methods for FindUninitializedFields.
327 //===----------------------------------------------------------------------===//
329 FindUninitializedFields::FindUninitializedFields(
330 ProgramStateRef State, const TypedValueRegion *const R, bool IsPedantic)
331 : State(State), ObjectR(R), IsPedantic(IsPedantic) {}
333 const UninitFieldSet &FindUninitializedFields::getUninitFields() {
334 isNonUnionUninit(ObjectR, FieldChainInfo());
336 if (!IsPedantic && !IsAnyFieldInitialized)
337 UninitFields.clear();
342 bool FindUninitializedFields::addFieldToUninits(FieldChainInfo Chain) {
343 if (State->getStateManager().getContext().getSourceManager().isInSystemHeader(
344 Chain.getEndOfChain()->getLocation()))
347 return UninitFields.insert(Chain).second;
350 bool FindUninitializedFields::isNonUnionUninit(const TypedValueRegion *R,
351 FieldChainInfo LocalChain) {
352 assert(R->getValueType()->isRecordType() &&
353 !R->getValueType()->isUnionType() &&
354 "This method only checks non-union record objects!");
356 const RecordDecl *RD =
357 R->getValueType()->getAs<RecordType>()->getDecl()->getDefinition();
358 assert(RD && "Referred record has no definition");
360 bool ContainsUninitField = false;
362 // Are all of this non-union's fields initialized?
363 for (const FieldDecl *I : RD->fields()) {
365 const auto FieldVal =
366 State->getLValue(I, loc::MemRegionVal(R)).castAs<loc::MemRegionVal>();
367 const auto *FR = FieldVal.getRegionAs<FieldRegion>();
368 QualType T = I->getType();
370 // If LocalChain already contains FR, then we encountered a cyclic
371 // reference. In this case, region FR is already under checking at an
372 // earlier node in the directed tree.
373 if (LocalChain.contains(FR))
376 if (T->isStructureOrClassType()) {
377 if (isNonUnionUninit(FR, {LocalChain, FR}))
378 ContainsUninitField = true;
382 if (T->isUnionType()) {
383 if (isUnionUninit(FR)) {
384 if (addFieldToUninits({LocalChain, FR}))
385 ContainsUninitField = true;
387 IsAnyFieldInitialized = true;
391 if (T->isArrayType()) {
392 IsAnyFieldInitialized = true;
396 if (T->isPointerType() || T->isReferenceType()) {
397 if (isPointerOrReferenceUninit(FR, LocalChain))
398 ContainsUninitField = true;
402 if (isPrimitiveType(T)) {
403 SVal V = State->getSVal(FieldVal);
405 if (isPrimitiveUninit(V)) {
406 if (addFieldToUninits({LocalChain, FR}))
407 ContainsUninitField = true;
412 llvm_unreachable("All cases are handled!");
416 // FIXME: As of now, because of `isCalledByConstructor`, objects whose type
417 // is a descendant of another type will emit warnings for uninitalized
418 // inherited members.
419 // This is not the only way to analyze bases of an object -- if we didn't
420 // filter them out, and didn't analyze the bases, this checker would run for
421 // each base of the object in order of base initailization and in theory would
422 // find every uninitalized field. This approach could also make handling
423 // diamond inheritances more easily.
425 // This rule (that a descendant type's cunstructor is responsible for
426 // initializing inherited data members) is not obvious, and should it should
428 const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD);
430 return ContainsUninitField;
432 for (const CXXBaseSpecifier &BaseSpec : CXXRD->bases()) {
433 const auto *BaseRegion = State->getLValue(BaseSpec, R)
434 .castAs<loc::MemRegionVal>()
435 .getRegionAs<TypedValueRegion>();
437 if (isNonUnionUninit(BaseRegion, LocalChain))
438 ContainsUninitField = true;
441 return ContainsUninitField;
444 bool FindUninitializedFields::isUnionUninit(const TypedValueRegion *R) {
445 assert(R->getValueType()->isUnionType() &&
446 "This method only checks union objects!");
447 // TODO: Implement support for union fields.
451 // Note that pointers/references don't contain fields themselves, so in this
452 // function we won't add anything to LocalChain.
453 bool FindUninitializedFields::isPointerOrReferenceUninit(
454 const FieldRegion *FR, FieldChainInfo LocalChain) {
456 assert((FR->getDecl()->getType()->isPointerType() ||
457 FR->getDecl()->getType()->isReferenceType()) &&
458 "This method only checks pointer/reference objects!");
460 SVal V = State->getSVal(FR);
462 if (V.isUnknown() || V.isZeroConstant()) {
463 IsAnyFieldInitialized = true;
468 return addFieldToUninits({LocalChain, FR});
471 const FieldDecl *FD = FR->getDecl();
473 // TODO: The dynamic type of a void pointer may be retrieved with
474 // `getDynamicTypeInfo`.
475 if (isVoidPointer(FD)) {
476 IsAnyFieldInitialized = true;
480 assert(V.getAs<Loc>() && "V should be Loc at this point!");
482 // At this point the pointer itself is initialized and points to a valid
483 // location, we'll now check the pointee.
484 SVal DerefdV = State->getSVal(V.castAs<Loc>());
486 // TODO: Dereferencing should be done according to the dynamic type.
487 while (Optional<Loc> L = DerefdV.getAs<Loc>()) {
488 DerefdV = State->getSVal(*L);
491 // If V is a pointer pointing to a record type.
492 if (Optional<nonloc::LazyCompoundVal> RecordV =
493 DerefdV.getAs<nonloc::LazyCompoundVal>()) {
495 const TypedValueRegion *R = RecordV->getRegion();
497 // We can't reason about symbolic regions, assume its initialized.
498 // Note that this also avoids a potential infinite recursion, because
499 // constructors for list-like classes are checked without being called, and
500 // the Static Analyzer will construct a symbolic region for Node *next; or
501 // similar code snippets.
502 if (R->getSymbolicBase()) {
503 IsAnyFieldInitialized = true;
507 const QualType T = R->getValueType();
509 if (T->isStructureOrClassType())
510 return isNonUnionUninit(R, {LocalChain, FR});
512 if (T->isUnionType()) {
513 if (isUnionUninit(R)) {
514 return addFieldToUninits({LocalChain, FR, /*IsDereferenced*/ true});
516 IsAnyFieldInitialized = true;
521 if (T->isArrayType()) {
522 IsAnyFieldInitialized = true;
526 llvm_unreachable("All cases are handled!");
529 // TODO: If possible, it should be asserted that the DerefdV at this point is
532 if (isPrimitiveUninit(DerefdV))
533 return addFieldToUninits({LocalChain, FR, /*IsDereferenced*/ true});
535 IsAnyFieldInitialized = true;
539 bool FindUninitializedFields::isPrimitiveUninit(const SVal &V) {
543 IsAnyFieldInitialized = true;
547 //===----------------------------------------------------------------------===//
548 // Methods for FieldChainInfo.
549 //===----------------------------------------------------------------------===//
551 FieldChainInfo::FieldChainInfo(const FieldChainInfo &Other,
552 const FieldRegion *FR, const bool IsDereferenced)
553 : FieldChainInfo(Other, IsDereferenced) {
554 assert(!contains(FR) && "Can't add a field that is already a part of the "
555 "fieldchain! Is this a cyclic reference?");
556 Chain = Factory.add(FR, Other.Chain);
559 bool FieldChainInfo::isPointer() const {
560 assert(!Chain.isEmpty() && "Empty fieldchain!");
561 return (*Chain.begin())->getDecl()->getType()->isPointerType();
564 bool FieldChainInfo::isDereferenced() const {
565 assert(isPointer() && "Only pointers may or may not be dereferenced!");
566 return IsDereferenced;
569 const FieldDecl *FieldChainInfo::getEndOfChain() const {
570 assert(!Chain.isEmpty() && "Empty fieldchain!");
571 return (*Chain.begin())->getDecl();
574 // TODO: This function constructs an incorrect fieldchain string in the
577 // struct Base { int x; };
578 // struct D1 : Base {}; struct D2 : Base {};
580 // struct MostDerived : D1, D2 {
584 // A call to MostDerived::MostDerived() will cause two notes that say
585 // "uninitialized field 'this->x'", but we can't refer to 'x' directly,
586 // we need an explicit namespace resolution whether the uninit field was
587 // 'D1::x' or 'D2::x'.
588 void FieldChainInfo::print(llvm::raw_ostream &Out) const {
592 const llvm::ImmutableListImpl<const FieldRegion *> *L =
593 Chain.getInternalPointer();
594 printTail(Out, L->getTail());
595 Out << getVariableName(L->getHead()->getDecl());
598 void FieldChainInfo::printTail(
599 llvm::raw_ostream &Out,
600 const llvm::ImmutableListImpl<const FieldRegion *> *L) {
604 printTail(Out, L->getTail());
605 const FieldDecl *Field = L->getHead()->getDecl();
606 Out << getVariableName(Field);
607 Out << (Field->getType()->isPointerType() ? "->" : ".");
610 //===----------------------------------------------------------------------===//
611 // Utility functions.
612 //===----------------------------------------------------------------------===//
614 static bool isVoidPointer(const FieldDecl *FD) {
615 QualType T = FD->getType();
617 while (!T.isNull()) {
618 if (T->isVoidPointerType())
620 T = T->getPointeeType();
625 static Optional<nonloc::LazyCompoundVal>
626 getObjectVal(const CXXConstructorDecl *CtorDecl, CheckerContext &Context) {
628 Loc ThisLoc = Context.getSValBuilder().getCXXThis(CtorDecl->getParent(),
629 Context.getStackFrame());
630 // Getting the value for 'this'.
631 SVal This = Context.getState()->getSVal(ThisLoc);
633 // Getting the value for '*this'.
634 SVal Object = Context.getState()->getSVal(This.castAs<Loc>());
636 return Object.getAs<nonloc::LazyCompoundVal>();
639 // TODO: We should also check that if the constructor was called by another
640 // constructor, whether those two are in any relation to one another. In it's
641 // current state, this introduces some false negatives.
642 static bool isCalledByConstructor(const CheckerContext &Context) {
643 const LocationContext *LC = Context.getLocationContext()->getParent();
646 if (isa<CXXConstructorDecl>(LC->getDecl()))
649 LC = LC->getParent();
654 static void printNoteMessage(llvm::raw_ostream &Out,
655 const FieldChainInfo &Chain) {
656 if (Chain.isPointer()) {
657 if (Chain.isDereferenced())
658 Out << "uninitialized pointee 'this->";
660 Out << "uninitialized pointer 'this->";
662 Out << "uninitialized field 'this->";
667 static StringRef getVariableName(const FieldDecl *Field) {
668 // If Field is a captured lambda variable, Field->getName() will return with
669 // an empty string. We can however acquire it's name from the lambda's
671 const auto *CXXParent = dyn_cast<CXXRecordDecl>(Field->getParent());
673 if (CXXParent && CXXParent->isLambda()) {
674 assert(CXXParent->captures_begin());
675 auto It = CXXParent->captures_begin() + Field->getFieldIndex();
676 return It->getCapturedVar()->getName();
679 return Field->getName();
682 void ento::registerUninitializedObjectChecker(CheckerManager &Mgr) {
683 auto Chk = Mgr.registerChecker<UninitializedObjectChecker>();
684 Chk->IsPedantic = Mgr.getAnalyzerOptions().getBooleanOption(
685 "Pedantic", /*DefaultVal*/ false, Chk);
686 Chk->ShouldConvertNotesToWarnings = Mgr.getAnalyzerOptions().getBooleanOption(
687 "NotesAsWarnings", /*DefaultVal*/ false, Chk);