1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines APIs for analyzing the format strings of printf, fscanf,
12 // The structure of format strings for fprintf are described in C99 7.19.6.1.
14 // The structure of format strings for fscanf are described in C99 7.19.6.2.
16 //===----------------------------------------------------------------------===//
18 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
19 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
21 #include "clang/AST/CanonicalType.h"
27 //===----------------------------------------------------------------------===//
28 /// Common components of both fprintf and fscanf format strings.
29 namespace analyze_format_string {
31 /// Class representing optional flags with location and representation
35 OptionalFlag(const char *Representation)
36 : representation(Representation), flag(false) {}
37 bool isSet() const { return flag; }
38 void set() { flag = true; }
39 void clear() { flag = false; }
40 void setPosition(const char *position) {
43 this->position = position;
45 const char *getPosition() const {
49 const char *toString() const { return representation; }
51 // Overloaded operators for bool like qualities
52 explicit operator bool() const { return flag; }
53 OptionalFlag& operator=(const bool &rhs) {
55 return *this; // Return a reference to myself.
58 const char *representation;
63 /// Represents the length modifier in a format string in scanf/printf.
64 class LengthModifier {
70 AsShortLong, // 'hl' (OpenCL float/int vector element)
73 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
77 AsInt32, // 'I32' (MSVCRT, like __int32)
78 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL)
79 AsInt64, // 'I64' (MSVCRT, like __int64)
81 AsAllocate, // for '%as', GNU extension to C90 scanf
82 AsMAllocate, // for '%ms', GNU extension to scanf
83 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
84 AsWideChar = AsLong // for '%ls', only makes sense for printf
88 : Position(nullptr), kind(None) {}
89 LengthModifier(const char *pos, Kind k)
90 : Position(pos), kind(k) {}
92 const char *getStart() const {
96 unsigned getLength() const {
111 Kind getKind() const { return kind; }
112 void setKind(Kind k) { kind = k; }
114 const char *toString() const;
117 const char *Position;
121 class ConversionSpecifier {
124 InvalidSpecifier = 0,
125 // C99 conversion specifiers.
128 DArg, // Apple extension
134 OArg, // Apple extension
136 UArg, // Apple extension
160 // Apple extension: P specifies to os_log that the data being pointed to is
161 // to be copied by os_log. The precision indicates the number of bytes to
165 // ** Printf-specific **
167 ZArg, // MS extension
169 // Objective-C specific specifiers.
171 ObjCBeg = ObjCObjArg,
172 ObjCEnd = ObjCObjArg,
174 // FreeBSD kernel specific specifiers.
180 // GlibC specific specifiers.
183 PrintfConvBeg = ObjCObjArg,
184 PrintfConvEnd = PrintErrno,
186 // ** Scanf-specific **
188 ScanfConvBeg = ScanListArg,
189 ScanfConvEnd = ScanListArg
192 ConversionSpecifier(bool isPrintf = true)
193 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
194 kind(InvalidSpecifier) {}
196 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
197 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
199 const char *getStart() const {
203 StringRef getCharacters() const {
204 return StringRef(getStart(), getLength());
207 bool consumesDataArgument() const {
214 case InvalidSpecifier:
221 Kind getKind() const { return kind; }
222 void setKind(Kind k) { kind = k; }
223 unsigned getLength() const {
224 return EndScanList ? EndScanList - Position : 1;
226 void setEndScanList(const char *pos) { EndScanList = pos; }
228 bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
229 kind == FreeBSDrArg || kind == FreeBSDyArg; }
230 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
231 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
232 bool isDoubleArg() const {
233 return kind >= DoubleArgBeg && kind <= DoubleArgEnd;
236 const char *toString() const;
238 bool isPrintfKind() const { return IsPrintf; }
240 Optional<ConversionSpecifier> getStandardSpecifier() const;
244 const char *Position;
245 const char *EndScanList;
251 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
252 AnyCharTy, CStrTy, WCStrTy, WIntTy };
254 /// How well a given conversion specifier matches its argument.
256 /// The conversion specifier and the argument types are incompatible. For
257 /// instance, "%d" and float.
259 /// The conversion specifier and the argument type are compatible. For
260 /// instance, "%d" and _Bool.
262 /// The conversion specifier and the argument type are disallowed by the C
263 /// standard, but are in practice harmless. For instance, "%p" and int*.
265 /// The conversion specifier and the argument type are compatible, but still
266 /// seems likely to be an error. For instance, "%hd" and _Bool.
267 NoMatchTypeConfusion,
273 const char *Name = nullptr;
276 /// The TypeKind identifies certain well-known types like size_t and
278 enum class TypeKind { DontCare, SizeT, PtrdiffT };
279 TypeKind TK = TypeKind::DontCare;
282 ArgType(Kind K = UnknownTy, const char *N = nullptr) : K(K), Name(N) {}
283 ArgType(QualType T, const char *N = nullptr) : K(SpecificTy), T(T), Name(N) {}
284 ArgType(CanQualType T) : K(SpecificTy), T(T) {}
286 static ArgType Invalid() { return ArgType(InvalidTy); }
287 bool isValid() const { return K != InvalidTy; }
289 bool isSizeT() const { return TK == TypeKind::SizeT; }
291 bool isPtrdiffT() const { return TK == TypeKind::PtrdiffT; }
293 /// Create an ArgType which corresponds to the type pointer to A.
294 static ArgType PtrTo(const ArgType& A) {
295 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
301 /// Create an ArgType which corresponds to the size_t/ssize_t type.
302 static ArgType makeSizeT(const ArgType &A) {
304 Res.TK = TypeKind::SizeT;
308 /// Create an ArgType which corresponds to the ptrdiff_t/unsigned ptrdiff_t
310 static ArgType makePtrdiffT(const ArgType &A) {
312 Res.TK = TypeKind::PtrdiffT;
316 MatchKind matchesType(ASTContext &C, QualType argTy) const;
318 QualType getRepresentativeType(ASTContext &C) const;
320 ArgType makeVectorType(ASTContext &C, unsigned NumElts) const;
322 std::string getRepresentativeTypeName(ASTContext &C) const;
325 class OptionalAmount {
327 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
329 OptionalAmount(HowSpecified howSpecified,
331 const char *amountStart,
332 unsigned amountLength,
333 bool usesPositionalArg)
334 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
335 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
337 OptionalAmount(bool valid = true)
338 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
339 UsesPositionalArg(0), UsesDotPrefix(0) {}
341 explicit OptionalAmount(unsigned Amount)
342 : start(nullptr), length(0), hs(Constant), amt(Amount),
343 UsesPositionalArg(false), UsesDotPrefix(false) {}
345 bool isInvalid() const {
346 return hs == Invalid;
349 HowSpecified getHowSpecified() const { return hs; }
350 void setHowSpecified(HowSpecified h) { hs = h; }
352 bool hasDataArgument() const { return hs == Arg; }
354 unsigned getArgIndex() const {
355 assert(hasDataArgument());
359 unsigned getConstantAmount() const {
360 assert(hs == Constant);
364 const char *getStart() const {
365 // We include the . character if it is given.
366 return start - UsesDotPrefix;
369 unsigned getConstantLength() const {
370 assert(hs == Constant);
371 return length + UsesDotPrefix;
374 ArgType getArgType(ASTContext &Ctx) const;
376 void toString(raw_ostream &os) const;
378 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
379 unsigned getPositionalArgIndex() const {
380 assert(hasDataArgument());
384 bool usesDotPrefix() const { return UsesDotPrefix; }
385 void setUsesDotPrefix() { UsesDotPrefix = true; }
392 bool UsesPositionalArg : 1;
397 class FormatSpecifier {
400 OptionalAmount FieldWidth;
401 ConversionSpecifier CS;
402 OptionalAmount VectorNumElts;
404 /// Positional arguments, an IEEE extension:
405 /// IEEE Std 1003.1, 2004 Edition
406 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
407 bool UsesPositionalArg;
410 FormatSpecifier(bool isPrintf)
411 : CS(isPrintf), VectorNumElts(false),
412 UsesPositionalArg(false), argIndex(0) {}
414 void setLengthModifier(LengthModifier lm) {
418 void setUsesPositionalArg() { UsesPositionalArg = true; }
420 void setArgIndex(unsigned i) {
424 unsigned getArgIndex() const {
428 unsigned getPositionalArgIndex() const {
432 const LengthModifier &getLengthModifier() const {
436 const OptionalAmount &getFieldWidth() const {
440 void setVectorNumElts(const OptionalAmount &Amt) {
444 const OptionalAmount &getVectorNumElts() const {
445 return VectorNumElts;
448 void setFieldWidth(const OptionalAmount &Amt) {
452 bool usesPositionalArg() const { return UsesPositionalArg; }
454 bool hasValidLengthModifier(const TargetInfo &Target,
455 const LangOptions &LO) const;
457 bool hasStandardLengthModifier() const;
459 Optional<LengthModifier> getCorrectedLengthModifier() const;
461 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
463 bool hasStandardLengthConversionCombination() const;
465 /// For a TypedefType QT, if it is a named integer type such as size_t,
466 /// assign the appropriate value to LM and return true.
467 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
470 } // end analyze_format_string namespace
472 //===----------------------------------------------------------------------===//
473 /// Pieces specific to fprintf format strings.
475 namespace analyze_printf {
477 class PrintfConversionSpecifier :
478 public analyze_format_string::ConversionSpecifier {
480 PrintfConversionSpecifier()
481 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
483 PrintfConversionSpecifier(const char *pos, Kind k)
484 : ConversionSpecifier(true, pos, k) {}
486 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
487 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
488 kind <= DoubleArgEnd; }
490 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
491 return CS->isPrintfKind();
495 using analyze_format_string::ArgType;
496 using analyze_format_string::LengthModifier;
497 using analyze_format_string::OptionalAmount;
498 using analyze_format_string::OptionalFlag;
500 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
501 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
502 OptionalFlag IsLeftJustified; // '-'
503 OptionalFlag HasPlusPrefix; // '+'
504 OptionalFlag HasSpacePrefix; // ' '
505 OptionalFlag HasAlternativeForm; // '#'
506 OptionalFlag HasLeadingZeroes; // '0'
507 OptionalFlag HasObjCTechnicalTerm; // '[tt]'
508 OptionalFlag IsPrivate; // '{private}'
509 OptionalFlag IsPublic; // '{public}'
510 OptionalFlag IsSensitive; // '{sensitive}'
511 OptionalAmount Precision;
514 ArgType getScalarArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
518 : FormatSpecifier(/* isPrintf = */ true), HasThousandsGrouping("'"),
519 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
520 HasAlternativeForm("#"), HasLeadingZeroes("0"),
521 HasObjCTechnicalTerm("tt"), IsPrivate("private"), IsPublic("public"),
522 IsSensitive("sensitive") {}
524 static PrintfSpecifier Parse(const char *beg, const char *end);
526 // Methods for incrementally constructing the PrintfSpecifier.
527 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
530 void setHasThousandsGrouping(const char *position) {
531 HasThousandsGrouping.setPosition(position);
533 void setIsLeftJustified(const char *position) {
534 IsLeftJustified.setPosition(position);
536 void setHasPlusPrefix(const char *position) {
537 HasPlusPrefix.setPosition(position);
539 void setHasSpacePrefix(const char *position) {
540 HasSpacePrefix.setPosition(position);
542 void setHasAlternativeForm(const char *position) {
543 HasAlternativeForm.setPosition(position);
545 void setHasLeadingZeros(const char *position) {
546 HasLeadingZeroes.setPosition(position);
548 void setHasObjCTechnicalTerm(const char *position) {
549 HasObjCTechnicalTerm.setPosition(position);
551 void setIsPrivate(const char *position) { IsPrivate.setPosition(position); }
552 void setIsPublic(const char *position) { IsPublic.setPosition(position); }
553 void setIsSensitive(const char *position) {
554 IsSensitive.setPosition(position);
556 void setUsesPositionalArg() { UsesPositionalArg = true; }
558 // Methods for querying the format specifier.
560 const PrintfConversionSpecifier &getConversionSpecifier() const {
561 return cast<PrintfConversionSpecifier>(CS);
564 void setPrecision(const OptionalAmount &Amt) {
566 Precision.setUsesDotPrefix();
569 const OptionalAmount &getPrecision() const {
573 bool consumesDataArgument() const {
574 return getConversionSpecifier().consumesDataArgument();
577 /// Returns the builtin type that a data argument
578 /// paired with this format specifier should have. This method
579 /// will return null if the format specifier does not have
580 /// a matching data argument or the matching argument matches
581 /// more than one type.
582 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
584 const OptionalFlag &hasThousandsGrouping() const {
585 return HasThousandsGrouping;
587 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
588 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
589 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
590 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
591 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
592 const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; }
593 const OptionalFlag &isPrivate() const { return IsPrivate; }
594 const OptionalFlag &isPublic() const { return IsPublic; }
595 const OptionalFlag &isSensitive() const { return IsSensitive; }
596 bool usesPositionalArg() const { return UsesPositionalArg; }
598 StringRef getMaskType() const { return MaskType; }
599 void setMaskType(StringRef S) { MaskType = S; }
601 /// Changes the specifier and length according to a QualType, retaining any
602 /// flags or options. Returns true on success, or false when a conversion
603 /// was not successful.
604 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
607 void toString(raw_ostream &os) const;
609 // Validation methods - to check if any element results in undefined behavior
610 bool hasValidPlusPrefix() const;
611 bool hasValidAlternativeForm() const;
612 bool hasValidLeadingZeros() const;
613 bool hasValidSpacePrefix() const;
614 bool hasValidLeftJustified() const;
615 bool hasValidThousandsGroupingPrefix() const;
617 bool hasValidPrecision() const;
618 bool hasValidFieldWidth() const;
620 } // end analyze_printf namespace
622 //===----------------------------------------------------------------------===//
623 /// Pieces specific to fscanf format strings.
625 namespace analyze_scanf {
627 class ScanfConversionSpecifier :
628 public analyze_format_string::ConversionSpecifier {
630 ScanfConversionSpecifier()
631 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
633 ScanfConversionSpecifier(const char *pos, Kind k)
634 : ConversionSpecifier(false, pos, k) {}
636 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
637 return !CS->isPrintfKind();
641 using analyze_format_string::ArgType;
642 using analyze_format_string::LengthModifier;
643 using analyze_format_string::OptionalAmount;
644 using analyze_format_string::OptionalFlag;
646 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
647 OptionalFlag SuppressAssignment; // '*'
650 FormatSpecifier(/* isPrintf = */ false),
651 SuppressAssignment("*") {}
653 void setSuppressAssignment(const char *position) {
654 SuppressAssignment.setPosition(position);
657 const OptionalFlag &getSuppressAssignment() const {
658 return SuppressAssignment;
661 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
665 const ScanfConversionSpecifier &getConversionSpecifier() const {
666 return cast<ScanfConversionSpecifier>(CS);
669 bool consumesDataArgument() const {
670 return CS.consumesDataArgument() && !SuppressAssignment;
673 ArgType getArgType(ASTContext &Ctx) const;
675 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
678 void toString(raw_ostream &os) const;
680 static ScanfSpecifier Parse(const char *beg, const char *end);
683 } // end analyze_scanf namespace
685 //===----------------------------------------------------------------------===//
686 // Parsing and processing of format strings (both fprintf and fscanf).
688 namespace analyze_format_string {
690 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
692 class FormatStringHandler {
694 FormatStringHandler() {}
695 virtual ~FormatStringHandler();
697 virtual void HandleNullChar(const char *nullCharacter) {}
699 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
701 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
702 PositionContext p) {}
704 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
706 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
707 unsigned specifierLen) {}
709 virtual void HandleEmptyObjCModifierFlag(const char *startFlags,
710 unsigned flagsLen) {}
712 virtual void HandleInvalidObjCModifierFlag(const char *startFlag,
715 virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
716 const char *flagsEnd,
717 const char *conversionPosition) {}
718 // Printf-specific handlers.
720 virtual bool HandleInvalidPrintfConversionSpecifier(
721 const analyze_printf::PrintfSpecifier &FS,
722 const char *startSpecifier,
723 unsigned specifierLen) {
727 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
728 const char *startSpecifier,
729 unsigned specifierLen) {
733 /// Handle mask types whose sizes are not between one and eight bytes.
734 virtual void handleInvalidMaskType(StringRef MaskType) {}
736 // Scanf-specific handlers.
738 virtual bool HandleInvalidScanfConversionSpecifier(
739 const analyze_scanf::ScanfSpecifier &FS,
740 const char *startSpecifier,
741 unsigned specifierLen) {
745 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
746 const char *startSpecifier,
747 unsigned specifierLen) {
751 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
754 bool ParsePrintfString(FormatStringHandler &H,
755 const char *beg, const char *end, const LangOptions &LO,
756 const TargetInfo &Target, bool isFreeBSDKPrintf);
758 bool ParseFormatStringHasSArg(const char *beg, const char *end,
759 const LangOptions &LO, const TargetInfo &Target);
761 bool ParseScanfString(FormatStringHandler &H,
762 const char *beg, const char *end, const LangOptions &LO,
763 const TargetInfo &Target);
765 /// Return true if the given string has at least one formatting specifier.
766 bool parseFormatStringHasFormattingSpecifiers(const char *Begin,
768 const LangOptions &LO,
769 const TargetInfo &Target);
771 } // end analyze_format_string namespace
772 } // end clang namespace