1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
20 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
22 #include "clang/AST/CanonicalType.h"
28 //===----------------------------------------------------------------------===//
29 /// Common components of both fprintf and fscanf format strings.
30 namespace analyze_format_string {
32 /// Class representing optional flags with location and representation
36 OptionalFlag(const char *Representation)
37 : representation(Representation), flag(false) {}
38 bool isSet() const { return flag; }
39 void set() { flag = true; }
40 void clear() { flag = false; }
41 void setPosition(const char *position) {
44 this->position = position;
46 const char *getPosition() const {
50 const char *toString() const { return representation; }
52 // Overloaded operators for bool like qualities
53 explicit operator bool() const { return flag; }
54 OptionalFlag& operator=(const bool &rhs) {
56 return *this; // Return a reference to myself.
59 const char *representation;
64 /// Represents the length modifier in a format string in scanf/printf.
65 class LengthModifier {
73 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
77 AsInt32, // 'I32' (MSVCRT, like __int32)
78 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL)
79 AsInt64, // 'I64' (MSVCRT, like __int64)
81 AsAllocate, // for '%as', GNU extension to C90 scanf
82 AsMAllocate, // for '%ms', GNU extension to scanf
83 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
84 AsWideChar = AsLong // for '%ls', only makes sense for printf
88 : Position(nullptr), kind(None) {}
89 LengthModifier(const char *pos, Kind k)
90 : Position(pos), kind(k) {}
92 const char *getStart() const {
96 unsigned getLength() const {
111 Kind getKind() const { return kind; }
112 void setKind(Kind k) { kind = k; }
114 const char *toString() const;
117 const char *Position;
121 class ConversionSpecifier {
124 InvalidSpecifier = 0,
125 // C99 conversion specifiers.
128 DArg, // Apple extension
134 OArg, // Apple extension
136 UArg, // Apple extension
160 // Apple extension: P specifies to os_log that the data being pointed to is
161 // to be copied by os_log. The precision indicates the number of bytes to
165 // ** Printf-specific **
167 ZArg, // MS extension
169 // Objective-C specific specifiers.
171 ObjCBeg = ObjCObjArg,
172 ObjCEnd = ObjCObjArg,
174 // FreeBSD kernel specific specifiers.
180 // GlibC specific specifiers.
183 PrintfConvBeg = ObjCObjArg,
184 PrintfConvEnd = PrintErrno,
186 // ** Scanf-specific **
188 ScanfConvBeg = ScanListArg,
189 ScanfConvEnd = ScanListArg
192 ConversionSpecifier(bool isPrintf = true)
193 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
194 kind(InvalidSpecifier) {}
196 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
197 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
199 const char *getStart() const {
203 StringRef getCharacters() const {
204 return StringRef(getStart(), getLength());
207 bool consumesDataArgument() const {
214 case InvalidSpecifier:
221 Kind getKind() const { return kind; }
222 void setKind(Kind k) { kind = k; }
223 unsigned getLength() const {
224 return EndScanList ? EndScanList - Position : 1;
226 void setEndScanList(const char *pos) { EndScanList = pos; }
228 bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
229 kind == FreeBSDrArg || kind == FreeBSDyArg; }
230 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
231 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
232 bool isDoubleArg() const {
233 return kind >= DoubleArgBeg && kind <= DoubleArgEnd;
236 const char *toString() const;
238 bool isPrintfKind() const { return IsPrintf; }
240 Optional<ConversionSpecifier> getStandardSpecifier() const;
244 const char *Position;
245 const char *EndScanList;
251 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
252 AnyCharTy, CStrTy, WCStrTy, WIntTy };
254 enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic };
262 ArgType(Kind k = UnknownTy, const char *n = nullptr)
263 : K(k), Name(n), Ptr(false) {}
264 ArgType(QualType t, const char *n = nullptr)
265 : K(SpecificTy), T(t), Name(n), Ptr(false) {}
266 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
268 static ArgType Invalid() { return ArgType(InvalidTy); }
269 bool isValid() const { return K != InvalidTy; }
271 /// Create an ArgType which corresponds to the type pointer to A.
272 static ArgType PtrTo(const ArgType& A) {
273 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
279 MatchKind matchesType(ASTContext &C, QualType argTy) const;
281 QualType getRepresentativeType(ASTContext &C) const;
283 std::string getRepresentativeTypeName(ASTContext &C) const;
286 class OptionalAmount {
288 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
290 OptionalAmount(HowSpecified howSpecified,
292 const char *amountStart,
293 unsigned amountLength,
294 bool usesPositionalArg)
295 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
296 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
298 OptionalAmount(bool valid = true)
299 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
300 UsesPositionalArg(0), UsesDotPrefix(0) {}
302 bool isInvalid() const {
303 return hs == Invalid;
306 HowSpecified getHowSpecified() const { return hs; }
307 void setHowSpecified(HowSpecified h) { hs = h; }
309 bool hasDataArgument() const { return hs == Arg; }
311 unsigned getArgIndex() const {
312 assert(hasDataArgument());
316 unsigned getConstantAmount() const {
317 assert(hs == Constant);
321 const char *getStart() const {
322 // We include the . character if it is given.
323 return start - UsesDotPrefix;
326 unsigned getConstantLength() const {
327 assert(hs == Constant);
328 return length + UsesDotPrefix;
331 ArgType getArgType(ASTContext &Ctx) const;
333 void toString(raw_ostream &os) const;
335 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
336 unsigned getPositionalArgIndex() const {
337 assert(hasDataArgument());
341 bool usesDotPrefix() const { return UsesDotPrefix; }
342 void setUsesDotPrefix() { UsesDotPrefix = true; }
349 bool UsesPositionalArg : 1;
354 class FormatSpecifier {
357 OptionalAmount FieldWidth;
358 ConversionSpecifier CS;
359 /// Positional arguments, an IEEE extension:
360 /// IEEE Std 1003.1, 2004 Edition
361 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
362 bool UsesPositionalArg;
365 FormatSpecifier(bool isPrintf)
366 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
368 void setLengthModifier(LengthModifier lm) {
372 void setUsesPositionalArg() { UsesPositionalArg = true; }
374 void setArgIndex(unsigned i) {
378 unsigned getArgIndex() const {
382 unsigned getPositionalArgIndex() const {
386 const LengthModifier &getLengthModifier() const {
390 const OptionalAmount &getFieldWidth() const {
394 void setFieldWidth(const OptionalAmount &Amt) {
398 bool usesPositionalArg() const { return UsesPositionalArg; }
400 bool hasValidLengthModifier(const TargetInfo &Target) const;
402 bool hasStandardLengthModifier() const;
404 Optional<LengthModifier> getCorrectedLengthModifier() const;
406 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
408 bool hasStandardLengthConversionCombination() const;
410 /// For a TypedefType QT, if it is a named integer type such as size_t,
411 /// assign the appropriate value to LM and return true.
412 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
415 } // end analyze_format_string namespace
417 //===----------------------------------------------------------------------===//
418 /// Pieces specific to fprintf format strings.
420 namespace analyze_printf {
422 class PrintfConversionSpecifier :
423 public analyze_format_string::ConversionSpecifier {
425 PrintfConversionSpecifier()
426 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
428 PrintfConversionSpecifier(const char *pos, Kind k)
429 : ConversionSpecifier(true, pos, k) {}
431 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
432 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
433 kind <= DoubleArgEnd; }
435 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
436 return CS->isPrintfKind();
440 using analyze_format_string::ArgType;
441 using analyze_format_string::LengthModifier;
442 using analyze_format_string::OptionalAmount;
443 using analyze_format_string::OptionalFlag;
445 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
446 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
447 OptionalFlag IsLeftJustified; // '-'
448 OptionalFlag HasPlusPrefix; // '+'
449 OptionalFlag HasSpacePrefix; // ' '
450 OptionalFlag HasAlternativeForm; // '#'
451 OptionalFlag HasLeadingZeroes; // '0'
452 OptionalFlag HasObjCTechnicalTerm; // '[tt]'
453 OptionalFlag IsPrivate; // '{private}'
454 OptionalFlag IsPublic; // '{public}'
455 OptionalAmount Precision;
458 : FormatSpecifier(/* isPrintf = */ true), HasThousandsGrouping("'"),
459 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
460 HasAlternativeForm("#"), HasLeadingZeroes("0"),
461 HasObjCTechnicalTerm("tt"), IsPrivate("private"), IsPublic("public") {}
463 static PrintfSpecifier Parse(const char *beg, const char *end);
465 // Methods for incrementally constructing the PrintfSpecifier.
466 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
469 void setHasThousandsGrouping(const char *position) {
470 HasThousandsGrouping.setPosition(position);
472 void setIsLeftJustified(const char *position) {
473 IsLeftJustified.setPosition(position);
475 void setHasPlusPrefix(const char *position) {
476 HasPlusPrefix.setPosition(position);
478 void setHasSpacePrefix(const char *position) {
479 HasSpacePrefix.setPosition(position);
481 void setHasAlternativeForm(const char *position) {
482 HasAlternativeForm.setPosition(position);
484 void setHasLeadingZeros(const char *position) {
485 HasLeadingZeroes.setPosition(position);
487 void setHasObjCTechnicalTerm(const char *position) {
488 HasObjCTechnicalTerm.setPosition(position);
490 void setIsPrivate(const char *position) { IsPrivate.setPosition(position); }
491 void setIsPublic(const char *position) { IsPublic.setPosition(position); }
492 void setUsesPositionalArg() { UsesPositionalArg = true; }
494 // Methods for querying the format specifier.
496 const PrintfConversionSpecifier &getConversionSpecifier() const {
497 return cast<PrintfConversionSpecifier>(CS);
500 void setPrecision(const OptionalAmount &Amt) {
502 Precision.setUsesDotPrefix();
505 const OptionalAmount &getPrecision() const {
509 bool consumesDataArgument() const {
510 return getConversionSpecifier().consumesDataArgument();
513 /// \brief Returns the builtin type that a data argument
514 /// paired with this format specifier should have. This method
515 /// will return null if the format specifier does not have
516 /// a matching data argument or the matching argument matches
517 /// more than one type.
518 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
520 const OptionalFlag &hasThousandsGrouping() const {
521 return HasThousandsGrouping;
523 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
524 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
525 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
526 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
527 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
528 const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; }
529 const OptionalFlag &isPrivate() const { return IsPrivate; }
530 const OptionalFlag &isPublic() const { return IsPublic; }
531 bool usesPositionalArg() const { return UsesPositionalArg; }
533 /// Changes the specifier and length according to a QualType, retaining any
534 /// flags or options. Returns true on success, or false when a conversion
535 /// was not successful.
536 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
539 void toString(raw_ostream &os) const;
541 // Validation methods - to check if any element results in undefined behavior
542 bool hasValidPlusPrefix() const;
543 bool hasValidAlternativeForm() const;
544 bool hasValidLeadingZeros() const;
545 bool hasValidSpacePrefix() const;
546 bool hasValidLeftJustified() const;
547 bool hasValidThousandsGroupingPrefix() const;
549 bool hasValidPrecision() const;
550 bool hasValidFieldWidth() const;
552 } // end analyze_printf namespace
554 //===----------------------------------------------------------------------===//
555 /// Pieces specific to fscanf format strings.
557 namespace analyze_scanf {
559 class ScanfConversionSpecifier :
560 public analyze_format_string::ConversionSpecifier {
562 ScanfConversionSpecifier()
563 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
565 ScanfConversionSpecifier(const char *pos, Kind k)
566 : ConversionSpecifier(false, pos, k) {}
568 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
569 return !CS->isPrintfKind();
573 using analyze_format_string::ArgType;
574 using analyze_format_string::LengthModifier;
575 using analyze_format_string::OptionalAmount;
576 using analyze_format_string::OptionalFlag;
578 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
579 OptionalFlag SuppressAssignment; // '*'
582 FormatSpecifier(/* isPrintf = */ false),
583 SuppressAssignment("*") {}
585 void setSuppressAssignment(const char *position) {
586 SuppressAssignment.setPosition(position);
589 const OptionalFlag &getSuppressAssignment() const {
590 return SuppressAssignment;
593 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
597 const ScanfConversionSpecifier &getConversionSpecifier() const {
598 return cast<ScanfConversionSpecifier>(CS);
601 bool consumesDataArgument() const {
602 return CS.consumesDataArgument() && !SuppressAssignment;
605 ArgType getArgType(ASTContext &Ctx) const;
607 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
610 void toString(raw_ostream &os) const;
612 static ScanfSpecifier Parse(const char *beg, const char *end);
615 } // end analyze_scanf namespace
617 //===----------------------------------------------------------------------===//
618 // Parsing and processing of format strings (both fprintf and fscanf).
620 namespace analyze_format_string {
622 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
624 class FormatStringHandler {
626 FormatStringHandler() {}
627 virtual ~FormatStringHandler();
629 virtual void HandleNullChar(const char *nullCharacter) {}
631 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
633 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
634 PositionContext p) {}
636 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
638 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
639 unsigned specifierLen) {}
641 virtual void HandleEmptyObjCModifierFlag(const char *startFlags,
642 unsigned flagsLen) {}
644 virtual void HandleInvalidObjCModifierFlag(const char *startFlag,
647 virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
648 const char *flagsEnd,
649 const char *conversionPosition) {}
650 // Printf-specific handlers.
652 virtual bool HandleInvalidPrintfConversionSpecifier(
653 const analyze_printf::PrintfSpecifier &FS,
654 const char *startSpecifier,
655 unsigned specifierLen) {
659 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
660 const char *startSpecifier,
661 unsigned specifierLen) {
665 // Scanf-specific handlers.
667 virtual bool HandleInvalidScanfConversionSpecifier(
668 const analyze_scanf::ScanfSpecifier &FS,
669 const char *startSpecifier,
670 unsigned specifierLen) {
674 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
675 const char *startSpecifier,
676 unsigned specifierLen) {
680 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
683 bool ParsePrintfString(FormatStringHandler &H,
684 const char *beg, const char *end, const LangOptions &LO,
685 const TargetInfo &Target, bool isFreeBSDKPrintf);
687 bool ParseFormatStringHasSArg(const char *beg, const char *end,
688 const LangOptions &LO, const TargetInfo &Target);
690 bool ParseScanfString(FormatStringHandler &H,
691 const char *beg, const char *end, const LangOptions &LO,
692 const TargetInfo &Target);
694 } // end analyze_format_string namespace
695 } // end clang namespace