1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
20 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
22 #include "clang/AST/CanonicalType.h"
28 //===----------------------------------------------------------------------===//
29 /// Common components of both fprintf and fscanf format strings.
30 namespace analyze_format_string {
32 /// Class representing optional flags with location and representation
36 OptionalFlag(const char *Representation)
37 : representation(Representation), flag(false) {}
38 bool isSet() { return flag; }
39 void set() { flag = true; }
40 void clear() { flag = false; }
41 void setPosition(const char *position) {
44 this->position = position;
46 const char *getPosition() const {
50 const char *toString() const { return representation; }
52 // Overloaded operators for bool like qualities
53 explicit operator bool() const { return flag; }
54 OptionalFlag& operator=(const bool &rhs) {
56 return *this; // Return a reference to myself.
59 const char *representation;
64 /// Represents the length modifier in a format string in scanf/printf.
65 class LengthModifier {
73 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
77 AsInt32, // 'I32' (MSVCRT, like __int32)
78 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL)
79 AsInt64, // 'I64' (MSVCRT, like __int64)
81 AsAllocate, // for '%as', GNU extension to C90 scanf
82 AsMAllocate, // for '%ms', GNU extension to scanf
83 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
84 AsWideChar = AsLong // for '%ls', only makes sense for printf
88 : Position(nullptr), kind(None) {}
89 LengthModifier(const char *pos, Kind k)
90 : Position(pos), kind(k) {}
92 const char *getStart() const {
96 unsigned getLength() const {
111 Kind getKind() const { return kind; }
112 void setKind(Kind k) { kind = k; }
114 const char *toString() const;
117 const char *Position;
121 class ConversionSpecifier {
124 InvalidSpecifier = 0,
125 // C99 conversion specifiers.
128 DArg, // Apple extension
130 IntArgBeg = dArg, IntArgEnd = iArg,
133 OArg, // Apple extension
135 UArg, // Apple extension
138 UIntArgBeg = oArg, UIntArgEnd = XArg,
148 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
157 // ** Printf-specific **
159 ZArg, // MS extension
161 // Objective-C specific specifiers.
163 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
165 // FreeBSD kernel specific specifiers.
171 // GlibC specific specifiers.
174 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
176 // ** Scanf-specific **
178 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
181 ConversionSpecifier(bool isPrintf = true)
182 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
183 kind(InvalidSpecifier) {}
185 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
186 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
188 const char *getStart() const {
192 StringRef getCharacters() const {
193 return StringRef(getStart(), getLength());
196 bool consumesDataArgument() const {
208 Kind getKind() const { return kind; }
209 void setKind(Kind k) { kind = k; }
210 unsigned getLength() const {
211 return EndScanList ? EndScanList - Position : 1;
214 bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
215 kind == FreeBSDrArg || kind == FreeBSDyArg; }
216 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
217 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
218 const char *toString() const;
220 bool isPrintfKind() const { return IsPrintf; }
222 Optional<ConversionSpecifier> getStandardSpecifier() const;
226 const char *Position;
227 const char *EndScanList;
233 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
234 AnyCharTy, CStrTy, WCStrTy, WIntTy };
236 enum MatchKind { NoMatch = 0, Match = 1, NoMatchPedantic };
244 ArgType(Kind k = UnknownTy, const char *n = nullptr)
245 : K(k), Name(n), Ptr(false) {}
246 ArgType(QualType t, const char *n = nullptr)
247 : K(SpecificTy), T(t), Name(n), Ptr(false) {}
248 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
250 static ArgType Invalid() { return ArgType(InvalidTy); }
251 bool isValid() const { return K != InvalidTy; }
253 /// Create an ArgType which corresponds to the type pointer to A.
254 static ArgType PtrTo(const ArgType& A) {
255 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
261 MatchKind matchesType(ASTContext &C, QualType argTy) const;
263 QualType getRepresentativeType(ASTContext &C) const;
265 std::string getRepresentativeTypeName(ASTContext &C) const;
268 class OptionalAmount {
270 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
272 OptionalAmount(HowSpecified howSpecified,
274 const char *amountStart,
275 unsigned amountLength,
276 bool usesPositionalArg)
277 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
278 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
280 OptionalAmount(bool valid = true)
281 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
282 UsesPositionalArg(0), UsesDotPrefix(0) {}
284 bool isInvalid() const {
285 return hs == Invalid;
288 HowSpecified getHowSpecified() const { return hs; }
289 void setHowSpecified(HowSpecified h) { hs = h; }
291 bool hasDataArgument() const { return hs == Arg; }
293 unsigned getArgIndex() const {
294 assert(hasDataArgument());
298 unsigned getConstantAmount() const {
299 assert(hs == Constant);
303 const char *getStart() const {
304 // We include the . character if it is given.
305 return start - UsesDotPrefix;
308 unsigned getConstantLength() const {
309 assert(hs == Constant);
310 return length + UsesDotPrefix;
313 ArgType getArgType(ASTContext &Ctx) const;
315 void toString(raw_ostream &os) const;
317 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
318 unsigned getPositionalArgIndex() const {
319 assert(hasDataArgument());
323 bool usesDotPrefix() const { return UsesDotPrefix; }
324 void setUsesDotPrefix() { UsesDotPrefix = true; }
331 bool UsesPositionalArg : 1;
336 class FormatSpecifier {
339 OptionalAmount FieldWidth;
340 ConversionSpecifier CS;
341 /// Positional arguments, an IEEE extension:
342 /// IEEE Std 1003.1, 2004 Edition
343 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
344 bool UsesPositionalArg;
347 FormatSpecifier(bool isPrintf)
348 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
350 void setLengthModifier(LengthModifier lm) {
354 void setUsesPositionalArg() { UsesPositionalArg = true; }
356 void setArgIndex(unsigned i) {
360 unsigned getArgIndex() const {
364 unsigned getPositionalArgIndex() const {
368 const LengthModifier &getLengthModifier() const {
372 const OptionalAmount &getFieldWidth() const {
376 void setFieldWidth(const OptionalAmount &Amt) {
380 bool usesPositionalArg() const { return UsesPositionalArg; }
382 bool hasValidLengthModifier(const TargetInfo &Target) const;
384 bool hasStandardLengthModifier() const;
386 Optional<LengthModifier> getCorrectedLengthModifier() const;
388 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
390 bool hasStandardLengthConversionCombination() const;
392 /// For a TypedefType QT, if it is a named integer type such as size_t,
393 /// assign the appropriate value to LM and return true.
394 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
397 } // end analyze_format_string namespace
399 //===----------------------------------------------------------------------===//
400 /// Pieces specific to fprintf format strings.
402 namespace analyze_printf {
404 class PrintfConversionSpecifier :
405 public analyze_format_string::ConversionSpecifier {
407 PrintfConversionSpecifier()
408 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
410 PrintfConversionSpecifier(const char *pos, Kind k)
411 : ConversionSpecifier(true, pos, k) {}
413 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
414 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
415 kind <= DoubleArgEnd; }
416 unsigned getLength() const {
417 // Conversion specifiers currently only are represented by
418 // single characters, but we be flexible.
422 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
423 return CS->isPrintfKind();
427 using analyze_format_string::ArgType;
428 using analyze_format_string::LengthModifier;
429 using analyze_format_string::OptionalAmount;
430 using analyze_format_string::OptionalFlag;
432 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
433 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
434 OptionalFlag IsLeftJustified; // '-'
435 OptionalFlag HasPlusPrefix; // '+'
436 OptionalFlag HasSpacePrefix; // ' '
437 OptionalFlag HasAlternativeForm; // '#'
438 OptionalFlag HasLeadingZeroes; // '0'
439 OptionalFlag HasObjCTechnicalTerm; // '[tt]'
440 OptionalAmount Precision;
443 FormatSpecifier(/* isPrintf = */ true),
444 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
445 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0"),
446 HasObjCTechnicalTerm("tt") {}
448 static PrintfSpecifier Parse(const char *beg, const char *end);
450 // Methods for incrementally constructing the PrintfSpecifier.
451 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
454 void setHasThousandsGrouping(const char *position) {
455 HasThousandsGrouping.setPosition(position);
457 void setIsLeftJustified(const char *position) {
458 IsLeftJustified.setPosition(position);
460 void setHasPlusPrefix(const char *position) {
461 HasPlusPrefix.setPosition(position);
463 void setHasSpacePrefix(const char *position) {
464 HasSpacePrefix.setPosition(position);
466 void setHasAlternativeForm(const char *position) {
467 HasAlternativeForm.setPosition(position);
469 void setHasLeadingZeros(const char *position) {
470 HasLeadingZeroes.setPosition(position);
472 void setHasObjCTechnicalTerm(const char *position) {
473 HasObjCTechnicalTerm.setPosition(position);
475 void setUsesPositionalArg() { UsesPositionalArg = true; }
477 // Methods for querying the format specifier.
479 const PrintfConversionSpecifier &getConversionSpecifier() const {
480 return cast<PrintfConversionSpecifier>(CS);
483 void setPrecision(const OptionalAmount &Amt) {
485 Precision.setUsesDotPrefix();
488 const OptionalAmount &getPrecision() const {
492 bool consumesDataArgument() const {
493 return getConversionSpecifier().consumesDataArgument();
496 /// \brief Returns the builtin type that a data argument
497 /// paired with this format specifier should have. This method
498 /// will return null if the format specifier does not have
499 /// a matching data argument or the matching argument matches
500 /// more than one type.
501 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
503 const OptionalFlag &hasThousandsGrouping() const {
504 return HasThousandsGrouping;
506 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
507 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
508 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
509 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
510 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
511 const OptionalFlag &hasObjCTechnicalTerm() const { return HasObjCTechnicalTerm; }
512 bool usesPositionalArg() const { return UsesPositionalArg; }
514 /// Changes the specifier and length according to a QualType, retaining any
515 /// flags or options. Returns true on success, or false when a conversion
516 /// was not successful.
517 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
520 void toString(raw_ostream &os) const;
522 // Validation methods - to check if any element results in undefined behavior
523 bool hasValidPlusPrefix() const;
524 bool hasValidAlternativeForm() const;
525 bool hasValidLeadingZeros() const;
526 bool hasValidSpacePrefix() const;
527 bool hasValidLeftJustified() const;
528 bool hasValidThousandsGroupingPrefix() const;
530 bool hasValidPrecision() const;
531 bool hasValidFieldWidth() const;
533 } // end analyze_printf namespace
535 //===----------------------------------------------------------------------===//
536 /// Pieces specific to fscanf format strings.
538 namespace analyze_scanf {
540 class ScanfConversionSpecifier :
541 public analyze_format_string::ConversionSpecifier {
543 ScanfConversionSpecifier()
544 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
546 ScanfConversionSpecifier(const char *pos, Kind k)
547 : ConversionSpecifier(false, pos, k) {}
549 void setEndScanList(const char *pos) { EndScanList = pos; }
551 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
552 return !CS->isPrintfKind();
556 using analyze_format_string::ArgType;
557 using analyze_format_string::LengthModifier;
558 using analyze_format_string::OptionalAmount;
559 using analyze_format_string::OptionalFlag;
561 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
562 OptionalFlag SuppressAssignment; // '*'
565 FormatSpecifier(/* isPrintf = */ false),
566 SuppressAssignment("*") {}
568 void setSuppressAssignment(const char *position) {
569 SuppressAssignment.setPosition(position);
572 const OptionalFlag &getSuppressAssignment() const {
573 return SuppressAssignment;
576 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
580 const ScanfConversionSpecifier &getConversionSpecifier() const {
581 return cast<ScanfConversionSpecifier>(CS);
584 bool consumesDataArgument() const {
585 return CS.consumesDataArgument() && !SuppressAssignment;
588 ArgType getArgType(ASTContext &Ctx) const;
590 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
593 void toString(raw_ostream &os) const;
595 static ScanfSpecifier Parse(const char *beg, const char *end);
598 } // end analyze_scanf namespace
600 //===----------------------------------------------------------------------===//
601 // Parsing and processing of format strings (both fprintf and fscanf).
603 namespace analyze_format_string {
605 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
607 class FormatStringHandler {
609 FormatStringHandler() {}
610 virtual ~FormatStringHandler();
612 virtual void HandleNullChar(const char *nullCharacter) {}
614 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
616 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
617 PositionContext p) {}
619 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
621 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
622 unsigned specifierLen) {}
624 virtual void HandleEmptyObjCModifierFlag(const char *startFlags,
625 unsigned flagsLen) {}
627 virtual void HandleInvalidObjCModifierFlag(const char *startFlag,
630 virtual void HandleObjCFlagsWithNonObjCConversion(const char *flagsStart,
631 const char *flagsEnd,
632 const char *conversionPosition) {}
633 // Printf-specific handlers.
635 virtual bool HandleInvalidPrintfConversionSpecifier(
636 const analyze_printf::PrintfSpecifier &FS,
637 const char *startSpecifier,
638 unsigned specifierLen) {
642 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
643 const char *startSpecifier,
644 unsigned specifierLen) {
648 // Scanf-specific handlers.
650 virtual bool HandleInvalidScanfConversionSpecifier(
651 const analyze_scanf::ScanfSpecifier &FS,
652 const char *startSpecifier,
653 unsigned specifierLen) {
657 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
658 const char *startSpecifier,
659 unsigned specifierLen) {
663 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
666 bool ParsePrintfString(FormatStringHandler &H,
667 const char *beg, const char *end, const LangOptions &LO,
668 const TargetInfo &Target, bool isFreeBSDKPrintf);
670 bool ParseFormatStringHasSArg(const char *beg, const char *end,
671 const LangOptions &LO, const TargetInfo &Target);
673 bool ParseScanfString(FormatStringHandler &H,
674 const char *beg, const char *end, const LangOptions &LO,
675 const TargetInfo &Target);
677 } // end analyze_format_string namespace
678 } // end clang namespace