1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
20 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
22 #include "clang/AST/CanonicalType.h"
28 //===----------------------------------------------------------------------===//
29 /// Common components of both fprintf and fscanf format strings.
30 namespace analyze_format_string {
32 /// Class representing optional flags with location and representation
36 OptionalFlag(const char *Representation)
37 : representation(Representation), flag(false) {}
38 bool isSet() { return flag; }
39 void set() { flag = true; }
40 void clear() { flag = false; }
41 void setPosition(const char *position) {
43 this->position = position;
45 const char *getPosition() const {
49 const char *toString() const { return representation; }
51 // Overloaded operators for bool like qualities
52 LLVM_EXPLICIT operator bool() const { return flag; }
53 OptionalFlag& operator=(const bool &rhs) {
55 return *this; // Return a reference to myself.
58 const char *representation;
63 /// Represents the length modifier in a format string in scanf/printf.
64 class LengthModifier {
72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
76 AsInt32, // 'I32' (MSVCRT, like __int32)
77 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL)
78 AsInt64, // 'I64' (MSVCRT, like __int64)
80 AsAllocate, // for '%as', GNU extension to C90 scanf
81 AsMAllocate, // for '%ms', GNU extension to scanf
82 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
83 AsWideChar = AsLong // for '%ls', only makes sense for printf
87 : Position(nullptr), kind(None) {}
88 LengthModifier(const char *pos, Kind k)
89 : Position(pos), kind(k) {}
91 const char *getStart() const {
95 unsigned getLength() const {
110 Kind getKind() const { return kind; }
111 void setKind(Kind k) { kind = k; }
113 const char *toString() const;
116 const char *Position;
120 class ConversionSpecifier {
123 InvalidSpecifier = 0,
124 // C99 conversion specifiers.
127 DArg, // Apple extension
129 IntArgBeg = dArg, IntArgEnd = iArg,
132 OArg, // Apple extension
134 UArg, // Apple extension
137 UIntArgBeg = oArg, UIntArgEnd = XArg,
147 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
156 // ** Printf-specific **
158 ZArg, // MS extension
160 // Objective-C specific specifiers.
162 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
164 // FreeBSD kernel specific specifiers.
170 // GlibC specific specifiers.
173 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
175 // ** Scanf-specific **
177 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
180 ConversionSpecifier(bool isPrintf = true)
181 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
182 kind(InvalidSpecifier) {}
184 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
185 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
187 const char *getStart() const {
191 StringRef getCharacters() const {
192 return StringRef(getStart(), getLength());
195 bool consumesDataArgument() const {
207 Kind getKind() const { return kind; }
208 void setKind(Kind k) { kind = k; }
209 unsigned getLength() const {
210 return EndScanList ? EndScanList - Position : 1;
213 bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
214 kind == FreeBSDrArg || kind == FreeBSDyArg; }
215 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
216 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
217 const char *toString() const;
219 bool isPrintfKind() const { return IsPrintf; }
221 Optional<ConversionSpecifier> getStandardSpecifier() const;
225 const char *Position;
226 const char *EndScanList;
232 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
233 AnyCharTy, CStrTy, WCStrTy, WIntTy };
240 ArgType(Kind k = UnknownTy, const char *n = nullptr)
241 : K(k), Name(n), Ptr(false) {}
242 ArgType(QualType t, const char *n = nullptr)
243 : K(SpecificTy), T(t), Name(n), Ptr(false) {}
244 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
246 static ArgType Invalid() { return ArgType(InvalidTy); }
247 bool isValid() const { return K != InvalidTy; }
249 /// Create an ArgType which corresponds to the type pointer to A.
250 static ArgType PtrTo(const ArgType& A) {
251 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
257 bool matchesType(ASTContext &C, QualType argTy) const;
259 QualType getRepresentativeType(ASTContext &C) const;
261 std::string getRepresentativeTypeName(ASTContext &C) const;
264 class OptionalAmount {
266 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
268 OptionalAmount(HowSpecified howSpecified,
270 const char *amountStart,
271 unsigned amountLength,
272 bool usesPositionalArg)
273 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
274 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
276 OptionalAmount(bool valid = true)
277 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
278 UsesPositionalArg(0), UsesDotPrefix(0) {}
280 bool isInvalid() const {
281 return hs == Invalid;
284 HowSpecified getHowSpecified() const { return hs; }
285 void setHowSpecified(HowSpecified h) { hs = h; }
287 bool hasDataArgument() const { return hs == Arg; }
289 unsigned getArgIndex() const {
290 assert(hasDataArgument());
294 unsigned getConstantAmount() const {
295 assert(hs == Constant);
299 const char *getStart() const {
300 // We include the . character if it is given.
301 return start - UsesDotPrefix;
304 unsigned getConstantLength() const {
305 assert(hs == Constant);
306 return length + UsesDotPrefix;
309 ArgType getArgType(ASTContext &Ctx) const;
311 void toString(raw_ostream &os) const;
313 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
314 unsigned getPositionalArgIndex() const {
315 assert(hasDataArgument());
319 bool usesDotPrefix() const { return UsesDotPrefix; }
320 void setUsesDotPrefix() { UsesDotPrefix = true; }
327 bool UsesPositionalArg : 1;
332 class FormatSpecifier {
335 OptionalAmount FieldWidth;
336 ConversionSpecifier CS;
337 /// Positional arguments, an IEEE extension:
338 /// IEEE Std 1003.1, 2004 Edition
339 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
340 bool UsesPositionalArg;
343 FormatSpecifier(bool isPrintf)
344 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
346 void setLengthModifier(LengthModifier lm) {
350 void setUsesPositionalArg() { UsesPositionalArg = true; }
352 void setArgIndex(unsigned i) {
356 unsigned getArgIndex() const {
360 unsigned getPositionalArgIndex() const {
364 const LengthModifier &getLengthModifier() const {
368 const OptionalAmount &getFieldWidth() const {
372 void setFieldWidth(const OptionalAmount &Amt) {
376 bool usesPositionalArg() const { return UsesPositionalArg; }
378 bool hasValidLengthModifier(const TargetInfo &Target) const;
380 bool hasStandardLengthModifier() const;
382 Optional<LengthModifier> getCorrectedLengthModifier() const;
384 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
386 bool hasStandardLengthConversionCombination() const;
388 /// For a TypedefType QT, if it is a named integer type such as size_t,
389 /// assign the appropriate value to LM and return true.
390 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
393 } // end analyze_format_string namespace
395 //===----------------------------------------------------------------------===//
396 /// Pieces specific to fprintf format strings.
398 namespace analyze_printf {
400 class PrintfConversionSpecifier :
401 public analyze_format_string::ConversionSpecifier {
403 PrintfConversionSpecifier()
404 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
406 PrintfConversionSpecifier(const char *pos, Kind k)
407 : ConversionSpecifier(true, pos, k) {}
409 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
410 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
411 kind <= DoubleArgEnd; }
412 unsigned getLength() const {
413 // Conversion specifiers currently only are represented by
414 // single characters, but we be flexible.
418 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
419 return CS->isPrintfKind();
423 using analyze_format_string::ArgType;
424 using analyze_format_string::LengthModifier;
425 using analyze_format_string::OptionalAmount;
426 using analyze_format_string::OptionalFlag;
428 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
429 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
430 OptionalFlag IsLeftJustified; // '-'
431 OptionalFlag HasPlusPrefix; // '+'
432 OptionalFlag HasSpacePrefix; // ' '
433 OptionalFlag HasAlternativeForm; // '#'
434 OptionalFlag HasLeadingZeroes; // '0'
435 OptionalAmount Precision;
438 FormatSpecifier(/* isPrintf = */ true),
439 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
440 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
442 static PrintfSpecifier Parse(const char *beg, const char *end);
444 // Methods for incrementally constructing the PrintfSpecifier.
445 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
448 void setHasThousandsGrouping(const char *position) {
449 HasThousandsGrouping = true;
450 HasThousandsGrouping.setPosition(position);
452 void setIsLeftJustified(const char *position) {
453 IsLeftJustified = true;
454 IsLeftJustified.setPosition(position);
456 void setHasPlusPrefix(const char *position) {
457 HasPlusPrefix = true;
458 HasPlusPrefix.setPosition(position);
460 void setHasSpacePrefix(const char *position) {
461 HasSpacePrefix = true;
462 HasSpacePrefix.setPosition(position);
464 void setHasAlternativeForm(const char *position) {
465 HasAlternativeForm = true;
466 HasAlternativeForm.setPosition(position);
468 void setHasLeadingZeros(const char *position) {
469 HasLeadingZeroes = true;
470 HasLeadingZeroes.setPosition(position);
472 void setUsesPositionalArg() { UsesPositionalArg = true; }
474 // Methods for querying the format specifier.
476 const PrintfConversionSpecifier &getConversionSpecifier() const {
477 return cast<PrintfConversionSpecifier>(CS);
480 void setPrecision(const OptionalAmount &Amt) {
482 Precision.setUsesDotPrefix();
485 const OptionalAmount &getPrecision() const {
489 bool consumesDataArgument() const {
490 return getConversionSpecifier().consumesDataArgument();
493 /// \brief Returns the builtin type that a data argument
494 /// paired with this format specifier should have. This method
495 /// will return null if the format specifier does not have
496 /// a matching data argument or the matching argument matches
497 /// more than one type.
498 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
500 const OptionalFlag &hasThousandsGrouping() const {
501 return HasThousandsGrouping;
503 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
504 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
505 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
506 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
507 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
508 bool usesPositionalArg() const { return UsesPositionalArg; }
510 /// Changes the specifier and length according to a QualType, retaining any
511 /// flags or options. Returns true on success, or false when a conversion
512 /// was not successful.
513 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
516 void toString(raw_ostream &os) const;
518 // Validation methods - to check if any element results in undefined behavior
519 bool hasValidPlusPrefix() const;
520 bool hasValidAlternativeForm() const;
521 bool hasValidLeadingZeros() const;
522 bool hasValidSpacePrefix() const;
523 bool hasValidLeftJustified() const;
524 bool hasValidThousandsGroupingPrefix() const;
526 bool hasValidPrecision() const;
527 bool hasValidFieldWidth() const;
529 } // end analyze_printf namespace
531 //===----------------------------------------------------------------------===//
532 /// Pieces specific to fscanf format strings.
534 namespace analyze_scanf {
536 class ScanfConversionSpecifier :
537 public analyze_format_string::ConversionSpecifier {
539 ScanfConversionSpecifier()
540 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
542 ScanfConversionSpecifier(const char *pos, Kind k)
543 : ConversionSpecifier(false, pos, k) {}
545 void setEndScanList(const char *pos) { EndScanList = pos; }
547 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
548 return !CS->isPrintfKind();
552 using analyze_format_string::ArgType;
553 using analyze_format_string::LengthModifier;
554 using analyze_format_string::OptionalAmount;
555 using analyze_format_string::OptionalFlag;
557 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
558 OptionalFlag SuppressAssignment; // '*'
561 FormatSpecifier(/* isPrintf = */ false),
562 SuppressAssignment("*") {}
564 void setSuppressAssignment(const char *position) {
565 SuppressAssignment = true;
566 SuppressAssignment.setPosition(position);
569 const OptionalFlag &getSuppressAssignment() const {
570 return SuppressAssignment;
573 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
577 const ScanfConversionSpecifier &getConversionSpecifier() const {
578 return cast<ScanfConversionSpecifier>(CS);
581 bool consumesDataArgument() const {
582 return CS.consumesDataArgument() && !SuppressAssignment;
585 ArgType getArgType(ASTContext &Ctx) const;
587 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
590 void toString(raw_ostream &os) const;
592 static ScanfSpecifier Parse(const char *beg, const char *end);
595 } // end analyze_scanf namespace
597 //===----------------------------------------------------------------------===//
598 // Parsing and processing of format strings (both fprintf and fscanf).
600 namespace analyze_format_string {
602 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
604 class FormatStringHandler {
606 FormatStringHandler() {}
607 virtual ~FormatStringHandler();
609 virtual void HandleNullChar(const char *nullCharacter) {}
611 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
613 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
614 PositionContext p) {}
616 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
618 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
619 unsigned specifierLen) {}
621 // Printf-specific handlers.
623 virtual bool HandleInvalidPrintfConversionSpecifier(
624 const analyze_printf::PrintfSpecifier &FS,
625 const char *startSpecifier,
626 unsigned specifierLen) {
630 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
631 const char *startSpecifier,
632 unsigned specifierLen) {
636 // Scanf-specific handlers.
638 virtual bool HandleInvalidScanfConversionSpecifier(
639 const analyze_scanf::ScanfSpecifier &FS,
640 const char *startSpecifier,
641 unsigned specifierLen) {
645 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
646 const char *startSpecifier,
647 unsigned specifierLen) {
651 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
654 bool ParsePrintfString(FormatStringHandler &H,
655 const char *beg, const char *end, const LangOptions &LO,
656 const TargetInfo &Target, bool isFreeBSDKPrintf);
658 bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO,
659 const TargetInfo &Target);
661 bool ParseScanfString(FormatStringHandler &H,
662 const char *beg, const char *end, const LangOptions &LO,
663 const TargetInfo &Target);
665 } // end analyze_format_string namespace
666 } // end clang namespace