1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
22 #include "clang/AST/CanonicalType.h"
28 //===----------------------------------------------------------------------===//
29 /// Common components of both fprintf and fscanf format strings.
30 namespace analyze_format_string {
32 /// Class representing optional flags with location and representation
36 OptionalFlag(const char *Representation)
37 : representation(Representation), flag(false) {}
38 bool isSet() { return flag; }
39 void set() { flag = true; }
40 void clear() { flag = false; }
41 void setPosition(const char *position) {
43 this->position = position;
45 const char *getPosition() const {
49 const char *toString() const { return representation; }
51 // Overloaded operators for bool like qualities
52 LLVM_EXPLICIT operator bool() const { return flag; }
53 OptionalFlag& operator=(const bool &rhs) {
55 return *this; // Return a reference to myself.
58 const char *representation;
63 /// Represents the length modifier in a format string in scanf/printf.
64 class LengthModifier {
72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
76 AsInt32, // 'I32' (MSVCRT, like __int32)
77 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL)
78 AsInt64, // 'I64' (MSVCRT, like __int64)
80 AsAllocate, // for '%as', GNU extension to C90 scanf
81 AsMAllocate, // for '%ms', GNU extension to scanf
82 AsWideChar = AsLong // for '%ls', only makes sense for printf
86 : Position(nullptr), kind(None) {}
87 LengthModifier(const char *pos, Kind k)
88 : Position(pos), kind(k) {}
90 const char *getStart() const {
94 unsigned getLength() const {
109 Kind getKind() const { return kind; }
110 void setKind(Kind k) { kind = k; }
112 const char *toString() const;
115 const char *Position;
119 class ConversionSpecifier {
122 InvalidSpecifier = 0,
123 // C99 conversion specifiers.
126 DArg, // Apple extension
128 IntArgBeg = dArg, IntArgEnd = iArg,
131 OArg, // Apple extension
133 UArg, // Apple extension
136 UIntArgBeg = oArg, UIntArgEnd = XArg,
146 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
155 // ** Printf-specific **
157 // Objective-C specific specifiers.
159 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
161 // FreeBSD specific specifiers
166 // GlibC specific specifiers.
169 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
171 // ** Scanf-specific **
173 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
176 ConversionSpecifier(bool isPrintf = true)
177 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
178 kind(InvalidSpecifier) {}
180 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
181 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
183 const char *getStart() const {
187 StringRef getCharacters() const {
188 return StringRef(getStart(), getLength());
191 bool consumesDataArgument() const {
203 Kind getKind() const { return kind; }
204 void setKind(Kind k) { kind = k; }
205 unsigned getLength() const {
206 return EndScanList ? EndScanList - Position : 1;
209 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
210 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
211 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
212 const char *toString() const;
214 bool isPrintfKind() const { return IsPrintf; }
216 Optional<ConversionSpecifier> getStandardSpecifier() const;
220 const char *Position;
221 const char *EndScanList;
227 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
228 AnyCharTy, CStrTy, WCStrTy, WIntTy };
235 ArgType(Kind k = UnknownTy, const char *n = nullptr)
236 : K(k), Name(n), Ptr(false) {}
237 ArgType(QualType t, const char *n = nullptr)
238 : K(SpecificTy), T(t), Name(n), Ptr(false) {}
239 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
241 static ArgType Invalid() { return ArgType(InvalidTy); }
242 bool isValid() const { return K != InvalidTy; }
244 /// Create an ArgType which corresponds to the type pointer to A.
245 static ArgType PtrTo(const ArgType& A) {
246 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
252 bool matchesType(ASTContext &C, QualType argTy) const;
254 QualType getRepresentativeType(ASTContext &C) const;
256 std::string getRepresentativeTypeName(ASTContext &C) const;
259 class OptionalAmount {
261 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
263 OptionalAmount(HowSpecified howSpecified,
265 const char *amountStart,
266 unsigned amountLength,
267 bool usesPositionalArg)
268 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
269 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
271 OptionalAmount(bool valid = true)
272 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
273 UsesPositionalArg(0), UsesDotPrefix(0) {}
275 bool isInvalid() const {
276 return hs == Invalid;
279 HowSpecified getHowSpecified() const { return hs; }
280 void setHowSpecified(HowSpecified h) { hs = h; }
282 bool hasDataArgument() const { return hs == Arg; }
284 unsigned getArgIndex() const {
285 assert(hasDataArgument());
289 unsigned getConstantAmount() const {
290 assert(hs == Constant);
294 const char *getStart() const {
295 // We include the . character if it is given.
296 return start - UsesDotPrefix;
299 unsigned getConstantLength() const {
300 assert(hs == Constant);
301 return length + UsesDotPrefix;
304 ArgType getArgType(ASTContext &Ctx) const;
306 void toString(raw_ostream &os) const;
308 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
309 unsigned getPositionalArgIndex() const {
310 assert(hasDataArgument());
314 bool usesDotPrefix() const { return UsesDotPrefix; }
315 void setUsesDotPrefix() { UsesDotPrefix = true; }
322 bool UsesPositionalArg : 1;
327 class FormatSpecifier {
330 OptionalAmount FieldWidth;
331 ConversionSpecifier CS;
332 /// Positional arguments, an IEEE extension:
333 /// IEEE Std 1003.1, 2004 Edition
334 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
335 bool UsesPositionalArg;
338 FormatSpecifier(bool isPrintf)
339 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
341 void setLengthModifier(LengthModifier lm) {
345 void setUsesPositionalArg() { UsesPositionalArg = true; }
347 void setArgIndex(unsigned i) {
351 unsigned getArgIndex() const {
355 unsigned getPositionalArgIndex() const {
359 const LengthModifier &getLengthModifier() const {
363 const OptionalAmount &getFieldWidth() const {
367 void setFieldWidth(const OptionalAmount &Amt) {
371 bool usesPositionalArg() const { return UsesPositionalArg; }
373 bool hasValidLengthModifier(const TargetInfo &Target) const;
375 bool hasStandardLengthModifier() const;
377 Optional<LengthModifier> getCorrectedLengthModifier() const;
379 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
381 bool hasStandardLengthConversionCombination() const;
383 /// For a TypedefType QT, if it is a named integer type such as size_t,
384 /// assign the appropriate value to LM and return true.
385 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
388 } // end analyze_format_string namespace
390 //===----------------------------------------------------------------------===//
391 /// Pieces specific to fprintf format strings.
393 namespace analyze_printf {
395 class PrintfConversionSpecifier :
396 public analyze_format_string::ConversionSpecifier {
398 PrintfConversionSpecifier()
399 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
401 PrintfConversionSpecifier(const char *pos, Kind k)
402 : ConversionSpecifier(true, pos, k) {}
404 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
405 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
406 kind <= DoubleArgEnd; }
407 unsigned getLength() const {
408 // Conversion specifiers currently only are represented by
409 // single characters, but we be flexible.
413 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
414 return CS->isPrintfKind();
418 using analyze_format_string::ArgType;
419 using analyze_format_string::LengthModifier;
420 using analyze_format_string::OptionalAmount;
421 using analyze_format_string::OptionalFlag;
423 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
424 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
425 OptionalFlag IsLeftJustified; // '-'
426 OptionalFlag HasPlusPrefix; // '+'
427 OptionalFlag HasSpacePrefix; // ' '
428 OptionalFlag HasAlternativeForm; // '#'
429 OptionalFlag HasLeadingZeroes; // '0'
430 OptionalAmount Precision;
433 FormatSpecifier(/* isPrintf = */ true),
434 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
435 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
437 static PrintfSpecifier Parse(const char *beg, const char *end);
439 // Methods for incrementally constructing the PrintfSpecifier.
440 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
443 void setHasThousandsGrouping(const char *position) {
444 HasThousandsGrouping = true;
445 HasThousandsGrouping.setPosition(position);
447 void setIsLeftJustified(const char *position) {
448 IsLeftJustified = true;
449 IsLeftJustified.setPosition(position);
451 void setHasPlusPrefix(const char *position) {
452 HasPlusPrefix = true;
453 HasPlusPrefix.setPosition(position);
455 void setHasSpacePrefix(const char *position) {
456 HasSpacePrefix = true;
457 HasSpacePrefix.setPosition(position);
459 void setHasAlternativeForm(const char *position) {
460 HasAlternativeForm = true;
461 HasAlternativeForm.setPosition(position);
463 void setHasLeadingZeros(const char *position) {
464 HasLeadingZeroes = true;
465 HasLeadingZeroes.setPosition(position);
467 void setUsesPositionalArg() { UsesPositionalArg = true; }
469 // Methods for querying the format specifier.
471 const PrintfConversionSpecifier &getConversionSpecifier() const {
472 return cast<PrintfConversionSpecifier>(CS);
475 void setPrecision(const OptionalAmount &Amt) {
477 Precision.setUsesDotPrefix();
480 const OptionalAmount &getPrecision() const {
484 bool consumesDataArgument() const {
485 return getConversionSpecifier().consumesDataArgument();
488 /// \brief Returns the builtin type that a data argument
489 /// paired with this format specifier should have. This method
490 /// will return null if the format specifier does not have
491 /// a matching data argument or the matching argument matches
492 /// more than one type.
493 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
495 const OptionalFlag &hasThousandsGrouping() const {
496 return HasThousandsGrouping;
498 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
499 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
500 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
501 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
502 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
503 bool usesPositionalArg() const { return UsesPositionalArg; }
505 /// Changes the specifier and length according to a QualType, retaining any
506 /// flags or options. Returns true on success, or false when a conversion
507 /// was not successful.
508 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
511 void toString(raw_ostream &os) const;
513 // Validation methods - to check if any element results in undefined behavior
514 bool hasValidPlusPrefix() const;
515 bool hasValidAlternativeForm() const;
516 bool hasValidLeadingZeros() const;
517 bool hasValidSpacePrefix() const;
518 bool hasValidLeftJustified() const;
519 bool hasValidThousandsGroupingPrefix() const;
521 bool hasValidPrecision() const;
522 bool hasValidFieldWidth() const;
524 } // end analyze_printf namespace
526 //===----------------------------------------------------------------------===//
527 /// Pieces specific to fscanf format strings.
529 namespace analyze_scanf {
531 class ScanfConversionSpecifier :
532 public analyze_format_string::ConversionSpecifier {
534 ScanfConversionSpecifier()
535 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
537 ScanfConversionSpecifier(const char *pos, Kind k)
538 : ConversionSpecifier(false, pos, k) {}
540 void setEndScanList(const char *pos) { EndScanList = pos; }
542 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
543 return !CS->isPrintfKind();
547 using analyze_format_string::ArgType;
548 using analyze_format_string::LengthModifier;
549 using analyze_format_string::OptionalAmount;
550 using analyze_format_string::OptionalFlag;
552 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
553 OptionalFlag SuppressAssignment; // '*'
556 FormatSpecifier(/* isPrintf = */ false),
557 SuppressAssignment("*") {}
559 void setSuppressAssignment(const char *position) {
560 SuppressAssignment = true;
561 SuppressAssignment.setPosition(position);
564 const OptionalFlag &getSuppressAssignment() const {
565 return SuppressAssignment;
568 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
572 const ScanfConversionSpecifier &getConversionSpecifier() const {
573 return cast<ScanfConversionSpecifier>(CS);
576 bool consumesDataArgument() const {
577 return CS.consumesDataArgument() && !SuppressAssignment;
580 ArgType getArgType(ASTContext &Ctx) const;
582 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
585 void toString(raw_ostream &os) const;
587 static ScanfSpecifier Parse(const char *beg, const char *end);
590 } // end analyze_scanf namespace
592 //===----------------------------------------------------------------------===//
593 // Parsing and processing of format strings (both fprintf and fscanf).
595 namespace analyze_format_string {
597 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
599 class FormatStringHandler {
601 FormatStringHandler() {}
602 virtual ~FormatStringHandler();
604 virtual void HandleNullChar(const char *nullCharacter) {}
606 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
608 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
609 PositionContext p) {}
611 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
613 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
614 unsigned specifierLen) {}
616 // Printf-specific handlers.
618 virtual bool HandleInvalidPrintfConversionSpecifier(
619 const analyze_printf::PrintfSpecifier &FS,
620 const char *startSpecifier,
621 unsigned specifierLen) {
625 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
626 const char *startSpecifier,
627 unsigned specifierLen) {
631 // Scanf-specific handlers.
633 virtual bool HandleInvalidScanfConversionSpecifier(
634 const analyze_scanf::ScanfSpecifier &FS,
635 const char *startSpecifier,
636 unsigned specifierLen) {
640 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
641 const char *startSpecifier,
642 unsigned specifierLen) {
646 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
649 bool ParsePrintfString(FormatStringHandler &H,
650 const char *beg, const char *end, const LangOptions &LO,
651 const TargetInfo &Target);
653 bool ParseScanfString(FormatStringHandler &H,
654 const char *beg, const char *end, const LangOptions &LO,
655 const TargetInfo &Target);
657 } // end analyze_format_string namespace
658 } // end clang namespace