1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
22 #include "clang/AST/CanonicalType.h"
28 //===----------------------------------------------------------------------===//
29 /// Common components of both fprintf and fscanf format strings.
30 namespace analyze_format_string {
32 /// Class representing optional flags with location and representation
36 OptionalFlag(const char *Representation)
37 : representation(Representation), flag(false) {}
38 bool isSet() { return flag; }
39 void set() { flag = true; }
40 void clear() { flag = false; }
41 void setPosition(const char *position) {
43 this->position = position;
45 const char *getPosition() const {
49 const char *toString() const { return representation; }
51 // Overloaded operators for bool like qualities
52 operator bool() const { return flag; }
53 OptionalFlag& operator=(const bool &rhs) {
55 return *this; // Return a reference to myself.
58 const char *representation;
63 /// Represents the length modifier in a format string in scanf/printf.
64 class LengthModifier {
72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
77 AsAllocate, // for '%as', GNU extension to C90 scanf
78 AsMAllocate, // for '%ms', GNU extension to scanf
79 AsWideChar = AsLong // for '%ls', only makes sense for printf
83 : Position(0), kind(None) {}
84 LengthModifier(const char *pos, Kind k)
85 : Position(pos), kind(k) {}
87 const char *getStart() const {
91 unsigned getLength() const {
103 Kind getKind() const { return kind; }
104 void setKind(Kind k) { kind = k; }
106 const char *toString() const;
109 const char *Position;
113 class ConversionSpecifier {
116 InvalidSpecifier = 0,
117 // C99 conversion specifiers.
120 DArg, // Apple extension
122 IntArgBeg = dArg, IntArgEnd = iArg,
125 OArg, // Apple extension
127 UArg, // Apple extension
130 UIntArgBeg = oArg, UIntArgEnd = XArg,
140 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
149 // ** Printf-specific **
151 // Objective-C specific specifiers.
153 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
155 // FreeBSD specific specifiers
160 // GlibC specific specifiers.
163 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
165 // ** Scanf-specific **
167 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
170 ConversionSpecifier(bool isPrintf = true)
171 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
173 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
174 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
176 const char *getStart() const {
180 StringRef getCharacters() const {
181 return StringRef(getStart(), getLength());
184 bool consumesDataArgument() const {
196 Kind getKind() const { return kind; }
197 void setKind(Kind k) { kind = k; }
198 unsigned getLength() const {
199 return EndScanList ? EndScanList - Position : 1;
202 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
203 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
204 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
205 const char *toString() const;
207 bool isPrintfKind() const { return IsPrintf; }
209 Optional<ConversionSpecifier> getStandardSpecifier() const;
213 const char *Position;
214 const char *EndScanList;
220 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
221 AnyCharTy, CStrTy, WCStrTy, WIntTy };
228 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {}
229 ArgType(QualType t, const char *n = 0)
230 : K(SpecificTy), T(t), Name(n), Ptr(false) {}
231 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {}
233 static ArgType Invalid() { return ArgType(InvalidTy); }
234 bool isValid() const { return K != InvalidTy; }
236 /// Create an ArgType which corresponds to the type pointer to A.
237 static ArgType PtrTo(const ArgType& A) {
238 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
244 bool matchesType(ASTContext &C, QualType argTy) const;
246 QualType getRepresentativeType(ASTContext &C) const;
248 std::string getRepresentativeTypeName(ASTContext &C) const;
251 class OptionalAmount {
253 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
255 OptionalAmount(HowSpecified howSpecified,
257 const char *amountStart,
258 unsigned amountLength,
259 bool usesPositionalArg)
260 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
261 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
263 OptionalAmount(bool valid = true)
264 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
265 UsesPositionalArg(0), UsesDotPrefix(0) {}
267 bool isInvalid() const {
268 return hs == Invalid;
271 HowSpecified getHowSpecified() const { return hs; }
272 void setHowSpecified(HowSpecified h) { hs = h; }
274 bool hasDataArgument() const { return hs == Arg; }
276 unsigned getArgIndex() const {
277 assert(hasDataArgument());
281 unsigned getConstantAmount() const {
282 assert(hs == Constant);
286 const char *getStart() const {
287 // We include the . character if it is given.
288 return start - UsesDotPrefix;
291 unsigned getConstantLength() const {
292 assert(hs == Constant);
293 return length + UsesDotPrefix;
296 ArgType getArgType(ASTContext &Ctx) const;
298 void toString(raw_ostream &os) const;
300 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
301 unsigned getPositionalArgIndex() const {
302 assert(hasDataArgument());
306 bool usesDotPrefix() const { return UsesDotPrefix; }
307 void setUsesDotPrefix() { UsesDotPrefix = true; }
314 bool UsesPositionalArg : 1;
319 class FormatSpecifier {
322 OptionalAmount FieldWidth;
323 ConversionSpecifier CS;
324 /// Positional arguments, an IEEE extension:
325 /// IEEE Std 1003.1, 2004 Edition
326 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
327 bool UsesPositionalArg;
330 FormatSpecifier(bool isPrintf)
331 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
333 void setLengthModifier(LengthModifier lm) {
337 void setUsesPositionalArg() { UsesPositionalArg = true; }
339 void setArgIndex(unsigned i) {
343 unsigned getArgIndex() const {
347 unsigned getPositionalArgIndex() const {
351 const LengthModifier &getLengthModifier() const {
355 const OptionalAmount &getFieldWidth() const {
359 void setFieldWidth(const OptionalAmount &Amt) {
363 bool usesPositionalArg() const { return UsesPositionalArg; }
365 bool hasValidLengthModifier(const TargetInfo &Target) const;
367 bool hasStandardLengthModifier() const;
369 Optional<LengthModifier> getCorrectedLengthModifier() const;
371 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
373 bool hasStandardLengthConversionCombination() const;
375 /// For a TypedefType QT, if it is a named integer type such as size_t,
376 /// assign the appropriate value to LM and return true.
377 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
380 } // end analyze_format_string namespace
382 //===----------------------------------------------------------------------===//
383 /// Pieces specific to fprintf format strings.
385 namespace analyze_printf {
387 class PrintfConversionSpecifier :
388 public analyze_format_string::ConversionSpecifier {
390 PrintfConversionSpecifier()
391 : ConversionSpecifier(true, 0, InvalidSpecifier) {}
393 PrintfConversionSpecifier(const char *pos, Kind k)
394 : ConversionSpecifier(true, pos, k) {}
396 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
397 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
398 kind <= DoubleArgEnd; }
399 unsigned getLength() const {
400 // Conversion specifiers currently only are represented by
401 // single characters, but we be flexible.
405 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
406 return CS->isPrintfKind();
410 using analyze_format_string::ArgType;
411 using analyze_format_string::LengthModifier;
412 using analyze_format_string::OptionalAmount;
413 using analyze_format_string::OptionalFlag;
415 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
416 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
417 OptionalFlag IsLeftJustified; // '-'
418 OptionalFlag HasPlusPrefix; // '+'
419 OptionalFlag HasSpacePrefix; // ' '
420 OptionalFlag HasAlternativeForm; // '#'
421 OptionalFlag HasLeadingZeroes; // '0'
422 OptionalAmount Precision;
425 FormatSpecifier(/* isPrintf = */ true),
426 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
427 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
429 static PrintfSpecifier Parse(const char *beg, const char *end);
431 // Methods for incrementally constructing the PrintfSpecifier.
432 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
435 void setHasThousandsGrouping(const char *position) {
436 HasThousandsGrouping = true;
437 HasThousandsGrouping.setPosition(position);
439 void setIsLeftJustified(const char *position) {
440 IsLeftJustified = true;
441 IsLeftJustified.setPosition(position);
443 void setHasPlusPrefix(const char *position) {
444 HasPlusPrefix = true;
445 HasPlusPrefix.setPosition(position);
447 void setHasSpacePrefix(const char *position) {
448 HasSpacePrefix = true;
449 HasSpacePrefix.setPosition(position);
451 void setHasAlternativeForm(const char *position) {
452 HasAlternativeForm = true;
453 HasAlternativeForm.setPosition(position);
455 void setHasLeadingZeros(const char *position) {
456 HasLeadingZeroes = true;
457 HasLeadingZeroes.setPosition(position);
459 void setUsesPositionalArg() { UsesPositionalArg = true; }
461 // Methods for querying the format specifier.
463 const PrintfConversionSpecifier &getConversionSpecifier() const {
464 return cast<PrintfConversionSpecifier>(CS);
467 void setPrecision(const OptionalAmount &Amt) {
469 Precision.setUsesDotPrefix();
472 const OptionalAmount &getPrecision() const {
476 bool consumesDataArgument() const {
477 return getConversionSpecifier().consumesDataArgument();
480 /// \brief Returns the builtin type that a data argument
481 /// paired with this format specifier should have. This method
482 /// will return null if the format specifier does not have
483 /// a matching data argument or the matching argument matches
484 /// more than one type.
485 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
487 const OptionalFlag &hasThousandsGrouping() const {
488 return HasThousandsGrouping;
490 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
491 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
492 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
493 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
494 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
495 bool usesPositionalArg() const { return UsesPositionalArg; }
497 /// Changes the specifier and length according to a QualType, retaining any
498 /// flags or options. Returns true on success, or false when a conversion
499 /// was not successful.
500 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
503 void toString(raw_ostream &os) const;
505 // Validation methods - to check if any element results in undefined behavior
506 bool hasValidPlusPrefix() const;
507 bool hasValidAlternativeForm() const;
508 bool hasValidLeadingZeros() const;
509 bool hasValidSpacePrefix() const;
510 bool hasValidLeftJustified() const;
511 bool hasValidThousandsGroupingPrefix() const;
513 bool hasValidPrecision() const;
514 bool hasValidFieldWidth() const;
516 } // end analyze_printf namespace
518 //===----------------------------------------------------------------------===//
519 /// Pieces specific to fscanf format strings.
521 namespace analyze_scanf {
523 class ScanfConversionSpecifier :
524 public analyze_format_string::ConversionSpecifier {
526 ScanfConversionSpecifier()
527 : ConversionSpecifier(false, 0, InvalidSpecifier) {}
529 ScanfConversionSpecifier(const char *pos, Kind k)
530 : ConversionSpecifier(false, pos, k) {}
532 void setEndScanList(const char *pos) { EndScanList = pos; }
534 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
535 return !CS->isPrintfKind();
539 using analyze_format_string::ArgType;
540 using analyze_format_string::LengthModifier;
541 using analyze_format_string::OptionalAmount;
542 using analyze_format_string::OptionalFlag;
544 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
545 OptionalFlag SuppressAssignment; // '*'
548 FormatSpecifier(/* isPrintf = */ false),
549 SuppressAssignment("*") {}
551 void setSuppressAssignment(const char *position) {
552 SuppressAssignment = true;
553 SuppressAssignment.setPosition(position);
556 const OptionalFlag &getSuppressAssignment() const {
557 return SuppressAssignment;
560 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
564 const ScanfConversionSpecifier &getConversionSpecifier() const {
565 return cast<ScanfConversionSpecifier>(CS);
568 bool consumesDataArgument() const {
569 return CS.consumesDataArgument() && !SuppressAssignment;
572 ArgType getArgType(ASTContext &Ctx) const;
574 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
576 void toString(raw_ostream &os) const;
578 static ScanfSpecifier Parse(const char *beg, const char *end);
581 } // end analyze_scanf namespace
583 //===----------------------------------------------------------------------===//
584 // Parsing and processing of format strings (both fprintf and fscanf).
586 namespace analyze_format_string {
588 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
590 class FormatStringHandler {
592 FormatStringHandler() {}
593 virtual ~FormatStringHandler();
595 virtual void HandleNullChar(const char *nullCharacter) {}
597 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
599 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
600 PositionContext p) {}
602 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
604 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
605 unsigned specifierLen) {}
607 // Printf-specific handlers.
609 virtual bool HandleInvalidPrintfConversionSpecifier(
610 const analyze_printf::PrintfSpecifier &FS,
611 const char *startSpecifier,
612 unsigned specifierLen) {
616 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
617 const char *startSpecifier,
618 unsigned specifierLen) {
622 // Scanf-specific handlers.
624 virtual bool HandleInvalidScanfConversionSpecifier(
625 const analyze_scanf::ScanfSpecifier &FS,
626 const char *startSpecifier,
627 unsigned specifierLen) {
631 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
632 const char *startSpecifier,
633 unsigned specifierLen) {
637 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
640 bool ParsePrintfString(FormatStringHandler &H,
641 const char *beg, const char *end, const LangOptions &LO,
642 const TargetInfo &Target);
644 bool ParseScanfString(FormatStringHandler &H,
645 const char *beg, const char *end, const LangOptions &LO,
646 const TargetInfo &Target);
648 } // end analyze_format_string namespace
649 } // end clang namespace