1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
22 #include "clang/AST/CanonicalType.h"
26 //===----------------------------------------------------------------------===//
27 /// Common components of both fprintf and fscanf format strings.
28 namespace analyze_format_string {
30 /// Class representing optional flags with location and representation
34 OptionalFlag(const char *Representation)
35 : representation(Representation), flag(false) {}
36 bool isSet() { return flag; }
37 void set() { flag = true; }
38 void clear() { flag = false; }
39 void setPosition(const char *position) {
41 this->position = position;
43 const char *getPosition() const {
47 const char *toString() const { return representation; }
49 // Overloaded operators for bool like qualities
50 operator bool() const { return flag; }
51 OptionalFlag& operator=(const bool &rhs) {
53 return *this; // Return a reference to myself.
56 const char *representation;
61 /// Represents the length modifier in a format string in scanf/printf.
62 class LengthModifier {
70 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types)
75 AsAllocate, // for '%as', GNU extension to C90 scanf
76 AsMAllocate, // for '%ms', GNU extension to scanf
77 AsWideChar = AsLong // for '%ls', only makes sense for printf
81 : Position(0), kind(None) {}
82 LengthModifier(const char *pos, Kind k)
83 : Position(pos), kind(k) {}
85 const char *getStart() const {
89 unsigned getLength() const {
101 Kind getKind() const { return kind; }
102 void setKind(Kind k) { kind = k; }
104 const char *toString() const;
107 const char *Position;
111 class ConversionSpecifier {
114 InvalidSpecifier = 0,
115 // C99 conversion specifiers.
119 IntArgBeg = cArg, IntArgEnd = iArg,
125 UIntArgBeg = oArg, UIntArgEnd = XArg,
135 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
144 // ** Printf-specific **
146 // Objective-C specific specifiers.
148 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
150 // GlibC specific specifiers.
153 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
155 // ** Scanf-specific **
157 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
160 ConversionSpecifier(bool isPrintf)
161 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
163 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
164 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
166 const char *getStart() const {
170 StringRef getCharacters() const {
171 return StringRef(getStart(), getLength());
174 bool consumesDataArgument() const {
185 Kind getKind() const { return kind; }
186 void setKind(Kind k) { kind = k; }
187 unsigned getLength() const {
188 return EndScanList ? EndScanList - Position : 1;
191 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
192 const char *toString() const;
194 bool isPrintfKind() const { return IsPrintf; }
198 const char *Position;
199 const char *EndScanList;
203 class ArgTypeResult {
205 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
206 AnyCharTy, CStrTy, WCStrTy, WIntTy };
211 ArgTypeResult(bool) : K(InvalidTy), Name(0) {}
213 ArgTypeResult(Kind k = UnknownTy) : K(k), Name(0) {}
214 ArgTypeResult(Kind k, const char *n) : K(k), Name(n) {}
215 ArgTypeResult(QualType t) : K(SpecificTy), T(t), Name(0) {}
216 ArgTypeResult(QualType t, const char *n) : K(SpecificTy), T(t), Name(n) {}
217 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t), Name(0) {}
219 static ArgTypeResult Invalid() { return ArgTypeResult(true); }
221 bool isValid() const { return K != InvalidTy; }
223 const QualType *getSpecificType() const {
224 return K == SpecificTy ? &T : 0;
227 bool matchesType(ASTContext &C, QualType argTy) const;
229 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
231 QualType getRepresentativeType(ASTContext &C) const;
233 std::string getRepresentativeTypeName(ASTContext &C) const;
236 class OptionalAmount {
238 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
240 OptionalAmount(HowSpecified howSpecified,
242 const char *amountStart,
243 unsigned amountLength,
244 bool usesPositionalArg)
245 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
246 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
248 OptionalAmount(bool valid = true)
249 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
250 UsesPositionalArg(0), UsesDotPrefix(0) {}
252 bool isInvalid() const {
253 return hs == Invalid;
256 HowSpecified getHowSpecified() const { return hs; }
257 void setHowSpecified(HowSpecified h) { hs = h; }
259 bool hasDataArgument() const { return hs == Arg; }
261 unsigned getArgIndex() const {
262 assert(hasDataArgument());
266 unsigned getConstantAmount() const {
267 assert(hs == Constant);
271 const char *getStart() const {
272 // We include the . character if it is given.
273 return start - UsesDotPrefix;
276 unsigned getConstantLength() const {
277 assert(hs == Constant);
278 return length + UsesDotPrefix;
281 ArgTypeResult getArgType(ASTContext &Ctx) const;
283 void toString(raw_ostream &os) const;
285 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
286 unsigned getPositionalArgIndex() const {
287 assert(hasDataArgument());
291 bool usesDotPrefix() const { return UsesDotPrefix; }
292 void setUsesDotPrefix() { UsesDotPrefix = true; }
299 bool UsesPositionalArg : 1;
304 class FormatSpecifier {
307 OptionalAmount FieldWidth;
308 ConversionSpecifier CS;
309 /// Positional arguments, an IEEE extension:
310 /// IEEE Std 1003.1, 2004 Edition
311 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
312 bool UsesPositionalArg;
315 FormatSpecifier(bool isPrintf)
316 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
318 void setLengthModifier(LengthModifier lm) {
322 void setUsesPositionalArg() { UsesPositionalArg = true; }
324 void setArgIndex(unsigned i) {
328 unsigned getArgIndex() const {
332 unsigned getPositionalArgIndex() const {
336 const LengthModifier &getLengthModifier() const {
340 const OptionalAmount &getFieldWidth() const {
344 void setFieldWidth(const OptionalAmount &Amt) {
348 bool usesPositionalArg() const { return UsesPositionalArg; }
350 bool hasValidLengthModifier() const;
352 bool hasStandardLengthModifier() const;
354 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
356 bool hasStandardLengthConversionCombination() const;
359 } // end analyze_format_string namespace
361 //===----------------------------------------------------------------------===//
362 /// Pieces specific to fprintf format strings.
364 namespace analyze_printf {
366 class PrintfConversionSpecifier :
367 public analyze_format_string::ConversionSpecifier {
369 PrintfConversionSpecifier()
370 : ConversionSpecifier(true, 0, InvalidSpecifier) {}
372 PrintfConversionSpecifier(const char *pos, Kind k)
373 : ConversionSpecifier(true, pos, k) {}
375 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
376 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
377 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
378 kind <= DoubleArgEnd; }
379 unsigned getLength() const {
380 // Conversion specifiers currently only are represented by
381 // single characters, but we be flexible.
385 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
386 return CS->isPrintfKind();
390 using analyze_format_string::ArgTypeResult;
391 using analyze_format_string::LengthModifier;
392 using analyze_format_string::OptionalAmount;
393 using analyze_format_string::OptionalFlag;
395 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
396 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
397 OptionalFlag IsLeftJustified; // '-'
398 OptionalFlag HasPlusPrefix; // '+'
399 OptionalFlag HasSpacePrefix; // ' '
400 OptionalFlag HasAlternativeForm; // '#'
401 OptionalFlag HasLeadingZeroes; // '0'
402 OptionalAmount Precision;
405 FormatSpecifier(/* isPrintf = */ true),
406 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
407 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
409 static PrintfSpecifier Parse(const char *beg, const char *end);
411 // Methods for incrementally constructing the PrintfSpecifier.
412 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
415 void setHasThousandsGrouping(const char *position) {
416 HasThousandsGrouping = true;
417 HasThousandsGrouping.setPosition(position);
419 void setIsLeftJustified(const char *position) {
420 IsLeftJustified = true;
421 IsLeftJustified.setPosition(position);
423 void setHasPlusPrefix(const char *position) {
424 HasPlusPrefix = true;
425 HasPlusPrefix.setPosition(position);
427 void setHasSpacePrefix(const char *position) {
428 HasSpacePrefix = true;
429 HasSpacePrefix.setPosition(position);
431 void setHasAlternativeForm(const char *position) {
432 HasAlternativeForm = true;
433 HasAlternativeForm.setPosition(position);
435 void setHasLeadingZeros(const char *position) {
436 HasLeadingZeroes = true;
437 HasLeadingZeroes.setPosition(position);
439 void setUsesPositionalArg() { UsesPositionalArg = true; }
441 // Methods for querying the format specifier.
443 const PrintfConversionSpecifier &getConversionSpecifier() const {
444 return cast<PrintfConversionSpecifier>(CS);
447 void setPrecision(const OptionalAmount &Amt) {
449 Precision.setUsesDotPrefix();
452 const OptionalAmount &getPrecision() const {
456 bool consumesDataArgument() const {
457 return getConversionSpecifier().consumesDataArgument();
460 /// \brief Returns the builtin type that a data argument
461 /// paired with this format specifier should have. This method
462 /// will return null if the format specifier does not have
463 /// a matching data argument or the matching argument matches
464 /// more than one type.
465 ArgTypeResult getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
467 const OptionalFlag &hasThousandsGrouping() const {
468 return HasThousandsGrouping;
470 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
471 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
472 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
473 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
474 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
475 bool usesPositionalArg() const { return UsesPositionalArg; }
477 /// Changes the specifier and length according to a QualType, retaining any
478 /// flags or options. Returns true on success, or false when a conversion
479 /// was not successful.
480 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
483 void toString(raw_ostream &os) const;
485 // Validation methods - to check if any element results in undefined behavior
486 bool hasValidPlusPrefix() const;
487 bool hasValidAlternativeForm() const;
488 bool hasValidLeadingZeros() const;
489 bool hasValidSpacePrefix() const;
490 bool hasValidLeftJustified() const;
491 bool hasValidThousandsGroupingPrefix() const;
493 bool hasValidPrecision() const;
494 bool hasValidFieldWidth() const;
496 } // end analyze_printf namespace
498 //===----------------------------------------------------------------------===//
499 /// Pieces specific to fscanf format strings.
501 namespace analyze_scanf {
503 class ScanfConversionSpecifier :
504 public analyze_format_string::ConversionSpecifier {
506 ScanfConversionSpecifier()
507 : ConversionSpecifier(false, 0, InvalidSpecifier) {}
509 ScanfConversionSpecifier(const char *pos, Kind k)
510 : ConversionSpecifier(false, pos, k) {}
512 void setEndScanList(const char *pos) { EndScanList = pos; }
514 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
515 return !CS->isPrintfKind();
519 using analyze_format_string::ArgTypeResult;
520 using analyze_format_string::LengthModifier;
521 using analyze_format_string::OptionalAmount;
522 using analyze_format_string::OptionalFlag;
524 class ScanfArgTypeResult : public ArgTypeResult {
526 enum Kind { UnknownTy, InvalidTy, CStrTy, WCStrTy, PtrToArgTypeResultTy };
531 QualType getRepresentativeType(ASTContext &C) const;
533 ScanfArgTypeResult(Kind k = UnknownTy, const char* n = 0) : K(k), Name(n) {}
534 ScanfArgTypeResult(ArgTypeResult a, const char *n = 0)
535 : K(PtrToArgTypeResultTy), A(a), Name(n) {
539 static ScanfArgTypeResult Invalid() { return ScanfArgTypeResult(InvalidTy); }
541 bool isValid() const { return K != InvalidTy; }
543 bool matchesType(ASTContext& C, QualType argTy) const;
545 std::string getRepresentativeTypeName(ASTContext& C) const;
548 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
549 OptionalFlag SuppressAssignment; // '*'
552 FormatSpecifier(/* isPrintf = */ false),
553 SuppressAssignment("*") {}
555 void setSuppressAssignment(const char *position) {
556 SuppressAssignment = true;
557 SuppressAssignment.setPosition(position);
560 const OptionalFlag &getSuppressAssignment() const {
561 return SuppressAssignment;
564 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
568 const ScanfConversionSpecifier &getConversionSpecifier() const {
569 return cast<ScanfConversionSpecifier>(CS);
572 bool consumesDataArgument() const {
573 return CS.consumesDataArgument() && !SuppressAssignment;
576 ScanfArgTypeResult getArgType(ASTContext &Ctx) const;
578 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx);
580 void toString(raw_ostream &os) const;
582 static ScanfSpecifier Parse(const char *beg, const char *end);
585 } // end analyze_scanf namespace
587 //===----------------------------------------------------------------------===//
588 // Parsing and processing of format strings (both fprintf and fscanf).
590 namespace analyze_format_string {
592 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
594 class FormatStringHandler {
596 FormatStringHandler() {}
597 virtual ~FormatStringHandler();
599 virtual void HandleNullChar(const char *nullCharacter) {}
601 virtual void HandlePosition(const char *startPos, unsigned posLen) {}
603 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
604 PositionContext p) {}
606 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
608 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
609 unsigned specifierLen) {}
611 // Printf-specific handlers.
613 virtual bool HandleInvalidPrintfConversionSpecifier(
614 const analyze_printf::PrintfSpecifier &FS,
615 const char *startSpecifier,
616 unsigned specifierLen) {
620 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
621 const char *startSpecifier,
622 unsigned specifierLen) {
626 // Scanf-specific handlers.
628 virtual bool HandleInvalidScanfConversionSpecifier(
629 const analyze_scanf::ScanfSpecifier &FS,
630 const char *startSpecifier,
631 unsigned specifierLen) {
635 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
636 const char *startSpecifier,
637 unsigned specifierLen) {
641 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
644 bool ParsePrintfString(FormatStringHandler &H,
645 const char *beg, const char *end, const LangOptions &LO);
647 bool ParseScanfString(FormatStringHandler &H,
648 const char *beg, const char *end, const LangOptions &LO);
650 } // end analyze_format_string namespace
651 } // end clang namespace