1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
22 #include "clang/AST/CanonicalType.h"
26 //===----------------------------------------------------------------------===//
27 /// Common components of both fprintf and fscanf format strings.
28 namespace analyze_format_string {
30 /// Class representing optional flags with location and representation
34 OptionalFlag(const char *Representation)
35 : representation(Representation), flag(false) {}
36 bool isSet() { return flag; }
37 void set() { flag = true; }
38 void clear() { flag = false; }
39 void setPosition(const char *position) {
41 this->position = position;
43 const char *getPosition() const {
47 const char *toString() const { return representation; }
49 // Overloaded operators for bool like qualities
50 operator bool() const { return flag; }
51 OptionalFlag& operator=(const bool &rhs) {
53 return *this; // Return a reference to myself.
56 const char *representation;
61 /// Represents the length modifier in a format string in scanf/printf.
62 class LengthModifier {
69 AsLongLong, // 'll', 'q' (BSD, deprecated)
74 AsWideChar = AsLong // for '%ls', only makes sense for printf
78 : Position(0), kind(None) {}
79 LengthModifier(const char *pos, Kind k)
80 : Position(pos), kind(k) {}
82 const char *getStart() const {
86 unsigned getLength() const {
98 Kind getKind() const { return kind; }
99 void setKind(Kind k) { kind = k; }
101 const char *toString() const;
104 const char *Position;
108 class ConversionSpecifier {
111 InvalidSpecifier = 0,
112 // C99 conversion specifiers.
116 IntArgBeg = cArg, IntArgEnd = iArg,
122 UIntArgBeg = oArg, UIntArgEnd = XArg,
132 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
141 // ** Printf-specific **
143 // Objective-C specific specifiers.
145 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
147 // FreeBSD specific specifiers
152 // GlibC specific specifiers.
155 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
157 // ** Scanf-specific **
159 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
162 ConversionSpecifier(bool isPrintf)
163 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
165 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
166 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
168 const char *getStart() const {
172 StringRef getCharacters() const {
173 return StringRef(getStart(), getLength());
176 bool consumesDataArgument() const {
187 Kind getKind() const { return kind; }
188 void setKind(Kind k) { kind = k; }
189 unsigned getLength() const {
190 return EndScanList ? EndScanList - Position : 1;
193 const char *toString() const;
195 bool isPrintfKind() const { return IsPrintf; }
199 const char *Position;
200 const char *EndScanList;
204 class ArgTypeResult {
206 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
207 CStrTy, WCStrTy, WIntTy };
211 ArgTypeResult(bool) : K(InvalidTy) {}
213 ArgTypeResult(Kind k = UnknownTy) : K(k) {}
214 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
215 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
217 static ArgTypeResult Invalid() { return ArgTypeResult(true); }
219 bool isValid() const { return K != InvalidTy; }
221 const QualType *getSpecificType() const {
222 return K == SpecificTy ? &T : 0;
225 bool matchesType(ASTContext &C, QualType argTy) const;
227 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
229 QualType getRepresentativeType(ASTContext &C) const;
232 class OptionalAmount {
234 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
236 OptionalAmount(HowSpecified howSpecified,
238 const char *amountStart,
239 unsigned amountLength,
240 bool usesPositionalArg)
241 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
242 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
244 OptionalAmount(bool valid = true)
245 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
246 UsesPositionalArg(0), UsesDotPrefix(0) {}
248 bool isInvalid() const {
249 return hs == Invalid;
252 HowSpecified getHowSpecified() const { return hs; }
253 void setHowSpecified(HowSpecified h) { hs = h; }
255 bool hasDataArgument() const { return hs == Arg; }
257 unsigned getArgIndex() const {
258 assert(hasDataArgument());
262 unsigned getConstantAmount() const {
263 assert(hs == Constant);
267 const char *getStart() const {
268 // We include the . character if it is given.
269 return start - UsesDotPrefix;
272 unsigned getConstantLength() const {
273 assert(hs == Constant);
274 return length + UsesDotPrefix;
277 ArgTypeResult getArgType(ASTContext &Ctx) const;
279 void toString(raw_ostream &os) const;
281 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
282 unsigned getPositionalArgIndex() const {
283 assert(hasDataArgument());
287 bool usesDotPrefix() const { return UsesDotPrefix; }
288 void setUsesDotPrefix() { UsesDotPrefix = true; }
295 bool UsesPositionalArg : 1;
300 class FormatSpecifier {
303 OptionalAmount FieldWidth;
304 ConversionSpecifier CS;
305 /// Positional arguments, an IEEE extension:
306 /// IEEE Std 1003.1, 2004 Edition
307 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
308 bool UsesPositionalArg;
311 FormatSpecifier(bool isPrintf)
312 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
314 void setLengthModifier(LengthModifier lm) {
318 void setUsesPositionalArg() { UsesPositionalArg = true; }
320 void setArgIndex(unsigned i) {
324 unsigned getArgIndex() const {
328 unsigned getPositionalArgIndex() const {
332 const LengthModifier &getLengthModifier() const {
336 const OptionalAmount &getFieldWidth() const {
340 void setFieldWidth(const OptionalAmount &Amt) {
344 bool usesPositionalArg() const { return UsesPositionalArg; }
346 bool hasValidLengthModifier() const;
349 } // end analyze_format_string namespace
351 //===----------------------------------------------------------------------===//
352 /// Pieces specific to fprintf format strings.
354 namespace analyze_printf {
356 class PrintfConversionSpecifier :
357 public analyze_format_string::ConversionSpecifier {
359 PrintfConversionSpecifier()
360 : ConversionSpecifier(true, 0, InvalidSpecifier) {}
362 PrintfConversionSpecifier(const char *pos, Kind k)
363 : ConversionSpecifier(true, pos, k) {}
365 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
366 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
367 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
368 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
369 kind <= DoubleArgBeg; }
370 unsigned getLength() const {
371 // Conversion specifiers currently only are represented by
372 // single characters, but we be flexible.
376 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
377 return CS->isPrintfKind();
381 using analyze_format_string::ArgTypeResult;
382 using analyze_format_string::LengthModifier;
383 using analyze_format_string::OptionalAmount;
384 using analyze_format_string::OptionalFlag;
386 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
387 OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
388 OptionalFlag IsLeftJustified; // '-'
389 OptionalFlag HasPlusPrefix; // '+'
390 OptionalFlag HasSpacePrefix; // ' '
391 OptionalFlag HasAlternativeForm; // '#'
392 OptionalFlag HasLeadingZeroes; // '0'
393 OptionalAmount Precision;
396 FormatSpecifier(/* isPrintf = */ true),
397 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
398 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
400 static PrintfSpecifier Parse(const char *beg, const char *end);
402 // Methods for incrementally constructing the PrintfSpecifier.
403 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
406 void setHasThousandsGrouping(const char *position) {
407 HasThousandsGrouping = true;
408 HasThousandsGrouping.setPosition(position);
410 void setIsLeftJustified(const char *position) {
411 IsLeftJustified = true;
412 IsLeftJustified.setPosition(position);
414 void setHasPlusPrefix(const char *position) {
415 HasPlusPrefix = true;
416 HasPlusPrefix.setPosition(position);
418 void setHasSpacePrefix(const char *position) {
419 HasSpacePrefix = true;
420 HasSpacePrefix.setPosition(position);
422 void setHasAlternativeForm(const char *position) {
423 HasAlternativeForm = true;
424 HasAlternativeForm.setPosition(position);
426 void setHasLeadingZeros(const char *position) {
427 HasLeadingZeroes = true;
428 HasLeadingZeroes.setPosition(position);
430 void setUsesPositionalArg() { UsesPositionalArg = true; }
432 // Methods for querying the format specifier.
434 const PrintfConversionSpecifier &getConversionSpecifier() const {
435 return cast<PrintfConversionSpecifier>(CS);
438 void setPrecision(const OptionalAmount &Amt) {
440 Precision.setUsesDotPrefix();
443 const OptionalAmount &getPrecision() const {
447 bool consumesDataArgument() const {
448 return getConversionSpecifier().consumesDataArgument();
451 /// \brief Returns the builtin type that a data argument
452 /// paired with this format specifier should have. This method
453 /// will return null if the format specifier does not have
454 /// a matching data argument or the matching argument matches
455 /// more than one type.
456 ArgTypeResult getArgType(ASTContext &Ctx) const;
458 const OptionalFlag &hasThousandsGrouping() const {
459 return HasThousandsGrouping;
461 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
462 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
463 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
464 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
465 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
466 bool usesPositionalArg() const { return UsesPositionalArg; }
468 /// Changes the specifier and length according to a QualType, retaining any
469 /// flags or options. Returns true on success, or false when a conversion
470 /// was not successful.
471 bool fixType(QualType QT);
473 void toString(raw_ostream &os) const;
475 // Validation methods - to check if any element results in undefined behavior
476 bool hasValidPlusPrefix() const;
477 bool hasValidAlternativeForm() const;
478 bool hasValidLeadingZeros() const;
479 bool hasValidSpacePrefix() const;
480 bool hasValidLeftJustified() const;
481 bool hasValidThousandsGroupingPrefix() const;
483 bool hasValidPrecision() const;
484 bool hasValidFieldWidth() const;
486 } // end analyze_printf namespace
488 //===----------------------------------------------------------------------===//
489 /// Pieces specific to fscanf format strings.
491 namespace analyze_scanf {
493 class ScanfConversionSpecifier :
494 public analyze_format_string::ConversionSpecifier {
496 ScanfConversionSpecifier()
497 : ConversionSpecifier(false, 0, InvalidSpecifier) {}
499 ScanfConversionSpecifier(const char *pos, Kind k)
500 : ConversionSpecifier(false, pos, k) {}
502 void setEndScanList(const char *pos) { EndScanList = pos; }
504 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
505 return !CS->isPrintfKind();
509 using analyze_format_string::LengthModifier;
510 using analyze_format_string::OptionalAmount;
511 using analyze_format_string::OptionalFlag;
513 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
514 OptionalFlag SuppressAssignment; // '*'
517 FormatSpecifier(/* isPrintf = */ false),
518 SuppressAssignment("*") {}
520 void setSuppressAssignment(const char *position) {
521 SuppressAssignment = true;
522 SuppressAssignment.setPosition(position);
525 const OptionalFlag &getSuppressAssignment() const {
526 return SuppressAssignment;
529 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
533 const ScanfConversionSpecifier &getConversionSpecifier() const {
534 return cast<ScanfConversionSpecifier>(CS);
537 bool consumesDataArgument() const {
538 return CS.consumesDataArgument() && !SuppressAssignment;
541 static ScanfSpecifier Parse(const char *beg, const char *end);
544 } // end analyze_scanf namespace
546 //===----------------------------------------------------------------------===//
547 // Parsing and processing of format strings (both fprintf and fscanf).
549 namespace analyze_format_string {
551 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
553 class FormatStringHandler {
555 FormatStringHandler() {}
556 virtual ~FormatStringHandler();
558 virtual void HandleNullChar(const char *nullCharacter) {}
560 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
561 PositionContext p) {}
563 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
565 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
566 unsigned specifierLen) {}
568 // Printf-specific handlers.
570 virtual bool HandleInvalidPrintfConversionSpecifier(
571 const analyze_printf::PrintfSpecifier &FS,
572 const char *startSpecifier,
573 unsigned specifierLen) {
577 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
578 const char *startSpecifier,
579 unsigned specifierLen) {
583 // Scanf-specific handlers.
585 virtual bool HandleInvalidScanfConversionSpecifier(
586 const analyze_scanf::ScanfSpecifier &FS,
587 const char *startSpecifier,
588 unsigned specifierLen) {
592 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
593 const char *startSpecifier,
594 unsigned specifierLen) {
598 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
601 bool ParsePrintfString(FormatStringHandler &H,
602 const char *beg, const char *end,
603 bool FormatExtensions);
605 bool ParseScanfString(FormatStringHandler &H,
606 const char *beg, const char *end);
608 } // end analyze_format_string namespace
609 } // end clang namespace