1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
22 #include "clang/AST/CanonicalType.h"
26 //===----------------------------------------------------------------------===//
27 /// Common components of both fprintf and fscanf format strings.
28 namespace analyze_format_string {
30 /// Class representing optional flags with location and representation
34 OptionalFlag(const char *Representation)
35 : representation(Representation), flag(false) {}
36 bool isSet() { return flag; }
37 void set() { flag = true; }
38 void clear() { flag = false; }
39 void setPosition(const char *position) {
41 this->position = position;
43 const char *getPosition() const {
47 const char *toString() const { return representation; }
49 // Overloaded operators for bool like qualities
50 operator bool() const { return flag; }
51 OptionalFlag& operator=(const bool &rhs) {
53 return *this; // Return a reference to myself.
56 const char *representation;
61 /// Represents the length modifier in a format string in scanf/printf.
62 class LengthModifier {
69 AsLongLong, // 'll', 'q' (BSD, deprecated)
74 AsWideChar = AsLong // for '%ls', only makes sense for printf
78 : Position(0), kind(None) {}
79 LengthModifier(const char *pos, Kind k)
80 : Position(pos), kind(k) {}
82 const char *getStart() const {
86 unsigned getLength() const {
98 Kind getKind() const { return kind; }
99 void setKind(Kind k) { kind = k; }
101 const char *toString() const;
104 const char *Position;
108 class ConversionSpecifier {
111 InvalidSpecifier = 0,
112 // C99 conversion specifiers.
116 IntArgBeg = cArg, IntArgEnd = iArg,
122 UIntArgBeg = oArg, UIntArgEnd = XArg,
132 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
141 // ** Printf-specific **
143 // Objective-C specific specifiers.
145 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
147 // FreeBSD specific specifiers
151 // GlibC specific specifiers.
154 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
156 // ** Scanf-specific **
158 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
161 ConversionSpecifier(bool isPrintf)
162 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
164 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
165 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
167 const char *getStart() const {
171 llvm::StringRef getCharacters() const {
172 return llvm::StringRef(getStart(), getLength());
175 bool consumesDataArgument() const {
186 Kind getKind() const { return kind; }
187 void setKind(Kind k) { kind = k; }
188 unsigned getLength() const {
189 return EndScanList ? EndScanList - Position : 1;
192 const char *toString() const;
194 bool isPrintfKind() const { return IsPrintf; }
198 const char *Position;
199 const char *EndScanList;
203 class ArgTypeResult {
205 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
206 CStrTy, WCStrTy, WIntTy };
210 ArgTypeResult(bool) : K(InvalidTy) {}
212 ArgTypeResult(Kind k = UnknownTy) : K(k) {}
213 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
214 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
216 static ArgTypeResult Invalid() { return ArgTypeResult(true); }
218 bool isValid() const { return K != InvalidTy; }
220 const QualType *getSpecificType() const {
221 return K == SpecificTy ? &T : 0;
224 bool matchesType(ASTContext &C, QualType argTy) const;
226 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
228 QualType getRepresentativeType(ASTContext &C) const;
231 class OptionalAmount {
233 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
235 OptionalAmount(HowSpecified howSpecified,
237 const char *amountStart,
238 unsigned amountLength,
239 bool usesPositionalArg)
240 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
241 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
243 OptionalAmount(bool valid = true)
244 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
245 UsesPositionalArg(0), UsesDotPrefix(0) {}
247 bool isInvalid() const {
248 return hs == Invalid;
251 HowSpecified getHowSpecified() const { return hs; }
252 void setHowSpecified(HowSpecified h) { hs = h; }
254 bool hasDataArgument() const { return hs == Arg; }
256 unsigned getArgIndex() const {
257 assert(hasDataArgument());
261 unsigned getConstantAmount() const {
262 assert(hs == Constant);
266 const char *getStart() const {
267 // We include the . character if it is given.
268 return start - UsesDotPrefix;
271 unsigned getConstantLength() const {
272 assert(hs == Constant);
273 return length + UsesDotPrefix;
276 ArgTypeResult getArgType(ASTContext &Ctx) const;
278 void toString(llvm::raw_ostream &os) const;
280 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
281 unsigned getPositionalArgIndex() const {
282 assert(hasDataArgument());
286 bool usesDotPrefix() const { return UsesDotPrefix; }
287 void setUsesDotPrefix() { UsesDotPrefix = true; }
294 bool UsesPositionalArg : 1;
299 class FormatSpecifier {
302 OptionalAmount FieldWidth;
303 ConversionSpecifier CS;
304 /// Positional arguments, an IEEE extension:
305 /// IEEE Std 1003.1, 2004 Edition
306 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
307 bool UsesPositionalArg;
310 FormatSpecifier(bool isPrintf)
311 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
313 void setLengthModifier(LengthModifier lm) {
317 void setUsesPositionalArg() { UsesPositionalArg = true; }
319 void setArgIndex(unsigned i) {
323 unsigned getArgIndex() const {
327 unsigned getPositionalArgIndex() const {
331 const LengthModifier &getLengthModifier() const {
335 const OptionalAmount &getFieldWidth() const {
339 void setFieldWidth(const OptionalAmount &Amt) {
343 bool usesPositionalArg() const { return UsesPositionalArg; }
345 bool hasValidLengthModifier() const;
348 } // end analyze_format_string namespace
350 //===----------------------------------------------------------------------===//
351 /// Pieces specific to fprintf format strings.
353 namespace analyze_printf {
355 class PrintfConversionSpecifier :
356 public analyze_format_string::ConversionSpecifier {
358 PrintfConversionSpecifier()
359 : ConversionSpecifier(true, 0, InvalidSpecifier) {}
361 PrintfConversionSpecifier(const char *pos, Kind k)
362 : ConversionSpecifier(true, pos, k) {}
364 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
365 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
366 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
367 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
368 kind <= DoubleArgBeg; }
369 unsigned getLength() const {
370 // Conversion specifiers currently only are represented by
371 // single characters, but we be flexible.
375 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
376 return CS->isPrintfKind();
380 using analyze_format_string::ArgTypeResult;
381 using analyze_format_string::LengthModifier;
382 using analyze_format_string::OptionalAmount;
383 using analyze_format_string::OptionalFlag;
385 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
386 OptionalFlag IsLeftJustified; // '-'
387 OptionalFlag HasPlusPrefix; // '+'
388 OptionalFlag HasSpacePrefix; // ' '
389 OptionalFlag HasAlternativeForm; // '#'
390 OptionalFlag HasLeadingZeroes; // '0'
391 OptionalAmount Precision;
394 FormatSpecifier(/* isPrintf = */ true),
395 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
396 HasAlternativeForm("#"), HasLeadingZeroes("0") {}
398 static PrintfSpecifier Parse(const char *beg, const char *end);
400 // Methods for incrementally constructing the PrintfSpecifier.
401 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
404 void setIsLeftJustified(const char *position) {
405 IsLeftJustified = true;
406 IsLeftJustified.setPosition(position);
408 void setHasPlusPrefix(const char *position) {
409 HasPlusPrefix = true;
410 HasPlusPrefix.setPosition(position);
412 void setHasSpacePrefix(const char *position) {
413 HasSpacePrefix = true;
414 HasSpacePrefix.setPosition(position);
416 void setHasAlternativeForm(const char *position) {
417 HasAlternativeForm = true;
418 HasAlternativeForm.setPosition(position);
420 void setHasLeadingZeros(const char *position) {
421 HasLeadingZeroes = true;
422 HasLeadingZeroes.setPosition(position);
424 void setUsesPositionalArg() { UsesPositionalArg = true; }
426 // Methods for querying the format specifier.
428 const PrintfConversionSpecifier &getConversionSpecifier() const {
429 return cast<PrintfConversionSpecifier>(CS);
432 void setPrecision(const OptionalAmount &Amt) {
434 Precision.setUsesDotPrefix();
437 const OptionalAmount &getPrecision() const {
441 bool consumesDataArgument() const {
442 return getConversionSpecifier().consumesDataArgument();
445 /// \brief Returns the builtin type that a data argument
446 /// paired with this format specifier should have. This method
447 /// will return null if the format specifier does not have
448 /// a matching data argument or the matching argument matches
449 /// more than one type.
450 ArgTypeResult getArgType(ASTContext &Ctx) const;
452 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
453 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
454 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
455 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
456 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
457 bool usesPositionalArg() const { return UsesPositionalArg; }
459 /// Changes the specifier and length according to a QualType, retaining any
460 /// flags or options. Returns true on success, or false when a conversion
461 /// was not successful.
462 bool fixType(QualType QT);
464 void toString(llvm::raw_ostream &os) const;
466 // Validation methods - to check if any element results in undefined behavior
467 bool hasValidPlusPrefix() const;
468 bool hasValidAlternativeForm() const;
469 bool hasValidLeadingZeros() const;
470 bool hasValidSpacePrefix() const;
471 bool hasValidLeftJustified() const;
473 bool hasValidPrecision() const;
474 bool hasValidFieldWidth() const;
476 } // end analyze_printf namespace
478 //===----------------------------------------------------------------------===//
479 /// Pieces specific to fscanf format strings.
481 namespace analyze_scanf {
483 class ScanfConversionSpecifier :
484 public analyze_format_string::ConversionSpecifier {
486 ScanfConversionSpecifier()
487 : ConversionSpecifier(false, 0, InvalidSpecifier) {}
489 ScanfConversionSpecifier(const char *pos, Kind k)
490 : ConversionSpecifier(false, pos, k) {}
492 void setEndScanList(const char *pos) { EndScanList = pos; }
494 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
495 return !CS->isPrintfKind();
499 using analyze_format_string::LengthModifier;
500 using analyze_format_string::OptionalAmount;
501 using analyze_format_string::OptionalFlag;
503 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
504 OptionalFlag SuppressAssignment; // '*'
507 FormatSpecifier(/* isPrintf = */ false),
508 SuppressAssignment("*") {}
510 void setSuppressAssignment(const char *position) {
511 SuppressAssignment = true;
512 SuppressAssignment.setPosition(position);
515 const OptionalFlag &getSuppressAssignment() const {
516 return SuppressAssignment;
519 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
523 const ScanfConversionSpecifier &getConversionSpecifier() const {
524 return cast<ScanfConversionSpecifier>(CS);
527 bool consumesDataArgument() const {
528 return CS.consumesDataArgument() && !SuppressAssignment;
531 static ScanfSpecifier Parse(const char *beg, const char *end);
534 } // end analyze_scanf namespace
536 //===----------------------------------------------------------------------===//
537 // Parsing and processing of format strings (both fprintf and fscanf).
539 namespace analyze_format_string {
541 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
543 class FormatStringHandler {
545 FormatStringHandler() {}
546 virtual ~FormatStringHandler();
548 virtual void HandleNullChar(const char *nullCharacter) {}
550 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
551 PositionContext p) {}
553 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
555 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
556 unsigned specifierLen) {}
558 // Printf-specific handlers.
560 virtual bool HandleInvalidPrintfConversionSpecifier(
561 const analyze_printf::PrintfSpecifier &FS,
562 const char *startSpecifier,
563 unsigned specifierLen) {
567 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
568 const char *startSpecifier,
569 unsigned specifierLen) {
573 // Scanf-specific handlers.
575 virtual bool HandleInvalidScanfConversionSpecifier(
576 const analyze_scanf::ScanfSpecifier &FS,
577 const char *startSpecifier,
578 unsigned specifierLen) {
582 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
583 const char *startSpecifier,
584 unsigned specifierLen) {
588 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
591 bool ParsePrintfString(FormatStringHandler &H,
592 const char *beg, const char *end,
593 bool FormatExtensions);
595 bool ParseScanfString(FormatStringHandler &H,
596 const char *beg, const char *end);
598 } // end analyze_format_string namespace
599 } // end clang namespace