1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
17 //===----------------------------------------------------------------------===//
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
22 #include "clang/AST/CanonicalType.h"
26 //===----------------------------------------------------------------------===//
27 /// Common components of both fprintf and fscanf format strings.
28 namespace analyze_format_string {
30 /// Class representing optional flags with location and representation
34 OptionalFlag(const char *Representation)
35 : representation(Representation), flag(false) {}
36 bool isSet() { return flag; }
37 void set() { flag = true; }
38 void clear() { flag = false; }
39 void setPosition(const char *position) {
41 this->position = position;
43 const char *getPosition() const {
47 const char *toString() const { return representation; }
49 // Overloaded operators for bool like qualities
50 operator bool() const { return flag; }
51 OptionalFlag& operator=(const bool &rhs) {
53 return *this; // Return a reference to myself.
56 const char *representation;
61 /// Represents the length modifier in a format string in scanf/printf.
62 class LengthModifier {
69 AsLongLong, // 'll', 'q' (BSD, deprecated)
74 AsWideChar = AsLong // for '%ls', only makes sense for printf
78 : Position(0), kind(None) {}
79 LengthModifier(const char *pos, Kind k)
80 : Position(pos), kind(k) {}
82 const char *getStart() const {
86 unsigned getLength() const {
98 Kind getKind() const { return kind; }
99 void setKind(Kind k) { kind = k; }
101 const char *toString() const;
104 const char *Position;
108 class ConversionSpecifier {
111 InvalidSpecifier = 0,
112 // C99 conversion specifiers.
116 IntArgBeg = cArg, IntArgEnd = iArg,
122 UIntArgBeg = oArg, UIntArgEnd = XArg,
132 DoubleArgBeg = fArg, DoubleArgEnd = AArg,
141 // ** Printf-specific **
143 // Objective-C specific specifiers.
145 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
147 // FreeBSD specific specifiers
152 // GlibC specific specifiers.
155 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
157 // ** Scanf-specific **
159 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
162 ConversionSpecifier(bool isPrintf)
163 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
165 ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
166 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
168 const char *getStart() const {
172 llvm::StringRef getCharacters() const {
173 return llvm::StringRef(getStart(), getLength());
176 bool consumesDataArgument() const {
187 Kind getKind() const { return kind; }
188 void setKind(Kind k) { kind = k; }
189 unsigned getLength() const {
190 return EndScanList ? EndScanList - Position : 1;
193 const char *toString() const;
195 bool isPrintfKind() const { return IsPrintf; }
199 const char *Position;
200 const char *EndScanList;
204 class ArgTypeResult {
206 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
207 CStrTy, WCStrTy, WIntTy };
211 ArgTypeResult(bool) : K(InvalidTy) {}
213 ArgTypeResult(Kind k = UnknownTy) : K(k) {}
214 ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
215 ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
217 static ArgTypeResult Invalid() { return ArgTypeResult(true); }
219 bool isValid() const { return K != InvalidTy; }
221 const QualType *getSpecificType() const {
222 return K == SpecificTy ? &T : 0;
225 bool matchesType(ASTContext &C, QualType argTy) const;
227 bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
229 QualType getRepresentativeType(ASTContext &C) const;
232 class OptionalAmount {
234 enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
236 OptionalAmount(HowSpecified howSpecified,
238 const char *amountStart,
239 unsigned amountLength,
240 bool usesPositionalArg)
241 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
242 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
244 OptionalAmount(bool valid = true)
245 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
246 UsesPositionalArg(0), UsesDotPrefix(0) {}
248 bool isInvalid() const {
249 return hs == Invalid;
252 HowSpecified getHowSpecified() const { return hs; }
253 void setHowSpecified(HowSpecified h) { hs = h; }
255 bool hasDataArgument() const { return hs == Arg; }
257 unsigned getArgIndex() const {
258 assert(hasDataArgument());
262 unsigned getConstantAmount() const {
263 assert(hs == Constant);
267 const char *getStart() const {
268 // We include the . character if it is given.
269 return start - UsesDotPrefix;
272 unsigned getConstantLength() const {
273 assert(hs == Constant);
274 return length + UsesDotPrefix;
277 ArgTypeResult getArgType(ASTContext &Ctx) const;
279 void toString(llvm::raw_ostream &os) const;
281 bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
282 unsigned getPositionalArgIndex() const {
283 assert(hasDataArgument());
287 bool usesDotPrefix() const { return UsesDotPrefix; }
288 void setUsesDotPrefix() { UsesDotPrefix = true; }
295 bool UsesPositionalArg : 1;
300 class FormatSpecifier {
303 OptionalAmount FieldWidth;
304 ConversionSpecifier CS;
305 /// Positional arguments, an IEEE extension:
306 /// IEEE Std 1003.1, 2004 Edition
307 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
308 bool UsesPositionalArg;
311 FormatSpecifier(bool isPrintf)
312 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
314 void setLengthModifier(LengthModifier lm) {
318 void setUsesPositionalArg() { UsesPositionalArg = true; }
320 void setArgIndex(unsigned i) {
324 unsigned getArgIndex() const {
328 unsigned getPositionalArgIndex() const {
332 const LengthModifier &getLengthModifier() const {
336 const OptionalAmount &getFieldWidth() const {
340 void setFieldWidth(const OptionalAmount &Amt) {
344 bool usesPositionalArg() const { return UsesPositionalArg; }
346 bool hasValidLengthModifier() const;
349 } // end analyze_format_string namespace
351 //===----------------------------------------------------------------------===//
352 /// Pieces specific to fprintf format strings.
354 namespace analyze_printf {
356 class PrintfConversionSpecifier :
357 public analyze_format_string::ConversionSpecifier {
359 PrintfConversionSpecifier()
360 : ConversionSpecifier(true, 0, InvalidSpecifier) {}
362 PrintfConversionSpecifier(const char *pos, Kind k)
363 : ConversionSpecifier(true, pos, k) {}
365 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
366 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
367 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
368 bool isDoubleArg() const { return kind >= DoubleArgBeg &&
369 kind <= DoubleArgBeg; }
370 unsigned getLength() const {
371 // Conversion specifiers currently only are represented by
372 // single characters, but we be flexible.
376 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
377 return CS->isPrintfKind();
381 using analyze_format_string::ArgTypeResult;
382 using analyze_format_string::LengthModifier;
383 using analyze_format_string::OptionalAmount;
384 using analyze_format_string::OptionalFlag;
386 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
387 OptionalFlag IsLeftJustified; // '-'
388 OptionalFlag HasPlusPrefix; // '+'
389 OptionalFlag HasSpacePrefix; // ' '
390 OptionalFlag HasAlternativeForm; // '#'
391 OptionalFlag HasLeadingZeroes; // '0'
392 OptionalAmount Precision;
395 FormatSpecifier(/* isPrintf = */ true),
396 IsLeftJustified("-"), HasPlusPrefix("+"), HasSpacePrefix(" "),
397 HasAlternativeForm("#"), HasLeadingZeroes("0") {}
399 static PrintfSpecifier Parse(const char *beg, const char *end);
401 // Methods for incrementally constructing the PrintfSpecifier.
402 void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
405 void setIsLeftJustified(const char *position) {
406 IsLeftJustified = true;
407 IsLeftJustified.setPosition(position);
409 void setHasPlusPrefix(const char *position) {
410 HasPlusPrefix = true;
411 HasPlusPrefix.setPosition(position);
413 void setHasSpacePrefix(const char *position) {
414 HasSpacePrefix = true;
415 HasSpacePrefix.setPosition(position);
417 void setHasAlternativeForm(const char *position) {
418 HasAlternativeForm = true;
419 HasAlternativeForm.setPosition(position);
421 void setHasLeadingZeros(const char *position) {
422 HasLeadingZeroes = true;
423 HasLeadingZeroes.setPosition(position);
425 void setUsesPositionalArg() { UsesPositionalArg = true; }
427 // Methods for querying the format specifier.
429 const PrintfConversionSpecifier &getConversionSpecifier() const {
430 return cast<PrintfConversionSpecifier>(CS);
433 void setPrecision(const OptionalAmount &Amt) {
435 Precision.setUsesDotPrefix();
438 const OptionalAmount &getPrecision() const {
442 bool consumesDataArgument() const {
443 return getConversionSpecifier().consumesDataArgument();
446 /// \brief Returns the builtin type that a data argument
447 /// paired with this format specifier should have. This method
448 /// will return null if the format specifier does not have
449 /// a matching data argument or the matching argument matches
450 /// more than one type.
451 ArgTypeResult getArgType(ASTContext &Ctx) const;
453 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
454 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
455 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
456 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
457 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
458 bool usesPositionalArg() const { return UsesPositionalArg; }
460 /// Changes the specifier and length according to a QualType, retaining any
461 /// flags or options. Returns true on success, or false when a conversion
462 /// was not successful.
463 bool fixType(QualType QT);
465 void toString(llvm::raw_ostream &os) const;
467 // Validation methods - to check if any element results in undefined behavior
468 bool hasValidPlusPrefix() const;
469 bool hasValidAlternativeForm() const;
470 bool hasValidLeadingZeros() const;
471 bool hasValidSpacePrefix() const;
472 bool hasValidLeftJustified() const;
474 bool hasValidPrecision() const;
475 bool hasValidFieldWidth() const;
477 } // end analyze_printf namespace
479 //===----------------------------------------------------------------------===//
480 /// Pieces specific to fscanf format strings.
482 namespace analyze_scanf {
484 class ScanfConversionSpecifier :
485 public analyze_format_string::ConversionSpecifier {
487 ScanfConversionSpecifier()
488 : ConversionSpecifier(false, 0, InvalidSpecifier) {}
490 ScanfConversionSpecifier(const char *pos, Kind k)
491 : ConversionSpecifier(false, pos, k) {}
493 void setEndScanList(const char *pos) { EndScanList = pos; }
495 static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
496 return !CS->isPrintfKind();
500 using analyze_format_string::LengthModifier;
501 using analyze_format_string::OptionalAmount;
502 using analyze_format_string::OptionalFlag;
504 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
505 OptionalFlag SuppressAssignment; // '*'
508 FormatSpecifier(/* isPrintf = */ false),
509 SuppressAssignment("*") {}
511 void setSuppressAssignment(const char *position) {
512 SuppressAssignment = true;
513 SuppressAssignment.setPosition(position);
516 const OptionalFlag &getSuppressAssignment() const {
517 return SuppressAssignment;
520 void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
524 const ScanfConversionSpecifier &getConversionSpecifier() const {
525 return cast<ScanfConversionSpecifier>(CS);
528 bool consumesDataArgument() const {
529 return CS.consumesDataArgument() && !SuppressAssignment;
532 static ScanfSpecifier Parse(const char *beg, const char *end);
535 } // end analyze_scanf namespace
537 //===----------------------------------------------------------------------===//
538 // Parsing and processing of format strings (both fprintf and fscanf).
540 namespace analyze_format_string {
542 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
544 class FormatStringHandler {
546 FormatStringHandler() {}
547 virtual ~FormatStringHandler();
549 virtual void HandleNullChar(const char *nullCharacter) {}
551 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
552 PositionContext p) {}
554 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
556 virtual void HandleIncompleteSpecifier(const char *startSpecifier,
557 unsigned specifierLen) {}
559 // Printf-specific handlers.
561 virtual bool HandleInvalidPrintfConversionSpecifier(
562 const analyze_printf::PrintfSpecifier &FS,
563 const char *startSpecifier,
564 unsigned specifierLen) {
568 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
569 const char *startSpecifier,
570 unsigned specifierLen) {
574 // Scanf-specific handlers.
576 virtual bool HandleInvalidScanfConversionSpecifier(
577 const analyze_scanf::ScanfSpecifier &FS,
578 const char *startSpecifier,
579 unsigned specifierLen) {
583 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
584 const char *startSpecifier,
585 unsigned specifierLen) {
589 virtual void HandleIncompleteScanList(const char *start, const char *end) {}
592 bool ParsePrintfString(FormatStringHandler &H,
593 const char *beg, const char *end,
594 bool FormatExtensions);
596 bool ParseScanfString(FormatStringHandler &H,
597 const char *beg, const char *end);
599 } // end analyze_format_string namespace
600 } // end clang namespace