1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
25 using namespace clang;
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28 PrintfSpecifierResult;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount;
36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37 const char *Start, const char *&Beg, const char *E,
40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44 analyze_format_string::PrecisionPos);
52 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
56 bool FormatExtensions) {
58 using namespace clang::analyze_format_string;
59 using namespace clang::analyze_printf;
62 const char *Start = 0;
63 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
65 // Look for a '%' character that indicates the start of a format specifier.
66 for ( ; I != E ; ++I) {
69 // Detect spurious null characters, which are likely errors.
74 Start = I++; // Record the start of the format specifier.
79 // No format specifier found?
84 // No more characters left?
85 H.HandleIncompleteSpecifier(Start, E - Start);
90 if (ParseArgPosition(H, FS, Start, I, E))
94 // No more characters left?
95 H.HandleIncompleteSpecifier(Start, E - Start);
99 // Look for flags (if any).
101 for ( ; I != E; ++I) {
103 default: hasMore = false; break;
105 // FIXME: POSIX specific. Always accept?
106 FS.setHasThousandsGrouping(I);
108 case '-': FS.setIsLeftJustified(I); break;
109 case '+': FS.setHasPlusPrefix(I); break;
110 case ' ': FS.setHasSpacePrefix(I); break;
111 case '#': FS.setHasAlternativeForm(I); break;
112 case '0': FS.setHasLeadingZeros(I); break;
119 // No more characters left?
120 H.HandleIncompleteSpecifier(Start, E - Start);
124 // Look for the field width (if any).
125 if (ParseFieldWidth(H, FS, Start, I, E,
126 FS.usesPositionalArg() ? 0 : &argIndex))
130 // No more characters left?
131 H.HandleIncompleteSpecifier(Start, E - Start);
135 // Look for the precision (if any).
139 H.HandleIncompleteSpecifier(Start, E - Start);
143 if (ParsePrecision(H, FS, Start, I, E,
144 FS.usesPositionalArg() ? 0 : &argIndex))
148 // No more characters left?
149 H.HandleIncompleteSpecifier(Start, E - Start);
154 // Look for the length modifier.
155 if (ParseLengthModifier(FS, I, E) && I == E) {
156 // No more characters left?
157 H.HandleIncompleteSpecifier(Start, E - Start);
162 // Detect spurious null characters, which are likely errors.
167 // Finally, look for the conversion specifier.
168 const char *conversionPosition = I++;
169 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
170 switch (*conversionPosition) {
173 // C99: 7.19.6.1 (section 8).
174 case '%': k = ConversionSpecifier::PercentArg; break;
175 case 'A': k = ConversionSpecifier::AArg; break;
176 case 'E': k = ConversionSpecifier::EArg; break;
177 case 'F': k = ConversionSpecifier::FArg; break;
178 case 'G': k = ConversionSpecifier::GArg; break;
179 case 'X': k = ConversionSpecifier::XArg; break;
180 case 'a': k = ConversionSpecifier::aArg; break;
181 case 'c': k = ConversionSpecifier::cArg; break;
182 case 'd': k = ConversionSpecifier::dArg; break;
183 case 'e': k = ConversionSpecifier::eArg; break;
184 case 'f': k = ConversionSpecifier::fArg; break;
185 case 'g': k = ConversionSpecifier::gArg; break;
186 case 'i': k = ConversionSpecifier::iArg; break;
187 case 'n': k = ConversionSpecifier::nArg; break;
188 case 'o': k = ConversionSpecifier::oArg; break;
189 case 'p': k = ConversionSpecifier::pArg; break;
190 case 's': k = ConversionSpecifier::sArg; break;
191 case 'u': k = ConversionSpecifier::uArg; break;
192 case 'x': k = ConversionSpecifier::xArg; break;
194 case 'C': k = ConversionSpecifier::CArg; break;
195 case 'S': k = ConversionSpecifier::SArg; break;
197 case '@': k = ConversionSpecifier::ObjCObjArg; break;
199 case 'm': k = ConversionSpecifier::PrintErrno; break;
200 // FreeBSD format extensions
201 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
202 case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break;
203 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
204 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
206 PrintfConversionSpecifier CS(conversionPosition, k);
207 FS.setConversionSpecifier(CS);
208 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
209 FS.setArgIndex(argIndex++);
211 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
214 if (k == ConversionSpecifier::InvalidSpecifier) {
215 // Assume the conversion takes one argument.
216 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
218 return PrintfSpecifierResult(Start, FS);
221 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
224 bool FormatExtensions) {
226 unsigned argIndex = 0;
228 // Keep looking for a format specifier until we have exhausted the string.
230 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
232 // Did a fail-stop error of any kind occur when parsing the specifier?
233 // If so, don't do any more processing.
234 if (FSR.shouldStop())
236 // Did we exhaust the string or encounter an error that
237 // we can recover from?
240 // We have a format specifier. Pass it to the callback.
241 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
245 assert(I == E && "Format string not exhausted");
249 //===----------------------------------------------------------------------===//
250 // Methods on ConversionSpecifier.
251 //===----------------------------------------------------------------------===//
252 const char *ConversionSpecifier::toString() const {
254 case dArg: return "d";
255 case iArg: return "i";
256 case oArg: return "o";
257 case uArg: return "u";
258 case xArg: return "x";
259 case XArg: return "X";
260 case fArg: return "f";
261 case FArg: return "F";
262 case eArg: return "e";
263 case EArg: return "E";
264 case gArg: return "g";
265 case GArg: return "G";
266 case aArg: return "a";
267 case AArg: return "A";
268 case cArg: return "c";
269 case sArg: return "s";
270 case pArg: return "p";
271 case nArg: return "n";
272 case PercentArg: return "%";
273 case ScanListArg: return "[";
274 case InvalidSpecifier: return NULL;
276 // MacOS X unicode extensions.
277 case CArg: return "C";
278 case SArg: return "S";
280 // Objective-C specific specifiers.
281 case ObjCObjArg: return "@";
283 // FreeBSD specific specifiers.
284 case bArg: return "b";
285 case DArg: return "D";
286 case rArg: return "r";
288 // GlibC specific specifiers.
289 case PrintErrno: return "m";
294 //===----------------------------------------------------------------------===//
295 // Methods on PrintfSpecifier.
296 //===----------------------------------------------------------------------===//
298 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
299 const PrintfConversionSpecifier &CS = getConversionSpecifier();
301 if (!CS.consumesDataArgument())
302 return ArgTypeResult::Invalid();
304 if (CS.getKind() == ConversionSpecifier::cArg)
305 switch (LM.getKind()) {
306 case LengthModifier::None: return Ctx.IntTy;
307 case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
309 return ArgTypeResult::Invalid();
313 switch (LM.getKind()) {
314 case LengthModifier::AsLongDouble:
315 return ArgTypeResult::Invalid();
316 case LengthModifier::None: return Ctx.IntTy;
317 case LengthModifier::AsChar: return Ctx.SignedCharTy;
318 case LengthModifier::AsShort: return Ctx.ShortTy;
319 case LengthModifier::AsLong: return Ctx.LongTy;
320 case LengthModifier::AsLongLong: return Ctx.LongLongTy;
321 case LengthModifier::AsIntMax:
322 // FIXME: Return unknown for now.
323 return ArgTypeResult();
324 case LengthModifier::AsSizeT: return Ctx.getSizeType();
325 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
329 switch (LM.getKind()) {
330 case LengthModifier::AsLongDouble:
331 return ArgTypeResult::Invalid();
332 case LengthModifier::None: return Ctx.UnsignedIntTy;
333 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
334 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
335 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
336 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
337 case LengthModifier::AsIntMax:
338 // FIXME: Return unknown for now.
339 return ArgTypeResult();
340 case LengthModifier::AsSizeT:
341 // FIXME: How to get the corresponding unsigned
342 // version of size_t?
343 return ArgTypeResult();
344 case LengthModifier::AsPtrDiff:
345 // FIXME: How to get the corresponding unsigned
346 // version of ptrdiff_t?
347 return ArgTypeResult();
350 if (CS.isDoubleArg()) {
351 if (LM.getKind() == LengthModifier::AsLongDouble)
352 return Ctx.LongDoubleTy;
356 switch (CS.getKind()) {
357 case ConversionSpecifier::sArg:
358 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
359 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
360 case ConversionSpecifier::SArg:
361 // FIXME: This appears to be Mac OS X specific.
362 return ArgTypeResult::WCStrTy;
363 case ConversionSpecifier::CArg:
365 case ConversionSpecifier::pArg:
366 return ArgTypeResult::CPointerTy;
371 // FIXME: Handle other cases.
372 return ArgTypeResult();
375 bool PrintfSpecifier::fixType(QualType QT) {
376 // Handle strings first (char *, wchar_t *)
377 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
378 CS.setKind(ConversionSpecifier::sArg);
380 // Disable irrelevant flags
381 HasAlternativeForm = 0;
382 HasLeadingZeroes = 0;
384 // Set the long length modifier for wide characters
385 if (QT->getPointeeType()->isWideCharType())
386 LM.setKind(LengthModifier::AsWideChar);
391 // We can only work with builtin types.
392 if (!QT->isBuiltinType())
395 // Everything else should be a base type
396 const BuiltinType *BT = QT->getAs<BuiltinType>();
398 // Set length modifier
399 switch (BT->getKind()) {
400 case BuiltinType::Bool:
401 case BuiltinType::WChar_U:
402 case BuiltinType::WChar_S:
403 case BuiltinType::Char16:
404 case BuiltinType::Char32:
405 case BuiltinType::UInt128:
406 case BuiltinType::Int128:
407 // Integral types which are non-trivial to correct.
410 case BuiltinType::Void:
411 case BuiltinType::NullPtr:
412 case BuiltinType::ObjCId:
413 case BuiltinType::ObjCClass:
414 case BuiltinType::ObjCSel:
415 case BuiltinType::Dependent:
416 case BuiltinType::Overload:
417 case BuiltinType::BoundMember:
418 case BuiltinType::UnknownAny:
419 // Misc other stuff which doesn't make sense here.
422 case BuiltinType::UInt:
423 case BuiltinType::Int:
424 case BuiltinType::Float:
425 case BuiltinType::Double:
426 LM.setKind(LengthModifier::None);
429 case BuiltinType::Char_U:
430 case BuiltinType::UChar:
431 case BuiltinType::Char_S:
432 case BuiltinType::SChar:
433 LM.setKind(LengthModifier::AsChar);
436 case BuiltinType::Short:
437 case BuiltinType::UShort:
438 LM.setKind(LengthModifier::AsShort);
441 case BuiltinType::Long:
442 case BuiltinType::ULong:
443 LM.setKind(LengthModifier::AsLong);
446 case BuiltinType::LongLong:
447 case BuiltinType::ULongLong:
448 LM.setKind(LengthModifier::AsLongLong);
451 case BuiltinType::LongDouble:
452 LM.setKind(LengthModifier::AsLongDouble);
456 // Set conversion specifier and disable any flags which do not apply to it.
457 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
458 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
459 CS.setKind(ConversionSpecifier::cArg);
460 LM.setKind(LengthModifier::None);
461 Precision.setHowSpecified(OptionalAmount::NotSpecified);
462 HasAlternativeForm = 0;
463 HasLeadingZeroes = 0;
466 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
467 else if (QT->isRealFloatingType()) {
468 CS.setKind(ConversionSpecifier::fArg);
470 else if (QT->isSignedIntegerType()) {
471 CS.setKind(ConversionSpecifier::dArg);
472 HasAlternativeForm = 0;
474 else if (QT->isUnsignedIntegerType()) {
475 // Preserve the original formatting, e.g. 'X', 'o'.
476 if (!cast<PrintfConversionSpecifier>(CS).isUIntArg())
477 CS.setKind(ConversionSpecifier::uArg);
478 HasAlternativeForm = 0;
482 assert(0 && "Unexpected type");
488 void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
489 // Whilst some features have no defined order, we are using the order
490 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
494 if (usesPositionalArg()) {
495 os << getPositionalArgIndex() << "$";
499 if (IsLeftJustified) os << "-";
500 if (HasPlusPrefix) os << "+";
501 if (HasSpacePrefix) os << " ";
502 if (HasAlternativeForm) os << "#";
503 if (HasLeadingZeroes) os << "0";
505 // Minimum field width
506 FieldWidth.toString(os);
508 Precision.toString(os);
511 // Conversion specifier
515 bool PrintfSpecifier::hasValidPlusPrefix() const {
519 // The plus prefix only makes sense for signed conversions
520 switch (CS.getKind()) {
521 case ConversionSpecifier::dArg:
522 case ConversionSpecifier::iArg:
523 case ConversionSpecifier::fArg:
524 case ConversionSpecifier::FArg:
525 case ConversionSpecifier::eArg:
526 case ConversionSpecifier::EArg:
527 case ConversionSpecifier::gArg:
528 case ConversionSpecifier::GArg:
529 case ConversionSpecifier::aArg:
530 case ConversionSpecifier::AArg:
531 case ConversionSpecifier::rArg:
539 bool PrintfSpecifier::hasValidAlternativeForm() const {
540 if (!HasAlternativeForm)
543 // Alternate form flag only valid with the oxXaAeEfFgG conversions
544 switch (CS.getKind()) {
545 case ConversionSpecifier::oArg:
546 case ConversionSpecifier::xArg:
547 case ConversionSpecifier::XArg:
548 case ConversionSpecifier::aArg:
549 case ConversionSpecifier::AArg:
550 case ConversionSpecifier::eArg:
551 case ConversionSpecifier::EArg:
552 case ConversionSpecifier::fArg:
553 case ConversionSpecifier::FArg:
554 case ConversionSpecifier::gArg:
555 case ConversionSpecifier::GArg:
556 case ConversionSpecifier::rArg:
564 bool PrintfSpecifier::hasValidLeadingZeros() const {
565 if (!HasLeadingZeroes)
568 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
569 switch (CS.getKind()) {
570 case ConversionSpecifier::dArg:
571 case ConversionSpecifier::iArg:
572 case ConversionSpecifier::oArg:
573 case ConversionSpecifier::uArg:
574 case ConversionSpecifier::xArg:
575 case ConversionSpecifier::XArg:
576 case ConversionSpecifier::aArg:
577 case ConversionSpecifier::AArg:
578 case ConversionSpecifier::eArg:
579 case ConversionSpecifier::EArg:
580 case ConversionSpecifier::fArg:
581 case ConversionSpecifier::FArg:
582 case ConversionSpecifier::gArg:
583 case ConversionSpecifier::GArg:
591 bool PrintfSpecifier::hasValidSpacePrefix() const {
595 // The space prefix only makes sense for signed conversions
596 switch (CS.getKind()) {
597 case ConversionSpecifier::dArg:
598 case ConversionSpecifier::iArg:
599 case ConversionSpecifier::fArg:
600 case ConversionSpecifier::FArg:
601 case ConversionSpecifier::eArg:
602 case ConversionSpecifier::EArg:
603 case ConversionSpecifier::gArg:
604 case ConversionSpecifier::GArg:
605 case ConversionSpecifier::aArg:
606 case ConversionSpecifier::AArg:
614 bool PrintfSpecifier::hasValidLeftJustified() const {
615 if (!IsLeftJustified)
618 // The left justified flag is valid for all conversions except n
619 switch (CS.getKind()) {
620 case ConversionSpecifier::nArg:
628 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
629 if (!HasThousandsGrouping)
632 switch (CS.getKind()) {
633 case ConversionSpecifier::dArg:
634 case ConversionSpecifier::iArg:
635 case ConversionSpecifier::uArg:
636 case ConversionSpecifier::fArg:
637 case ConversionSpecifier::FArg:
638 case ConversionSpecifier::gArg:
639 case ConversionSpecifier::GArg:
646 bool PrintfSpecifier::hasValidPrecision() const {
647 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
650 // Precision is only valid with the diouxXaAeEfFgGs conversions
651 switch (CS.getKind()) {
652 case ConversionSpecifier::dArg:
653 case ConversionSpecifier::iArg:
654 case ConversionSpecifier::oArg:
655 case ConversionSpecifier::uArg:
656 case ConversionSpecifier::xArg:
657 case ConversionSpecifier::XArg:
658 case ConversionSpecifier::aArg:
659 case ConversionSpecifier::AArg:
660 case ConversionSpecifier::eArg:
661 case ConversionSpecifier::EArg:
662 case ConversionSpecifier::fArg:
663 case ConversionSpecifier::FArg:
664 case ConversionSpecifier::gArg:
665 case ConversionSpecifier::GArg:
666 case ConversionSpecifier::sArg:
673 bool PrintfSpecifier::hasValidFieldWidth() const {
674 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
677 // The field width is valid for all conversions except n
678 switch (CS.getKind()) {
679 case ConversionSpecifier::nArg: