1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
25 using namespace clang;
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28 PrintfSpecifierResult;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount;
36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37 const char *Start, const char *&Beg, const char *E,
40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
43 analyze_format_string::PrecisionPos);
51 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
55 bool FormatExtensions) {
57 using namespace clang::analyze_format_string;
58 using namespace clang::analyze_printf;
61 const char *Start = 0;
62 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64 // Look for a '%' character that indicates the start of a format specifier.
65 for ( ; I != E ; ++I) {
68 // Detect spurious null characters, which are likely errors.
73 Start = I++; // Record the start of the format specifier.
78 // No format specifier found?
83 // No more characters left?
84 H.HandleIncompleteSpecifier(Start, E - Start);
89 if (ParseArgPosition(H, FS, Start, I, E))
93 // No more characters left?
94 H.HandleIncompleteSpecifier(Start, E - Start);
98 // Look for flags (if any).
100 for ( ; I != E; ++I) {
102 default: hasMore = false; break;
104 // FIXME: POSIX specific. Always accept?
105 FS.setHasThousandsGrouping(I);
107 case '-': FS.setIsLeftJustified(I); break;
108 case '+': FS.setHasPlusPrefix(I); break;
109 case ' ': FS.setHasSpacePrefix(I); break;
110 case '#': FS.setHasAlternativeForm(I); break;
111 case '0': FS.setHasLeadingZeros(I); break;
118 // No more characters left?
119 H.HandleIncompleteSpecifier(Start, E - Start);
123 // Look for the field width (if any).
124 if (ParseFieldWidth(H, FS, Start, I, E,
125 FS.usesPositionalArg() ? 0 : &argIndex))
129 // No more characters left?
130 H.HandleIncompleteSpecifier(Start, E - Start);
134 // Look for the precision (if any).
138 H.HandleIncompleteSpecifier(Start, E - Start);
142 if (ParsePrecision(H, FS, Start, I, E,
143 FS.usesPositionalArg() ? 0 : &argIndex))
147 // No more characters left?
148 H.HandleIncompleteSpecifier(Start, E - Start);
153 // Look for the length modifier.
154 if (ParseLengthModifier(FS, I, E) && I == E) {
155 // No more characters left?
156 H.HandleIncompleteSpecifier(Start, E - Start);
161 // Detect spurious null characters, which are likely errors.
166 // Finally, look for the conversion specifier.
167 const char *conversionPosition = I++;
168 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
169 switch (*conversionPosition) {
172 // C99: 7.19.6.1 (section 8).
173 case '%': k = ConversionSpecifier::PercentArg; break;
174 case 'A': k = ConversionSpecifier::AArg; break;
175 case 'E': k = ConversionSpecifier::EArg; break;
176 case 'F': k = ConversionSpecifier::FArg; break;
177 case 'G': k = ConversionSpecifier::GArg; break;
178 case 'X': k = ConversionSpecifier::XArg; break;
179 case 'a': k = ConversionSpecifier::aArg; break;
180 case 'c': k = ConversionSpecifier::cArg; break;
181 case 'd': k = ConversionSpecifier::dArg; break;
182 case 'e': k = ConversionSpecifier::eArg; break;
183 case 'f': k = ConversionSpecifier::fArg; break;
184 case 'g': k = ConversionSpecifier::gArg; break;
185 case 'i': k = ConversionSpecifier::iArg; break;
186 case 'n': k = ConversionSpecifier::nArg; break;
187 case 'o': k = ConversionSpecifier::oArg; break;
188 case 'p': k = ConversionSpecifier::pArg; break;
189 case 's': k = ConversionSpecifier::sArg; break;
190 case 'u': k = ConversionSpecifier::uArg; break;
191 case 'x': k = ConversionSpecifier::xArg; break;
193 case 'C': k = ConversionSpecifier::CArg; break;
194 case 'S': k = ConversionSpecifier::SArg; break;
196 case '@': k = ConversionSpecifier::ObjCObjArg; break;
198 case 'm': k = ConversionSpecifier::PrintErrno; break;
199 // FreeBSD format extensions
200 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
201 case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break;
202 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
203 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
205 PrintfConversionSpecifier CS(conversionPosition, k);
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
208 FS.setArgIndex(argIndex++);
210 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
213 if (k == ConversionSpecifier::InvalidSpecifier) {
214 // Assume the conversion takes one argument.
215 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
217 return PrintfSpecifierResult(Start, FS);
220 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
223 bool FormatExtensions) {
225 unsigned argIndex = 0;
227 // Keep looking for a format specifier until we have exhausted the string.
229 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
231 // Did a fail-stop error of any kind occur when parsing the specifier?
232 // If so, don't do any more processing.
233 if (FSR.shouldStop())
235 // Did we exhaust the string or encounter an error that
236 // we can recover from?
239 // We have a format specifier. Pass it to the callback.
240 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
244 assert(I == E && "Format string not exhausted");
248 //===----------------------------------------------------------------------===//
249 // Methods on ConversionSpecifier.
250 //===----------------------------------------------------------------------===//
251 const char *ConversionSpecifier::toString() const {
253 case dArg: return "d";
254 case iArg: return "i";
255 case oArg: return "o";
256 case uArg: return "u";
257 case xArg: return "x";
258 case XArg: return "X";
259 case fArg: return "f";
260 case FArg: return "F";
261 case eArg: return "e";
262 case EArg: return "E";
263 case gArg: return "g";
264 case GArg: return "G";
265 case aArg: return "a";
266 case AArg: return "A";
267 case cArg: return "c";
268 case sArg: return "s";
269 case pArg: return "p";
270 case nArg: return "n";
271 case PercentArg: return "%";
272 case ScanListArg: return "[";
273 case InvalidSpecifier: return NULL;
275 // MacOS X unicode extensions.
276 case CArg: return "C";
277 case SArg: return "S";
279 // Objective-C specific specifiers.
280 case ObjCObjArg: return "@";
282 // FreeBSD specific specifiers.
283 case bArg: return "b";
284 case DArg: return "D";
285 case rArg: return "r";
287 // GlibC specific specifiers.
288 case PrintErrno: return "m";
293 //===----------------------------------------------------------------------===//
294 // Methods on PrintfSpecifier.
295 //===----------------------------------------------------------------------===//
297 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
298 const PrintfConversionSpecifier &CS = getConversionSpecifier();
300 if (!CS.consumesDataArgument())
301 return ArgTypeResult::Invalid();
303 if (CS.getKind() == ConversionSpecifier::cArg)
304 switch (LM.getKind()) {
305 case LengthModifier::None: return Ctx.IntTy;
306 case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
308 return ArgTypeResult::Invalid();
312 switch (LM.getKind()) {
313 case LengthModifier::AsLongDouble:
314 return ArgTypeResult::Invalid();
315 case LengthModifier::None: return Ctx.IntTy;
316 case LengthModifier::AsChar: return Ctx.SignedCharTy;
317 case LengthModifier::AsShort: return Ctx.ShortTy;
318 case LengthModifier::AsLong: return Ctx.LongTy;
319 case LengthModifier::AsLongLong: return Ctx.LongLongTy;
320 case LengthModifier::AsIntMax:
321 // FIXME: Return unknown for now.
322 return ArgTypeResult();
323 case LengthModifier::AsSizeT: return Ctx.getSizeType();
324 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
328 switch (LM.getKind()) {
329 case LengthModifier::AsLongDouble:
330 return ArgTypeResult::Invalid();
331 case LengthModifier::None: return Ctx.UnsignedIntTy;
332 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
333 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
334 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
335 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
336 case LengthModifier::AsIntMax:
337 // FIXME: Return unknown for now.
338 return ArgTypeResult();
339 case LengthModifier::AsSizeT:
340 // FIXME: How to get the corresponding unsigned
341 // version of size_t?
342 return ArgTypeResult();
343 case LengthModifier::AsPtrDiff:
344 // FIXME: How to get the corresponding unsigned
345 // version of ptrdiff_t?
346 return ArgTypeResult();
349 if (CS.isDoubleArg()) {
350 if (LM.getKind() == LengthModifier::AsLongDouble)
351 return Ctx.LongDoubleTy;
355 switch (CS.getKind()) {
356 case ConversionSpecifier::sArg:
357 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
358 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
359 case ConversionSpecifier::SArg:
360 // FIXME: This appears to be Mac OS X specific.
361 return ArgTypeResult::WCStrTy;
362 case ConversionSpecifier::CArg:
364 case ConversionSpecifier::pArg:
365 return ArgTypeResult::CPointerTy;
370 // FIXME: Handle other cases.
371 return ArgTypeResult();
374 bool PrintfSpecifier::fixType(QualType QT) {
375 // Handle strings first (char *, wchar_t *)
376 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
377 CS.setKind(ConversionSpecifier::sArg);
379 // Disable irrelevant flags
380 HasAlternativeForm = 0;
381 HasLeadingZeroes = 0;
383 // Set the long length modifier for wide characters
384 if (QT->getPointeeType()->isWideCharType())
385 LM.setKind(LengthModifier::AsWideChar);
390 // We can only work with builtin types.
391 if (!QT->isBuiltinType())
394 // Everything else should be a base type
395 const BuiltinType *BT = QT->getAs<BuiltinType>();
397 // Set length modifier
398 switch (BT->getKind()) {
399 case BuiltinType::Bool:
400 case BuiltinType::WChar_U:
401 case BuiltinType::WChar_S:
402 case BuiltinType::Char16:
403 case BuiltinType::Char32:
404 case BuiltinType::UInt128:
405 case BuiltinType::Int128:
406 case BuiltinType::Half:
407 // Integral types which are non-trivial to correct.
410 case BuiltinType::Void:
411 case BuiltinType::NullPtr:
412 case BuiltinType::ObjCId:
413 case BuiltinType::ObjCClass:
414 case BuiltinType::ObjCSel:
415 case BuiltinType::Dependent:
416 case BuiltinType::Overload:
417 case BuiltinType::BoundMember:
418 case BuiltinType::UnknownAny:
419 // Misc other stuff which doesn't make sense here.
422 case BuiltinType::UInt:
423 case BuiltinType::Int:
424 case BuiltinType::Float:
425 case BuiltinType::Double:
426 LM.setKind(LengthModifier::None);
429 case BuiltinType::Char_U:
430 case BuiltinType::UChar:
431 case BuiltinType::Char_S:
432 case BuiltinType::SChar:
433 LM.setKind(LengthModifier::AsChar);
436 case BuiltinType::Short:
437 case BuiltinType::UShort:
438 LM.setKind(LengthModifier::AsShort);
441 case BuiltinType::Long:
442 case BuiltinType::ULong:
443 LM.setKind(LengthModifier::AsLong);
446 case BuiltinType::LongLong:
447 case BuiltinType::ULongLong:
448 LM.setKind(LengthModifier::AsLongLong);
451 case BuiltinType::LongDouble:
452 LM.setKind(LengthModifier::AsLongDouble);
456 // Set conversion specifier and disable any flags which do not apply to it.
457 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
458 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
459 CS.setKind(ConversionSpecifier::cArg);
460 LM.setKind(LengthModifier::None);
461 Precision.setHowSpecified(OptionalAmount::NotSpecified);
462 HasAlternativeForm = 0;
463 HasLeadingZeroes = 0;
466 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
467 else if (QT->isRealFloatingType()) {
468 CS.setKind(ConversionSpecifier::fArg);
470 else if (QT->isSignedIntegerType()) {
471 CS.setKind(ConversionSpecifier::dArg);
472 HasAlternativeForm = 0;
474 else if (QT->isUnsignedIntegerType()) {
475 // Preserve the original formatting, e.g. 'X', 'o'.
476 if (!cast<PrintfConversionSpecifier>(CS).isUIntArg())
477 CS.setKind(ConversionSpecifier::uArg);
478 HasAlternativeForm = 0;
481 llvm_unreachable("Unexpected type");
487 void PrintfSpecifier::toString(raw_ostream &os) const {
488 // Whilst some features have no defined order, we are using the order
489 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
493 if (usesPositionalArg()) {
494 os << getPositionalArgIndex() << "$";
498 if (IsLeftJustified) os << "-";
499 if (HasPlusPrefix) os << "+";
500 if (HasSpacePrefix) os << " ";
501 if (HasAlternativeForm) os << "#";
502 if (HasLeadingZeroes) os << "0";
504 // Minimum field width
505 FieldWidth.toString(os);
507 Precision.toString(os);
510 // Conversion specifier
514 bool PrintfSpecifier::hasValidPlusPrefix() const {
518 // The plus prefix only makes sense for signed conversions
519 switch (CS.getKind()) {
520 case ConversionSpecifier::dArg:
521 case ConversionSpecifier::iArg:
522 case ConversionSpecifier::fArg:
523 case ConversionSpecifier::FArg:
524 case ConversionSpecifier::eArg:
525 case ConversionSpecifier::EArg:
526 case ConversionSpecifier::gArg:
527 case ConversionSpecifier::GArg:
528 case ConversionSpecifier::aArg:
529 case ConversionSpecifier::AArg:
530 case ConversionSpecifier::rArg:
538 bool PrintfSpecifier::hasValidAlternativeForm() const {
539 if (!HasAlternativeForm)
542 // Alternate form flag only valid with the oxXaAeEfFgG conversions
543 switch (CS.getKind()) {
544 case ConversionSpecifier::oArg:
545 case ConversionSpecifier::xArg:
546 case ConversionSpecifier::XArg:
547 case ConversionSpecifier::aArg:
548 case ConversionSpecifier::AArg:
549 case ConversionSpecifier::eArg:
550 case ConversionSpecifier::EArg:
551 case ConversionSpecifier::fArg:
552 case ConversionSpecifier::FArg:
553 case ConversionSpecifier::gArg:
554 case ConversionSpecifier::GArg:
555 case ConversionSpecifier::rArg:
563 bool PrintfSpecifier::hasValidLeadingZeros() const {
564 if (!HasLeadingZeroes)
567 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
568 switch (CS.getKind()) {
569 case ConversionSpecifier::dArg:
570 case ConversionSpecifier::iArg:
571 case ConversionSpecifier::oArg:
572 case ConversionSpecifier::uArg:
573 case ConversionSpecifier::xArg:
574 case ConversionSpecifier::XArg:
575 case ConversionSpecifier::aArg:
576 case ConversionSpecifier::AArg:
577 case ConversionSpecifier::eArg:
578 case ConversionSpecifier::EArg:
579 case ConversionSpecifier::fArg:
580 case ConversionSpecifier::FArg:
581 case ConversionSpecifier::gArg:
582 case ConversionSpecifier::GArg:
590 bool PrintfSpecifier::hasValidSpacePrefix() const {
594 // The space prefix only makes sense for signed conversions
595 switch (CS.getKind()) {
596 case ConversionSpecifier::dArg:
597 case ConversionSpecifier::iArg:
598 case ConversionSpecifier::fArg:
599 case ConversionSpecifier::FArg:
600 case ConversionSpecifier::eArg:
601 case ConversionSpecifier::EArg:
602 case ConversionSpecifier::gArg:
603 case ConversionSpecifier::GArg:
604 case ConversionSpecifier::aArg:
605 case ConversionSpecifier::AArg:
613 bool PrintfSpecifier::hasValidLeftJustified() const {
614 if (!IsLeftJustified)
617 // The left justified flag is valid for all conversions except n
618 switch (CS.getKind()) {
619 case ConversionSpecifier::nArg:
627 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
628 if (!HasThousandsGrouping)
631 switch (CS.getKind()) {
632 case ConversionSpecifier::dArg:
633 case ConversionSpecifier::iArg:
634 case ConversionSpecifier::uArg:
635 case ConversionSpecifier::fArg:
636 case ConversionSpecifier::FArg:
637 case ConversionSpecifier::gArg:
638 case ConversionSpecifier::GArg:
645 bool PrintfSpecifier::hasValidPrecision() const {
646 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
649 // Precision is only valid with the diouxXaAeEfFgGs conversions
650 switch (CS.getKind()) {
651 case ConversionSpecifier::dArg:
652 case ConversionSpecifier::iArg:
653 case ConversionSpecifier::oArg:
654 case ConversionSpecifier::uArg:
655 case ConversionSpecifier::xArg:
656 case ConversionSpecifier::XArg:
657 case ConversionSpecifier::aArg:
658 case ConversionSpecifier::AArg:
659 case ConversionSpecifier::eArg:
660 case ConversionSpecifier::EArg:
661 case ConversionSpecifier::fArg:
662 case ConversionSpecifier::FArg:
663 case ConversionSpecifier::gArg:
664 case ConversionSpecifier::GArg:
665 case ConversionSpecifier::sArg:
672 bool PrintfSpecifier::hasValidFieldWidth() const {
673 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
676 // The field width is valid for all conversions except n
677 switch (CS.getKind()) {
678 case ConversionSpecifier::nArg: