1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_printf::PrintfSpecifier;
26 using namespace clang;
28 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
29 PrintfSpecifierResult;
31 //===----------------------------------------------------------------------===//
32 // Methods for parsing format strings.
33 //===----------------------------------------------------------------------===//
35 using analyze_format_string::ParseNonPositionAmount;
37 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
38 const char *Start, const char *&Beg, const char *E,
41 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44 analyze_format_string::PrecisionPos);
52 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
56 const LangOptions &LO,
57 const TargetInfo &Target) {
59 using namespace clang::analyze_format_string;
60 using namespace clang::analyze_printf;
63 const char *Start = 0;
64 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
66 // Look for a '%' character that indicates the start of a format specifier.
67 for ( ; I != E ; ++I) {
70 // Detect spurious null characters, which are likely errors.
75 Start = I++; // Record the start of the format specifier.
80 // No format specifier found?
85 // No more characters left?
86 H.HandleIncompleteSpecifier(Start, E - Start);
91 if (ParseArgPosition(H, FS, Start, I, E))
95 // No more characters left?
96 H.HandleIncompleteSpecifier(Start, E - Start);
100 // Look for flags (if any).
102 for ( ; I != E; ++I) {
104 default: hasMore = false; break;
106 // FIXME: POSIX specific. Always accept?
107 FS.setHasThousandsGrouping(I);
109 case '-': FS.setIsLeftJustified(I); break;
110 case '+': FS.setHasPlusPrefix(I); break;
111 case ' ': FS.setHasSpacePrefix(I); break;
112 case '#': FS.setHasAlternativeForm(I); break;
113 case '0': FS.setHasLeadingZeros(I); break;
120 // No more characters left?
121 H.HandleIncompleteSpecifier(Start, E - Start);
125 // Look for the field width (if any).
126 if (ParseFieldWidth(H, FS, Start, I, E,
127 FS.usesPositionalArg() ? 0 : &argIndex))
131 // No more characters left?
132 H.HandleIncompleteSpecifier(Start, E - Start);
136 // Look for the precision (if any).
140 H.HandleIncompleteSpecifier(Start, E - Start);
144 if (ParsePrecision(H, FS, Start, I, E,
145 FS.usesPositionalArg() ? 0 : &argIndex))
149 // No more characters left?
150 H.HandleIncompleteSpecifier(Start, E - Start);
155 // Look for the length modifier.
156 if (ParseLengthModifier(FS, I, E, LO) && I == E) {
157 // No more characters left?
158 H.HandleIncompleteSpecifier(Start, E - Start);
163 // Detect spurious null characters, which are likely errors.
168 // Finally, look for the conversion specifier.
169 const char *conversionPosition = I++;
170 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
171 switch (*conversionPosition) {
174 // C99: 7.19.6.1 (section 8).
175 case '%': k = ConversionSpecifier::PercentArg; break;
176 case 'A': k = ConversionSpecifier::AArg; break;
177 case 'E': k = ConversionSpecifier::EArg; break;
178 case 'F': k = ConversionSpecifier::FArg; break;
179 case 'G': k = ConversionSpecifier::GArg; break;
180 case 'X': k = ConversionSpecifier::XArg; break;
181 case 'a': k = ConversionSpecifier::aArg; break;
182 case 'c': k = ConversionSpecifier::cArg; break;
183 case 'd': k = ConversionSpecifier::dArg; break;
184 case 'e': k = ConversionSpecifier::eArg; break;
185 case 'f': k = ConversionSpecifier::fArg; break;
186 case 'g': k = ConversionSpecifier::gArg; break;
187 case 'i': k = ConversionSpecifier::iArg; break;
188 case 'n': k = ConversionSpecifier::nArg; break;
189 case 'o': k = ConversionSpecifier::oArg; break;
190 case 'p': k = ConversionSpecifier::pArg; break;
191 case 's': k = ConversionSpecifier::sArg; break;
192 case 'u': k = ConversionSpecifier::uArg; break;
193 case 'x': k = ConversionSpecifier::xArg; break;
195 case 'C': k = ConversionSpecifier::CArg; break;
196 case 'S': k = ConversionSpecifier::SArg; break;
198 case '@': k = ConversionSpecifier::ObjCObjArg; break;
200 case 'm': k = ConversionSpecifier::PrintErrno; break;
201 // FreeBSD format extensions
203 if (LO.FormatExtensions)
204 k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
207 if (LO.FormatExtensions)
208 k = ConversionSpecifier::FreeBSDrArg;
211 if (LO.FormatExtensions)
212 k = ConversionSpecifier::iArg;
216 if (Target.getTriple().isOSDarwin())
217 k = ConversionSpecifier::DArg;
218 else if (LO.FormatExtensions)
219 k = ConversionSpecifier::FreeBSDDArg; // u_char * followed by char *
222 if (Target.getTriple().isOSDarwin())
223 k = ConversionSpecifier::OArg;
226 if (Target.getTriple().isOSDarwin())
227 k = ConversionSpecifier::UArg;
230 PrintfConversionSpecifier CS(conversionPosition, k);
231 FS.setConversionSpecifier(CS);
232 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
233 FS.setArgIndex(argIndex++);
235 if (k == ConversionSpecifier::FreeBSDbArg ||
236 k == ConversionSpecifier::FreeBSDDArg)
239 if (k == ConversionSpecifier::InvalidSpecifier) {
240 // Assume the conversion takes one argument.
241 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
243 return PrintfSpecifierResult(Start, FS);
246 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
249 const LangOptions &LO,
250 const TargetInfo &Target) {
252 unsigned argIndex = 0;
254 // Keep looking for a format specifier until we have exhausted the string.
256 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
258 // Did a fail-stop error of any kind occur when parsing the specifier?
259 // If so, don't do any more processing.
260 if (FSR.shouldStop())
262 // Did we exhaust the string or encounter an error that
263 // we can recover from?
266 // We have a format specifier. Pass it to the callback.
267 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
271 assert(I == E && "Format string not exhausted");
275 //===----------------------------------------------------------------------===//
276 // Methods on PrintfSpecifier.
277 //===----------------------------------------------------------------------===//
279 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
280 bool IsObjCLiteral) const {
281 const PrintfConversionSpecifier &CS = getConversionSpecifier();
283 if (!CS.consumesDataArgument())
284 return ArgType::Invalid();
286 if (CS.getKind() == ConversionSpecifier::cArg)
287 switch (LM.getKind()) {
288 case LengthModifier::None: return Ctx.IntTy;
289 case LengthModifier::AsLong:
290 return ArgType(ArgType::WIntTy, "wint_t");
292 return ArgType::Invalid();
296 switch (LM.getKind()) {
297 case LengthModifier::AsLongDouble:
299 return Ctx.LongLongTy;
300 case LengthModifier::None: return Ctx.IntTy;
301 case LengthModifier::AsChar: return ArgType::AnyCharTy;
302 case LengthModifier::AsShort: return Ctx.ShortTy;
303 case LengthModifier::AsLong: return Ctx.LongTy;
304 case LengthModifier::AsLongLong:
305 case LengthModifier::AsQuad:
306 return Ctx.LongLongTy;
307 case LengthModifier::AsIntMax:
308 return ArgType(Ctx.getIntMaxType(), "intmax_t");
309 case LengthModifier::AsSizeT:
310 // FIXME: How to get the corresponding signed version of size_t?
312 case LengthModifier::AsPtrDiff:
313 return ArgType(Ctx.getPointerDiffType(), "ptrdiff_t");
314 case LengthModifier::AsAllocate:
315 case LengthModifier::AsMAllocate:
316 return ArgType::Invalid();
320 switch (LM.getKind()) {
321 case LengthModifier::AsLongDouble:
323 return Ctx.UnsignedLongLongTy;
324 case LengthModifier::None: return Ctx.UnsignedIntTy;
325 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
326 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
327 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
328 case LengthModifier::AsLongLong:
329 case LengthModifier::AsQuad:
330 return Ctx.UnsignedLongLongTy;
331 case LengthModifier::AsIntMax:
332 return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
333 case LengthModifier::AsSizeT:
334 return ArgType(Ctx.getSizeType(), "size_t");
335 case LengthModifier::AsPtrDiff:
336 // FIXME: How to get the corresponding unsigned
337 // version of ptrdiff_t?
339 case LengthModifier::AsAllocate:
340 case LengthModifier::AsMAllocate:
341 return ArgType::Invalid();
344 if (CS.isDoubleArg()) {
345 if (LM.getKind() == LengthModifier::AsLongDouble)
346 return Ctx.LongDoubleTy;
350 if (CS.getKind() == ConversionSpecifier::nArg) {
351 switch (LM.getKind()) {
352 case LengthModifier::None:
353 return ArgType::PtrTo(Ctx.IntTy);
354 case LengthModifier::AsChar:
355 return ArgType::PtrTo(Ctx.SignedCharTy);
356 case LengthModifier::AsShort:
357 return ArgType::PtrTo(Ctx.ShortTy);
358 case LengthModifier::AsLong:
359 return ArgType::PtrTo(Ctx.LongTy);
360 case LengthModifier::AsLongLong:
361 case LengthModifier::AsQuad:
362 return ArgType::PtrTo(Ctx.LongLongTy);
363 case LengthModifier::AsIntMax:
364 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
365 case LengthModifier::AsSizeT:
366 return ArgType(); // FIXME: ssize_t
367 case LengthModifier::AsPtrDiff:
368 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
369 case LengthModifier::AsLongDouble:
370 return ArgType(); // FIXME: Is this a known extension?
371 case LengthModifier::AsAllocate:
372 case LengthModifier::AsMAllocate:
373 return ArgType::Invalid();
377 switch (CS.getKind()) {
378 case ConversionSpecifier::sArg:
379 if (LM.getKind() == LengthModifier::AsWideChar) {
381 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
383 return ArgType(ArgType::WCStrTy, "wchar_t *");
385 return ArgType::CStrTy;
386 case ConversionSpecifier::SArg:
388 return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()),
390 return ArgType(ArgType::WCStrTy, "wchar_t *");
391 case ConversionSpecifier::CArg:
393 return ArgType(Ctx.UnsignedShortTy, "unichar");
394 return ArgType(Ctx.WCharTy, "wchar_t");
395 case ConversionSpecifier::pArg:
396 return ArgType::CPointerTy;
397 case ConversionSpecifier::ObjCObjArg:
398 return ArgType::ObjCPointerTy;
403 // FIXME: Handle other cases.
407 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
408 ASTContext &Ctx, bool IsObjCLiteral) {
409 // %n is different from other conversion specifiers; don't try to fix it.
410 if (CS.getKind() == ConversionSpecifier::nArg)
413 // Handle Objective-C objects first. Note that while the '%@' specifier will
414 // not warn for structure pointer or void pointer arguments (because that's
415 // how CoreFoundation objects are implemented), we only show a fixit for '%@'
416 // if we know it's an object (block, id, class, or __attribute__((NSObject))).
417 if (QT->isObjCRetainableType()) {
421 CS.setKind(ConversionSpecifier::ObjCObjArg);
423 // Disable irrelevant flags
424 HasThousandsGrouping = false;
425 HasPlusPrefix = false;
426 HasSpacePrefix = false;
427 HasAlternativeForm = false;
428 HasLeadingZeroes = false;
429 Precision.setHowSpecified(OptionalAmount::NotSpecified);
430 LM.setKind(LengthModifier::None);
435 // Handle strings next (char *, wchar_t *)
436 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
437 CS.setKind(ConversionSpecifier::sArg);
439 // Disable irrelevant flags
440 HasAlternativeForm = 0;
441 HasLeadingZeroes = 0;
443 // Set the long length modifier for wide characters
444 if (QT->getPointeeType()->isWideCharType())
445 LM.setKind(LengthModifier::AsWideChar);
447 LM.setKind(LengthModifier::None);
452 // If it's an enum, get its underlying type.
453 if (const EnumType *ETy = QT->getAs<EnumType>())
454 QT = ETy->getDecl()->getIntegerType();
456 // We can only work with builtin types.
457 const BuiltinType *BT = QT->getAs<BuiltinType>();
461 // Set length modifier
462 switch (BT->getKind()) {
463 case BuiltinType::Bool:
464 case BuiltinType::WChar_U:
465 case BuiltinType::WChar_S:
466 case BuiltinType::Char16:
467 case BuiltinType::Char32:
468 case BuiltinType::UInt128:
469 case BuiltinType::Int128:
470 case BuiltinType::Half:
471 // Various types which are non-trivial to correct.
474 #define SIGNED_TYPE(Id, SingletonId)
475 #define UNSIGNED_TYPE(Id, SingletonId)
476 #define FLOATING_TYPE(Id, SingletonId)
477 #define BUILTIN_TYPE(Id, SingletonId) \
478 case BuiltinType::Id:
479 #include "clang/AST/BuiltinTypes.def"
480 // Misc other stuff which doesn't make sense here.
483 case BuiltinType::UInt:
484 case BuiltinType::Int:
485 case BuiltinType::Float:
486 case BuiltinType::Double:
487 LM.setKind(LengthModifier::None);
490 case BuiltinType::Char_U:
491 case BuiltinType::UChar:
492 case BuiltinType::Char_S:
493 case BuiltinType::SChar:
494 LM.setKind(LengthModifier::AsChar);
497 case BuiltinType::Short:
498 case BuiltinType::UShort:
499 LM.setKind(LengthModifier::AsShort);
502 case BuiltinType::Long:
503 case BuiltinType::ULong:
504 LM.setKind(LengthModifier::AsLong);
507 case BuiltinType::LongLong:
508 case BuiltinType::ULongLong:
509 LM.setKind(LengthModifier::AsLongLong);
512 case BuiltinType::LongDouble:
513 LM.setKind(LengthModifier::AsLongDouble);
517 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
518 if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
519 namedTypeToLengthModifier(QT, LM);
521 // If fixing the length modifier was enough, we might be done.
522 if (hasValidLengthModifier(Ctx.getTargetInfo())) {
523 // If we're going to offer a fix anyway, make sure the sign matches.
524 switch (CS.getKind()) {
525 case ConversionSpecifier::uArg:
526 case ConversionSpecifier::UArg:
527 if (QT->isSignedIntegerType())
528 CS.setKind(clang::analyze_format_string::ConversionSpecifier::dArg);
530 case ConversionSpecifier::dArg:
531 case ConversionSpecifier::DArg:
532 case ConversionSpecifier::iArg:
533 if (QT->isUnsignedIntegerType() && !HasPlusPrefix)
534 CS.setKind(clang::analyze_format_string::ConversionSpecifier::uArg);
537 // Other specifiers do not have signed/unsigned variants.
541 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
542 if (ATR.isValid() && ATR.matchesType(Ctx, QT))
546 // Set conversion specifier and disable any flags which do not apply to it.
547 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
548 if (!isa<TypedefType>(QT) && QT->isCharType()) {
549 CS.setKind(ConversionSpecifier::cArg);
550 LM.setKind(LengthModifier::None);
551 Precision.setHowSpecified(OptionalAmount::NotSpecified);
552 HasAlternativeForm = 0;
553 HasLeadingZeroes = 0;
556 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
557 else if (QT->isRealFloatingType()) {
558 CS.setKind(ConversionSpecifier::fArg);
560 else if (QT->isSignedIntegerType()) {
561 CS.setKind(ConversionSpecifier::dArg);
562 HasAlternativeForm = 0;
564 else if (QT->isUnsignedIntegerType()) {
565 CS.setKind(ConversionSpecifier::uArg);
566 HasAlternativeForm = 0;
569 llvm_unreachable("Unexpected type");
575 void PrintfSpecifier::toString(raw_ostream &os) const {
576 // Whilst some features have no defined order, we are using the order
577 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
581 if (usesPositionalArg()) {
582 os << getPositionalArgIndex() << "$";
586 if (IsLeftJustified) os << "-";
587 if (HasPlusPrefix) os << "+";
588 if (HasSpacePrefix) os << " ";
589 if (HasAlternativeForm) os << "#";
590 if (HasLeadingZeroes) os << "0";
592 // Minimum field width
593 FieldWidth.toString(os);
595 Precision.toString(os);
598 // Conversion specifier
602 bool PrintfSpecifier::hasValidPlusPrefix() const {
606 // The plus prefix only makes sense for signed conversions
607 switch (CS.getKind()) {
608 case ConversionSpecifier::dArg:
609 case ConversionSpecifier::DArg:
610 case ConversionSpecifier::iArg:
611 case ConversionSpecifier::fArg:
612 case ConversionSpecifier::FArg:
613 case ConversionSpecifier::eArg:
614 case ConversionSpecifier::EArg:
615 case ConversionSpecifier::gArg:
616 case ConversionSpecifier::GArg:
617 case ConversionSpecifier::aArg:
618 case ConversionSpecifier::AArg:
619 case ConversionSpecifier::FreeBSDrArg:
627 bool PrintfSpecifier::hasValidAlternativeForm() const {
628 if (!HasAlternativeForm)
631 // Alternate form flag only valid with the oxXaAeEfFgG conversions
632 switch (CS.getKind()) {
633 case ConversionSpecifier::oArg:
634 case ConversionSpecifier::OArg:
635 case ConversionSpecifier::xArg:
636 case ConversionSpecifier::XArg:
637 case ConversionSpecifier::aArg:
638 case ConversionSpecifier::AArg:
639 case ConversionSpecifier::eArg:
640 case ConversionSpecifier::EArg:
641 case ConversionSpecifier::fArg:
642 case ConversionSpecifier::FArg:
643 case ConversionSpecifier::gArg:
644 case ConversionSpecifier::GArg:
645 case ConversionSpecifier::FreeBSDrArg:
653 bool PrintfSpecifier::hasValidLeadingZeros() const {
654 if (!HasLeadingZeroes)
657 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
658 switch (CS.getKind()) {
659 case ConversionSpecifier::dArg:
660 case ConversionSpecifier::DArg:
661 case ConversionSpecifier::iArg:
662 case ConversionSpecifier::oArg:
663 case ConversionSpecifier::OArg:
664 case ConversionSpecifier::uArg:
665 case ConversionSpecifier::UArg:
666 case ConversionSpecifier::xArg:
667 case ConversionSpecifier::XArg:
668 case ConversionSpecifier::aArg:
669 case ConversionSpecifier::AArg:
670 case ConversionSpecifier::eArg:
671 case ConversionSpecifier::EArg:
672 case ConversionSpecifier::fArg:
673 case ConversionSpecifier::FArg:
674 case ConversionSpecifier::gArg:
675 case ConversionSpecifier::GArg:
683 bool PrintfSpecifier::hasValidSpacePrefix() const {
687 // The space prefix only makes sense for signed conversions
688 switch (CS.getKind()) {
689 case ConversionSpecifier::dArg:
690 case ConversionSpecifier::DArg:
691 case ConversionSpecifier::iArg:
692 case ConversionSpecifier::fArg:
693 case ConversionSpecifier::FArg:
694 case ConversionSpecifier::eArg:
695 case ConversionSpecifier::EArg:
696 case ConversionSpecifier::gArg:
697 case ConversionSpecifier::GArg:
698 case ConversionSpecifier::aArg:
699 case ConversionSpecifier::AArg:
707 bool PrintfSpecifier::hasValidLeftJustified() const {
708 if (!IsLeftJustified)
711 // The left justified flag is valid for all conversions except n
712 switch (CS.getKind()) {
713 case ConversionSpecifier::nArg:
721 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
722 if (!HasThousandsGrouping)
725 switch (CS.getKind()) {
726 case ConversionSpecifier::dArg:
727 case ConversionSpecifier::DArg:
728 case ConversionSpecifier::iArg:
729 case ConversionSpecifier::uArg:
730 case ConversionSpecifier::UArg:
731 case ConversionSpecifier::fArg:
732 case ConversionSpecifier::FArg:
733 case ConversionSpecifier::gArg:
734 case ConversionSpecifier::GArg:
741 bool PrintfSpecifier::hasValidPrecision() const {
742 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
745 // Precision is only valid with the diouxXaAeEfFgGs conversions
746 switch (CS.getKind()) {
747 case ConversionSpecifier::dArg:
748 case ConversionSpecifier::DArg:
749 case ConversionSpecifier::iArg:
750 case ConversionSpecifier::oArg:
751 case ConversionSpecifier::OArg:
752 case ConversionSpecifier::uArg:
753 case ConversionSpecifier::UArg:
754 case ConversionSpecifier::xArg:
755 case ConversionSpecifier::XArg:
756 case ConversionSpecifier::aArg:
757 case ConversionSpecifier::AArg:
758 case ConversionSpecifier::eArg:
759 case ConversionSpecifier::EArg:
760 case ConversionSpecifier::fArg:
761 case ConversionSpecifier::FArg:
762 case ConversionSpecifier::gArg:
763 case ConversionSpecifier::GArg:
764 case ConversionSpecifier::sArg:
771 bool PrintfSpecifier::hasValidFieldWidth() const {
772 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
775 // The field width is valid for all conversions except n
776 switch (CS.getKind()) {
777 case ConversionSpecifier::nArg: