1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgType;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
25 using namespace clang;
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28 PrintfSpecifierResult;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount;
36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37 const char *Start, const char *&Beg, const char *E,
40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
43 analyze_format_string::PrecisionPos);
51 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
55 const LangOptions &LO) {
57 using namespace clang::analyze_format_string;
58 using namespace clang::analyze_printf;
61 const char *Start = 0;
62 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64 // Look for a '%' character that indicates the start of a format specifier.
65 for ( ; I != E ; ++I) {
68 // Detect spurious null characters, which are likely errors.
73 Start = I++; // Record the start of the format specifier.
78 // No format specifier found?
83 // No more characters left?
84 H.HandleIncompleteSpecifier(Start, E - Start);
89 if (ParseArgPosition(H, FS, Start, I, E))
93 // No more characters left?
94 H.HandleIncompleteSpecifier(Start, E - Start);
98 // Look for flags (if any).
100 for ( ; I != E; ++I) {
102 default: hasMore = false; break;
104 // FIXME: POSIX specific. Always accept?
105 FS.setHasThousandsGrouping(I);
107 case '-': FS.setIsLeftJustified(I); break;
108 case '+': FS.setHasPlusPrefix(I); break;
109 case ' ': FS.setHasSpacePrefix(I); break;
110 case '#': FS.setHasAlternativeForm(I); break;
111 case '0': FS.setHasLeadingZeros(I); break;
118 // No more characters left?
119 H.HandleIncompleteSpecifier(Start, E - Start);
123 // Look for the field width (if any).
124 if (ParseFieldWidth(H, FS, Start, I, E,
125 FS.usesPositionalArg() ? 0 : &argIndex))
129 // No more characters left?
130 H.HandleIncompleteSpecifier(Start, E - Start);
134 // Look for the precision (if any).
138 H.HandleIncompleteSpecifier(Start, E - Start);
142 if (ParsePrecision(H, FS, Start, I, E,
143 FS.usesPositionalArg() ? 0 : &argIndex))
147 // No more characters left?
148 H.HandleIncompleteSpecifier(Start, E - Start);
153 // Look for the length modifier.
154 if (ParseLengthModifier(FS, I, E, LO) && I == E) {
155 // No more characters left?
156 H.HandleIncompleteSpecifier(Start, E - Start);
161 // Detect spurious null characters, which are likely errors.
166 // Finally, look for the conversion specifier.
167 const char *conversionPosition = I++;
168 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
169 switch (*conversionPosition) {
172 // C99: 7.19.6.1 (section 8).
173 case '%': k = ConversionSpecifier::PercentArg; break;
174 case 'A': k = ConversionSpecifier::AArg; break;
175 case 'E': k = ConversionSpecifier::EArg; break;
176 case 'F': k = ConversionSpecifier::FArg; break;
177 case 'G': k = ConversionSpecifier::GArg; break;
178 case 'X': k = ConversionSpecifier::XArg; break;
179 case 'a': k = ConversionSpecifier::aArg; break;
180 case 'c': k = ConversionSpecifier::cArg; break;
181 case 'd': k = ConversionSpecifier::dArg; break;
182 case 'e': k = ConversionSpecifier::eArg; break;
183 case 'f': k = ConversionSpecifier::fArg; break;
184 case 'g': k = ConversionSpecifier::gArg; break;
185 case 'i': k = ConversionSpecifier::iArg; break;
186 case 'n': k = ConversionSpecifier::nArg; break;
187 case 'o': k = ConversionSpecifier::oArg; break;
188 case 'p': k = ConversionSpecifier::pArg; break;
189 case 's': k = ConversionSpecifier::sArg; break;
190 case 'u': k = ConversionSpecifier::uArg; break;
191 case 'x': k = ConversionSpecifier::xArg; break;
193 case 'C': k = ConversionSpecifier::CArg; break;
194 case 'S': k = ConversionSpecifier::SArg; break;
196 case '@': k = ConversionSpecifier::ObjCObjArg; break;
198 case 'm': k = ConversionSpecifier::PrintErrno; break;
199 // FreeBSD format extensions
200 case 'b': if (LO.FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
201 case 'r': if (LO.FormatExtensions) k = ConversionSpecifier::rArg; break;
202 case 'y': if (LO.FormatExtensions) k = ConversionSpecifier::iArg; break;
203 case 'D': if (LO.FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
205 PrintfConversionSpecifier CS(conversionPosition, k);
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
208 FS.setArgIndex(argIndex++);
210 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
213 if (k == ConversionSpecifier::InvalidSpecifier) {
214 // Assume the conversion takes one argument.
215 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
217 return PrintfSpecifierResult(Start, FS);
220 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
223 const LangOptions &LO) {
225 unsigned argIndex = 0;
227 // Keep looking for a format specifier until we have exhausted the string.
229 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
231 // Did a fail-stop error of any kind occur when parsing the specifier?
232 // If so, don't do any more processing.
233 if (FSR.shouldStop())
235 // Did we exhaust the string or encounter an error that
236 // we can recover from?
239 // We have a format specifier. Pass it to the callback.
240 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
244 assert(I == E && "Format string not exhausted");
248 //===----------------------------------------------------------------------===//
249 // Methods on PrintfSpecifier.
250 //===----------------------------------------------------------------------===//
252 ArgType PrintfSpecifier::getArgType(ASTContext &Ctx,
253 bool IsObjCLiteral) const {
254 const PrintfConversionSpecifier &CS = getConversionSpecifier();
256 if (!CS.consumesDataArgument())
257 return ArgType::Invalid();
259 if (CS.getKind() == ConversionSpecifier::cArg)
260 switch (LM.getKind()) {
261 case LengthModifier::None: return Ctx.IntTy;
262 case LengthModifier::AsLong:
263 return ArgType(ArgType::WIntTy, "wint_t");
265 return ArgType::Invalid();
269 switch (LM.getKind()) {
270 case LengthModifier::AsLongDouble:
272 return Ctx.LongLongTy;
273 case LengthModifier::None: return Ctx.IntTy;
274 case LengthModifier::AsChar: return ArgType::AnyCharTy;
275 case LengthModifier::AsShort: return Ctx.ShortTy;
276 case LengthModifier::AsLong: return Ctx.LongTy;
277 case LengthModifier::AsLongLong:
278 case LengthModifier::AsQuad:
279 return Ctx.LongLongTy;
280 case LengthModifier::AsIntMax:
281 return ArgType(Ctx.getIntMaxType(), "intmax_t");
282 case LengthModifier::AsSizeT:
283 // FIXME: How to get the corresponding signed version of size_t?
285 case LengthModifier::AsPtrDiff:
286 return ArgType(Ctx.getPointerDiffType(), "ptrdiff_t");
287 case LengthModifier::AsAllocate:
288 case LengthModifier::AsMAllocate:
289 return ArgType::Invalid();
293 switch (LM.getKind()) {
294 case LengthModifier::AsLongDouble:
296 return Ctx.UnsignedLongLongTy;
297 case LengthModifier::None: return Ctx.UnsignedIntTy;
298 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
299 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
300 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
301 case LengthModifier::AsLongLong:
302 case LengthModifier::AsQuad:
303 return Ctx.UnsignedLongLongTy;
304 case LengthModifier::AsIntMax:
305 return ArgType(Ctx.getUIntMaxType(), "uintmax_t");
306 case LengthModifier::AsSizeT:
307 return ArgType(Ctx.getSizeType(), "size_t");
308 case LengthModifier::AsPtrDiff:
309 // FIXME: How to get the corresponding unsigned
310 // version of ptrdiff_t?
312 case LengthModifier::AsAllocate:
313 case LengthModifier::AsMAllocate:
314 return ArgType::Invalid();
317 if (CS.isDoubleArg()) {
318 if (LM.getKind() == LengthModifier::AsLongDouble)
319 return Ctx.LongDoubleTy;
323 if (CS.getKind() == ConversionSpecifier::nArg) {
324 switch (LM.getKind()) {
325 case LengthModifier::None:
326 return ArgType::PtrTo(Ctx.IntTy);
327 case LengthModifier::AsChar:
328 return ArgType::PtrTo(Ctx.SignedCharTy);
329 case LengthModifier::AsShort:
330 return ArgType::PtrTo(Ctx.ShortTy);
331 case LengthModifier::AsLong:
332 return ArgType::PtrTo(Ctx.LongTy);
333 case LengthModifier::AsLongLong:
334 case LengthModifier::AsQuad:
335 return ArgType::PtrTo(Ctx.LongLongTy);
336 case LengthModifier::AsIntMax:
337 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
338 case LengthModifier::AsSizeT:
339 return ArgType(); // FIXME: ssize_t
340 case LengthModifier::AsPtrDiff:
341 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
342 case LengthModifier::AsLongDouble:
343 return ArgType(); // FIXME: Is this a known extension?
344 case LengthModifier::AsAllocate:
345 case LengthModifier::AsMAllocate:
346 return ArgType::Invalid();
350 switch (CS.getKind()) {
351 case ConversionSpecifier::sArg:
352 if (LM.getKind() == LengthModifier::AsWideChar) {
354 return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst());
355 return ArgType(ArgType::WCStrTy, "wchar_t *");
357 return ArgType::CStrTy;
358 case ConversionSpecifier::SArg:
360 return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst());
361 return ArgType(ArgType::WCStrTy, "wchar_t *");
362 case ConversionSpecifier::CArg:
364 return Ctx.UnsignedShortTy;
365 return ArgType(Ctx.WCharTy, "wchar_t");
366 case ConversionSpecifier::pArg:
367 return ArgType::CPointerTy;
368 case ConversionSpecifier::ObjCObjArg:
369 return ArgType::ObjCPointerTy;
374 // FIXME: Handle other cases.
378 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
379 ASTContext &Ctx, bool IsObjCLiteral) {
380 // %n is different from other conversion specifiers; don't try to fix it.
381 if (CS.getKind() == ConversionSpecifier::nArg)
384 // Handle Objective-C objects first. Note that while the '%@' specifier will
385 // not warn for structure pointer or void pointer arguments (because that's
386 // how CoreFoundation objects are implemented), we only show a fixit for '%@'
387 // if we know it's an object (block, id, class, or __attribute__((NSObject))).
388 if (QT->isObjCRetainableType()) {
392 CS.setKind(ConversionSpecifier::ObjCObjArg);
394 // Disable irrelevant flags
395 HasThousandsGrouping = false;
396 HasPlusPrefix = false;
397 HasSpacePrefix = false;
398 HasAlternativeForm = false;
399 HasLeadingZeroes = false;
400 Precision.setHowSpecified(OptionalAmount::NotSpecified);
401 LM.setKind(LengthModifier::None);
406 // Handle strings next (char *, wchar_t *)
407 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
408 CS.setKind(ConversionSpecifier::sArg);
410 // Disable irrelevant flags
411 HasAlternativeForm = 0;
412 HasLeadingZeroes = 0;
414 // Set the long length modifier for wide characters
415 if (QT->getPointeeType()->isWideCharType())
416 LM.setKind(LengthModifier::AsWideChar);
418 LM.setKind(LengthModifier::None);
423 // If it's an enum, get its underlying type.
424 if (const EnumType *ETy = QT->getAs<EnumType>())
425 QT = ETy->getDecl()->getIntegerType();
427 // We can only work with builtin types.
428 const BuiltinType *BT = QT->getAs<BuiltinType>();
432 // Set length modifier
433 switch (BT->getKind()) {
434 case BuiltinType::Bool:
435 case BuiltinType::WChar_U:
436 case BuiltinType::WChar_S:
437 case BuiltinType::Char16:
438 case BuiltinType::Char32:
439 case BuiltinType::UInt128:
440 case BuiltinType::Int128:
441 case BuiltinType::Half:
442 // Various types which are non-trivial to correct.
445 #define SIGNED_TYPE(Id, SingletonId)
446 #define UNSIGNED_TYPE(Id, SingletonId)
447 #define FLOATING_TYPE(Id, SingletonId)
448 #define BUILTIN_TYPE(Id, SingletonId) \
449 case BuiltinType::Id:
450 #include "clang/AST/BuiltinTypes.def"
451 // Misc other stuff which doesn't make sense here.
454 case BuiltinType::UInt:
455 case BuiltinType::Int:
456 case BuiltinType::Float:
457 case BuiltinType::Double:
458 LM.setKind(LengthModifier::None);
461 case BuiltinType::Char_U:
462 case BuiltinType::UChar:
463 case BuiltinType::Char_S:
464 case BuiltinType::SChar:
465 LM.setKind(LengthModifier::AsChar);
468 case BuiltinType::Short:
469 case BuiltinType::UShort:
470 LM.setKind(LengthModifier::AsShort);
473 case BuiltinType::Long:
474 case BuiltinType::ULong:
475 LM.setKind(LengthModifier::AsLong);
478 case BuiltinType::LongLong:
479 case BuiltinType::ULongLong:
480 LM.setKind(LengthModifier::AsLongLong);
483 case BuiltinType::LongDouble:
484 LM.setKind(LengthModifier::AsLongDouble);
488 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
489 if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
490 namedTypeToLengthModifier(QT, LM);
492 // If fixing the length modifier was enough, we are done.
493 const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral);
494 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
497 // Set conversion specifier and disable any flags which do not apply to it.
498 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
499 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
500 CS.setKind(ConversionSpecifier::cArg);
501 LM.setKind(LengthModifier::None);
502 Precision.setHowSpecified(OptionalAmount::NotSpecified);
503 HasAlternativeForm = 0;
504 HasLeadingZeroes = 0;
507 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
508 else if (QT->isRealFloatingType()) {
509 CS.setKind(ConversionSpecifier::fArg);
511 else if (QT->isSignedIntegerType()) {
512 CS.setKind(ConversionSpecifier::dArg);
513 HasAlternativeForm = 0;
515 else if (QT->isUnsignedIntegerType()) {
516 CS.setKind(ConversionSpecifier::uArg);
517 HasAlternativeForm = 0;
520 llvm_unreachable("Unexpected type");
526 void PrintfSpecifier::toString(raw_ostream &os) const {
527 // Whilst some features have no defined order, we are using the order
528 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
532 if (usesPositionalArg()) {
533 os << getPositionalArgIndex() << "$";
537 if (IsLeftJustified) os << "-";
538 if (HasPlusPrefix) os << "+";
539 if (HasSpacePrefix) os << " ";
540 if (HasAlternativeForm) os << "#";
541 if (HasLeadingZeroes) os << "0";
543 // Minimum field width
544 FieldWidth.toString(os);
546 Precision.toString(os);
549 // Conversion specifier
553 bool PrintfSpecifier::hasValidPlusPrefix() const {
557 // The plus prefix only makes sense for signed conversions
558 switch (CS.getKind()) {
559 case ConversionSpecifier::dArg:
560 case ConversionSpecifier::iArg:
561 case ConversionSpecifier::fArg:
562 case ConversionSpecifier::FArg:
563 case ConversionSpecifier::eArg:
564 case ConversionSpecifier::EArg:
565 case ConversionSpecifier::gArg:
566 case ConversionSpecifier::GArg:
567 case ConversionSpecifier::aArg:
568 case ConversionSpecifier::AArg:
569 case ConversionSpecifier::rArg:
577 bool PrintfSpecifier::hasValidAlternativeForm() const {
578 if (!HasAlternativeForm)
581 // Alternate form flag only valid with the oxXaAeEfFgG conversions
582 switch (CS.getKind()) {
583 case ConversionSpecifier::oArg:
584 case ConversionSpecifier::xArg:
585 case ConversionSpecifier::XArg:
586 case ConversionSpecifier::aArg:
587 case ConversionSpecifier::AArg:
588 case ConversionSpecifier::eArg:
589 case ConversionSpecifier::EArg:
590 case ConversionSpecifier::fArg:
591 case ConversionSpecifier::FArg:
592 case ConversionSpecifier::gArg:
593 case ConversionSpecifier::GArg:
594 case ConversionSpecifier::rArg:
602 bool PrintfSpecifier::hasValidLeadingZeros() const {
603 if (!HasLeadingZeroes)
606 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
607 switch (CS.getKind()) {
608 case ConversionSpecifier::dArg:
609 case ConversionSpecifier::iArg:
610 case ConversionSpecifier::oArg:
611 case ConversionSpecifier::uArg:
612 case ConversionSpecifier::xArg:
613 case ConversionSpecifier::XArg:
614 case ConversionSpecifier::aArg:
615 case ConversionSpecifier::AArg:
616 case ConversionSpecifier::eArg:
617 case ConversionSpecifier::EArg:
618 case ConversionSpecifier::fArg:
619 case ConversionSpecifier::FArg:
620 case ConversionSpecifier::gArg:
621 case ConversionSpecifier::GArg:
629 bool PrintfSpecifier::hasValidSpacePrefix() const {
633 // The space prefix only makes sense for signed conversions
634 switch (CS.getKind()) {
635 case ConversionSpecifier::dArg:
636 case ConversionSpecifier::iArg:
637 case ConversionSpecifier::fArg:
638 case ConversionSpecifier::FArg:
639 case ConversionSpecifier::eArg:
640 case ConversionSpecifier::EArg:
641 case ConversionSpecifier::gArg:
642 case ConversionSpecifier::GArg:
643 case ConversionSpecifier::aArg:
644 case ConversionSpecifier::AArg:
652 bool PrintfSpecifier::hasValidLeftJustified() const {
653 if (!IsLeftJustified)
656 // The left justified flag is valid for all conversions except n
657 switch (CS.getKind()) {
658 case ConversionSpecifier::nArg:
666 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
667 if (!HasThousandsGrouping)
670 switch (CS.getKind()) {
671 case ConversionSpecifier::dArg:
672 case ConversionSpecifier::iArg:
673 case ConversionSpecifier::uArg:
674 case ConversionSpecifier::fArg:
675 case ConversionSpecifier::FArg:
676 case ConversionSpecifier::gArg:
677 case ConversionSpecifier::GArg:
684 bool PrintfSpecifier::hasValidPrecision() const {
685 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
688 // Precision is only valid with the diouxXaAeEfFgGs conversions
689 switch (CS.getKind()) {
690 case ConversionSpecifier::dArg:
691 case ConversionSpecifier::iArg:
692 case ConversionSpecifier::oArg:
693 case ConversionSpecifier::uArg:
694 case ConversionSpecifier::xArg:
695 case ConversionSpecifier::XArg:
696 case ConversionSpecifier::aArg:
697 case ConversionSpecifier::AArg:
698 case ConversionSpecifier::eArg:
699 case ConversionSpecifier::EArg:
700 case ConversionSpecifier::fArg:
701 case ConversionSpecifier::FArg:
702 case ConversionSpecifier::gArg:
703 case ConversionSpecifier::GArg:
704 case ConversionSpecifier::sArg:
711 bool PrintfSpecifier::hasValidFieldWidth() const {
712 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
715 // The field width is valid for all conversions except n
716 switch (CS.getKind()) {
717 case ConversionSpecifier::nArg: