1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
25 using namespace clang;
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28 PrintfSpecifierResult;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount;
36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37 const char *Start, const char *&Beg, const char *E,
40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
42 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
43 analyze_format_string::PrecisionPos);
51 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
55 const LangOptions &LO) {
57 using namespace clang::analyze_format_string;
58 using namespace clang::analyze_printf;
61 const char *Start = 0;
62 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
64 // Look for a '%' character that indicates the start of a format specifier.
65 for ( ; I != E ; ++I) {
68 // Detect spurious null characters, which are likely errors.
73 Start = I++; // Record the start of the format specifier.
78 // No format specifier found?
83 // No more characters left?
84 H.HandleIncompleteSpecifier(Start, E - Start);
89 if (ParseArgPosition(H, FS, Start, I, E))
93 // No more characters left?
94 H.HandleIncompleteSpecifier(Start, E - Start);
98 // Look for flags (if any).
100 for ( ; I != E; ++I) {
102 default: hasMore = false; break;
104 // FIXME: POSIX specific. Always accept?
105 FS.setHasThousandsGrouping(I);
107 case '-': FS.setIsLeftJustified(I); break;
108 case '+': FS.setHasPlusPrefix(I); break;
109 case ' ': FS.setHasSpacePrefix(I); break;
110 case '#': FS.setHasAlternativeForm(I); break;
111 case '0': FS.setHasLeadingZeros(I); break;
118 // No more characters left?
119 H.HandleIncompleteSpecifier(Start, E - Start);
123 // Look for the field width (if any).
124 if (ParseFieldWidth(H, FS, Start, I, E,
125 FS.usesPositionalArg() ? 0 : &argIndex))
129 // No more characters left?
130 H.HandleIncompleteSpecifier(Start, E - Start);
134 // Look for the precision (if any).
138 H.HandleIncompleteSpecifier(Start, E - Start);
142 if (ParsePrecision(H, FS, Start, I, E,
143 FS.usesPositionalArg() ? 0 : &argIndex))
147 // No more characters left?
148 H.HandleIncompleteSpecifier(Start, E - Start);
153 // Look for the length modifier.
154 if (ParseLengthModifier(FS, I, E, LO) && I == E) {
155 // No more characters left?
156 H.HandleIncompleteSpecifier(Start, E - Start);
161 // Detect spurious null characters, which are likely errors.
166 // Finally, look for the conversion specifier.
167 const char *conversionPosition = I++;
168 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
169 switch (*conversionPosition) {
172 // C99: 7.19.6.1 (section 8).
173 case '%': k = ConversionSpecifier::PercentArg; break;
174 case 'A': k = ConversionSpecifier::AArg; break;
175 case 'E': k = ConversionSpecifier::EArg; break;
176 case 'F': k = ConversionSpecifier::FArg; break;
177 case 'G': k = ConversionSpecifier::GArg; break;
178 case 'X': k = ConversionSpecifier::XArg; break;
179 case 'a': k = ConversionSpecifier::aArg; break;
180 case 'c': k = ConversionSpecifier::cArg; break;
181 case 'd': k = ConversionSpecifier::dArg; break;
182 case 'e': k = ConversionSpecifier::eArg; break;
183 case 'f': k = ConversionSpecifier::fArg; break;
184 case 'g': k = ConversionSpecifier::gArg; break;
185 case 'i': k = ConversionSpecifier::iArg; break;
186 case 'n': k = ConversionSpecifier::nArg; break;
187 case 'o': k = ConversionSpecifier::oArg; break;
188 case 'p': k = ConversionSpecifier::pArg; break;
189 case 's': k = ConversionSpecifier::sArg; break;
190 case 'u': k = ConversionSpecifier::uArg; break;
191 case 'x': k = ConversionSpecifier::xArg; break;
193 case 'C': k = ConversionSpecifier::CArg; break;
194 case 'S': k = ConversionSpecifier::SArg; break;
196 case '@': k = ConversionSpecifier::ObjCObjArg; break;
198 case 'm': k = ConversionSpecifier::PrintErrno; break;
199 // FreeBSD format extensions
200 case 'b': if (LO.FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
201 case 'r': if (LO.FormatExtensions) k = ConversionSpecifier::rArg; break;
202 case 'y': if (LO.FormatExtensions) k = ConversionSpecifier::iArg; break;
203 case 'D': if (LO.FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
205 PrintfConversionSpecifier CS(conversionPosition, k);
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
208 FS.setArgIndex(argIndex++);
210 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
213 if (k == ConversionSpecifier::InvalidSpecifier) {
214 // Assume the conversion takes one argument.
215 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
217 return PrintfSpecifierResult(Start, FS);
220 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
223 const LangOptions &LO) {
225 unsigned argIndex = 0;
227 // Keep looking for a format specifier until we have exhausted the string.
229 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
231 // Did a fail-stop error of any kind occur when parsing the specifier?
232 // If so, don't do any more processing.
233 if (FSR.shouldStop())
235 // Did we exhaust the string or encounter an error that
236 // we can recover from?
239 // We have a format specifier. Pass it to the callback.
240 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
244 assert(I == E && "Format string not exhausted");
248 //===----------------------------------------------------------------------===//
249 // Methods on PrintfSpecifier.
250 //===----------------------------------------------------------------------===//
252 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx,
253 bool IsObjCLiteral) const {
254 const PrintfConversionSpecifier &CS = getConversionSpecifier();
256 if (!CS.consumesDataArgument())
257 return ArgTypeResult::Invalid();
259 if (CS.getKind() == ConversionSpecifier::cArg)
260 switch (LM.getKind()) {
261 case LengthModifier::None: return Ctx.IntTy;
262 case LengthModifier::AsLong:
263 return ArgTypeResult(ArgTypeResult::WIntTy, "wint_t");
265 return ArgTypeResult::Invalid();
269 switch (LM.getKind()) {
270 case LengthModifier::AsLongDouble:
272 return Ctx.LongLongTy;
273 case LengthModifier::None: return Ctx.IntTy;
274 case LengthModifier::AsChar: return ArgTypeResult::AnyCharTy;
275 case LengthModifier::AsShort: return Ctx.ShortTy;
276 case LengthModifier::AsLong: return Ctx.LongTy;
277 case LengthModifier::AsLongLong:
278 case LengthModifier::AsQuad:
279 return Ctx.LongLongTy;
280 case LengthModifier::AsIntMax:
281 return ArgTypeResult(Ctx.getIntMaxType(), "intmax_t");
282 case LengthModifier::AsSizeT:
283 // FIXME: How to get the corresponding signed version of size_t?
284 return ArgTypeResult();
285 case LengthModifier::AsPtrDiff:
286 return ArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t");
287 case LengthModifier::AsAllocate:
288 case LengthModifier::AsMAllocate:
289 return ArgTypeResult::Invalid();
293 switch (LM.getKind()) {
294 case LengthModifier::AsLongDouble:
296 return Ctx.UnsignedLongLongTy;
297 case LengthModifier::None: return Ctx.UnsignedIntTy;
298 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
299 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
300 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
301 case LengthModifier::AsLongLong:
302 case LengthModifier::AsQuad:
303 return Ctx.UnsignedLongLongTy;
304 case LengthModifier::AsIntMax:
305 return ArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t");
306 case LengthModifier::AsSizeT:
307 return ArgTypeResult(Ctx.getSizeType(), "size_t");
308 case LengthModifier::AsPtrDiff:
309 // FIXME: How to get the corresponding unsigned
310 // version of ptrdiff_t?
311 return ArgTypeResult();
312 case LengthModifier::AsAllocate:
313 case LengthModifier::AsMAllocate:
314 return ArgTypeResult::Invalid();
317 if (CS.isDoubleArg()) {
318 if (LM.getKind() == LengthModifier::AsLongDouble)
319 return Ctx.LongDoubleTy;
323 switch (CS.getKind()) {
324 case ConversionSpecifier::sArg:
325 if (LM.getKind() == LengthModifier::AsWideChar) {
327 return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst());
328 return ArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t *");
330 return ArgTypeResult::CStrTy;
331 case ConversionSpecifier::SArg:
333 return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst());
334 return ArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t *");
335 case ConversionSpecifier::CArg:
337 return Ctx.UnsignedShortTy;
338 return ArgTypeResult(Ctx.WCharTy, "wchar_t");
339 case ConversionSpecifier::pArg:
340 return ArgTypeResult::CPointerTy;
341 case ConversionSpecifier::ObjCObjArg:
342 return ArgTypeResult::ObjCPointerTy;
347 // FIXME: Handle other cases.
348 return ArgTypeResult();
351 bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
352 ASTContext &Ctx, bool IsObjCLiteral) {
353 // Handle strings first (char *, wchar_t *)
354 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
355 CS.setKind(ConversionSpecifier::sArg);
357 // Disable irrelevant flags
358 HasAlternativeForm = 0;
359 HasLeadingZeroes = 0;
361 // Set the long length modifier for wide characters
362 if (QT->getPointeeType()->isWideCharType())
363 LM.setKind(LengthModifier::AsWideChar);
365 LM.setKind(LengthModifier::None);
370 // We can only work with builtin types.
371 const BuiltinType *BT = QT->getAs<BuiltinType>();
375 // Set length modifier
376 switch (BT->getKind()) {
377 case BuiltinType::Bool:
378 case BuiltinType::WChar_U:
379 case BuiltinType::WChar_S:
380 case BuiltinType::Char16:
381 case BuiltinType::Char32:
382 case BuiltinType::UInt128:
383 case BuiltinType::Int128:
384 case BuiltinType::Half:
385 // Various types which are non-trivial to correct.
388 #define SIGNED_TYPE(Id, SingletonId)
389 #define UNSIGNED_TYPE(Id, SingletonId)
390 #define FLOATING_TYPE(Id, SingletonId)
391 #define BUILTIN_TYPE(Id, SingletonId) \
392 case BuiltinType::Id:
393 #include "clang/AST/BuiltinTypes.def"
394 // Misc other stuff which doesn't make sense here.
397 case BuiltinType::UInt:
398 case BuiltinType::Int:
399 case BuiltinType::Float:
400 case BuiltinType::Double:
401 LM.setKind(LengthModifier::None);
404 case BuiltinType::Char_U:
405 case BuiltinType::UChar:
406 case BuiltinType::Char_S:
407 case BuiltinType::SChar:
408 LM.setKind(LengthModifier::AsChar);
411 case BuiltinType::Short:
412 case BuiltinType::UShort:
413 LM.setKind(LengthModifier::AsShort);
416 case BuiltinType::Long:
417 case BuiltinType::ULong:
418 LM.setKind(LengthModifier::AsLong);
421 case BuiltinType::LongLong:
422 case BuiltinType::ULongLong:
423 LM.setKind(LengthModifier::AsLongLong);
426 case BuiltinType::LongDouble:
427 LM.setKind(LengthModifier::AsLongDouble);
431 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
432 if (isa<TypedefType>(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
433 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
434 if (Identifier->getName() == "size_t") {
435 LM.setKind(LengthModifier::AsSizeT);
436 } else if (Identifier->getName() == "ssize_t") {
437 // Not C99, but common in Unix.
438 LM.setKind(LengthModifier::AsSizeT);
439 } else if (Identifier->getName() == "intmax_t") {
440 LM.setKind(LengthModifier::AsIntMax);
441 } else if (Identifier->getName() == "uintmax_t") {
442 LM.setKind(LengthModifier::AsIntMax);
443 } else if (Identifier->getName() == "ptrdiff_t") {
444 LM.setKind(LengthModifier::AsPtrDiff);
448 // If fixing the length modifier was enough, we are done.
449 const analyze_printf::ArgTypeResult &ATR = getArgType(Ctx, IsObjCLiteral);
450 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
453 // Set conversion specifier and disable any flags which do not apply to it.
454 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
455 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
456 CS.setKind(ConversionSpecifier::cArg);
457 LM.setKind(LengthModifier::None);
458 Precision.setHowSpecified(OptionalAmount::NotSpecified);
459 HasAlternativeForm = 0;
460 HasLeadingZeroes = 0;
463 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
464 else if (QT->isRealFloatingType()) {
465 CS.setKind(ConversionSpecifier::fArg);
467 else if (QT->isSignedIntegerType()) {
468 CS.setKind(ConversionSpecifier::dArg);
469 HasAlternativeForm = 0;
471 else if (QT->isUnsignedIntegerType()) {
472 CS.setKind(ConversionSpecifier::uArg);
473 HasAlternativeForm = 0;
476 llvm_unreachable("Unexpected type");
482 void PrintfSpecifier::toString(raw_ostream &os) const {
483 // Whilst some features have no defined order, we are using the order
484 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
488 if (usesPositionalArg()) {
489 os << getPositionalArgIndex() << "$";
493 if (IsLeftJustified) os << "-";
494 if (HasPlusPrefix) os << "+";
495 if (HasSpacePrefix) os << " ";
496 if (HasAlternativeForm) os << "#";
497 if (HasLeadingZeroes) os << "0";
499 // Minimum field width
500 FieldWidth.toString(os);
502 Precision.toString(os);
505 // Conversion specifier
509 bool PrintfSpecifier::hasValidPlusPrefix() const {
513 // The plus prefix only makes sense for signed conversions
514 switch (CS.getKind()) {
515 case ConversionSpecifier::dArg:
516 case ConversionSpecifier::iArg:
517 case ConversionSpecifier::fArg:
518 case ConversionSpecifier::FArg:
519 case ConversionSpecifier::eArg:
520 case ConversionSpecifier::EArg:
521 case ConversionSpecifier::gArg:
522 case ConversionSpecifier::GArg:
523 case ConversionSpecifier::aArg:
524 case ConversionSpecifier::AArg:
525 case ConversionSpecifier::rArg:
533 bool PrintfSpecifier::hasValidAlternativeForm() const {
534 if (!HasAlternativeForm)
537 // Alternate form flag only valid with the oxXaAeEfFgG conversions
538 switch (CS.getKind()) {
539 case ConversionSpecifier::oArg:
540 case ConversionSpecifier::xArg:
541 case ConversionSpecifier::XArg:
542 case ConversionSpecifier::aArg:
543 case ConversionSpecifier::AArg:
544 case ConversionSpecifier::eArg:
545 case ConversionSpecifier::EArg:
546 case ConversionSpecifier::fArg:
547 case ConversionSpecifier::FArg:
548 case ConversionSpecifier::gArg:
549 case ConversionSpecifier::GArg:
550 case ConversionSpecifier::rArg:
558 bool PrintfSpecifier::hasValidLeadingZeros() const {
559 if (!HasLeadingZeroes)
562 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
563 switch (CS.getKind()) {
564 case ConversionSpecifier::dArg:
565 case ConversionSpecifier::iArg:
566 case ConversionSpecifier::oArg:
567 case ConversionSpecifier::uArg:
568 case ConversionSpecifier::xArg:
569 case ConversionSpecifier::XArg:
570 case ConversionSpecifier::aArg:
571 case ConversionSpecifier::AArg:
572 case ConversionSpecifier::eArg:
573 case ConversionSpecifier::EArg:
574 case ConversionSpecifier::fArg:
575 case ConversionSpecifier::FArg:
576 case ConversionSpecifier::gArg:
577 case ConversionSpecifier::GArg:
585 bool PrintfSpecifier::hasValidSpacePrefix() const {
589 // The space prefix only makes sense for signed conversions
590 switch (CS.getKind()) {
591 case ConversionSpecifier::dArg:
592 case ConversionSpecifier::iArg:
593 case ConversionSpecifier::fArg:
594 case ConversionSpecifier::FArg:
595 case ConversionSpecifier::eArg:
596 case ConversionSpecifier::EArg:
597 case ConversionSpecifier::gArg:
598 case ConversionSpecifier::GArg:
599 case ConversionSpecifier::aArg:
600 case ConversionSpecifier::AArg:
608 bool PrintfSpecifier::hasValidLeftJustified() const {
609 if (!IsLeftJustified)
612 // The left justified flag is valid for all conversions except n
613 switch (CS.getKind()) {
614 case ConversionSpecifier::nArg:
622 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
623 if (!HasThousandsGrouping)
626 switch (CS.getKind()) {
627 case ConversionSpecifier::dArg:
628 case ConversionSpecifier::iArg:
629 case ConversionSpecifier::uArg:
630 case ConversionSpecifier::fArg:
631 case ConversionSpecifier::FArg:
632 case ConversionSpecifier::gArg:
633 case ConversionSpecifier::GArg:
640 bool PrintfSpecifier::hasValidPrecision() const {
641 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
644 // Precision is only valid with the diouxXaAeEfFgGs conversions
645 switch (CS.getKind()) {
646 case ConversionSpecifier::dArg:
647 case ConversionSpecifier::iArg:
648 case ConversionSpecifier::oArg:
649 case ConversionSpecifier::uArg:
650 case ConversionSpecifier::xArg:
651 case ConversionSpecifier::XArg:
652 case ConversionSpecifier::aArg:
653 case ConversionSpecifier::AArg:
654 case ConversionSpecifier::eArg:
655 case ConversionSpecifier::EArg:
656 case ConversionSpecifier::fArg:
657 case ConversionSpecifier::FArg:
658 case ConversionSpecifier::gArg:
659 case ConversionSpecifier::GArg:
660 case ConversionSpecifier::sArg:
667 bool PrintfSpecifier::hasValidFieldWidth() const {
668 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
671 // The field width is valid for all conversions except n
672 switch (CS.getKind()) {
673 case ConversionSpecifier::nArg: