1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
25 using namespace clang;
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28 PrintfSpecifierResult;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount;
36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37 const char *Start, const char *&Beg, const char *E,
40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44 analyze_format_string::PrecisionPos);
52 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
56 bool FormatExtensions) {
58 using namespace clang::analyze_format_string;
59 using namespace clang::analyze_printf;
62 const char *Start = 0;
63 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
65 // Look for a '%' character that indicates the start of a format specifier.
66 for ( ; I != E ; ++I) {
69 // Detect spurious null characters, which are likely errors.
74 Start = I++; // Record the start of the format specifier.
79 // No format specifier found?
84 // No more characters left?
85 H.HandleIncompleteSpecifier(Start, E - Start);
90 if (ParseArgPosition(H, FS, Start, I, E))
94 // No more characters left?
95 H.HandleIncompleteSpecifier(Start, E - Start);
99 // Look for flags (if any).
101 for ( ; I != E; ++I) {
103 default: hasMore = false; break;
105 // FIXME: POSIX specific. Always accept?
106 FS.setHasThousandsGrouping(I);
108 case '-': FS.setIsLeftJustified(I); break;
109 case '+': FS.setHasPlusPrefix(I); break;
110 case ' ': FS.setHasSpacePrefix(I); break;
111 case '#': FS.setHasAlternativeForm(I); break;
112 case '0': FS.setHasLeadingZeros(I); break;
119 // No more characters left?
120 H.HandleIncompleteSpecifier(Start, E - Start);
124 // Look for the field width (if any).
125 if (ParseFieldWidth(H, FS, Start, I, E,
126 FS.usesPositionalArg() ? 0 : &argIndex))
130 // No more characters left?
131 H.HandleIncompleteSpecifier(Start, E - Start);
135 // Look for the precision (if any).
139 H.HandleIncompleteSpecifier(Start, E - Start);
143 if (ParsePrecision(H, FS, Start, I, E,
144 FS.usesPositionalArg() ? 0 : &argIndex))
148 // No more characters left?
149 H.HandleIncompleteSpecifier(Start, E - Start);
154 // Look for the length modifier.
155 if (ParseLengthModifier(FS, I, E) && I == E) {
156 // No more characters left?
157 H.HandleIncompleteSpecifier(Start, E - Start);
162 // Detect spurious null characters, which are likely errors.
167 // Finally, look for the conversion specifier.
168 const char *conversionPosition = I++;
169 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
170 switch (*conversionPosition) {
173 // C99: 7.19.6.1 (section 8).
174 case '%': k = ConversionSpecifier::PercentArg; break;
175 case 'A': k = ConversionSpecifier::AArg; break;
176 case 'E': k = ConversionSpecifier::EArg; break;
177 case 'F': k = ConversionSpecifier::FArg; break;
178 case 'G': k = ConversionSpecifier::GArg; break;
179 case 'X': k = ConversionSpecifier::XArg; break;
180 case 'a': k = ConversionSpecifier::aArg; break;
181 case 'c': k = ConversionSpecifier::cArg; break;
182 case 'd': k = ConversionSpecifier::dArg; break;
183 case 'e': k = ConversionSpecifier::eArg; break;
184 case 'f': k = ConversionSpecifier::fArg; break;
185 case 'g': k = ConversionSpecifier::gArg; break;
186 case 'i': k = ConversionSpecifier::iArg; break;
187 case 'n': k = ConversionSpecifier::nArg; break;
188 case 'o': k = ConversionSpecifier::oArg; break;
189 case 'p': k = ConversionSpecifier::pArg; break;
190 case 's': k = ConversionSpecifier::sArg; break;
191 case 'u': k = ConversionSpecifier::uArg; break;
192 case 'x': k = ConversionSpecifier::xArg; break;
194 case 'C': k = ConversionSpecifier::CArg; break;
195 case 'S': k = ConversionSpecifier::SArg; break;
197 case '@': k = ConversionSpecifier::ObjCObjArg; break;
199 case 'm': k = ConversionSpecifier::PrintErrno; break;
200 // FreeBSD format extensions
201 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
202 case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break;
203 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
204 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
206 PrintfConversionSpecifier CS(conversionPosition, k);
207 FS.setConversionSpecifier(CS);
208 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
209 FS.setArgIndex(argIndex++);
211 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
214 if (k == ConversionSpecifier::InvalidSpecifier) {
215 // Assume the conversion takes one argument.
216 return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
218 return PrintfSpecifierResult(Start, FS);
221 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
224 bool FormatExtensions) {
226 unsigned argIndex = 0;
228 // Keep looking for a format specifier until we have exhausted the string.
230 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
232 // Did a fail-stop error of any kind occur when parsing the specifier?
233 // If so, don't do any more processing.
234 if (FSR.shouldStop())
236 // Did we exhaust the string or encounter an error that
237 // we can recover from?
240 // We have a format specifier. Pass it to the callback.
241 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
245 assert(I == E && "Format string not exhausted");
249 //===----------------------------------------------------------------------===//
250 // Methods on ConversionSpecifier.
251 //===----------------------------------------------------------------------===//
252 const char *ConversionSpecifier::toString() const {
254 case dArg: return "d";
255 case iArg: return "i";
256 case oArg: return "o";
257 case uArg: return "u";
258 case xArg: return "x";
259 case XArg: return "X";
260 case fArg: return "f";
261 case FArg: return "F";
262 case eArg: return "e";
263 case EArg: return "E";
264 case gArg: return "g";
265 case GArg: return "G";
266 case aArg: return "a";
267 case AArg: return "A";
268 case cArg: return "c";
269 case sArg: return "s";
270 case pArg: return "p";
271 case nArg: return "n";
272 case PercentArg: return "%";
273 case ScanListArg: return "[";
274 case InvalidSpecifier: return NULL;
276 // MacOS X unicode extensions.
277 case CArg: return "C";
278 case SArg: return "S";
280 // Objective-C specific specifiers.
281 case ObjCObjArg: return "@";
283 // FreeBSD specific specifiers.
284 case bArg: return "b";
285 case DArg: return "D";
286 case rArg: return "r";
288 // GlibC specific specifiers.
289 case PrintErrno: return "m";
294 //===----------------------------------------------------------------------===//
295 // Methods on PrintfSpecifier.
296 //===----------------------------------------------------------------------===//
298 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
299 const PrintfConversionSpecifier &CS = getConversionSpecifier();
301 if (!CS.consumesDataArgument())
302 return ArgTypeResult::Invalid();
304 if (CS.getKind() == ConversionSpecifier::cArg)
305 switch (LM.getKind()) {
306 case LengthModifier::None: return Ctx.IntTy;
307 case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
309 return ArgTypeResult::Invalid();
313 switch (LM.getKind()) {
314 case LengthModifier::AsLongDouble:
315 return ArgTypeResult::Invalid();
316 case LengthModifier::None: return Ctx.IntTy;
317 case LengthModifier::AsChar: return Ctx.SignedCharTy;
318 case LengthModifier::AsShort: return Ctx.ShortTy;
319 case LengthModifier::AsLong: return Ctx.LongTy;
320 case LengthModifier::AsLongLong: return Ctx.LongLongTy;
321 case LengthModifier::AsIntMax:
322 // FIXME: Return unknown for now.
323 return ArgTypeResult();
324 case LengthModifier::AsSizeT: return Ctx.getSizeType();
325 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
329 switch (LM.getKind()) {
330 case LengthModifier::AsLongDouble:
331 return ArgTypeResult::Invalid();
332 case LengthModifier::None: return Ctx.UnsignedIntTy;
333 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
334 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
335 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
336 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
337 case LengthModifier::AsIntMax:
338 // FIXME: Return unknown for now.
339 return ArgTypeResult();
340 case LengthModifier::AsSizeT:
341 // FIXME: How to get the corresponding unsigned
342 // version of size_t?
343 return ArgTypeResult();
344 case LengthModifier::AsPtrDiff:
345 // FIXME: How to get the corresponding unsigned
346 // version of ptrdiff_t?
347 return ArgTypeResult();
350 if (CS.isDoubleArg()) {
351 if (LM.getKind() == LengthModifier::AsLongDouble)
352 return Ctx.LongDoubleTy;
356 switch (CS.getKind()) {
357 case ConversionSpecifier::sArg:
358 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
359 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
360 case ConversionSpecifier::SArg:
361 // FIXME: This appears to be Mac OS X specific.
362 return ArgTypeResult::WCStrTy;
363 case ConversionSpecifier::CArg:
365 case ConversionSpecifier::pArg:
366 return ArgTypeResult::CPointerTy;
371 // FIXME: Handle other cases.
372 return ArgTypeResult();
375 bool PrintfSpecifier::fixType(QualType QT) {
376 // Handle strings first (char *, wchar_t *)
377 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
378 CS.setKind(ConversionSpecifier::sArg);
380 // Disable irrelevant flags
381 HasAlternativeForm = 0;
382 HasLeadingZeroes = 0;
384 // Set the long length modifier for wide characters
385 if (QT->getPointeeType()->isWideCharType())
386 LM.setKind(LengthModifier::AsWideChar);
391 // We can only work with builtin types.
392 if (!QT->isBuiltinType())
395 // Everything else should be a base type
396 const BuiltinType *BT = QT->getAs<BuiltinType>();
398 // Set length modifier
399 switch (BT->getKind()) {
401 // The rest of the conversions are either optional or for non-builtin types
402 LM.setKind(LengthModifier::None);
405 case BuiltinType::Char_U:
406 case BuiltinType::UChar:
407 case BuiltinType::Char_S:
408 case BuiltinType::SChar:
409 LM.setKind(LengthModifier::AsChar);
412 case BuiltinType::Short:
413 case BuiltinType::UShort:
414 LM.setKind(LengthModifier::AsShort);
417 case BuiltinType::WChar_S:
418 case BuiltinType::WChar_U:
419 case BuiltinType::Long:
420 case BuiltinType::ULong:
421 LM.setKind(LengthModifier::AsLong);
424 case BuiltinType::LongLong:
425 case BuiltinType::ULongLong:
426 LM.setKind(LengthModifier::AsLongLong);
429 case BuiltinType::LongDouble:
430 LM.setKind(LengthModifier::AsLongDouble);
434 // Set conversion specifier and disable any flags which do not apply to it.
435 // Let typedefs to char fall through to int, as %c is silly for uint8_t.
436 if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
437 CS.setKind(ConversionSpecifier::cArg);
438 LM.setKind(LengthModifier::None);
439 Precision.setHowSpecified(OptionalAmount::NotSpecified);
440 HasAlternativeForm = 0;
441 HasLeadingZeroes = 0;
444 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
445 else if (QT->isRealFloatingType()) {
446 CS.setKind(ConversionSpecifier::fArg);
448 else if (QT->isPointerType()) {
449 CS.setKind(ConversionSpecifier::pArg);
450 Precision.setHowSpecified(OptionalAmount::NotSpecified);
451 HasAlternativeForm = 0;
452 HasLeadingZeroes = 0;
455 else if (QT->isSignedIntegerType()) {
456 CS.setKind(ConversionSpecifier::dArg);
457 HasAlternativeForm = 0;
459 else if (QT->isUnsignedIntegerType()) {
460 CS.setKind(ConversionSpecifier::uArg);
461 HasAlternativeForm = 0;
471 void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
472 // Whilst some features have no defined order, we are using the order
473 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
477 if (usesPositionalArg()) {
478 os << getPositionalArgIndex() << "$";
482 if (IsLeftJustified) os << "-";
483 if (HasPlusPrefix) os << "+";
484 if (HasSpacePrefix) os << " ";
485 if (HasAlternativeForm) os << "#";
486 if (HasLeadingZeroes) os << "0";
488 // Minimum field width
489 FieldWidth.toString(os);
491 Precision.toString(os);
494 // Conversion specifier
498 bool PrintfSpecifier::hasValidPlusPrefix() const {
502 // The plus prefix only makes sense for signed conversions
503 switch (CS.getKind()) {
504 case ConversionSpecifier::dArg:
505 case ConversionSpecifier::iArg:
506 case ConversionSpecifier::fArg:
507 case ConversionSpecifier::FArg:
508 case ConversionSpecifier::eArg:
509 case ConversionSpecifier::EArg:
510 case ConversionSpecifier::gArg:
511 case ConversionSpecifier::GArg:
512 case ConversionSpecifier::aArg:
513 case ConversionSpecifier::AArg:
514 case ConversionSpecifier::rArg:
522 bool PrintfSpecifier::hasValidAlternativeForm() const {
523 if (!HasAlternativeForm)
526 // Alternate form flag only valid with the oxXaAeEfFgG conversions
527 switch (CS.getKind()) {
528 case ConversionSpecifier::oArg:
529 case ConversionSpecifier::xArg:
530 case ConversionSpecifier::XArg:
531 case ConversionSpecifier::aArg:
532 case ConversionSpecifier::AArg:
533 case ConversionSpecifier::eArg:
534 case ConversionSpecifier::EArg:
535 case ConversionSpecifier::fArg:
536 case ConversionSpecifier::FArg:
537 case ConversionSpecifier::gArg:
538 case ConversionSpecifier::GArg:
539 case ConversionSpecifier::rArg:
547 bool PrintfSpecifier::hasValidLeadingZeros() const {
548 if (!HasLeadingZeroes)
551 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
552 switch (CS.getKind()) {
553 case ConversionSpecifier::dArg:
554 case ConversionSpecifier::iArg:
555 case ConversionSpecifier::oArg:
556 case ConversionSpecifier::uArg:
557 case ConversionSpecifier::xArg:
558 case ConversionSpecifier::XArg:
559 case ConversionSpecifier::aArg:
560 case ConversionSpecifier::AArg:
561 case ConversionSpecifier::eArg:
562 case ConversionSpecifier::EArg:
563 case ConversionSpecifier::fArg:
564 case ConversionSpecifier::FArg:
565 case ConversionSpecifier::gArg:
566 case ConversionSpecifier::GArg:
574 bool PrintfSpecifier::hasValidSpacePrefix() const {
578 // The space prefix only makes sense for signed conversions
579 switch (CS.getKind()) {
580 case ConversionSpecifier::dArg:
581 case ConversionSpecifier::iArg:
582 case ConversionSpecifier::fArg:
583 case ConversionSpecifier::FArg:
584 case ConversionSpecifier::eArg:
585 case ConversionSpecifier::EArg:
586 case ConversionSpecifier::gArg:
587 case ConversionSpecifier::GArg:
588 case ConversionSpecifier::aArg:
589 case ConversionSpecifier::AArg:
597 bool PrintfSpecifier::hasValidLeftJustified() const {
598 if (!IsLeftJustified)
601 // The left justified flag is valid for all conversions except n
602 switch (CS.getKind()) {
603 case ConversionSpecifier::nArg:
611 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
612 if (!HasThousandsGrouping)
615 switch (CS.getKind()) {
616 case ConversionSpecifier::dArg:
617 case ConversionSpecifier::iArg:
618 case ConversionSpecifier::uArg:
619 case ConversionSpecifier::fArg:
620 case ConversionSpecifier::FArg:
621 case ConversionSpecifier::gArg:
622 case ConversionSpecifier::GArg:
629 bool PrintfSpecifier::hasValidPrecision() const {
630 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
633 // Precision is only valid with the diouxXaAeEfFgGs conversions
634 switch (CS.getKind()) {
635 case ConversionSpecifier::dArg:
636 case ConversionSpecifier::iArg:
637 case ConversionSpecifier::oArg:
638 case ConversionSpecifier::uArg:
639 case ConversionSpecifier::xArg:
640 case ConversionSpecifier::XArg:
641 case ConversionSpecifier::aArg:
642 case ConversionSpecifier::AArg:
643 case ConversionSpecifier::eArg:
644 case ConversionSpecifier::EArg:
645 case ConversionSpecifier::fArg:
646 case ConversionSpecifier::FArg:
647 case ConversionSpecifier::gArg:
648 case ConversionSpecifier::GArg:
649 case ConversionSpecifier::sArg:
656 bool PrintfSpecifier::hasValidFieldWidth() const {
657 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
660 // The field width is valid for all conversions except n
661 switch (CS.getKind()) {
662 case ConversionSpecifier::nArg: