1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in printf and friends. The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
25 using namespace clang;
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28 PrintfSpecifierResult;
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
34 using analyze_format_string::ParseNonPositionAmount;
36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37 const char *Start, const char *&Beg, const char *E,
40 FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
43 const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44 analyze_format_string::PrecisionPos);
52 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
56 bool FormatExtensions) {
58 using namespace clang::analyze_format_string;
59 using namespace clang::analyze_printf;
62 const char *Start = 0;
63 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
65 // Look for a '%' character that indicates the start of a format specifier.
66 for ( ; I != E ; ++I) {
69 // Detect spurious null characters, which are likely errors.
74 Start = I++; // Record the start of the format specifier.
79 // No format specifier found?
84 // No more characters left?
85 H.HandleIncompleteSpecifier(Start, E - Start);
90 if (ParseArgPosition(H, FS, Start, I, E))
94 // No more characters left?
95 H.HandleIncompleteSpecifier(Start, E - Start);
99 // Look for flags (if any).
101 for ( ; I != E; ++I) {
103 default: hasMore = false; break;
104 case '-': FS.setIsLeftJustified(I); break;
105 case '+': FS.setHasPlusPrefix(I); break;
106 case ' ': FS.setHasSpacePrefix(I); break;
107 case '#': FS.setHasAlternativeForm(I); break;
108 case '0': FS.setHasLeadingZeros(I); break;
115 // No more characters left?
116 H.HandleIncompleteSpecifier(Start, E - Start);
120 // Look for the field width (if any).
121 if (ParseFieldWidth(H, FS, Start, I, E,
122 FS.usesPositionalArg() ? 0 : &argIndex))
126 // No more characters left?
127 H.HandleIncompleteSpecifier(Start, E - Start);
131 // Look for the precision (if any).
135 H.HandleIncompleteSpecifier(Start, E - Start);
139 if (ParsePrecision(H, FS, Start, I, E,
140 FS.usesPositionalArg() ? 0 : &argIndex))
144 // No more characters left?
145 H.HandleIncompleteSpecifier(Start, E - Start);
150 // Look for the length modifier.
151 if (ParseLengthModifier(FS, I, E) && I == E) {
152 // No more characters left?
153 H.HandleIncompleteSpecifier(Start, E - Start);
158 // Detect spurious null characters, which are likely errors.
163 // Finally, look for the conversion specifier.
164 const char *conversionPosition = I++;
165 ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
166 switch (*conversionPosition) {
169 // C99: 7.19.6.1 (section 8).
170 case '%': k = ConversionSpecifier::PercentArg; break;
171 case 'A': k = ConversionSpecifier::AArg; break;
172 case 'E': k = ConversionSpecifier::EArg; break;
173 case 'F': k = ConversionSpecifier::FArg; break;
174 case 'G': k = ConversionSpecifier::GArg; break;
175 case 'X': k = ConversionSpecifier::XArg; break;
176 case 'a': k = ConversionSpecifier::aArg; break;
177 case 'c': k = ConversionSpecifier::cArg; break;
178 case 'd': k = ConversionSpecifier::dArg; break;
179 case 'e': k = ConversionSpecifier::eArg; break;
180 case 'f': k = ConversionSpecifier::fArg; break;
181 case 'g': k = ConversionSpecifier::gArg; break;
182 case 'i': k = ConversionSpecifier::iArg; break;
183 case 'n': k = ConversionSpecifier::nArg; break;
184 case 'o': k = ConversionSpecifier::oArg; break;
185 case 'p': k = ConversionSpecifier::pArg; break;
186 case 's': k = ConversionSpecifier::sArg; break;
187 case 'u': k = ConversionSpecifier::uArg; break;
188 case 'x': k = ConversionSpecifier::xArg; break;
189 // Mac OS X (unicode) specific
190 case 'C': k = ConversionSpecifier::CArg; break;
191 case 'S': k = ConversionSpecifier::SArg; break;
193 case '@': k = ConversionSpecifier::ObjCObjArg; break;
195 case 'm': k = ConversionSpecifier::PrintErrno; break;
196 // FreeBSD format extensions
197 case 'b': if (FormatExtensions) k = ConversionSpecifier::bArg; break; /* check for int and then char * */
198 case 'r': if (FormatExtensions) k = ConversionSpecifier::rArg; break;
199 case 'y': if (FormatExtensions) k = ConversionSpecifier::iArg; break;
200 case 'D': if (FormatExtensions) k = ConversionSpecifier::DArg; break; /* check for u_char * pointer and a char * string */
202 PrintfConversionSpecifier CS(conversionPosition, k);
203 FS.setConversionSpecifier(CS);
204 if (CS.consumesDataArgument() && !FS.usesPositionalArg())
205 FS.setArgIndex(argIndex++);
207 if (k == ConversionSpecifier::bArg || k == ConversionSpecifier::DArg)
210 if (k == ConversionSpecifier::InvalidSpecifier) {
211 // Assume the conversion takes one argument.
212 return !H.HandleInvalidPrintfConversionSpecifier(FS, Beg, I - Beg);
214 return PrintfSpecifierResult(Start, FS);
217 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
220 bool FormatExtensions) {
222 unsigned argIndex = 0;
224 // Keep looking for a format specifier until we have exhausted the string.
226 const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
228 // Did a fail-stop error of any kind occur when parsing the specifier?
229 // If so, don't do any more processing.
230 if (FSR.shouldStop())
232 // Did we exhaust the string or encounter an error that
233 // we can recover from?
236 // We have a format specifier. Pass it to the callback.
237 if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
241 assert(I == E && "Format string not exhausted");
245 //===----------------------------------------------------------------------===//
246 // Methods on ConversionSpecifier.
247 //===----------------------------------------------------------------------===//
248 const char *ConversionSpecifier::toString() const {
250 case dArg: return "d";
251 case iArg: return "i";
252 case oArg: return "o";
253 case uArg: return "u";
254 case xArg: return "x";
255 case XArg: return "X";
256 case fArg: return "f";
257 case FArg: return "F";
258 case eArg: return "e";
259 case EArg: return "E";
260 case gArg: return "g";
261 case GArg: return "G";
262 case aArg: return "a";
263 case AArg: return "A";
264 case cArg: return "c";
265 case sArg: return "s";
266 case pArg: return "p";
267 case nArg: return "n";
268 case PercentArg: return "%";
269 case ScanListArg: return "[";
270 case InvalidSpecifier: return NULL;
272 // MacOS X unicode extensions.
273 case CArg: return "C";
274 case SArg: return "S";
276 // Objective-C specific specifiers.
277 case ObjCObjArg: return "@";
279 // FreeBSD specific specifiers.
280 case bArg: return "b";
281 case DArg: return "D";
282 case rArg: return "r";
284 // GlibC specific specifiers.
285 case PrintErrno: return "m";
290 //===----------------------------------------------------------------------===//
291 // Methods on PrintfSpecifier.
292 //===----------------------------------------------------------------------===//
294 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
295 const PrintfConversionSpecifier &CS = getConversionSpecifier();
297 if (!CS.consumesDataArgument())
298 return ArgTypeResult::Invalid();
300 if (CS.getKind() == ConversionSpecifier::cArg)
301 switch (LM.getKind()) {
302 case LengthModifier::None: return Ctx.IntTy;
303 case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
305 return ArgTypeResult::Invalid();
309 switch (LM.getKind()) {
310 case LengthModifier::AsLongDouble:
311 return ArgTypeResult::Invalid();
312 case LengthModifier::None: return Ctx.IntTy;
313 case LengthModifier::AsChar: return Ctx.SignedCharTy;
314 case LengthModifier::AsShort: return Ctx.ShortTy;
315 case LengthModifier::AsLong: return Ctx.LongTy;
316 case LengthModifier::AsLongLong: return Ctx.LongLongTy;
317 case LengthModifier::AsIntMax:
318 // FIXME: Return unknown for now.
319 return ArgTypeResult();
320 case LengthModifier::AsSizeT: return Ctx.getSizeType();
321 case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
325 switch (LM.getKind()) {
326 case LengthModifier::AsLongDouble:
327 return ArgTypeResult::Invalid();
328 case LengthModifier::None: return Ctx.UnsignedIntTy;
329 case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
330 case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
331 case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
332 case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
333 case LengthModifier::AsIntMax:
334 // FIXME: Return unknown for now.
335 return ArgTypeResult();
336 case LengthModifier::AsSizeT:
337 // FIXME: How to get the corresponding unsigned
338 // version of size_t?
339 return ArgTypeResult();
340 case LengthModifier::AsPtrDiff:
341 // FIXME: How to get the corresponding unsigned
342 // version of ptrdiff_t?
343 return ArgTypeResult();
346 if (CS.isDoubleArg()) {
347 if (LM.getKind() == LengthModifier::AsLongDouble)
348 return Ctx.LongDoubleTy;
352 switch (CS.getKind()) {
353 case ConversionSpecifier::sArg:
354 return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
355 ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
356 case ConversionSpecifier::SArg:
357 // FIXME: This appears to be Mac OS X specific.
358 return ArgTypeResult::WCStrTy;
359 case ConversionSpecifier::CArg:
361 case ConversionSpecifier::pArg:
362 return ArgTypeResult::CPointerTy;
367 // FIXME: Handle other cases.
368 return ArgTypeResult();
371 bool PrintfSpecifier::fixType(QualType QT) {
372 // Handle strings first (char *, wchar_t *)
373 if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
374 CS.setKind(ConversionSpecifier::sArg);
376 // Disable irrelevant flags
377 HasAlternativeForm = 0;
378 HasLeadingZeroes = 0;
380 // Set the long length modifier for wide characters
381 if (QT->getPointeeType()->isWideCharType())
382 LM.setKind(LengthModifier::AsWideChar);
387 // We can only work with builtin types.
388 if (!QT->isBuiltinType())
391 // Everything else should be a base type
392 const BuiltinType *BT = QT->getAs<BuiltinType>();
394 // Set length modifier
395 switch (BT->getKind()) {
397 // The rest of the conversions are either optional or for non-builtin types
398 LM.setKind(LengthModifier::None);
401 case BuiltinType::WChar:
402 case BuiltinType::Long:
403 case BuiltinType::ULong:
404 LM.setKind(LengthModifier::AsLong);
407 case BuiltinType::LongLong:
408 case BuiltinType::ULongLong:
409 LM.setKind(LengthModifier::AsLongLong);
412 case BuiltinType::LongDouble:
413 LM.setKind(LengthModifier::AsLongDouble);
417 // Set conversion specifier and disable any flags which do not apply to it.
418 if (QT->isAnyCharacterType()) {
419 CS.setKind(ConversionSpecifier::cArg);
420 Precision.setHowSpecified(OptionalAmount::NotSpecified);
421 HasAlternativeForm = 0;
422 HasLeadingZeroes = 0;
425 // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
426 else if (QT->isRealFloatingType()) {
427 CS.setKind(ConversionSpecifier::fArg);
429 else if (QT->isPointerType()) {
430 CS.setKind(ConversionSpecifier::pArg);
431 Precision.setHowSpecified(OptionalAmount::NotSpecified);
432 HasAlternativeForm = 0;
433 HasLeadingZeroes = 0;
436 else if (QT->isSignedIntegerType()) {
437 CS.setKind(ConversionSpecifier::dArg);
438 HasAlternativeForm = 0;
440 else if (QT->isUnsignedIntegerType()) {
441 CS.setKind(ConversionSpecifier::uArg);
442 HasAlternativeForm = 0;
452 void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
453 // Whilst some features have no defined order, we are using the order
454 // appearing in the C99 standard (ISO/IEC 9899:1999 (E) ยค7.19.6.1)
458 if (usesPositionalArg()) {
459 os << getPositionalArgIndex() << "$";
463 if (IsLeftJustified) os << "-";
464 if (HasPlusPrefix) os << "+";
465 if (HasSpacePrefix) os << " ";
466 if (HasAlternativeForm) os << "#";
467 if (HasLeadingZeroes) os << "0";
469 // Minimum field width
470 FieldWidth.toString(os);
472 Precision.toString(os);
475 // Conversion specifier
479 bool PrintfSpecifier::hasValidPlusPrefix() const {
483 // The plus prefix only makes sense for signed conversions
484 switch (CS.getKind()) {
485 case ConversionSpecifier::dArg:
486 case ConversionSpecifier::iArg:
487 case ConversionSpecifier::fArg:
488 case ConversionSpecifier::FArg:
489 case ConversionSpecifier::eArg:
490 case ConversionSpecifier::EArg:
491 case ConversionSpecifier::gArg:
492 case ConversionSpecifier::GArg:
493 case ConversionSpecifier::aArg:
494 case ConversionSpecifier::AArg:
495 case ConversionSpecifier::rArg:
503 bool PrintfSpecifier::hasValidAlternativeForm() const {
504 if (!HasAlternativeForm)
507 // Alternate form flag only valid with the oxaAeEfFgG conversions
508 switch (CS.getKind()) {
509 case ConversionSpecifier::oArg:
510 case ConversionSpecifier::xArg:
511 case ConversionSpecifier::aArg:
512 case ConversionSpecifier::AArg:
513 case ConversionSpecifier::eArg:
514 case ConversionSpecifier::EArg:
515 case ConversionSpecifier::fArg:
516 case ConversionSpecifier::FArg:
517 case ConversionSpecifier::gArg:
518 case ConversionSpecifier::GArg:
519 case ConversionSpecifier::rArg:
527 bool PrintfSpecifier::hasValidLeadingZeros() const {
528 if (!HasLeadingZeroes)
531 // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
532 switch (CS.getKind()) {
533 case ConversionSpecifier::dArg:
534 case ConversionSpecifier::iArg:
535 case ConversionSpecifier::oArg:
536 case ConversionSpecifier::uArg:
537 case ConversionSpecifier::xArg:
538 case ConversionSpecifier::XArg:
539 case ConversionSpecifier::aArg:
540 case ConversionSpecifier::AArg:
541 case ConversionSpecifier::eArg:
542 case ConversionSpecifier::EArg:
543 case ConversionSpecifier::fArg:
544 case ConversionSpecifier::FArg:
545 case ConversionSpecifier::gArg:
546 case ConversionSpecifier::GArg:
554 bool PrintfSpecifier::hasValidSpacePrefix() const {
558 // The space prefix only makes sense for signed conversions
559 switch (CS.getKind()) {
560 case ConversionSpecifier::dArg:
561 case ConversionSpecifier::iArg:
562 case ConversionSpecifier::fArg:
563 case ConversionSpecifier::FArg:
564 case ConversionSpecifier::eArg:
565 case ConversionSpecifier::EArg:
566 case ConversionSpecifier::gArg:
567 case ConversionSpecifier::GArg:
568 case ConversionSpecifier::aArg:
569 case ConversionSpecifier::AArg:
577 bool PrintfSpecifier::hasValidLeftJustified() const {
578 if (!IsLeftJustified)
581 // The left justified flag is valid for all conversions except n
582 switch (CS.getKind()) {
583 case ConversionSpecifier::nArg:
591 bool PrintfSpecifier::hasValidPrecision() const {
592 if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
595 // Precision is only valid with the diouxXaAeEfFgGs conversions
596 switch (CS.getKind()) {
597 case ConversionSpecifier::dArg:
598 case ConversionSpecifier::iArg:
599 case ConversionSpecifier::oArg:
600 case ConversionSpecifier::uArg:
601 case ConversionSpecifier::xArg:
602 case ConversionSpecifier::XArg:
603 case ConversionSpecifier::aArg:
604 case ConversionSpecifier::AArg:
605 case ConversionSpecifier::eArg:
606 case ConversionSpecifier::EArg:
607 case ConversionSpecifier::fArg:
608 case ConversionSpecifier::FArg:
609 case ConversionSpecifier::gArg:
610 case ConversionSpecifier::GArg:
611 case ConversionSpecifier::sArg:
618 bool PrintfSpecifier::hasValidFieldWidth() const {
619 if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
622 // The field width is valid for all conversions except n
623 switch (CS.getKind()) {
624 case ConversionSpecifier::nArg: