1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_scanf::ScanfConversionSpecifier;
25 using clang::analyze_scanf::ScanfSpecifier;
26 using clang::UpdateOnReturn;
27 using namespace clang;
29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
32 static bool ParseScanList(FormatStringHandler &H,
33 ScanfConversionSpecifier &CS,
34 const char *&Beg, const char *E) {
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39 // No more characters?
41 H.HandleIncompleteScanList(start, I);
45 // Special case: ']' is the first character.
48 H.HandleIncompleteScanList(start, I - 1);
53 // Look for a ']' character which denotes the end of the scan list.
56 H.HandleIncompleteScanList(start, I - 1);
65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66 // We can possibly refactor.
67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
71 const LangOptions &LO,
72 const TargetInfo &Target) {
74 using namespace clang::analyze_scanf;
76 const char *Start = 0;
77 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
79 // Look for a '%' character that indicates the start of a format specifier.
80 for ( ; I != E ; ++I) {
83 // Detect spurious null characters, which are likely errors.
88 Start = I++; // Record the start of the format specifier.
93 // No format specifier found?
98 // No more characters left?
99 H.HandleIncompleteSpecifier(Start, E - Start);
104 if (ParseArgPosition(H, FS, Start, I, E))
108 // No more characters left?
109 H.HandleIncompleteSpecifier(Start, E - Start);
113 // Look for '*' flag if it is present.
115 FS.setSuppressAssignment(I);
117 H.HandleIncompleteSpecifier(Start, E - Start);
122 // Look for the field width (if any). Unlike printf, this is either
123 // a fixed integer or isn't present.
124 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
125 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
126 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
127 FS.setFieldWidth(Amt);
130 // No more characters left?
131 H.HandleIncompleteSpecifier(Start, E - Start);
136 // Look for the length modifier.
137 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
138 // No more characters left?
139 H.HandleIncompleteSpecifier(Start, E - Start);
143 // Detect spurious null characters, which are likely errors.
149 // Finally, look for the conversion specifier.
150 const char *conversionPosition = I++;
151 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
152 switch (*conversionPosition) {
155 case '%': k = ConversionSpecifier::PercentArg; break;
156 case 'A': k = ConversionSpecifier::AArg; break;
157 case 'E': k = ConversionSpecifier::EArg; break;
158 case 'F': k = ConversionSpecifier::FArg; break;
159 case 'G': k = ConversionSpecifier::GArg; break;
160 case 'X': k = ConversionSpecifier::XArg; break;
161 case 'a': k = ConversionSpecifier::aArg; break;
162 case 'd': k = ConversionSpecifier::dArg; break;
163 case 'e': k = ConversionSpecifier::eArg; break;
164 case 'f': k = ConversionSpecifier::fArg; break;
165 case 'g': k = ConversionSpecifier::gArg; break;
166 case 'i': k = ConversionSpecifier::iArg; break;
167 case 'n': k = ConversionSpecifier::nArg; break;
168 case 'c': k = ConversionSpecifier::cArg; break;
169 case 'C': k = ConversionSpecifier::CArg; break;
170 case 'S': k = ConversionSpecifier::SArg; break;
171 case '[': k = ConversionSpecifier::ScanListArg; break;
172 case 'u': k = ConversionSpecifier::uArg; break;
173 case 'x': k = ConversionSpecifier::xArg; break;
174 case 'o': k = ConversionSpecifier::oArg; break;
175 case 's': k = ConversionSpecifier::sArg; break;
176 case 'p': k = ConversionSpecifier::pArg; break;
180 if (Target.getTriple().isOSDarwin())
181 k = ConversionSpecifier::DArg;
184 if (Target.getTriple().isOSDarwin())
185 k = ConversionSpecifier::OArg;
188 if (Target.getTriple().isOSDarwin())
189 k = ConversionSpecifier::UArg;
192 ScanfConversionSpecifier CS(conversionPosition, k);
193 if (k == ScanfConversionSpecifier::ScanListArg) {
194 if (ParseScanList(H, CS, I, E))
197 FS.setConversionSpecifier(CS);
198 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
199 && !FS.usesPositionalArg())
200 FS.setArgIndex(argIndex++);
202 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
203 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
205 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
206 // Assume the conversion takes one argument.
207 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
209 return ScanfSpecifierResult(Start, FS);
212 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
213 const ScanfConversionSpecifier &CS = getConversionSpecifier();
215 if (!CS.consumesDataArgument())
216 return ArgType::Invalid();
218 switch(CS.getKind()) {
220 case ConversionSpecifier::dArg:
221 case ConversionSpecifier::DArg:
222 case ConversionSpecifier::iArg:
223 switch (LM.getKind()) {
224 case LengthModifier::None:
225 return ArgType::PtrTo(Ctx.IntTy);
226 case LengthModifier::AsChar:
227 return ArgType::PtrTo(ArgType::AnyCharTy);
228 case LengthModifier::AsShort:
229 return ArgType::PtrTo(Ctx.ShortTy);
230 case LengthModifier::AsLong:
231 return ArgType::PtrTo(Ctx.LongTy);
232 case LengthModifier::AsLongLong:
233 case LengthModifier::AsQuad:
234 return ArgType::PtrTo(Ctx.LongLongTy);
235 case LengthModifier::AsInt64:
236 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
237 case LengthModifier::AsIntMax:
238 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
239 case LengthModifier::AsSizeT:
242 case LengthModifier::AsPtrDiff:
243 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
244 case LengthModifier::AsLongDouble:
246 return ArgType::PtrTo(Ctx.LongLongTy);
247 case LengthModifier::AsAllocate:
248 case LengthModifier::AsMAllocate:
249 case LengthModifier::AsInt32:
250 case LengthModifier::AsInt3264:
251 return ArgType::Invalid();
255 case ConversionSpecifier::oArg:
256 case ConversionSpecifier::OArg:
257 case ConversionSpecifier::uArg:
258 case ConversionSpecifier::UArg:
259 case ConversionSpecifier::xArg:
260 case ConversionSpecifier::XArg:
261 switch (LM.getKind()) {
262 case LengthModifier::None:
263 return ArgType::PtrTo(Ctx.UnsignedIntTy);
264 case LengthModifier::AsChar:
265 return ArgType::PtrTo(Ctx.UnsignedCharTy);
266 case LengthModifier::AsShort:
267 return ArgType::PtrTo(Ctx.UnsignedShortTy);
268 case LengthModifier::AsLong:
269 return ArgType::PtrTo(Ctx.UnsignedLongTy);
270 case LengthModifier::AsLongLong:
271 case LengthModifier::AsQuad:
272 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
273 case LengthModifier::AsInt64:
274 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
275 case LengthModifier::AsIntMax:
276 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
277 case LengthModifier::AsSizeT:
278 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
279 case LengthModifier::AsPtrDiff:
280 // FIXME: Unsigned version of ptrdiff_t?
282 case LengthModifier::AsLongDouble:
284 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
285 case LengthModifier::AsAllocate:
286 case LengthModifier::AsMAllocate:
287 case LengthModifier::AsInt32:
288 case LengthModifier::AsInt3264:
289 return ArgType::Invalid();
293 case ConversionSpecifier::aArg:
294 case ConversionSpecifier::AArg:
295 case ConversionSpecifier::eArg:
296 case ConversionSpecifier::EArg:
297 case ConversionSpecifier::fArg:
298 case ConversionSpecifier::FArg:
299 case ConversionSpecifier::gArg:
300 case ConversionSpecifier::GArg:
301 switch (LM.getKind()) {
302 case LengthModifier::None:
303 return ArgType::PtrTo(Ctx.FloatTy);
304 case LengthModifier::AsLong:
305 return ArgType::PtrTo(Ctx.DoubleTy);
306 case LengthModifier::AsLongDouble:
307 return ArgType::PtrTo(Ctx.LongDoubleTy);
309 return ArgType::Invalid();
312 // Char, string and scanlist.
313 case ConversionSpecifier::cArg:
314 case ConversionSpecifier::sArg:
315 case ConversionSpecifier::ScanListArg:
316 switch (LM.getKind()) {
317 case LengthModifier::None:
318 return ArgType::PtrTo(ArgType::AnyCharTy);
319 case LengthModifier::AsLong:
320 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
321 case LengthModifier::AsAllocate:
322 case LengthModifier::AsMAllocate:
323 return ArgType::PtrTo(ArgType::CStrTy);
325 return ArgType::Invalid();
327 case ConversionSpecifier::CArg:
328 case ConversionSpecifier::SArg:
329 // FIXME: Mac OS X specific?
330 switch (LM.getKind()) {
331 case LengthModifier::None:
332 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
333 case LengthModifier::AsAllocate:
334 case LengthModifier::AsMAllocate:
335 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
337 return ArgType::Invalid();
341 case ConversionSpecifier::pArg:
342 return ArgType::PtrTo(ArgType::CPointerTy);
345 case ConversionSpecifier::nArg:
346 switch (LM.getKind()) {
347 case LengthModifier::None:
348 return ArgType::PtrTo(Ctx.IntTy);
349 case LengthModifier::AsChar:
350 return ArgType::PtrTo(Ctx.SignedCharTy);
351 case LengthModifier::AsShort:
352 return ArgType::PtrTo(Ctx.ShortTy);
353 case LengthModifier::AsLong:
354 return ArgType::PtrTo(Ctx.LongTy);
355 case LengthModifier::AsLongLong:
356 case LengthModifier::AsQuad:
357 return ArgType::PtrTo(Ctx.LongLongTy);
358 case LengthModifier::AsInt64:
359 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
360 case LengthModifier::AsIntMax:
361 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
362 case LengthModifier::AsSizeT:
363 return ArgType(); // FIXME: ssize_t
364 case LengthModifier::AsPtrDiff:
365 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
366 case LengthModifier::AsLongDouble:
367 return ArgType(); // FIXME: Is this a known extension?
368 case LengthModifier::AsAllocate:
369 case LengthModifier::AsMAllocate:
370 case LengthModifier::AsInt32:
371 case LengthModifier::AsInt3264:
372 return ArgType::Invalid();
382 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
384 if (!QT->isPointerType())
387 // %n is different from other conversion specifiers; don't try to fix it.
388 if (CS.getKind() == ConversionSpecifier::nArg)
391 QualType PT = QT->getPointeeType();
393 // If it's an enum, get its underlying type.
394 if (const EnumType *ETy = QT->getAs<EnumType>())
395 QT = ETy->getDecl()->getIntegerType();
397 const BuiltinType *BT = PT->getAs<BuiltinType>();
401 // Pointer to a character.
402 if (PT->isAnyCharacterType()) {
403 CS.setKind(ConversionSpecifier::sArg);
404 if (PT->isWideCharType())
405 LM.setKind(LengthModifier::AsWideChar);
407 LM.setKind(LengthModifier::None);
411 // Figure out the length modifier.
412 switch (BT->getKind()) {
414 case BuiltinType::UInt:
415 case BuiltinType::Int:
416 case BuiltinType::Float:
417 LM.setKind(LengthModifier::None);
421 case BuiltinType::Char_U:
422 case BuiltinType::UChar:
423 case BuiltinType::Char_S:
424 case BuiltinType::SChar:
425 LM.setKind(LengthModifier::AsChar);
429 case BuiltinType::Short:
430 case BuiltinType::UShort:
431 LM.setKind(LengthModifier::AsShort);
435 case BuiltinType::Long:
436 case BuiltinType::ULong:
437 case BuiltinType::Double:
438 LM.setKind(LengthModifier::AsLong);
442 case BuiltinType::LongLong:
443 case BuiltinType::ULongLong:
444 LM.setKind(LengthModifier::AsLongLong);
448 case BuiltinType::LongDouble:
449 LM.setKind(LengthModifier::AsLongDouble);
457 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
458 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
459 namedTypeToLengthModifier(PT, LM);
461 // If fixing the length modifier was enough, we are done.
462 if (hasValidLengthModifier(Ctx.getTargetInfo())) {
463 const analyze_scanf::ArgType &AT = getArgType(Ctx);
464 if (AT.isValid() && AT.matchesType(Ctx, QT))
468 // Figure out the conversion specifier.
469 if (PT->isRealFloatingType())
470 CS.setKind(ConversionSpecifier::fArg);
471 else if (PT->isSignedIntegerType())
472 CS.setKind(ConversionSpecifier::dArg);
473 else if (PT->isUnsignedIntegerType())
474 CS.setKind(ConversionSpecifier::uArg);
476 llvm_unreachable("Unexpected type");
481 void ScanfSpecifier::toString(raw_ostream &os) const {
484 if (usesPositionalArg())
485 os << getPositionalArgIndex() << "$";
486 if (SuppressAssignment)
489 FieldWidth.toString(os);
494 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
497 const LangOptions &LO,
498 const TargetInfo &Target) {
500 unsigned argIndex = 0;
502 // Keep looking for a format specifier until we have exhausted the string.
504 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
506 // Did a fail-stop error of any kind occur when parsing the specifier?
507 // If so, don't do any more processing.
508 if (FSR.shouldStop())
510 // Did we exhaust the string or encounter an error that
511 // we can recover from?
514 // We have a format specifier. Pass it to the callback.
515 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
516 I - FSR.getStart())) {
520 assert(I == E && "Format string not exhausted");