1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_scanf::ScanfArgTypeResult;
24 using clang::analyze_scanf::ScanfConversionSpecifier;
25 using clang::analyze_scanf::ScanfSpecifier;
26 using clang::UpdateOnReturn;
27 using namespace clang;
29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
32 static bool ParseScanList(FormatStringHandler &H,
33 ScanfConversionSpecifier &CS,
34 const char *&Beg, const char *E) {
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39 // No more characters?
41 H.HandleIncompleteScanList(start, I);
45 // Special case: ']' is the first character.
48 H.HandleIncompleteScanList(start, I - 1);
53 // Look for a ']' character which denotes the end of the scan list.
56 H.HandleIncompleteScanList(start, I - 1);
65 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
66 // We can possibly refactor.
67 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
71 const LangOptions &LO) {
73 using namespace clang::analyze_scanf;
75 const char *Start = 0;
76 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
78 // Look for a '%' character that indicates the start of a format specifier.
79 for ( ; I != E ; ++I) {
82 // Detect spurious null characters, which are likely errors.
87 Start = I++; // Record the start of the format specifier.
92 // No format specifier found?
97 // No more characters left?
98 H.HandleIncompleteSpecifier(Start, E - Start);
103 if (ParseArgPosition(H, FS, Start, I, E))
107 // No more characters left?
108 H.HandleIncompleteSpecifier(Start, E - Start);
112 // Look for '*' flag if it is present.
114 FS.setSuppressAssignment(I);
116 H.HandleIncompleteSpecifier(Start, E - Start);
121 // Look for the field width (if any). Unlike printf, this is either
122 // a fixed integer or isn't present.
123 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
124 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
125 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
126 FS.setFieldWidth(Amt);
129 // No more characters left?
130 H.HandleIncompleteSpecifier(Start, E - Start);
135 // Look for the length modifier.
136 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
137 // No more characters left?
138 H.HandleIncompleteSpecifier(Start, E - Start);
142 // Detect spurious null characters, which are likely errors.
148 // Finally, look for the conversion specifier.
149 const char *conversionPosition = I++;
150 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
151 switch (*conversionPosition) {
154 case '%': k = ConversionSpecifier::PercentArg; break;
155 case 'A': k = ConversionSpecifier::AArg; break;
156 case 'E': k = ConversionSpecifier::EArg; break;
157 case 'F': k = ConversionSpecifier::FArg; break;
158 case 'G': k = ConversionSpecifier::GArg; break;
159 case 'X': k = ConversionSpecifier::XArg; break;
160 case 'a': k = ConversionSpecifier::aArg; break;
161 case 'd': k = ConversionSpecifier::dArg; break;
162 case 'e': k = ConversionSpecifier::eArg; break;
163 case 'f': k = ConversionSpecifier::fArg; break;
164 case 'g': k = ConversionSpecifier::gArg; break;
165 case 'i': k = ConversionSpecifier::iArg; break;
166 case 'n': k = ConversionSpecifier::nArg; break;
167 case 'c': k = ConversionSpecifier::cArg; break;
168 case 'C': k = ConversionSpecifier::CArg; break;
169 case 'S': k = ConversionSpecifier::SArg; break;
170 case '[': k = ConversionSpecifier::ScanListArg; break;
171 case 'u': k = ConversionSpecifier::uArg; break;
172 case 'x': k = ConversionSpecifier::xArg; break;
173 case 'o': k = ConversionSpecifier::oArg; break;
174 case 's': k = ConversionSpecifier::sArg; break;
175 case 'p': k = ConversionSpecifier::pArg; break;
177 ScanfConversionSpecifier CS(conversionPosition, k);
178 if (k == ScanfConversionSpecifier::ScanListArg) {
179 if (ParseScanList(H, CS, I, E))
182 FS.setConversionSpecifier(CS);
183 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
184 && !FS.usesPositionalArg())
185 FS.setArgIndex(argIndex++);
187 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
188 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
190 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
191 // Assume the conversion takes one argument.
192 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
194 return ScanfSpecifierResult(Start, FS);
197 ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const {
198 const ScanfConversionSpecifier &CS = getConversionSpecifier();
200 if (!CS.consumesDataArgument())
201 return ScanfArgTypeResult::Invalid();
203 switch(CS.getKind()) {
205 case ConversionSpecifier::dArg:
206 case ConversionSpecifier::iArg:
207 switch (LM.getKind()) {
208 case LengthModifier::None: return ArgTypeResult(Ctx.IntTy);
209 case LengthModifier::AsChar:
210 return ArgTypeResult(ArgTypeResult::AnyCharTy);
211 case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy);
212 case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy);
213 case LengthModifier::AsLongLong:
214 case LengthModifier::AsQuad:
215 return ArgTypeResult(Ctx.LongLongTy);
216 case LengthModifier::AsIntMax:
217 return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *");
218 case LengthModifier::AsSizeT:
220 return ScanfArgTypeResult();
221 case LengthModifier::AsPtrDiff:
222 return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *");
223 case LengthModifier::AsLongDouble:
225 return ArgTypeResult(Ctx.LongLongTy);
226 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
227 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
231 case ConversionSpecifier::oArg:
232 case ConversionSpecifier::uArg:
233 case ConversionSpecifier::xArg:
234 case ConversionSpecifier::XArg:
235 switch (LM.getKind()) {
236 case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy);
237 case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy);
238 case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy);
239 case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy);
240 case LengthModifier::AsLongLong:
241 case LengthModifier::AsQuad:
242 return ArgTypeResult(Ctx.UnsignedLongLongTy);
243 case LengthModifier::AsIntMax:
244 return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *");
245 case LengthModifier::AsSizeT:
246 return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *");
247 case LengthModifier::AsPtrDiff:
248 // FIXME: Unsigned version of ptrdiff_t?
249 return ScanfArgTypeResult();
250 case LengthModifier::AsLongDouble:
252 return ArgTypeResult(Ctx.UnsignedLongLongTy);
253 case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid();
254 case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid();
258 case ConversionSpecifier::aArg:
259 case ConversionSpecifier::AArg:
260 case ConversionSpecifier::eArg:
261 case ConversionSpecifier::EArg:
262 case ConversionSpecifier::fArg:
263 case ConversionSpecifier::FArg:
264 case ConversionSpecifier::gArg:
265 case ConversionSpecifier::GArg:
266 switch (LM.getKind()) {
267 case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy);
268 case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy);
269 case LengthModifier::AsLongDouble:
270 return ArgTypeResult(Ctx.LongDoubleTy);
272 return ScanfArgTypeResult::Invalid();
275 // Char, string and scanlist.
276 case ConversionSpecifier::cArg:
277 case ConversionSpecifier::sArg:
278 case ConversionSpecifier::ScanListArg:
279 switch (LM.getKind()) {
280 case LengthModifier::None: return ScanfArgTypeResult::CStrTy;
281 case LengthModifier::AsLong:
282 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
283 case LengthModifier::AsAllocate:
284 case LengthModifier::AsMAllocate:
285 return ScanfArgTypeResult(ArgTypeResult::CStrTy);
287 return ScanfArgTypeResult::Invalid();
289 case ConversionSpecifier::CArg:
290 case ConversionSpecifier::SArg:
291 // FIXME: Mac OS X specific?
292 switch (LM.getKind()) {
293 case LengthModifier::None:
294 return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *");
295 case LengthModifier::AsAllocate:
296 case LengthModifier::AsMAllocate:
297 return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **");
299 return ScanfArgTypeResult::Invalid();
303 case ConversionSpecifier::pArg:
304 return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy));
310 return ScanfArgTypeResult();
313 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
315 if (!QT->isPointerType())
318 QualType PT = QT->getPointeeType();
319 const BuiltinType *BT = PT->getAs<BuiltinType>();
323 // Pointer to a character.
324 if (PT->isAnyCharacterType()) {
325 CS.setKind(ConversionSpecifier::sArg);
326 if (PT->isWideCharType())
327 LM.setKind(LengthModifier::AsWideChar);
329 LM.setKind(LengthModifier::None);
333 // Figure out the length modifier.
334 switch (BT->getKind()) {
336 case BuiltinType::UInt:
337 case BuiltinType::Int:
338 case BuiltinType::Float:
339 LM.setKind(LengthModifier::None);
343 case BuiltinType::Char_U:
344 case BuiltinType::UChar:
345 case BuiltinType::Char_S:
346 case BuiltinType::SChar:
347 LM.setKind(LengthModifier::AsChar);
351 case BuiltinType::Short:
352 case BuiltinType::UShort:
353 LM.setKind(LengthModifier::AsShort);
357 case BuiltinType::Long:
358 case BuiltinType::ULong:
359 case BuiltinType::Double:
360 LM.setKind(LengthModifier::AsLong);
364 case BuiltinType::LongLong:
365 case BuiltinType::ULongLong:
366 LM.setKind(LengthModifier::AsLongLong);
370 case BuiltinType::LongDouble:
371 LM.setKind(LengthModifier::AsLongDouble);
379 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
380 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) {
381 const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier();
382 if (Identifier->getName() == "size_t") {
383 LM.setKind(LengthModifier::AsSizeT);
384 } else if (Identifier->getName() == "ssize_t") {
385 // Not C99, but common in Unix.
386 LM.setKind(LengthModifier::AsSizeT);
387 } else if (Identifier->getName() == "intmax_t") {
388 LM.setKind(LengthModifier::AsIntMax);
389 } else if (Identifier->getName() == "uintmax_t") {
390 LM.setKind(LengthModifier::AsIntMax);
391 } else if (Identifier->getName() == "ptrdiff_t") {
392 LM.setKind(LengthModifier::AsPtrDiff);
396 // If fixing the length modifier was enough, we are done.
397 const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx);
398 if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT))
401 // Figure out the conversion specifier.
402 if (PT->isRealFloatingType())
403 CS.setKind(ConversionSpecifier::fArg);
404 else if (PT->isSignedIntegerType())
405 CS.setKind(ConversionSpecifier::dArg);
406 else if (PT->isUnsignedIntegerType())
407 CS.setKind(ConversionSpecifier::uArg);
409 llvm_unreachable("Unexpected type");
414 void ScanfSpecifier::toString(raw_ostream &os) const {
417 if (usesPositionalArg())
418 os << getPositionalArgIndex() << "$";
419 if (SuppressAssignment)
422 FieldWidth.toString(os);
427 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
430 const LangOptions &LO) {
432 unsigned argIndex = 0;
434 // Keep looking for a format specifier until we have exhausted the string.
436 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
438 // Did a fail-stop error of any kind occur when parsing the specifier?
439 // If so, don't do any more processing.
440 if (FSR.shouldStop())
442 // Did we exhaust the string or encounter an error that
443 // we can recover from?
446 // We have a format specifier. Pass it to the callback.
447 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
448 I - FSR.getStart())) {
452 assert(I == E && "Format string not exhausted");
456 bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const {
459 llvm_unreachable("ArgTypeResult must be valid");
463 return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy);
465 return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy);
466 case PtrToArgTypeResultTy: {
467 const PointerType *PT = argTy->getAs<PointerType>();
470 return A.matchesType(C, PT->getPointeeType());
474 llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
477 QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const {
480 llvm_unreachable("No representative type for Invalid ArgTypeResult");
484 return C.getPointerType(C.CharTy);
486 return C.getPointerType(C.getWCharType());
487 case PtrToArgTypeResultTy:
488 return C.getPointerType(A.getRepresentativeType(C));
491 llvm_unreachable("Invalid ScanfArgTypeResult Kind!");
494 std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const {
495 std::string S = getRepresentativeType(C).getAsString();
497 return std::string("'") + S + "'";
498 return std::string("'") + Name + "' (aka '" + S + "')";