1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
13 //===----------------------------------------------------------------------===//
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
18 using clang::analyze_format_string::ArgType;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_scanf::ScanfConversionSpecifier;
24 using clang::analyze_scanf::ScanfSpecifier;
25 using clang::UpdateOnReturn;
26 using namespace clang;
28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
31 static bool ParseScanList(FormatStringHandler &H,
32 ScanfConversionSpecifier &CS,
33 const char *&Beg, const char *E) {
35 const char *start = I - 1;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 // No more characters?
40 H.HandleIncompleteScanList(start, I);
44 // Special case: ']' is the first character.
47 H.HandleIncompleteScanList(start, I - 1);
52 // Look for a ']' character which denotes the end of the scan list.
55 H.HandleIncompleteScanList(start, I - 1);
64 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
65 // We can possibly refactor.
66 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
70 const LangOptions &LO) {
72 using namespace clang::analyze_scanf;
74 const char *Start = 0;
75 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
77 // Look for a '%' character that indicates the start of a format specifier.
78 for ( ; I != E ; ++I) {
81 // Detect spurious null characters, which are likely errors.
86 Start = I++; // Record the start of the format specifier.
91 // No format specifier found?
96 // No more characters left?
97 H.HandleIncompleteSpecifier(Start, E - Start);
102 if (ParseArgPosition(H, FS, Start, I, E))
106 // No more characters left?
107 H.HandleIncompleteSpecifier(Start, E - Start);
111 // Look for '*' flag if it is present.
113 FS.setSuppressAssignment(I);
115 H.HandleIncompleteSpecifier(Start, E - Start);
120 // Look for the field width (if any). Unlike printf, this is either
121 // a fixed integer or isn't present.
122 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
123 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
124 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
125 FS.setFieldWidth(Amt);
128 // No more characters left?
129 H.HandleIncompleteSpecifier(Start, E - Start);
134 // Look for the length modifier.
135 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
136 // No more characters left?
137 H.HandleIncompleteSpecifier(Start, E - Start);
141 // Detect spurious null characters, which are likely errors.
147 // Finally, look for the conversion specifier.
148 const char *conversionPosition = I++;
149 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
150 switch (*conversionPosition) {
153 case '%': k = ConversionSpecifier::PercentArg; break;
154 case 'A': k = ConversionSpecifier::AArg; break;
155 case 'E': k = ConversionSpecifier::EArg; break;
156 case 'F': k = ConversionSpecifier::FArg; break;
157 case 'G': k = ConversionSpecifier::GArg; break;
158 case 'X': k = ConversionSpecifier::XArg; break;
159 case 'a': k = ConversionSpecifier::aArg; break;
160 case 'd': k = ConversionSpecifier::dArg; break;
161 case 'e': k = ConversionSpecifier::eArg; break;
162 case 'f': k = ConversionSpecifier::fArg; break;
163 case 'g': k = ConversionSpecifier::gArg; break;
164 case 'i': k = ConversionSpecifier::iArg; break;
165 case 'n': k = ConversionSpecifier::nArg; break;
166 case 'c': k = ConversionSpecifier::cArg; break;
167 case 'C': k = ConversionSpecifier::CArg; break;
168 case 'S': k = ConversionSpecifier::SArg; break;
169 case '[': k = ConversionSpecifier::ScanListArg; break;
170 case 'u': k = ConversionSpecifier::uArg; break;
171 case 'x': k = ConversionSpecifier::xArg; break;
172 case 'o': k = ConversionSpecifier::oArg; break;
173 case 's': k = ConversionSpecifier::sArg; break;
174 case 'p': k = ConversionSpecifier::pArg; break;
176 ScanfConversionSpecifier CS(conversionPosition, k);
177 if (k == ScanfConversionSpecifier::ScanListArg) {
178 if (ParseScanList(H, CS, I, E))
181 FS.setConversionSpecifier(CS);
182 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
183 && !FS.usesPositionalArg())
184 FS.setArgIndex(argIndex++);
186 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
187 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
189 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
190 // Assume the conversion takes one argument.
191 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
193 return ScanfSpecifierResult(Start, FS);
196 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
197 const ScanfConversionSpecifier &CS = getConversionSpecifier();
199 if (!CS.consumesDataArgument())
200 return ArgType::Invalid();
202 switch(CS.getKind()) {
204 case ConversionSpecifier::dArg:
205 case ConversionSpecifier::iArg:
206 switch (LM.getKind()) {
207 case LengthModifier::None:
208 return ArgType::PtrTo(Ctx.IntTy);
209 case LengthModifier::AsChar:
210 return ArgType::PtrTo(ArgType::AnyCharTy);
211 case LengthModifier::AsShort:
212 return ArgType::PtrTo(Ctx.ShortTy);
213 case LengthModifier::AsLong:
214 return ArgType::PtrTo(Ctx.LongTy);
215 case LengthModifier::AsLongLong:
216 case LengthModifier::AsQuad:
217 return ArgType::PtrTo(Ctx.LongLongTy);
218 case LengthModifier::AsIntMax:
219 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
220 case LengthModifier::AsSizeT:
223 case LengthModifier::AsPtrDiff:
224 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
225 case LengthModifier::AsLongDouble:
227 return ArgType::PtrTo(Ctx.LongLongTy);
228 case LengthModifier::AsAllocate:
229 return ArgType::Invalid();
230 case LengthModifier::AsMAllocate:
231 return ArgType::Invalid();
235 case ConversionSpecifier::oArg:
236 case ConversionSpecifier::uArg:
237 case ConversionSpecifier::xArg:
238 case ConversionSpecifier::XArg:
239 switch (LM.getKind()) {
240 case LengthModifier::None:
241 return ArgType::PtrTo(Ctx.UnsignedIntTy);
242 case LengthModifier::AsChar:
243 return ArgType::PtrTo(Ctx.UnsignedCharTy);
244 case LengthModifier::AsShort:
245 return ArgType::PtrTo(Ctx.UnsignedShortTy);
246 case LengthModifier::AsLong:
247 return ArgType::PtrTo(Ctx.UnsignedLongTy);
248 case LengthModifier::AsLongLong:
249 case LengthModifier::AsQuad:
250 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
251 case LengthModifier::AsIntMax:
252 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
253 case LengthModifier::AsSizeT:
254 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
255 case LengthModifier::AsPtrDiff:
256 // FIXME: Unsigned version of ptrdiff_t?
258 case LengthModifier::AsLongDouble:
260 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
261 case LengthModifier::AsAllocate:
262 return ArgType::Invalid();
263 case LengthModifier::AsMAllocate:
264 return ArgType::Invalid();
268 case ConversionSpecifier::aArg:
269 case ConversionSpecifier::AArg:
270 case ConversionSpecifier::eArg:
271 case ConversionSpecifier::EArg:
272 case ConversionSpecifier::fArg:
273 case ConversionSpecifier::FArg:
274 case ConversionSpecifier::gArg:
275 case ConversionSpecifier::GArg:
276 switch (LM.getKind()) {
277 case LengthModifier::None:
278 return ArgType::PtrTo(Ctx.FloatTy);
279 case LengthModifier::AsLong:
280 return ArgType::PtrTo(Ctx.DoubleTy);
281 case LengthModifier::AsLongDouble:
282 return ArgType::PtrTo(Ctx.LongDoubleTy);
284 return ArgType::Invalid();
287 // Char, string and scanlist.
288 case ConversionSpecifier::cArg:
289 case ConversionSpecifier::sArg:
290 case ConversionSpecifier::ScanListArg:
291 switch (LM.getKind()) {
292 case LengthModifier::None:
293 return ArgType::PtrTo(ArgType::AnyCharTy);
294 case LengthModifier::AsLong:
295 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
296 case LengthModifier::AsAllocate:
297 case LengthModifier::AsMAllocate:
298 return ArgType::PtrTo(ArgType::CStrTy);
300 return ArgType::Invalid();
302 case ConversionSpecifier::CArg:
303 case ConversionSpecifier::SArg:
304 // FIXME: Mac OS X specific?
305 switch (LM.getKind()) {
306 case LengthModifier::None:
307 return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t"));
308 case LengthModifier::AsAllocate:
309 case LengthModifier::AsMAllocate:
310 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
312 return ArgType::Invalid();
316 case ConversionSpecifier::pArg:
317 return ArgType::PtrTo(ArgType::CPointerTy);
320 case ConversionSpecifier::nArg:
321 switch (LM.getKind()) {
322 case LengthModifier::None:
323 return ArgType::PtrTo(Ctx.IntTy);
324 case LengthModifier::AsChar:
325 return ArgType::PtrTo(Ctx.SignedCharTy);
326 case LengthModifier::AsShort:
327 return ArgType::PtrTo(Ctx.ShortTy);
328 case LengthModifier::AsLong:
329 return ArgType::PtrTo(Ctx.LongTy);
330 case LengthModifier::AsLongLong:
331 case LengthModifier::AsQuad:
332 return ArgType::PtrTo(Ctx.LongLongTy);
333 case LengthModifier::AsIntMax:
334 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
335 case LengthModifier::AsSizeT:
336 return ArgType(); // FIXME: ssize_t
337 case LengthModifier::AsPtrDiff:
338 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
339 case LengthModifier::AsLongDouble:
340 return ArgType(); // FIXME: Is this a known extension?
341 case LengthModifier::AsAllocate:
342 case LengthModifier::AsMAllocate:
343 return ArgType::Invalid();
353 bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt,
355 if (!QT->isPointerType())
358 // %n is different from other conversion specifiers; don't try to fix it.
359 if (CS.getKind() == ConversionSpecifier::nArg)
362 QualType PT = QT->getPointeeType();
364 // If it's an enum, get its underlying type.
365 if (const EnumType *ETy = QT->getAs<EnumType>())
366 QT = ETy->getDecl()->getIntegerType();
368 const BuiltinType *BT = PT->getAs<BuiltinType>();
372 // Pointer to a character.
373 if (PT->isAnyCharacterType()) {
374 CS.setKind(ConversionSpecifier::sArg);
375 if (PT->isWideCharType())
376 LM.setKind(LengthModifier::AsWideChar);
378 LM.setKind(LengthModifier::None);
382 // Figure out the length modifier.
383 switch (BT->getKind()) {
385 case BuiltinType::UInt:
386 case BuiltinType::Int:
387 case BuiltinType::Float:
388 LM.setKind(LengthModifier::None);
392 case BuiltinType::Char_U:
393 case BuiltinType::UChar:
394 case BuiltinType::Char_S:
395 case BuiltinType::SChar:
396 LM.setKind(LengthModifier::AsChar);
400 case BuiltinType::Short:
401 case BuiltinType::UShort:
402 LM.setKind(LengthModifier::AsShort);
406 case BuiltinType::Long:
407 case BuiltinType::ULong:
408 case BuiltinType::Double:
409 LM.setKind(LengthModifier::AsLong);
413 case BuiltinType::LongLong:
414 case BuiltinType::ULongLong:
415 LM.setKind(LengthModifier::AsLongLong);
419 case BuiltinType::LongDouble:
420 LM.setKind(LengthModifier::AsLongDouble);
428 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
429 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x))
430 namedTypeToLengthModifier(PT, LM);
432 // If fixing the length modifier was enough, we are done.
433 const analyze_scanf::ArgType &AT = getArgType(Ctx);
434 if (hasValidLengthModifier() && AT.isValid() && AT.matchesType(Ctx, QT))
437 // Figure out the conversion specifier.
438 if (PT->isRealFloatingType())
439 CS.setKind(ConversionSpecifier::fArg);
440 else if (PT->isSignedIntegerType())
441 CS.setKind(ConversionSpecifier::dArg);
442 else if (PT->isUnsignedIntegerType())
443 CS.setKind(ConversionSpecifier::uArg);
445 llvm_unreachable("Unexpected type");
450 void ScanfSpecifier::toString(raw_ostream &os) const {
453 if (usesPositionalArg())
454 os << getPositionalArgIndex() << "$";
455 if (SuppressAssignment)
458 FieldWidth.toString(os);
463 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
466 const LangOptions &LO) {
468 unsigned argIndex = 0;
470 // Keep looking for a format specifier until we have exhausted the string.
472 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
474 // Did a fail-stop error of any kind occur when parsing the specifier?
475 // If so, don't do any more processing.
476 if (FSR.shouldStop())
478 // Did we exhaust the string or encounter an error that
479 // we can recover from?
482 // We have a format specifier. Pass it to the callback.
483 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
484 I - FSR.getStart())) {
488 assert(I == E && "Format string not exhausted");