1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
13 //===----------------------------------------------------------------------===//
15 #include "clang/AST/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
19 using clang::analyze_format_string::ArgType;
20 using clang::analyze_format_string::FormatStringHandler;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::ConversionSpecifier;
24 using clang::analyze_scanf::ScanfConversionSpecifier;
25 using clang::analyze_scanf::ScanfSpecifier;
26 using clang::UpdateOnReturn;
27 using namespace clang;
29 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
32 static bool ParseScanList(FormatStringHandler &H,
33 ScanfConversionSpecifier &CS,
34 const char *&Beg, const char *E) {
36 const char *start = I - 1;
37 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39 // No more characters?
41 H.HandleIncompleteScanList(start, I);
45 // Special case: ']' is the first character.
48 H.HandleIncompleteScanList(start, I - 1);
53 // Special case: "^]" are the first characters.
54 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
57 H.HandleIncompleteScanList(start, I - 1);
62 // Look for a ']' character which denotes the end of the scan list.
65 H.HandleIncompleteScanList(start, I - 1);
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
76 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
80 const LangOptions &LO,
81 const TargetInfo &Target) {
82 using namespace clang::analyze_format_string;
83 using namespace clang::analyze_scanf;
85 const char *Start = nullptr;
86 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
88 // Look for a '%' character that indicates the start of a format specifier.
89 for ( ; I != E ; ++I) {
92 // Detect spurious null characters, which are likely errors.
97 Start = I++; // Record the start of the format specifier.
102 // No format specifier found?
107 // No more characters left?
108 H.HandleIncompleteSpecifier(Start, E - Start);
113 if (ParseArgPosition(H, FS, Start, I, E))
117 // No more characters left?
118 H.HandleIncompleteSpecifier(Start, E - Start);
122 // Look for '*' flag if it is present.
124 FS.setSuppressAssignment(I);
126 H.HandleIncompleteSpecifier(Start, E - Start);
131 // Look for the field width (if any). Unlike printf, this is either
132 // a fixed integer or isn't present.
133 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
134 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
135 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136 FS.setFieldWidth(Amt);
139 // No more characters left?
140 H.HandleIncompleteSpecifier(Start, E - Start);
145 // Look for the length modifier.
146 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147 // No more characters left?
148 H.HandleIncompleteSpecifier(Start, E - Start);
152 // Detect spurious null characters, which are likely errors.
158 // Finally, look for the conversion specifier.
159 const char *conversionPosition = I++;
160 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161 switch (*conversionPosition) {
164 case '%': k = ConversionSpecifier::PercentArg; break;
165 case 'A': k = ConversionSpecifier::AArg; break;
166 case 'E': k = ConversionSpecifier::EArg; break;
167 case 'F': k = ConversionSpecifier::FArg; break;
168 case 'G': k = ConversionSpecifier::GArg; break;
169 case 'X': k = ConversionSpecifier::XArg; break;
170 case 'a': k = ConversionSpecifier::aArg; break;
171 case 'd': k = ConversionSpecifier::dArg; break;
172 case 'e': k = ConversionSpecifier::eArg; break;
173 case 'f': k = ConversionSpecifier::fArg; break;
174 case 'g': k = ConversionSpecifier::gArg; break;
175 case 'i': k = ConversionSpecifier::iArg; break;
176 case 'n': k = ConversionSpecifier::nArg; break;
177 case 'c': k = ConversionSpecifier::cArg; break;
178 case 'C': k = ConversionSpecifier::CArg; break;
179 case 'S': k = ConversionSpecifier::SArg; break;
180 case '[': k = ConversionSpecifier::ScanListArg; break;
181 case 'u': k = ConversionSpecifier::uArg; break;
182 case 'x': k = ConversionSpecifier::xArg; break;
183 case 'o': k = ConversionSpecifier::oArg; break;
184 case 's': k = ConversionSpecifier::sArg; break;
185 case 'p': k = ConversionSpecifier::pArg; break;
189 if (Target.getTriple().isOSDarwin())
190 k = ConversionSpecifier::DArg;
193 if (Target.getTriple().isOSDarwin())
194 k = ConversionSpecifier::OArg;
197 if (Target.getTriple().isOSDarwin())
198 k = ConversionSpecifier::UArg;
201 ScanfConversionSpecifier CS(conversionPosition, k);
202 if (k == ScanfConversionSpecifier::ScanListArg) {
203 if (ParseScanList(H, CS, I, E))
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208 && !FS.usesPositionalArg())
209 FS.setArgIndex(argIndex++);
211 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
214 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215 unsigned Len = I - Beg;
216 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217 CS.setEndScanList(Beg + Len);
218 FS.setConversionSpecifier(CS);
220 // Assume the conversion takes one argument.
221 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
223 return ScanfSpecifierResult(Start, FS);
226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227 const ScanfConversionSpecifier &CS = getConversionSpecifier();
229 if (!CS.consumesDataArgument())
230 return ArgType::Invalid();
232 switch(CS.getKind()) {
234 case ConversionSpecifier::dArg:
235 case ConversionSpecifier::DArg:
236 case ConversionSpecifier::iArg:
237 switch (LM.getKind()) {
238 case LengthModifier::None:
239 return ArgType::PtrTo(Ctx.IntTy);
240 case LengthModifier::AsChar:
241 return ArgType::PtrTo(ArgType::AnyCharTy);
242 case LengthModifier::AsShort:
243 return ArgType::PtrTo(Ctx.ShortTy);
244 case LengthModifier::AsLong:
245 return ArgType::PtrTo(Ctx.LongTy);
246 case LengthModifier::AsLongLong:
247 case LengthModifier::AsQuad:
248 return ArgType::PtrTo(Ctx.LongLongTy);
249 case LengthModifier::AsInt64:
250 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
251 case LengthModifier::AsIntMax:
252 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
253 case LengthModifier::AsSizeT:
254 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
255 case LengthModifier::AsPtrDiff:
256 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
257 case LengthModifier::AsLongDouble:
259 return ArgType::PtrTo(Ctx.LongLongTy);
260 case LengthModifier::AsAllocate:
261 case LengthModifier::AsMAllocate:
262 case LengthModifier::AsInt32:
263 case LengthModifier::AsInt3264:
264 case LengthModifier::AsWide:
265 return ArgType::Invalid();
267 llvm_unreachable("Unsupported LenghtModifier Type");
270 case ConversionSpecifier::oArg:
271 case ConversionSpecifier::OArg:
272 case ConversionSpecifier::uArg:
273 case ConversionSpecifier::UArg:
274 case ConversionSpecifier::xArg:
275 case ConversionSpecifier::XArg:
276 switch (LM.getKind()) {
277 case LengthModifier::None:
278 return ArgType::PtrTo(Ctx.UnsignedIntTy);
279 case LengthModifier::AsChar:
280 return ArgType::PtrTo(Ctx.UnsignedCharTy);
281 case LengthModifier::AsShort:
282 return ArgType::PtrTo(Ctx.UnsignedShortTy);
283 case LengthModifier::AsLong:
284 return ArgType::PtrTo(Ctx.UnsignedLongTy);
285 case LengthModifier::AsLongLong:
286 case LengthModifier::AsQuad:
287 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288 case LengthModifier::AsInt64:
289 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290 case LengthModifier::AsIntMax:
291 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292 case LengthModifier::AsSizeT:
293 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294 case LengthModifier::AsPtrDiff:
295 return ArgType::PtrTo(
296 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297 case LengthModifier::AsLongDouble:
299 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300 case LengthModifier::AsAllocate:
301 case LengthModifier::AsMAllocate:
302 case LengthModifier::AsInt32:
303 case LengthModifier::AsInt3264:
304 case LengthModifier::AsWide:
305 return ArgType::Invalid();
307 llvm_unreachable("Unsupported LenghtModifier Type");
310 case ConversionSpecifier::aArg:
311 case ConversionSpecifier::AArg:
312 case ConversionSpecifier::eArg:
313 case ConversionSpecifier::EArg:
314 case ConversionSpecifier::fArg:
315 case ConversionSpecifier::FArg:
316 case ConversionSpecifier::gArg:
317 case ConversionSpecifier::GArg:
318 switch (LM.getKind()) {
319 case LengthModifier::None:
320 return ArgType::PtrTo(Ctx.FloatTy);
321 case LengthModifier::AsLong:
322 return ArgType::PtrTo(Ctx.DoubleTy);
323 case LengthModifier::AsLongDouble:
324 return ArgType::PtrTo(Ctx.LongDoubleTy);
326 return ArgType::Invalid();
329 // Char, string and scanlist.
330 case ConversionSpecifier::cArg:
331 case ConversionSpecifier::sArg:
332 case ConversionSpecifier::ScanListArg:
333 switch (LM.getKind()) {
334 case LengthModifier::None:
335 return ArgType::PtrTo(ArgType::AnyCharTy);
336 case LengthModifier::AsLong:
337 case LengthModifier::AsWide:
338 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
339 case LengthModifier::AsAllocate:
340 case LengthModifier::AsMAllocate:
341 return ArgType::PtrTo(ArgType::CStrTy);
342 case LengthModifier::AsShort:
343 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
344 return ArgType::PtrTo(ArgType::AnyCharTy);
347 return ArgType::Invalid();
349 case ConversionSpecifier::CArg:
350 case ConversionSpecifier::SArg:
351 // FIXME: Mac OS X specific?
352 switch (LM.getKind()) {
353 case LengthModifier::None:
354 case LengthModifier::AsWide:
355 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
356 case LengthModifier::AsAllocate:
357 case LengthModifier::AsMAllocate:
358 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
359 case LengthModifier::AsShort:
360 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
361 return ArgType::PtrTo(ArgType::AnyCharTy);
364 return ArgType::Invalid();
368 case ConversionSpecifier::pArg:
369 return ArgType::PtrTo(ArgType::CPointerTy);
372 case ConversionSpecifier::nArg:
373 switch (LM.getKind()) {
374 case LengthModifier::None:
375 return ArgType::PtrTo(Ctx.IntTy);
376 case LengthModifier::AsChar:
377 return ArgType::PtrTo(Ctx.SignedCharTy);
378 case LengthModifier::AsShort:
379 return ArgType::PtrTo(Ctx.ShortTy);
380 case LengthModifier::AsLong:
381 return ArgType::PtrTo(Ctx.LongTy);
382 case LengthModifier::AsLongLong:
383 case LengthModifier::AsQuad:
384 return ArgType::PtrTo(Ctx.LongLongTy);
385 case LengthModifier::AsInt64:
386 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
387 case LengthModifier::AsIntMax:
388 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
389 case LengthModifier::AsSizeT:
390 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
391 case LengthModifier::AsPtrDiff:
392 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
393 case LengthModifier::AsLongDouble:
394 return ArgType(); // FIXME: Is this a known extension?
395 case LengthModifier::AsAllocate:
396 case LengthModifier::AsMAllocate:
397 case LengthModifier::AsInt32:
398 case LengthModifier::AsInt3264:
399 case LengthModifier::AsWide:
400 return ArgType::Invalid();
410 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
411 const LangOptions &LangOpt,
414 // %n is different from other conversion specifiers; don't try to fix it.
415 if (CS.getKind() == ConversionSpecifier::nArg)
418 if (!QT->isPointerType())
421 QualType PT = QT->getPointeeType();
423 // If it's an enum, get its underlying type.
424 if (const EnumType *ETy = PT->getAs<EnumType>()) {
425 // Don't try to fix incomplete enums.
426 if (!ETy->getDecl()->isComplete())
428 PT = ETy->getDecl()->getIntegerType();
431 const BuiltinType *BT = PT->getAs<BuiltinType>();
435 // Pointer to a character.
436 if (PT->isAnyCharacterType()) {
437 CS.setKind(ConversionSpecifier::sArg);
438 if (PT->isWideCharType())
439 LM.setKind(LengthModifier::AsWideChar);
441 LM.setKind(LengthModifier::None);
443 // If we know the target array length, we can use it as a field width.
444 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
445 if (CAT->getSizeModifier() == ArrayType::Normal)
446 FieldWidth = OptionalAmount(OptionalAmount::Constant,
447 CAT->getSize().getZExtValue() - 1,
454 // Figure out the length modifier.
455 switch (BT->getKind()) {
457 case BuiltinType::UInt:
458 case BuiltinType::Int:
459 case BuiltinType::Float:
460 LM.setKind(LengthModifier::None);
464 case BuiltinType::Char_U:
465 case BuiltinType::UChar:
466 case BuiltinType::Char_S:
467 case BuiltinType::SChar:
468 LM.setKind(LengthModifier::AsChar);
472 case BuiltinType::Short:
473 case BuiltinType::UShort:
474 LM.setKind(LengthModifier::AsShort);
478 case BuiltinType::Long:
479 case BuiltinType::ULong:
480 case BuiltinType::Double:
481 LM.setKind(LengthModifier::AsLong);
485 case BuiltinType::LongLong:
486 case BuiltinType::ULongLong:
487 LM.setKind(LengthModifier::AsLongLong);
491 case BuiltinType::LongDouble:
492 LM.setKind(LengthModifier::AsLongDouble);
500 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
501 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
502 namedTypeToLengthModifier(PT, LM);
504 // If fixing the length modifier was enough, we are done.
505 if (hasValidLengthModifier(Ctx.getTargetInfo())) {
506 const analyze_scanf::ArgType &AT = getArgType(Ctx);
507 if (AT.isValid() && AT.matchesType(Ctx, QT))
511 // Figure out the conversion specifier.
512 if (PT->isRealFloatingType())
513 CS.setKind(ConversionSpecifier::fArg);
514 else if (PT->isSignedIntegerType())
515 CS.setKind(ConversionSpecifier::dArg);
516 else if (PT->isUnsignedIntegerType())
517 CS.setKind(ConversionSpecifier::uArg);
519 llvm_unreachable("Unexpected type");
524 void ScanfSpecifier::toString(raw_ostream &os) const {
527 if (usesPositionalArg())
528 os << getPositionalArgIndex() << "$";
529 if (SuppressAssignment)
532 FieldWidth.toString(os);
537 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
540 const LangOptions &LO,
541 const TargetInfo &Target) {
543 unsigned argIndex = 0;
545 // Keep looking for a format specifier until we have exhausted the string.
547 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
549 // Did a fail-stop error of any kind occur when parsing the specifier?
550 // If so, don't do any more processing.
551 if (FSR.shouldStop())
553 // Did we exhaust the string or encounter an error that
554 // we can recover from?
557 // We have a format specifier. Pass it to the callback.
558 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
559 I - FSR.getStart())) {
563 assert(I == E && "Format string not exhausted");