1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Handling of format string in scanf and friends. The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
12 //===----------------------------------------------------------------------===//
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
18 using clang::analyze_format_string::ArgType;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_scanf::ScanfConversionSpecifier;
24 using clang::analyze_scanf::ScanfSpecifier;
25 using clang::UpdateOnReturn;
26 using namespace clang;
28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
31 static bool ParseScanList(FormatStringHandler &H,
32 ScanfConversionSpecifier &CS,
33 const char *&Beg, const char *E) {
35 const char *start = I - 1;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 // No more characters?
40 H.HandleIncompleteScanList(start, I);
44 // Special case: ']' is the first character.
47 H.HandleIncompleteScanList(start, I - 1);
52 // Special case: "^]" are the first characters.
53 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
56 H.HandleIncompleteScanList(start, I - 1);
61 // Look for a ']' character which denotes the end of the scan list.
64 H.HandleIncompleteScanList(start, I - 1);
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
75 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
79 const LangOptions &LO,
80 const TargetInfo &Target) {
81 using namespace clang::analyze_format_string;
82 using namespace clang::analyze_scanf;
84 const char *Start = nullptr;
85 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I != E ; ++I) {
91 // Detect spurious null characters, which are likely errors.
96 Start = I++; // Record the start of the format specifier.
101 // No format specifier found?
106 // No more characters left?
107 H.HandleIncompleteSpecifier(Start, E - Start);
112 if (ParseArgPosition(H, FS, Start, I, E))
116 // No more characters left?
117 H.HandleIncompleteSpecifier(Start, E - Start);
121 // Look for '*' flag if it is present.
123 FS.setSuppressAssignment(I);
125 H.HandleIncompleteSpecifier(Start, E - Start);
130 // Look for the field width (if any). Unlike printf, this is either
131 // a fixed integer or isn't present.
132 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
133 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135 FS.setFieldWidth(Amt);
138 // No more characters left?
139 H.HandleIncompleteSpecifier(Start, E - Start);
144 // Look for the length modifier.
145 if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146 // No more characters left?
147 H.HandleIncompleteSpecifier(Start, E - Start);
151 // Detect spurious null characters, which are likely errors.
157 // Finally, look for the conversion specifier.
158 const char *conversionPosition = I++;
159 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160 switch (*conversionPosition) {
163 case '%': k = ConversionSpecifier::PercentArg; break;
164 case 'A': k = ConversionSpecifier::AArg; break;
165 case 'E': k = ConversionSpecifier::EArg; break;
166 case 'F': k = ConversionSpecifier::FArg; break;
167 case 'G': k = ConversionSpecifier::GArg; break;
168 case 'X': k = ConversionSpecifier::XArg; break;
169 case 'a': k = ConversionSpecifier::aArg; break;
170 case 'd': k = ConversionSpecifier::dArg; break;
171 case 'e': k = ConversionSpecifier::eArg; break;
172 case 'f': k = ConversionSpecifier::fArg; break;
173 case 'g': k = ConversionSpecifier::gArg; break;
174 case 'i': k = ConversionSpecifier::iArg; break;
175 case 'n': k = ConversionSpecifier::nArg; break;
176 case 'c': k = ConversionSpecifier::cArg; break;
177 case 'C': k = ConversionSpecifier::CArg; break;
178 case 'S': k = ConversionSpecifier::SArg; break;
179 case '[': k = ConversionSpecifier::ScanListArg; break;
180 case 'u': k = ConversionSpecifier::uArg; break;
181 case 'x': k = ConversionSpecifier::xArg; break;
182 case 'o': k = ConversionSpecifier::oArg; break;
183 case 's': k = ConversionSpecifier::sArg; break;
184 case 'p': k = ConversionSpecifier::pArg; break;
188 if (Target.getTriple().isOSDarwin())
189 k = ConversionSpecifier::DArg;
192 if (Target.getTriple().isOSDarwin())
193 k = ConversionSpecifier::OArg;
196 if (Target.getTriple().isOSDarwin())
197 k = ConversionSpecifier::UArg;
200 ScanfConversionSpecifier CS(conversionPosition, k);
201 if (k == ScanfConversionSpecifier::ScanListArg) {
202 if (ParseScanList(H, CS, I, E))
205 FS.setConversionSpecifier(CS);
206 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
207 && !FS.usesPositionalArg())
208 FS.setArgIndex(argIndex++);
210 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
211 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
214 unsigned Len = I - Beg;
215 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
216 CS.setEndScanList(Beg + Len);
217 FS.setConversionSpecifier(CS);
219 // Assume the conversion takes one argument.
220 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222 return ScanfSpecifierResult(Start, FS);
225 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
226 const ScanfConversionSpecifier &CS = getConversionSpecifier();
228 if (!CS.consumesDataArgument())
229 return ArgType::Invalid();
231 switch(CS.getKind()) {
233 case ConversionSpecifier::dArg:
234 case ConversionSpecifier::DArg:
235 case ConversionSpecifier::iArg:
236 switch (LM.getKind()) {
237 case LengthModifier::None:
238 return ArgType::PtrTo(Ctx.IntTy);
239 case LengthModifier::AsChar:
240 return ArgType::PtrTo(ArgType::AnyCharTy);
241 case LengthModifier::AsShort:
242 return ArgType::PtrTo(Ctx.ShortTy);
243 case LengthModifier::AsLong:
244 return ArgType::PtrTo(Ctx.LongTy);
245 case LengthModifier::AsLongLong:
246 case LengthModifier::AsQuad:
247 return ArgType::PtrTo(Ctx.LongLongTy);
248 case LengthModifier::AsInt64:
249 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
250 case LengthModifier::AsIntMax:
251 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
252 case LengthModifier::AsSizeT:
253 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
254 case LengthModifier::AsPtrDiff:
255 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
256 case LengthModifier::AsLongDouble:
258 return ArgType::PtrTo(Ctx.LongLongTy);
259 case LengthModifier::AsAllocate:
260 case LengthModifier::AsMAllocate:
261 case LengthModifier::AsInt32:
262 case LengthModifier::AsInt3264:
263 case LengthModifier::AsWide:
264 case LengthModifier::AsShortLong:
265 return ArgType::Invalid();
267 llvm_unreachable("Unsupported LengthModifier Type");
270 case ConversionSpecifier::oArg:
271 case ConversionSpecifier::OArg:
272 case ConversionSpecifier::uArg:
273 case ConversionSpecifier::UArg:
274 case ConversionSpecifier::xArg:
275 case ConversionSpecifier::XArg:
276 switch (LM.getKind()) {
277 case LengthModifier::None:
278 return ArgType::PtrTo(Ctx.UnsignedIntTy);
279 case LengthModifier::AsChar:
280 return ArgType::PtrTo(Ctx.UnsignedCharTy);
281 case LengthModifier::AsShort:
282 return ArgType::PtrTo(Ctx.UnsignedShortTy);
283 case LengthModifier::AsLong:
284 return ArgType::PtrTo(Ctx.UnsignedLongTy);
285 case LengthModifier::AsLongLong:
286 case LengthModifier::AsQuad:
287 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288 case LengthModifier::AsInt64:
289 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290 case LengthModifier::AsIntMax:
291 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292 case LengthModifier::AsSizeT:
293 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294 case LengthModifier::AsPtrDiff:
295 return ArgType::PtrTo(
296 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297 case LengthModifier::AsLongDouble:
299 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300 case LengthModifier::AsAllocate:
301 case LengthModifier::AsMAllocate:
302 case LengthModifier::AsInt32:
303 case LengthModifier::AsInt3264:
304 case LengthModifier::AsWide:
305 case LengthModifier::AsShortLong:
306 return ArgType::Invalid();
308 llvm_unreachable("Unsupported LengthModifier Type");
311 case ConversionSpecifier::aArg:
312 case ConversionSpecifier::AArg:
313 case ConversionSpecifier::eArg:
314 case ConversionSpecifier::EArg:
315 case ConversionSpecifier::fArg:
316 case ConversionSpecifier::FArg:
317 case ConversionSpecifier::gArg:
318 case ConversionSpecifier::GArg:
319 switch (LM.getKind()) {
320 case LengthModifier::None:
321 return ArgType::PtrTo(Ctx.FloatTy);
322 case LengthModifier::AsLong:
323 return ArgType::PtrTo(Ctx.DoubleTy);
324 case LengthModifier::AsLongDouble:
325 return ArgType::PtrTo(Ctx.LongDoubleTy);
327 return ArgType::Invalid();
330 // Char, string and scanlist.
331 case ConversionSpecifier::cArg:
332 case ConversionSpecifier::sArg:
333 case ConversionSpecifier::ScanListArg:
334 switch (LM.getKind()) {
335 case LengthModifier::None:
336 return ArgType::PtrTo(ArgType::AnyCharTy);
337 case LengthModifier::AsLong:
338 case LengthModifier::AsWide:
339 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
340 case LengthModifier::AsAllocate:
341 case LengthModifier::AsMAllocate:
342 return ArgType::PtrTo(ArgType::CStrTy);
343 case LengthModifier::AsShort:
344 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
345 return ArgType::PtrTo(ArgType::AnyCharTy);
348 return ArgType::Invalid();
350 case ConversionSpecifier::CArg:
351 case ConversionSpecifier::SArg:
352 // FIXME: Mac OS X specific?
353 switch (LM.getKind()) {
354 case LengthModifier::None:
355 case LengthModifier::AsWide:
356 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
357 case LengthModifier::AsAllocate:
358 case LengthModifier::AsMAllocate:
359 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
360 case LengthModifier::AsShort:
361 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
362 return ArgType::PtrTo(ArgType::AnyCharTy);
365 return ArgType::Invalid();
369 case ConversionSpecifier::pArg:
370 return ArgType::PtrTo(ArgType::CPointerTy);
373 case ConversionSpecifier::nArg:
374 switch (LM.getKind()) {
375 case LengthModifier::None:
376 return ArgType::PtrTo(Ctx.IntTy);
377 case LengthModifier::AsChar:
378 return ArgType::PtrTo(Ctx.SignedCharTy);
379 case LengthModifier::AsShort:
380 return ArgType::PtrTo(Ctx.ShortTy);
381 case LengthModifier::AsLong:
382 return ArgType::PtrTo(Ctx.LongTy);
383 case LengthModifier::AsLongLong:
384 case LengthModifier::AsQuad:
385 return ArgType::PtrTo(Ctx.LongLongTy);
386 case LengthModifier::AsInt64:
387 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
388 case LengthModifier::AsIntMax:
389 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
390 case LengthModifier::AsSizeT:
391 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
392 case LengthModifier::AsPtrDiff:
393 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
394 case LengthModifier::AsLongDouble:
395 return ArgType(); // FIXME: Is this a known extension?
396 case LengthModifier::AsAllocate:
397 case LengthModifier::AsMAllocate:
398 case LengthModifier::AsInt32:
399 case LengthModifier::AsInt3264:
400 case LengthModifier::AsWide:
401 case LengthModifier::AsShortLong:
402 return ArgType::Invalid();
412 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
413 const LangOptions &LangOpt,
416 // %n is different from other conversion specifiers; don't try to fix it.
417 if (CS.getKind() == ConversionSpecifier::nArg)
420 if (!QT->isPointerType())
423 QualType PT = QT->getPointeeType();
425 // If it's an enum, get its underlying type.
426 if (const EnumType *ETy = PT->getAs<EnumType>()) {
427 // Don't try to fix incomplete enums.
428 if (!ETy->getDecl()->isComplete())
430 PT = ETy->getDecl()->getIntegerType();
433 const BuiltinType *BT = PT->getAs<BuiltinType>();
437 // Pointer to a character.
438 if (PT->isAnyCharacterType()) {
439 CS.setKind(ConversionSpecifier::sArg);
440 if (PT->isWideCharType())
441 LM.setKind(LengthModifier::AsWideChar);
443 LM.setKind(LengthModifier::None);
445 // If we know the target array length, we can use it as a field width.
446 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
447 if (CAT->getSizeModifier() == ArrayType::Normal)
448 FieldWidth = OptionalAmount(OptionalAmount::Constant,
449 CAT->getSize().getZExtValue() - 1,
456 // Figure out the length modifier.
457 switch (BT->getKind()) {
459 case BuiltinType::UInt:
460 case BuiltinType::Int:
461 case BuiltinType::Float:
462 LM.setKind(LengthModifier::None);
466 case BuiltinType::Char_U:
467 case BuiltinType::UChar:
468 case BuiltinType::Char_S:
469 case BuiltinType::SChar:
470 LM.setKind(LengthModifier::AsChar);
474 case BuiltinType::Short:
475 case BuiltinType::UShort:
476 LM.setKind(LengthModifier::AsShort);
480 case BuiltinType::Long:
481 case BuiltinType::ULong:
482 case BuiltinType::Double:
483 LM.setKind(LengthModifier::AsLong);
487 case BuiltinType::LongLong:
488 case BuiltinType::ULongLong:
489 LM.setKind(LengthModifier::AsLongLong);
493 case BuiltinType::LongDouble:
494 LM.setKind(LengthModifier::AsLongDouble);
502 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
504 namedTypeToLengthModifier(PT, LM);
506 // If fixing the length modifier was enough, we are done.
507 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508 const analyze_scanf::ArgType &AT = getArgType(Ctx);
509 if (AT.isValid() && AT.matchesType(Ctx, QT))
513 // Figure out the conversion specifier.
514 if (PT->isRealFloatingType())
515 CS.setKind(ConversionSpecifier::fArg);
516 else if (PT->isSignedIntegerType())
517 CS.setKind(ConversionSpecifier::dArg);
518 else if (PT->isUnsignedIntegerType())
519 CS.setKind(ConversionSpecifier::uArg);
521 llvm_unreachable("Unexpected type");
526 void ScanfSpecifier::toString(raw_ostream &os) const {
529 if (usesPositionalArg())
530 os << getPositionalArgIndex() << "$";
531 if (SuppressAssignment)
534 FieldWidth.toString(os);
539 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
542 const LangOptions &LO,
543 const TargetInfo &Target) {
545 unsigned argIndex = 0;
547 // Keep looking for a format specifier until we have exhausted the string.
549 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
551 // Did a fail-stop error of any kind occur when parsing the specifier?
552 // If so, don't do any more processing.
553 if (FSR.shouldStop())
555 // Did we exhaust the string or encounter an error that
556 // we can recover from?
559 // We have a format specifier. Pass it to the callback.
560 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561 I - FSR.getStart())) {
565 assert(I == E && "Format string not exhausted");