1 //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Scanf/printf implementation for use in *Sanitizer interceptors.
11 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
12 // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
13 // with a few common GNU extensions.
15 //===----------------------------------------------------------------------===//
19 static const char *parse_number(const char *p, int *out) {
20 *out = internal_atoll(p);
21 while (*p >= '0' && *p <= '9')
26 static const char *maybe_parse_param_index(const char *p, int *out) {
28 if (*p >= '0' && *p <= '9') {
30 const char *q = parse_number(p, &number);
38 // Otherwise, do not change p. This will be re-parsed later as the field
43 static bool char_is_one_of(char c, const char *s) {
44 return !!internal_strchr(s, c);
47 static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
48 if (char_is_one_of(*p, "jztLq")) {
51 } else if (*p == 'h') {
58 } else if (*p == 'l') {
69 // Returns true if the character is an integer conversion specifier.
70 static bool format_is_integer_conv(char c) {
71 return char_is_one_of(c, "diouxXn");
74 // Returns true if the character is an floating point conversion specifier.
75 static bool format_is_float_conv(char c) {
76 return char_is_one_of(c, "aAeEfFgG");
79 // Returns string output character size for string-like conversions,
80 // or 0 if the conversion is invalid.
81 static int format_get_char_size(char convSpecifier,
82 const char lengthModifier[2]) {
83 if (char_is_one_of(convSpecifier, "CS")) {
84 return sizeof(wchar_t);
87 if (char_is_one_of(convSpecifier, "cs[")) {
88 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
89 return sizeof(wchar_t);
90 else if (lengthModifier[0] == '\0')
97 enum FormatStoreSize {
98 // Store size not known in advance; can be calculated as wcslen() of the
99 // destination buffer.
101 // Store size not known in advance; can be calculated as strlen() of the
102 // destination buffer.
104 // Invalid conversion specifier.
108 // Returns the memory size of a format directive (if >0), or a value of
110 static int format_get_value_size(char convSpecifier,
111 const char lengthModifier[2],
112 bool promote_float) {
113 if (format_is_integer_conv(convSpecifier)) {
114 switch (lengthModifier[0]) {
116 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
118 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
120 return sizeof(long long);
122 return sizeof(long long);
124 return sizeof(INTMAX_T);
126 return sizeof(SIZE_T);
128 return sizeof(PTRDIFF_T);
136 if (format_is_float_conv(convSpecifier)) {
137 switch (lengthModifier[0]) {
140 return sizeof(long double);
142 return lengthModifier[1] == 'l' ? sizeof(long double)
145 // Printf promotes floats to doubles but scanf does not
146 return promote_float ? sizeof(double) : sizeof(float);
152 if (convSpecifier == 'p') {
153 if (lengthModifier[0] != 0)
155 return sizeof(void *);
161 struct ScanfDirective {
162 int argIdx; // argument index, or -1 if not specified ("%n$")
166 bool suppressed; // suppress assignment ("*")
167 bool allocate; // allocate space ("m")
168 char lengthModifier[2];
173 // Parse scanf format string. If a valid directive in encountered, it is
174 // returned in dir. This function returns the pointer to the first
175 // unprocessed character, or 0 in case of error.
176 // In case of the end-of-string, a pointer to the closing \0 is returned.
177 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
178 ScanfDirective *dir) {
179 internal_memset(dir, 0, sizeof(*dir));
198 p = maybe_parse_param_index(p, &dir->argIdx);
202 dir->suppressed = true;
206 if (*p >= '0' && *p <= '9') {
207 p = parse_number(p, &dir->fieldWidth);
209 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
214 dir->allocate = true;
218 p = maybe_parse_length_modifier(p, dir->lengthModifier);
219 // Conversion specifier.
220 dir->convSpecifier = *p++;
221 // Consume %[...] expression.
222 if (dir->convSpecifier == '[') {
227 while (*p && *p != ']')
230 return nullptr; // unexpected end of string
231 // Consume the closing ']'.
234 // This is unfortunately ambiguous between old GNU extension
235 // of %as, %aS and %a[...] and newer POSIX %a followed by
236 // letters s, S or [.
237 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
238 !dir->lengthModifier[0]) {
239 if (*p == 's' || *p == 'S') {
240 dir->maybeGnuMalloc = true;
242 } else if (*p == '[') {
243 // Watch for %a[h-j%d], if % appears in the
244 // [...] range, then we need to give up, we don't know
245 // if scanf will parse it as POSIX %a [h-j %d ] or
246 // GNU allocation of string with range dh-j plus %.
247 const char *q = p + 1;
252 while (*q && *q != ']' && *q != '%')
254 if (*q == 0 || *q == '%')
256 p = q + 1; // Consume the closing ']'.
257 dir->maybeGnuMalloc = true;
266 static int scanf_get_value_size(ScanfDirective *dir) {
268 if (!char_is_one_of(dir->convSpecifier, "cCsS["))
270 return sizeof(char *);
273 if (dir->maybeGnuMalloc) {
274 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
276 // This is ambiguous, so check the smaller size of char * (if it is
277 // a GNU extension of %as, %aS or %a[...]) and float (if it is
278 // POSIX %a followed by s, S or [ letters).
279 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
282 if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
283 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
285 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
288 if (dir->fieldWidth == 0) {
289 if (!needsTerminator)
291 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
293 return (dir->fieldWidth + needsTerminator) * charSize;
296 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
299 // Common part of *scanf interceptors.
300 // Process format string and va_list, and report all store ranges.
301 // Stops when "consuming" n_inputs input items.
302 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
303 const char *format, va_list aq) {
304 CHECK_GT(n_inputs, 0);
305 const char *p = format;
307 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
311 p = scanf_parse_next(p, allowGnuMalloc, &dir);
314 if (dir.convSpecifier == 0) {
315 // This can only happen at the end of the format string.
319 // Here the directive is valid. Do what it says.
320 if (dir.argIdx != -1) {
326 int size = scanf_get_value_size(&dir);
327 if (size == FSS_INVALID) {
328 Report("%s: WARNING: unexpected format specifier in scanf interceptor: ",
329 SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin);
332 void *argp = va_arg(aq, void *);
333 if (dir.convSpecifier != 'n')
337 if (size == FSS_STRLEN) {
338 size = internal_strlen((const char *)argp) + 1;
339 } else if (size == FSS_WCSLEN) {
340 // FIXME: actually use wcslen() to calculate it.
343 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
347 #if SANITIZER_INTERCEPT_PRINTF
349 struct PrintfDirective {
352 int argIdx; // width argument index, or -1 if not specified ("%*n$")
353 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
357 bool starredPrecision;
358 char lengthModifier[2];
362 static const char *maybe_parse_number(const char *p, int *out) {
363 if (*p >= '0' && *p <= '9')
364 p = parse_number(p, out);
368 static const char *maybe_parse_number_or_star(const char *p, int *out,
375 p = maybe_parse_number(p, out);
380 // Parse printf format string. Same as scanf_parse_next.
381 static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
382 internal_memset(dir, 0, sizeof(*dir));
384 dir->precisionIdx = -1;
402 p = maybe_parse_param_index(p, &dir->precisionIdx);
405 while (char_is_one_of(*p, "'-+ #0")) {
409 p = maybe_parse_number_or_star(p, &dir->fieldWidth,
416 // Actual precision is optional (surprise!)
417 p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
418 &dir->starredPrecision);
422 if (dir->starredPrecision) {
423 p = maybe_parse_param_index(p, &dir->precisionIdx);
428 p = maybe_parse_length_modifier(p, dir->lengthModifier);
429 // Conversion specifier.
430 dir->convSpecifier = *p++;
437 static int printf_get_value_size(PrintfDirective *dir) {
438 if (char_is_one_of(dir->convSpecifier, "cCsS")) {
440 format_get_char_size(dir->convSpecifier, dir->lengthModifier);
443 if (char_is_one_of(dir->convSpecifier, "sS")) {
444 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
449 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
452 #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
454 if (format_is_float_conv(convSpecifier)) { \
457 va_arg(*aq, double); \
460 va_arg(*aq, long double); \
463 va_arg(*aq, long double); \
466 Report("WARNING: unexpected floating-point arg size" \
467 " in printf interceptor: %d\n", size); \
481 Report("WARNING: unexpected arg size" \
482 " in printf interceptor: %d\n", size); \
488 // Common part of *printf interceptors.
489 // Process format string and va_list, and report all load ranges.
490 static void printf_common(void *ctx, const char *format, va_list aq) {
491 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
493 const char *p = format;
497 p = printf_parse_next(p, &dir);
500 if (dir.convSpecifier == 0) {
501 // This can only happen at the end of the format string.
505 // Here the directive is valid. Do what it says.
506 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
510 if (dir.starredWidth) {
512 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
514 if (dir.starredPrecision) {
516 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
518 // %m does not require an argument: strlen(errno).
519 if (dir.convSpecifier == 'm')
521 int size = printf_get_value_size(&dir);
522 if (size == FSS_INVALID) {
523 static int ReportedOnce;
526 "%s: WARNING: unexpected format specifier in printf "
527 "interceptor: %.*s (reported once per process)\n",
528 SanitizerToolName, dir.end - dir.begin, dir.begin);
531 if (dir.convSpecifier == 'n') {
532 void *argp = va_arg(aq, void *);
533 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
535 } else if (size == FSS_STRLEN) {
536 if (void *argp = va_arg(aq, void *)) {
537 if (dir.starredPrecision) {
538 // FIXME: properly support starred precision for strings.
540 } else if (dir.fieldPrecision > 0) {
541 // Won't read more than "precision" symbols.
542 size = internal_strnlen((const char *)argp, dir.fieldPrecision);
543 if (size < dir.fieldPrecision) size++;
545 // Whole string will be accessed.
546 size = internal_strlen((const char *)argp) + 1;
548 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
550 } else if (size == FSS_WCSLEN) {
551 if (void *argp = va_arg(aq, void *)) {
552 // FIXME: Properly support wide-character strings (via wcsrtombs).
554 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
557 // Skip non-pointer args
558 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
563 #endif // SANITIZER_INTERCEPT_PRINTF