]>
Commit | Line | Data |
---|---|---|
7d752f28 | 1 | //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// |
2 | // | |
3 | // This file is distributed under the University of Illinois Open Source | |
4 | // License. See LICENSE.TXT for details. | |
5 | // | |
6 | //===----------------------------------------------------------------------===// | |
7 | // | |
8 | // Scanf/printf implementation for use in *Sanitizer interceptors. | |
9 | // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html | |
10 | // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html | |
11 | // with a few common GNU extensions. | |
12 | // | |
13 | //===----------------------------------------------------------------------===// | |
5645a48f | 14 | |
7d752f28 | 15 | #include <stdarg.h> |
16 | ||
17 | static const char *parse_number(const char *p, int *out) { | |
18 | *out = internal_atoll(p); | |
19 | while (*p >= '0' && *p <= '9') | |
20 | ++p; | |
21 | return p; | |
22 | } | |
23 | ||
24 | static const char *maybe_parse_param_index(const char *p, int *out) { | |
25 | // n$ | |
26 | if (*p >= '0' && *p <= '9') { | |
27 | int number; | |
28 | const char *q = parse_number(p, &number); | |
29 | CHECK(q); | |
30 | if (*q == '$') { | |
31 | *out = number; | |
32 | p = q + 1; | |
33 | } | |
34 | } | |
35 | ||
36 | // Otherwise, do not change p. This will be re-parsed later as the field | |
37 | // width. | |
38 | return p; | |
39 | } | |
40 | ||
41 | static bool char_is_one_of(char c, const char *s) { | |
42 | return !!internal_strchr(s, c); | |
43 | } | |
44 | ||
45 | static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { | |
46 | if (char_is_one_of(*p, "jztLq")) { | |
47 | ll[0] = *p; | |
48 | ++p; | |
49 | } else if (*p == 'h') { | |
50 | ll[0] = 'h'; | |
51 | ++p; | |
52 | if (*p == 'h') { | |
53 | ll[1] = 'h'; | |
54 | ++p; | |
55 | } | |
56 | } else if (*p == 'l') { | |
57 | ll[0] = 'l'; | |
58 | ++p; | |
59 | if (*p == 'l') { | |
60 | ll[1] = 'l'; | |
61 | ++p; | |
62 | } | |
63 | } | |
64 | return p; | |
65 | } | |
66 | ||
67 | // Returns true if the character is an integer conversion specifier. | |
68 | static bool format_is_integer_conv(char c) { | |
69 | return char_is_one_of(c, "diouxXn"); | |
70 | } | |
71 | ||
72 | // Returns true if the character is an floating point conversion specifier. | |
73 | static bool format_is_float_conv(char c) { | |
74 | return char_is_one_of(c, "aAeEfFgG"); | |
75 | } | |
76 | ||
77 | // Returns string output character size for string-like conversions, | |
78 | // or 0 if the conversion is invalid. | |
79 | static int format_get_char_size(char convSpecifier, | |
80 | const char lengthModifier[2]) { | |
81 | if (char_is_one_of(convSpecifier, "CS")) { | |
82 | return sizeof(wchar_t); | |
83 | } | |
84 | ||
85 | if (char_is_one_of(convSpecifier, "cs[")) { | |
86 | if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') | |
87 | return sizeof(wchar_t); | |
88 | else if (lengthModifier[0] == '\0') | |
89 | return sizeof(char); | |
90 | } | |
91 | ||
92 | return 0; | |
93 | } | |
94 | ||
95 | enum FormatStoreSize { | |
96 | // Store size not known in advance; can be calculated as wcslen() of the | |
97 | // destination buffer. | |
98 | FSS_WCSLEN = -2, | |
99 | // Store size not known in advance; can be calculated as strlen() of the | |
100 | // destination buffer. | |
101 | FSS_STRLEN = -1, | |
102 | // Invalid conversion specifier. | |
103 | FSS_INVALID = 0 | |
104 | }; | |
105 | ||
106 | // Returns the memory size of a format directive (if >0), or a value of | |
107 | // FormatStoreSize. | |
108 | static int format_get_value_size(char convSpecifier, | |
109 | const char lengthModifier[2], | |
110 | bool promote_float) { | |
111 | if (format_is_integer_conv(convSpecifier)) { | |
112 | switch (lengthModifier[0]) { | |
113 | case 'h': | |
114 | return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); | |
115 | case 'l': | |
116 | return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); | |
117 | case 'q': | |
118 | return sizeof(long long); | |
119 | case 'L': | |
120 | return sizeof(long long); | |
121 | case 'j': | |
122 | return sizeof(INTMAX_T); | |
123 | case 'z': | |
124 | return sizeof(SIZE_T); | |
125 | case 't': | |
126 | return sizeof(PTRDIFF_T); | |
127 | case 0: | |
128 | return sizeof(int); | |
129 | default: | |
130 | return FSS_INVALID; | |
131 | } | |
132 | } | |
133 | ||
134 | if (format_is_float_conv(convSpecifier)) { | |
135 | switch (lengthModifier[0]) { | |
136 | case 'L': | |
137 | case 'q': | |
138 | return sizeof(long double); | |
139 | case 'l': | |
140 | return lengthModifier[1] == 'l' ? sizeof(long double) | |
141 | : sizeof(double); | |
142 | case 0: | |
143 | // Printf promotes floats to doubles but scanf does not | |
144 | return promote_float ? sizeof(double) : sizeof(float); | |
145 | default: | |
146 | return FSS_INVALID; | |
147 | } | |
148 | } | |
149 | ||
150 | if (convSpecifier == 'p') { | |
151 | if (lengthModifier[0] != 0) | |
152 | return FSS_INVALID; | |
153 | return sizeof(void *); | |
154 | } | |
155 | ||
156 | return FSS_INVALID; | |
157 | } | |
158 | ||
159 | struct ScanfDirective { | |
160 | int argIdx; // argument index, or -1 if not specified ("%n$") | |
161 | int fieldWidth; | |
162 | const char *begin; | |
163 | const char *end; | |
164 | bool suppressed; // suppress assignment ("*") | |
165 | bool allocate; // allocate space ("m") | |
166 | char lengthModifier[2]; | |
167 | char convSpecifier; | |
168 | bool maybeGnuMalloc; | |
169 | }; | |
170 | ||
171 | // Parse scanf format string. If a valid directive in encountered, it is | |
172 | // returned in dir. This function returns the pointer to the first | |
173 | // unprocessed character, or 0 in case of error. | |
174 | // In case of the end-of-string, a pointer to the closing \0 is returned. | |
175 | static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, | |
176 | ScanfDirective *dir) { | |
177 | internal_memset(dir, 0, sizeof(*dir)); | |
178 | dir->argIdx = -1; | |
179 | ||
180 | while (*p) { | |
181 | if (*p != '%') { | |
182 | ++p; | |
183 | continue; | |
184 | } | |
185 | dir->begin = p; | |
186 | ++p; | |
187 | // %% | |
188 | if (*p == '%') { | |
189 | ++p; | |
190 | continue; | |
191 | } | |
192 | if (*p == '\0') { | |
5645a48f | 193 | return nullptr; |
7d752f28 | 194 | } |
195 | // %n$ | |
196 | p = maybe_parse_param_index(p, &dir->argIdx); | |
197 | CHECK(p); | |
198 | // * | |
199 | if (*p == '*') { | |
200 | dir->suppressed = true; | |
201 | ++p; | |
202 | } | |
203 | // Field width | |
204 | if (*p >= '0' && *p <= '9') { | |
205 | p = parse_number(p, &dir->fieldWidth); | |
206 | CHECK(p); | |
207 | if (dir->fieldWidth <= 0) // Width if at all must be non-zero | |
5645a48f | 208 | return nullptr; |
7d752f28 | 209 | } |
210 | // m | |
211 | if (*p == 'm') { | |
212 | dir->allocate = true; | |
213 | ++p; | |
214 | } | |
215 | // Length modifier. | |
216 | p = maybe_parse_length_modifier(p, dir->lengthModifier); | |
217 | // Conversion specifier. | |
218 | dir->convSpecifier = *p++; | |
219 | // Consume %[...] expression. | |
220 | if (dir->convSpecifier == '[') { | |
221 | if (*p == '^') | |
222 | ++p; | |
223 | if (*p == ']') | |
224 | ++p; | |
225 | while (*p && *p != ']') | |
226 | ++p; | |
227 | if (*p == 0) | |
5645a48f | 228 | return nullptr; // unexpected end of string |
229 | // Consume the closing ']'. | |
7d752f28 | 230 | ++p; |
231 | } | |
232 | // This is unfortunately ambiguous between old GNU extension | |
233 | // of %as, %aS and %a[...] and newer POSIX %a followed by | |
234 | // letters s, S or [. | |
235 | if (allowGnuMalloc && dir->convSpecifier == 'a' && | |
236 | !dir->lengthModifier[0]) { | |
237 | if (*p == 's' || *p == 'S') { | |
238 | dir->maybeGnuMalloc = true; | |
239 | ++p; | |
240 | } else if (*p == '[') { | |
241 | // Watch for %a[h-j%d], if % appears in the | |
242 | // [...] range, then we need to give up, we don't know | |
243 | // if scanf will parse it as POSIX %a [h-j %d ] or | |
244 | // GNU allocation of string with range dh-j plus %. | |
245 | const char *q = p + 1; | |
246 | if (*q == '^') | |
247 | ++q; | |
248 | if (*q == ']') | |
249 | ++q; | |
250 | while (*q && *q != ']' && *q != '%') | |
251 | ++q; | |
252 | if (*q == 0 || *q == '%') | |
5645a48f | 253 | return nullptr; |
7d752f28 | 254 | p = q + 1; // Consume the closing ']'. |
255 | dir->maybeGnuMalloc = true; | |
256 | } | |
257 | } | |
258 | dir->end = p; | |
259 | break; | |
260 | } | |
261 | return p; | |
262 | } | |
263 | ||
264 | static int scanf_get_value_size(ScanfDirective *dir) { | |
265 | if (dir->allocate) { | |
266 | if (!char_is_one_of(dir->convSpecifier, "cCsS[")) | |
267 | return FSS_INVALID; | |
268 | return sizeof(char *); | |
269 | } | |
270 | ||
271 | if (dir->maybeGnuMalloc) { | |
272 | if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) | |
273 | return FSS_INVALID; | |
274 | // This is ambiguous, so check the smaller size of char * (if it is | |
275 | // a GNU extension of %as, %aS or %a[...]) and float (if it is | |
276 | // POSIX %a followed by s, S or [ letters). | |
277 | return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); | |
278 | } | |
279 | ||
280 | if (char_is_one_of(dir->convSpecifier, "cCsS[")) { | |
281 | bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); | |
282 | unsigned charSize = | |
283 | format_get_char_size(dir->convSpecifier, dir->lengthModifier); | |
284 | if (charSize == 0) | |
285 | return FSS_INVALID; | |
286 | if (dir->fieldWidth == 0) { | |
287 | if (!needsTerminator) | |
288 | return charSize; | |
289 | return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; | |
290 | } | |
291 | return (dir->fieldWidth + needsTerminator) * charSize; | |
292 | } | |
293 | ||
294 | return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); | |
295 | } | |
296 | ||
297 | // Common part of *scanf interceptors. | |
298 | // Process format string and va_list, and report all store ranges. | |
299 | // Stops when "consuming" n_inputs input items. | |
300 | static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, | |
301 | const char *format, va_list aq) { | |
302 | CHECK_GT(n_inputs, 0); | |
303 | const char *p = format; | |
304 | ||
305 | COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); | |
306 | ||
307 | while (*p) { | |
308 | ScanfDirective dir; | |
309 | p = scanf_parse_next(p, allowGnuMalloc, &dir); | |
310 | if (!p) | |
311 | break; | |
312 | if (dir.convSpecifier == 0) { | |
313 | // This can only happen at the end of the format string. | |
314 | CHECK_EQ(*p, 0); | |
315 | break; | |
316 | } | |
317 | // Here the directive is valid. Do what it says. | |
318 | if (dir.argIdx != -1) { | |
319 | // Unsupported. | |
320 | break; | |
321 | } | |
322 | if (dir.suppressed) | |
323 | continue; | |
324 | int size = scanf_get_value_size(&dir); | |
325 | if (size == FSS_INVALID) { | |
36093749 | 326 | Report("%s: WARNING: unexpected format specifier in scanf interceptor: ", |
327 | SanitizerToolName, "%.*s\n", dir.end - dir.begin, dir.begin); | |
7d752f28 | 328 | break; |
329 | } | |
330 | void *argp = va_arg(aq, void *); | |
331 | if (dir.convSpecifier != 'n') | |
332 | --n_inputs; | |
333 | if (n_inputs < 0) | |
334 | break; | |
335 | if (size == FSS_STRLEN) { | |
336 | size = internal_strlen((const char *)argp) + 1; | |
337 | } else if (size == FSS_WCSLEN) { | |
338 | // FIXME: actually use wcslen() to calculate it. | |
339 | size = 0; | |
340 | } | |
341 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); | |
342 | } | |
343 | } | |
344 | ||
345 | #if SANITIZER_INTERCEPT_PRINTF | |
346 | ||
347 | struct PrintfDirective { | |
348 | int fieldWidth; | |
349 | int fieldPrecision; | |
350 | int argIdx; // width argument index, or -1 if not specified ("%*n$") | |
351 | int precisionIdx; // precision argument index, or -1 if not specified (".*n$") | |
352 | const char *begin; | |
353 | const char *end; | |
354 | bool starredWidth; | |
355 | bool starredPrecision; | |
356 | char lengthModifier[2]; | |
357 | char convSpecifier; | |
358 | }; | |
359 | ||
360 | static const char *maybe_parse_number(const char *p, int *out) { | |
361 | if (*p >= '0' && *p <= '9') | |
362 | p = parse_number(p, out); | |
363 | return p; | |
364 | } | |
365 | ||
366 | static const char *maybe_parse_number_or_star(const char *p, int *out, | |
367 | bool *star) { | |
368 | if (*p == '*') { | |
369 | *star = true; | |
370 | ++p; | |
371 | } else { | |
372 | *star = false; | |
373 | p = maybe_parse_number(p, out); | |
374 | } | |
375 | return p; | |
376 | } | |
377 | ||
378 | // Parse printf format string. Same as scanf_parse_next. | |
379 | static const char *printf_parse_next(const char *p, PrintfDirective *dir) { | |
380 | internal_memset(dir, 0, sizeof(*dir)); | |
381 | dir->argIdx = -1; | |
382 | dir->precisionIdx = -1; | |
383 | ||
384 | while (*p) { | |
385 | if (*p != '%') { | |
386 | ++p; | |
387 | continue; | |
388 | } | |
389 | dir->begin = p; | |
390 | ++p; | |
391 | // %% | |
392 | if (*p == '%') { | |
393 | ++p; | |
394 | continue; | |
395 | } | |
396 | if (*p == '\0') { | |
5645a48f | 397 | return nullptr; |
7d752f28 | 398 | } |
399 | // %n$ | |
400 | p = maybe_parse_param_index(p, &dir->precisionIdx); | |
401 | CHECK(p); | |
402 | // Flags | |
403 | while (char_is_one_of(*p, "'-+ #0")) { | |
404 | ++p; | |
405 | } | |
406 | // Field width | |
407 | p = maybe_parse_number_or_star(p, &dir->fieldWidth, | |
408 | &dir->starredWidth); | |
409 | if (!p) | |
5645a48f | 410 | return nullptr; |
7d752f28 | 411 | // Precision |
412 | if (*p == '.') { | |
413 | ++p; | |
414 | // Actual precision is optional (surprise!) | |
415 | p = maybe_parse_number_or_star(p, &dir->fieldPrecision, | |
416 | &dir->starredPrecision); | |
417 | if (!p) | |
5645a48f | 418 | return nullptr; |
7d752f28 | 419 | // m$ |
420 | if (dir->starredPrecision) { | |
421 | p = maybe_parse_param_index(p, &dir->precisionIdx); | |
422 | CHECK(p); | |
423 | } | |
424 | } | |
425 | // Length modifier. | |
426 | p = maybe_parse_length_modifier(p, dir->lengthModifier); | |
427 | // Conversion specifier. | |
428 | dir->convSpecifier = *p++; | |
429 | dir->end = p; | |
430 | break; | |
431 | } | |
432 | return p; | |
433 | } | |
434 | ||
435 | static int printf_get_value_size(PrintfDirective *dir) { | |
7d752f28 | 436 | if (char_is_one_of(dir->convSpecifier, "cCsS")) { |
437 | unsigned charSize = | |
438 | format_get_char_size(dir->convSpecifier, dir->lengthModifier); | |
439 | if (charSize == 0) | |
440 | return FSS_INVALID; | |
441 | if (char_is_one_of(dir->convSpecifier, "sS")) { | |
442 | return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; | |
443 | } | |
444 | return charSize; | |
445 | } | |
446 | ||
447 | return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); | |
448 | } | |
449 | ||
450 | #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ | |
451 | do { \ | |
452 | if (format_is_float_conv(convSpecifier)) { \ | |
453 | switch (size) { \ | |
454 | case 8: \ | |
455 | va_arg(*aq, double); \ | |
456 | break; \ | |
a9586c9c | 457 | case 12: \ |
458 | va_arg(*aq, long double); \ | |
459 | break; \ | |
7d752f28 | 460 | case 16: \ |
461 | va_arg(*aq, long double); \ | |
462 | break; \ | |
463 | default: \ | |
464 | Report("WARNING: unexpected floating-point arg size" \ | |
465 | " in printf interceptor: %d\n", size); \ | |
466 | return; \ | |
467 | } \ | |
468 | } else { \ | |
469 | switch (size) { \ | |
470 | case 1: \ | |
471 | case 2: \ | |
472 | case 4: \ | |
473 | va_arg(*aq, u32); \ | |
474 | break; \ | |
475 | case 8: \ | |
476 | va_arg(*aq, u64); \ | |
477 | break; \ | |
478 | default: \ | |
479 | Report("WARNING: unexpected arg size" \ | |
480 | " in printf interceptor: %d\n", size); \ | |
481 | return; \ | |
482 | } \ | |
483 | } \ | |
484 | } while (0) | |
485 | ||
486 | // Common part of *printf interceptors. | |
487 | // Process format string and va_list, and report all load ranges. | |
488 | static void printf_common(void *ctx, const char *format, va_list aq) { | |
489 | COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); | |
490 | ||
491 | const char *p = format; | |
492 | ||
493 | while (*p) { | |
494 | PrintfDirective dir; | |
495 | p = printf_parse_next(p, &dir); | |
496 | if (!p) | |
497 | break; | |
498 | if (dir.convSpecifier == 0) { | |
499 | // This can only happen at the end of the format string. | |
500 | CHECK_EQ(*p, 0); | |
501 | break; | |
502 | } | |
503 | // Here the directive is valid. Do what it says. | |
504 | if (dir.argIdx != -1 || dir.precisionIdx != -1) { | |
505 | // Unsupported. | |
506 | break; | |
507 | } | |
508 | if (dir.starredWidth) { | |
509 | // Dynamic width | |
510 | SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); | |
511 | } | |
512 | if (dir.starredPrecision) { | |
513 | // Dynamic precision | |
514 | SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); | |
515 | } | |
36093749 | 516 | // %m does not require an argument: strlen(errno). |
517 | if (dir.convSpecifier == 'm') | |
518 | continue; | |
7d752f28 | 519 | int size = printf_get_value_size(&dir); |
520 | if (size == FSS_INVALID) { | |
36093749 | 521 | static int ReportedOnce; |
522 | if (!ReportedOnce++) | |
523 | Report( | |
524 | "%s: WARNING: unexpected format specifier in printf " | |
525 | "interceptor: %.*s (reported once per process)\n", | |
526 | SanitizerToolName, dir.end - dir.begin, dir.begin); | |
7d752f28 | 527 | break; |
528 | } | |
529 | if (dir.convSpecifier == 'n') { | |
530 | void *argp = va_arg(aq, void *); | |
531 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); | |
532 | continue; | |
533 | } else if (size == FSS_STRLEN) { | |
534 | if (void *argp = va_arg(aq, void *)) { | |
535 | if (dir.starredPrecision) { | |
536 | // FIXME: properly support starred precision for strings. | |
537 | size = 0; | |
538 | } else if (dir.fieldPrecision > 0) { | |
539 | // Won't read more than "precision" symbols. | |
540 | size = internal_strnlen((const char *)argp, dir.fieldPrecision); | |
541 | if (size < dir.fieldPrecision) size++; | |
542 | } else { | |
543 | // Whole string will be accessed. | |
544 | size = internal_strlen((const char *)argp) + 1; | |
545 | } | |
546 | COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); | |
547 | } | |
548 | } else if (size == FSS_WCSLEN) { | |
549 | if (void *argp = va_arg(aq, void *)) { | |
550 | // FIXME: Properly support wide-character strings (via wcsrtombs). | |
551 | size = 0; | |
552 | COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); | |
553 | } | |
554 | } else { | |
555 | // Skip non-pointer args | |
556 | SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); | |
557 | } | |
558 | } | |
559 | } | |
560 | ||
5645a48f | 561 | #endif // SANITIZER_INTERCEPT_PRINTF |