1 //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Scanf/printf implementation for use in *Sanitizer interceptors. 11 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 12 // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html 13 // with a few common GNU extensions. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include <stdarg.h> 18 19 static const char *parse_number(const char *p, int *out) { 20 *out = internal_atoll(p); 21 while (*p >= '0' && *p <= '9') 22 ++p; 23 return p; 24 } 25 26 static const char *maybe_parse_param_index(const char *p, int *out) { 27 // n$ 28 if (*p >= '0' && *p <= '9') { 29 int number; 30 const char *q = parse_number(p, &number); 31 CHECK(q); 32 if (*q == '$') { 33 *out = number; 34 p = q + 1; 35 } 36 } 37 38 // Otherwise, do not change p. This will be re-parsed later as the field 39 // width. 40 return p; 41 } 42 43 static bool char_is_one_of(char c, const char *s) { 44 return !!internal_strchr(s, c); 45 } 46 47 static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { 48 if (char_is_one_of(*p, "jztLq")) { 49 ll[0] = *p; 50 ++p; 51 } else if (*p == 'h') { 52 ll[0] = 'h'; 53 ++p; 54 if (*p == 'h') { 55 ll[1] = 'h'; 56 ++p; 57 } 58 } else if (*p == 'l') { 59 ll[0] = 'l'; 60 ++p; 61 if (*p == 'l') { 62 ll[1] = 'l'; 63 ++p; 64 } 65 } 66 return p; 67 } 68 69 // Returns true if the character is an integer conversion specifier. 70 static bool format_is_integer_conv(char c) { 71 return char_is_one_of(c, "diouxXn"); 72 } 73 74 // Returns true if the character is an floating point conversion specifier. 75 static bool format_is_float_conv(char c) { 76 return char_is_one_of(c, "aAeEfFgG"); 77 } 78 79 // Returns string output character size for string-like conversions, 80 // or 0 if the conversion is invalid. 81 static int format_get_char_size(char convSpecifier, 82 const char lengthModifier[2]) { 83 if (char_is_one_of(convSpecifier, "CS")) { 84 return sizeof(wchar_t); 85 } 86 87 if (char_is_one_of(convSpecifier, "cs[")) { 88 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') 89 return sizeof(wchar_t); 90 else if (lengthModifier[0] == '\0') 91 return sizeof(char); 92 } 93 94 return 0; 95 } 96 97 enum FormatStoreSize { 98 // Store size not known in advance; can be calculated as wcslen() of the 99 // destination buffer. 100 FSS_WCSLEN = -2, 101 // Store size not known in advance; can be calculated as strlen() of the 102 // destination buffer. 103 FSS_STRLEN = -1, 104 // Invalid conversion specifier. 105 FSS_INVALID = 0 106 }; 107 108 // Returns the memory size of a format directive (if >0), or a value of 109 // FormatStoreSize. 110 static int format_get_value_size(char convSpecifier, 111 const char lengthModifier[2], 112 bool promote_float) { 113 if (format_is_integer_conv(convSpecifier)) { 114 switch (lengthModifier[0]) { 115 case 'h': 116 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 117 case 'l': 118 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 119 case 'q': 120 return sizeof(long long); 121 case 'L': 122 return sizeof(long long); 123 case 'j': 124 return sizeof(INTMAX_T); 125 case 'z': 126 return sizeof(SIZE_T); 127 case 't': 128 return sizeof(PTRDIFF_T); 129 case 0: 130 return sizeof(int); 131 default: 132 return FSS_INVALID; 133 } 134 } 135 136 if (format_is_float_conv(convSpecifier)) { 137 switch (lengthModifier[0]) { 138 case 'L': 139 case 'q': 140 return sizeof(long double); 141 case 'l': 142 return lengthModifier[1] == 'l' ? sizeof(long double) 143 : sizeof(double); 144 case 0: 145 // Printf promotes floats to doubles but scanf does not 146 return promote_float ? sizeof(double) : sizeof(float); 147 default: 148 return FSS_INVALID; 149 } 150 } 151 152 if (convSpecifier == 'p') { 153 if (lengthModifier[0] != 0) 154 return FSS_INVALID; 155 return sizeof(void *); 156 } 157 158 return FSS_INVALID; 159 } 160 161 struct ScanfDirective { 162 int argIdx; // argument index, or -1 if not specified ("%n$") 163 int fieldWidth; 164 const char *begin; 165 const char *end; 166 bool suppressed; // suppress assignment ("*") 167 bool allocate; // allocate space ("m") 168 char lengthModifier[2]; 169 char convSpecifier; 170 bool maybeGnuMalloc; 171 }; 172 173 // Parse scanf format string. If a valid directive in encountered, it is 174 // returned in dir. This function returns the pointer to the first 175 // unprocessed character, or 0 in case of error. 176 // In case of the end-of-string, a pointer to the closing \0 is returned. 177 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 178 ScanfDirective *dir) { 179 internal_memset(dir, 0, sizeof(*dir)); 180 dir->argIdx = -1; 181 182 while (*p) { 183 if (*p != '%') { 184 ++p; 185 continue; 186 } 187 dir->begin = p; 188 ++p; 189 // %% 190 if (*p == '%') { 191 ++p; 192 continue; 193 } 194 if (*p == '\0') { 195 return nullptr; 196 } 197 // %n$ 198 p = maybe_parse_param_index(p, &dir->argIdx); 199 CHECK(p); 200 // * 201 if (*p == '*') { 202 dir->suppressed = true; 203 ++p; 204 } 205 // Field width 206 if (*p >= '0' && *p <= '9') { 207 p = parse_number(p, &dir->fieldWidth); 208 CHECK(p); 209 if (dir->fieldWidth <= 0) // Width if at all must be non-zero 210 return nullptr; 211 } 212 // m 213 if (*p == 'm') { 214 dir->allocate = true; 215 ++p; 216 } 217 // Length modifier. 218 p = maybe_parse_length_modifier(p, dir->lengthModifier); 219 // Conversion specifier. 220 dir->convSpecifier = *p++; 221 // Consume %[...] expression. 222 if (dir->convSpecifier == '[') { 223 if (*p == '^') 224 ++p; 225 if (*p == ']') 226 ++p; 227 while (*p && *p != ']') 228 ++p; 229 if (*p == 0) 230 return nullptr; // unexpected end of string 231 // Consume the closing ']'. 232 ++p; 233 } 234 // This is unfortunately ambiguous between old GNU extension 235 // of %as, %aS and %a[...] and newer POSIX %a followed by 236 // letters s, S or [. 237 if (allowGnuMalloc && dir->convSpecifier == 'a' && 238 !dir->lengthModifier[0]) { 239 if (*p == 's' || *p == 'S') { 240 dir->maybeGnuMalloc = true; 241 ++p; 242 } else if (*p == '[') { 243 // Watch for %a[h-j%d], if % appears in the 244 // [...] range, then we need to give up, we don't know 245 // if scanf will parse it as POSIX %a [h-j %d ] or 246 // GNU allocation of string with range dh-j plus %. 247 const char *q = p + 1; 248 if (*q == '^') 249 ++q; 250 if (*q == ']') 251 ++q; 252 while (*q && *q != ']' && *q != '%') 253 ++q; 254 if (*q == 0 || *q == '%') 255 return nullptr; 256 p = q + 1; // Consume the closing ']'. 257 dir->maybeGnuMalloc = true; 258 } 259 } 260 dir->end = p; 261 break; 262 } 263 return p; 264 } 265 266 static int scanf_get_value_size(ScanfDirective *dir) { 267 if (dir->allocate) { 268 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 269 return FSS_INVALID; 270 return sizeof(char *); 271 } 272 273 if (dir->maybeGnuMalloc) { 274 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 275 return FSS_INVALID; 276 // This is ambiguous, so check the smaller size of char * (if it is 277 // a GNU extension of %as, %aS or %a[...]) and float (if it is 278 // POSIX %a followed by s, S or [ letters). 279 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 280 } 281 282 if (char_is_one_of(dir->convSpecifier, "cCsS[")) { 283 bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS["); 284 unsigned charSize = 285 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 286 if (charSize == 0) 287 return FSS_INVALID; 288 if (dir->fieldWidth == 0) { 289 if (!needsTerminator) 290 return charSize; 291 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 292 } 293 return (dir->fieldWidth + needsTerminator) * charSize; 294 } 295 296 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false); 297 } 298 299 // Common part of *scanf interceptors. 300 // Process format string and va_list, and report all store ranges. 301 // Stops when "consuming" n_inputs input items. 302 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 303 const char *format, va_list aq) { 304 CHECK_GT(n_inputs, 0); 305 const char *p = format; 306 307 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 308 309 while (*p) { 310 ScanfDirective dir; 311 p = scanf_parse_next(p, allowGnuMalloc, &dir); 312 if (!p) 313 break; 314 if (dir.convSpecifier == 0) { 315 // This can only happen at the end of the format string. 316 CHECK_EQ(*p, 0); 317 break; 318 } 319 // Here the directive is valid. Do what it says. 320 if (dir.argIdx != -1) { 321 // Unsupported. 322 break; 323 } 324 if (dir.suppressed) 325 continue; 326 int size = scanf_get_value_size(&dir); 327 if (size == FSS_INVALID) { 328 Report("WARNING: unexpected format specifier in scanf interceptor: " 329 "%.*s\n", dir.end - dir.begin, dir.begin); 330 break; 331 } 332 void *argp = va_arg(aq, void *); 333 if (dir.convSpecifier != 'n') 334 --n_inputs; 335 if (n_inputs < 0) 336 break; 337 if (size == FSS_STRLEN) { 338 size = internal_strlen((const char *)argp) + 1; 339 } else if (size == FSS_WCSLEN) { 340 // FIXME: actually use wcslen() to calculate it. 341 size = 0; 342 } 343 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 344 } 345 } 346 347 #if SANITIZER_INTERCEPT_PRINTF 348 349 struct PrintfDirective { 350 int fieldWidth; 351 int fieldPrecision; 352 int argIdx; // width argument index, or -1 if not specified ("%*n$") 353 int precisionIdx; // precision argument index, or -1 if not specified (".*n$") 354 const char *begin; 355 const char *end; 356 bool starredWidth; 357 bool starredPrecision; 358 char lengthModifier[2]; 359 char convSpecifier; 360 }; 361 362 static const char *maybe_parse_number(const char *p, int *out) { 363 if (*p >= '0' && *p <= '9') 364 p = parse_number(p, out); 365 return p; 366 } 367 368 static const char *maybe_parse_number_or_star(const char *p, int *out, 369 bool *star) { 370 if (*p == '*') { 371 *star = true; 372 ++p; 373 } else { 374 *star = false; 375 p = maybe_parse_number(p, out); 376 } 377 return p; 378 } 379 380 // Parse printf format string. Same as scanf_parse_next. 381 static const char *printf_parse_next(const char *p, PrintfDirective *dir) { 382 internal_memset(dir, 0, sizeof(*dir)); 383 dir->argIdx = -1; 384 dir->precisionIdx = -1; 385 386 while (*p) { 387 if (*p != '%') { 388 ++p; 389 continue; 390 } 391 dir->begin = p; 392 ++p; 393 // %% 394 if (*p == '%') { 395 ++p; 396 continue; 397 } 398 if (*p == '\0') { 399 return nullptr; 400 } 401 // %n$ 402 p = maybe_parse_param_index(p, &dir->precisionIdx); 403 CHECK(p); 404 // Flags 405 while (char_is_one_of(*p, "'-+ #0")) { 406 ++p; 407 } 408 // Field width 409 p = maybe_parse_number_or_star(p, &dir->fieldWidth, 410 &dir->starredWidth); 411 if (!p) 412 return nullptr; 413 // Precision 414 if (*p == '.') { 415 ++p; 416 // Actual precision is optional (surprise!) 417 p = maybe_parse_number_or_star(p, &dir->fieldPrecision, 418 &dir->starredPrecision); 419 if (!p) 420 return nullptr; 421 // m$ 422 if (dir->starredPrecision) { 423 p = maybe_parse_param_index(p, &dir->precisionIdx); 424 CHECK(p); 425 } 426 } 427 // Length modifier. 428 p = maybe_parse_length_modifier(p, dir->lengthModifier); 429 // Conversion specifier. 430 dir->convSpecifier = *p++; 431 dir->end = p; 432 break; 433 } 434 return p; 435 } 436 437 static int printf_get_value_size(PrintfDirective *dir) { 438 if (dir->convSpecifier == 'm') { 439 return sizeof(char *); 440 } 441 442 if (char_is_one_of(dir->convSpecifier, "cCsS")) { 443 unsigned charSize = 444 format_get_char_size(dir->convSpecifier, dir->lengthModifier); 445 if (charSize == 0) 446 return FSS_INVALID; 447 if (char_is_one_of(dir->convSpecifier, "sS")) { 448 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; 449 } 450 return charSize; 451 } 452 453 return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true); 454 } 455 456 #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ 457 do { \ 458 if (format_is_float_conv(convSpecifier)) { \ 459 switch (size) { \ 460 case 8: \ 461 va_arg(*aq, double); \ 462 break; \ 463 case 12: \ 464 va_arg(*aq, long double); \ 465 break; \ 466 case 16: \ 467 va_arg(*aq, long double); \ 468 break; \ 469 default: \ 470 Report("WARNING: unexpected floating-point arg size" \ 471 " in printf interceptor: %d\n", size); \ 472 return; \ 473 } \ 474 } else { \ 475 switch (size) { \ 476 case 1: \ 477 case 2: \ 478 case 4: \ 479 va_arg(*aq, u32); \ 480 break; \ 481 case 8: \ 482 va_arg(*aq, u64); \ 483 break; \ 484 default: \ 485 Report("WARNING: unexpected arg size" \ 486 " in printf interceptor: %d\n", size); \ 487 return; \ 488 } \ 489 } \ 490 } while (0) 491 492 // Common part of *printf interceptors. 493 // Process format string and va_list, and report all load ranges. 494 static void printf_common(void *ctx, const char *format, va_list aq) { 495 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); 496 497 const char *p = format; 498 499 while (*p) { 500 PrintfDirective dir; 501 p = printf_parse_next(p, &dir); 502 if (!p) 503 break; 504 if (dir.convSpecifier == 0) { 505 // This can only happen at the end of the format string. 506 CHECK_EQ(*p, 0); 507 break; 508 } 509 // Here the directive is valid. Do what it says. 510 if (dir.argIdx != -1 || dir.precisionIdx != -1) { 511 // Unsupported. 512 break; 513 } 514 if (dir.starredWidth) { 515 // Dynamic width 516 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 517 } 518 if (dir.starredPrecision) { 519 // Dynamic precision 520 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); 521 } 522 int size = printf_get_value_size(&dir); 523 if (size == FSS_INVALID) { 524 Report("WARNING: unexpected format specifier in printf " 525 "interceptor: %.*s\n", dir.end - dir.begin, dir.begin); 526 break; 527 } 528 if (dir.convSpecifier == 'n') { 529 void *argp = va_arg(aq, void *); 530 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 531 continue; 532 } else if (size == FSS_STRLEN) { 533 if (void *argp = va_arg(aq, void *)) { 534 if (dir.starredPrecision) { 535 // FIXME: properly support starred precision for strings. 536 size = 0; 537 } else if (dir.fieldPrecision > 0) { 538 // Won't read more than "precision" symbols. 539 size = internal_strnlen((const char *)argp, dir.fieldPrecision); 540 if (size < dir.fieldPrecision) size++; 541 } else { 542 // Whole string will be accessed. 543 size = internal_strlen((const char *)argp) + 1; 544 } 545 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 546 } 547 } else if (size == FSS_WCSLEN) { 548 if (void *argp = va_arg(aq, void *)) { 549 // FIXME: Properly support wide-character strings (via wcsrtombs). 550 size = 0; 551 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size); 552 } 553 } else { 554 // Skip non-pointer args 555 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); 556 } 557 } 558 } 559 560 #endif // SANITIZER_INTERCEPT_PRINTF 561