Home | History | Annotate | Download | only in sanitizer_common
      1 //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Scanf/printf implementation for use in *Sanitizer interceptors.
     11 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
     12 // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
     13 // with a few common GNU extensions.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include <stdarg.h>
     18 
     19 static const char *parse_number(const char *p, int *out) {
     20   *out = internal_atoll(p);
     21   while (*p >= '0' && *p <= '9')
     22     ++p;
     23   return p;
     24 }
     25 
     26 static const char *maybe_parse_param_index(const char *p, int *out) {
     27   // n$
     28   if (*p >= '0' && *p <= '9') {
     29     int number;
     30     const char *q = parse_number(p, &number);
     31     CHECK(q);
     32     if (*q == '$') {
     33       *out = number;
     34       p = q + 1;
     35     }
     36   }
     37 
     38   // Otherwise, do not change p. This will be re-parsed later as the field
     39   // width.
     40   return p;
     41 }
     42 
     43 static bool char_is_one_of(char c, const char *s) {
     44   return !!internal_strchr(s, c);
     45 }
     46 
     47 static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
     48   if (char_is_one_of(*p, "jztLq")) {
     49     ll[0] = *p;
     50     ++p;
     51   } else if (*p == 'h') {
     52     ll[0] = 'h';
     53     ++p;
     54     if (*p == 'h') {
     55       ll[1] = 'h';
     56       ++p;
     57     }
     58   } else if (*p == 'l') {
     59     ll[0] = 'l';
     60     ++p;
     61     if (*p == 'l') {
     62       ll[1] = 'l';
     63       ++p;
     64     }
     65   }
     66   return p;
     67 }
     68 
     69 // Returns true if the character is an integer conversion specifier.
     70 static bool format_is_integer_conv(char c) {
     71   return char_is_one_of(c, "diouxXn");
     72 }
     73 
     74 // Returns true if the character is an floating point conversion specifier.
     75 static bool format_is_float_conv(char c) {
     76   return char_is_one_of(c, "aAeEfFgG");
     77 }
     78 
     79 // Returns string output character size for string-like conversions,
     80 // or 0 if the conversion is invalid.
     81 static int format_get_char_size(char convSpecifier,
     82                                 const char lengthModifier[2]) {
     83   if (char_is_one_of(convSpecifier, "CS")) {
     84     return sizeof(wchar_t);
     85   }
     86 
     87   if (char_is_one_of(convSpecifier, "cs[")) {
     88     if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
     89       return sizeof(wchar_t);
     90     else if (lengthModifier[0] == '\0')
     91       return sizeof(char);
     92   }
     93 
     94   return 0;
     95 }
     96 
     97 enum FormatStoreSize {
     98   // Store size not known in advance; can be calculated as wcslen() of the
     99   // destination buffer.
    100   FSS_WCSLEN = -2,
    101   // Store size not known in advance; can be calculated as strlen() of the
    102   // destination buffer.
    103   FSS_STRLEN = -1,
    104   // Invalid conversion specifier.
    105   FSS_INVALID = 0
    106 };
    107 
    108 // Returns the memory size of a format directive (if >0), or a value of
    109 // FormatStoreSize.
    110 static int format_get_value_size(char convSpecifier,
    111                                  const char lengthModifier[2],
    112                                  bool promote_float) {
    113   if (format_is_integer_conv(convSpecifier)) {
    114     switch (lengthModifier[0]) {
    115     case 'h':
    116       return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
    117     case 'l':
    118       return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
    119     case 'q':
    120       return sizeof(long long);
    121     case 'L':
    122       return sizeof(long long);
    123     case 'j':
    124       return sizeof(INTMAX_T);
    125     case 'z':
    126       return sizeof(SIZE_T);
    127     case 't':
    128       return sizeof(PTRDIFF_T);
    129     case 0:
    130       return sizeof(int);
    131     default:
    132       return FSS_INVALID;
    133     }
    134   }
    135 
    136   if (format_is_float_conv(convSpecifier)) {
    137     switch (lengthModifier[0]) {
    138     case 'L':
    139     case 'q':
    140       return sizeof(long double);
    141     case 'l':
    142       return lengthModifier[1] == 'l' ? sizeof(long double)
    143                                            : sizeof(double);
    144     case 0:
    145       // Printf promotes floats to doubles but scanf does not
    146       return promote_float ? sizeof(double) : sizeof(float);
    147     default:
    148       return FSS_INVALID;
    149     }
    150   }
    151 
    152   if (convSpecifier == 'p') {
    153     if (lengthModifier[0] != 0)
    154       return FSS_INVALID;
    155     return sizeof(void *);
    156   }
    157 
    158   return FSS_INVALID;
    159 }
    160 
    161 struct ScanfDirective {
    162   int argIdx; // argument index, or -1 if not specified ("%n$")
    163   int fieldWidth;
    164   const char *begin;
    165   const char *end;
    166   bool suppressed; // suppress assignment ("*")
    167   bool allocate;   // allocate space ("m")
    168   char lengthModifier[2];
    169   char convSpecifier;
    170   bool maybeGnuMalloc;
    171 };
    172 
    173 // Parse scanf format string. If a valid directive in encountered, it is
    174 // returned in dir. This function returns the pointer to the first
    175 // unprocessed character, or 0 in case of error.
    176 // In case of the end-of-string, a pointer to the closing \0 is returned.
    177 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
    178                                     ScanfDirective *dir) {
    179   internal_memset(dir, 0, sizeof(*dir));
    180   dir->argIdx = -1;
    181 
    182   while (*p) {
    183     if (*p != '%') {
    184       ++p;
    185       continue;
    186     }
    187     dir->begin = p;
    188     ++p;
    189     // %%
    190     if (*p == '%') {
    191       ++p;
    192       continue;
    193     }
    194     if (*p == '\0') {
    195       return nullptr;
    196     }
    197     // %n$
    198     p = maybe_parse_param_index(p, &dir->argIdx);
    199     CHECK(p);
    200     // *
    201     if (*p == '*') {
    202       dir->suppressed = true;
    203       ++p;
    204     }
    205     // Field width
    206     if (*p >= '0' && *p <= '9') {
    207       p = parse_number(p, &dir->fieldWidth);
    208       CHECK(p);
    209       if (dir->fieldWidth <= 0)  // Width if at all must be non-zero
    210         return nullptr;
    211     }
    212     // m
    213     if (*p == 'm') {
    214       dir->allocate = true;
    215       ++p;
    216     }
    217     // Length modifier.
    218     p = maybe_parse_length_modifier(p, dir->lengthModifier);
    219     // Conversion specifier.
    220     dir->convSpecifier = *p++;
    221     // Consume %[...] expression.
    222     if (dir->convSpecifier == '[') {
    223       if (*p == '^')
    224         ++p;
    225       if (*p == ']')
    226         ++p;
    227       while (*p && *p != ']')
    228         ++p;
    229       if (*p == 0)
    230         return nullptr; // unexpected end of string
    231                         // Consume the closing ']'.
    232       ++p;
    233     }
    234     // This is unfortunately ambiguous between old GNU extension
    235     // of %as, %aS and %a[...] and newer POSIX %a followed by
    236     // letters s, S or [.
    237     if (allowGnuMalloc && dir->convSpecifier == 'a' &&
    238         !dir->lengthModifier[0]) {
    239       if (*p == 's' || *p == 'S') {
    240         dir->maybeGnuMalloc = true;
    241         ++p;
    242       } else if (*p == '[') {
    243         // Watch for %a[h-j%d], if % appears in the
    244         // [...] range, then we need to give up, we don't know
    245         // if scanf will parse it as POSIX %a [h-j %d ] or
    246         // GNU allocation of string with range dh-j plus %.
    247         const char *q = p + 1;
    248         if (*q == '^')
    249           ++q;
    250         if (*q == ']')
    251           ++q;
    252         while (*q && *q != ']' && *q != '%')
    253           ++q;
    254         if (*q == 0 || *q == '%')
    255           return nullptr;
    256         p = q + 1; // Consume the closing ']'.
    257         dir->maybeGnuMalloc = true;
    258       }
    259     }
    260     dir->end = p;
    261     break;
    262   }
    263   return p;
    264 }
    265 
    266 static int scanf_get_value_size(ScanfDirective *dir) {
    267   if (dir->allocate) {
    268     if (!char_is_one_of(dir->convSpecifier, "cCsS["))
    269       return FSS_INVALID;
    270     return sizeof(char *);
    271   }
    272 
    273   if (dir->maybeGnuMalloc) {
    274     if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
    275       return FSS_INVALID;
    276     // This is ambiguous, so check the smaller size of char * (if it is
    277     // a GNU extension of %as, %aS or %a[...]) and float (if it is
    278     // POSIX %a followed by s, S or [ letters).
    279     return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
    280   }
    281 
    282   if (char_is_one_of(dir->convSpecifier, "cCsS[")) {
    283     bool needsTerminator = char_is_one_of(dir->convSpecifier, "sS[");
    284     unsigned charSize =
    285         format_get_char_size(dir->convSpecifier, dir->lengthModifier);
    286     if (charSize == 0)
    287       return FSS_INVALID;
    288     if (dir->fieldWidth == 0) {
    289       if (!needsTerminator)
    290         return charSize;
    291       return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
    292     }
    293     return (dir->fieldWidth + needsTerminator) * charSize;
    294   }
    295 
    296   return format_get_value_size(dir->convSpecifier, dir->lengthModifier, false);
    297 }
    298 
    299 // Common part of *scanf interceptors.
    300 // Process format string and va_list, and report all store ranges.
    301 // Stops when "consuming" n_inputs input items.
    302 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
    303                          const char *format, va_list aq) {
    304   CHECK_GT(n_inputs, 0);
    305   const char *p = format;
    306 
    307   COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
    308 
    309   while (*p) {
    310     ScanfDirective dir;
    311     p = scanf_parse_next(p, allowGnuMalloc, &dir);
    312     if (!p)
    313       break;
    314     if (dir.convSpecifier == 0) {
    315       // This can only happen at the end of the format string.
    316       CHECK_EQ(*p, 0);
    317       break;
    318     }
    319     // Here the directive is valid. Do what it says.
    320     if (dir.argIdx != -1) {
    321       // Unsupported.
    322       break;
    323     }
    324     if (dir.suppressed)
    325       continue;
    326     int size = scanf_get_value_size(&dir);
    327     if (size == FSS_INVALID) {
    328       Report("WARNING: unexpected format specifier in scanf interceptor: "
    329         "%.*s\n", dir.end - dir.begin, dir.begin);
    330       break;
    331     }
    332     void *argp = va_arg(aq, void *);
    333     if (dir.convSpecifier != 'n')
    334       --n_inputs;
    335     if (n_inputs < 0)
    336       break;
    337     if (size == FSS_STRLEN) {
    338       size = internal_strlen((const char *)argp) + 1;
    339     } else if (size == FSS_WCSLEN) {
    340       // FIXME: actually use wcslen() to calculate it.
    341       size = 0;
    342     }
    343     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
    344   }
    345 }
    346 
    347 #if SANITIZER_INTERCEPT_PRINTF
    348 
    349 struct PrintfDirective {
    350   int fieldWidth;
    351   int fieldPrecision;
    352   int argIdx; // width argument index, or -1 if not specified ("%*n$")
    353   int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
    354   const char *begin;
    355   const char *end;
    356   bool starredWidth;
    357   bool starredPrecision;
    358   char lengthModifier[2];
    359   char convSpecifier;
    360 };
    361 
    362 static const char *maybe_parse_number(const char *p, int *out) {
    363   if (*p >= '0' && *p <= '9')
    364     p = parse_number(p, out);
    365   return p;
    366 }
    367 
    368 static const char *maybe_parse_number_or_star(const char *p, int *out,
    369                                               bool *star) {
    370   if (*p == '*') {
    371     *star = true;
    372     ++p;
    373   } else {
    374     *star = false;
    375     p = maybe_parse_number(p, out);
    376   }
    377   return p;
    378 }
    379 
    380 // Parse printf format string. Same as scanf_parse_next.
    381 static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
    382   internal_memset(dir, 0, sizeof(*dir));
    383   dir->argIdx = -1;
    384   dir->precisionIdx = -1;
    385 
    386   while (*p) {
    387     if (*p != '%') {
    388       ++p;
    389       continue;
    390     }
    391     dir->begin = p;
    392     ++p;
    393     // %%
    394     if (*p == '%') {
    395       ++p;
    396       continue;
    397     }
    398     if (*p == '\0') {
    399       return nullptr;
    400     }
    401     // %n$
    402     p = maybe_parse_param_index(p, &dir->precisionIdx);
    403     CHECK(p);
    404     // Flags
    405     while (char_is_one_of(*p, "'-+ #0")) {
    406       ++p;
    407     }
    408     // Field width
    409     p = maybe_parse_number_or_star(p, &dir->fieldWidth,
    410                                    &dir->starredWidth);
    411     if (!p)
    412       return nullptr;
    413     // Precision
    414     if (*p == '.') {
    415       ++p;
    416       // Actual precision is optional (surprise!)
    417       p = maybe_parse_number_or_star(p, &dir->fieldPrecision,
    418                                      &dir->starredPrecision);
    419       if (!p)
    420         return nullptr;
    421       // m$
    422       if (dir->starredPrecision) {
    423         p = maybe_parse_param_index(p, &dir->precisionIdx);
    424         CHECK(p);
    425       }
    426     }
    427     // Length modifier.
    428     p = maybe_parse_length_modifier(p, dir->lengthModifier);
    429     // Conversion specifier.
    430     dir->convSpecifier = *p++;
    431     dir->end = p;
    432     break;
    433   }
    434   return p;
    435 }
    436 
    437 static int printf_get_value_size(PrintfDirective *dir) {
    438   if (dir->convSpecifier == 'm') {
    439     return sizeof(char *);
    440   }
    441 
    442   if (char_is_one_of(dir->convSpecifier, "cCsS")) {
    443     unsigned charSize =
    444         format_get_char_size(dir->convSpecifier, dir->lengthModifier);
    445     if (charSize == 0)
    446       return FSS_INVALID;
    447     if (char_is_one_of(dir->convSpecifier, "sS")) {
    448       return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
    449     }
    450     return charSize;
    451   }
    452 
    453   return format_get_value_size(dir->convSpecifier, dir->lengthModifier, true);
    454 }
    455 
    456 #define SKIP_SCALAR_ARG(aq, convSpecifier, size)                   \
    457   do {                                                             \
    458     if (format_is_float_conv(convSpecifier)) {                     \
    459       switch (size) {                                              \
    460       case 8:                                                      \
    461         va_arg(*aq, double);                                       \
    462         break;                                                     \
    463       case 12:                                                     \
    464         va_arg(*aq, long double);                                  \
    465         break;                                                     \
    466       case 16:                                                     \
    467         va_arg(*aq, long double);                                  \
    468         break;                                                     \
    469       default:                                                     \
    470         Report("WARNING: unexpected floating-point arg size"       \
    471                " in printf interceptor: %d\n", size);              \
    472         return;                                                    \
    473       }                                                            \
    474     } else {                                                       \
    475       switch (size) {                                              \
    476       case 1:                                                      \
    477       case 2:                                                      \
    478       case 4:                                                      \
    479         va_arg(*aq, u32);                                          \
    480         break;                                                     \
    481       case 8:                                                      \
    482         va_arg(*aq, u64);                                          \
    483         break;                                                     \
    484       default:                                                     \
    485         Report("WARNING: unexpected arg size"                      \
    486                " in printf interceptor: %d\n", size);              \
    487         return;                                                    \
    488       }                                                            \
    489     }                                                              \
    490   } while (0)
    491 
    492 // Common part of *printf interceptors.
    493 // Process format string and va_list, and report all load ranges.
    494 static void printf_common(void *ctx, const char *format, va_list aq) {
    495   COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
    496 
    497   const char *p = format;
    498 
    499   while (*p) {
    500     PrintfDirective dir;
    501     p = printf_parse_next(p, &dir);
    502     if (!p)
    503       break;
    504     if (dir.convSpecifier == 0) {
    505       // This can only happen at the end of the format string.
    506       CHECK_EQ(*p, 0);
    507       break;
    508     }
    509     // Here the directive is valid. Do what it says.
    510     if (dir.argIdx != -1 || dir.precisionIdx != -1) {
    511       // Unsupported.
    512       break;
    513     }
    514     if (dir.starredWidth) {
    515       // Dynamic width
    516       SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
    517     }
    518     if (dir.starredPrecision) {
    519       // Dynamic precision
    520       SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
    521     }
    522     int size = printf_get_value_size(&dir);
    523     if (size == FSS_INVALID) {
    524       Report("WARNING: unexpected format specifier in printf "
    525              "interceptor: %.*s\n", dir.end - dir.begin, dir.begin);
    526       break;
    527     }
    528     if (dir.convSpecifier == 'n') {
    529       void *argp = va_arg(aq, void *);
    530       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
    531       continue;
    532     } else if (size == FSS_STRLEN) {
    533       if (void *argp = va_arg(aq, void *)) {
    534         if (dir.starredPrecision) {
    535           // FIXME: properly support starred precision for strings.
    536           size = 0;
    537         } else if (dir.fieldPrecision > 0) {
    538           // Won't read more than "precision" symbols.
    539           size = internal_strnlen((const char *)argp, dir.fieldPrecision);
    540           if (size < dir.fieldPrecision) size++;
    541         } else {
    542           // Whole string will be accessed.
    543           size = internal_strlen((const char *)argp) + 1;
    544         }
    545         COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
    546       }
    547     } else if (size == FSS_WCSLEN) {
    548       if (void *argp = va_arg(aq, void *)) {
    549         // FIXME: Properly support wide-character strings (via wcsrtombs).
    550         size = 0;
    551         COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, size);
    552       }
    553     } else {
    554       // Skip non-pointer args
    555       SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
    556     }
    557   }
    558 }
    559 
    560 #endif // SANITIZER_INTERCEPT_PRINTF
    561