Home | History | Annotate | Download | only in sanitizer_common
      1 //===-- sanitizer_common_interceptors_scanf.inc -----------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Scanf implementation for use in *Sanitizer interceptors.
     11 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
     12 // with a few common GNU extensions.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 #include <stdarg.h>
     16 
     17 struct ScanfDirective {
     18   int argIdx; // argument index, or -1 of not specified ("%n$")
     19   int fieldWidth;
     20   bool suppressed; // suppress assignment ("*")
     21   bool allocate;   // allocate space ("m")
     22   char lengthModifier[2];
     23   char convSpecifier;
     24   bool maybeGnuMalloc;
     25 };
     26 
     27 static const char *parse_number(const char *p, int *out) {
     28   *out = internal_atoll(p);
     29   while (*p >= '0' && *p <= '9')
     30     ++p;
     31   return p;
     32 }
     33 
     34 static bool char_is_one_of(char c, const char *s) {
     35   return !!internal_strchr(s, c);
     36 }
     37 
     38 // Parse scanf format string. If a valid directive in encountered, it is
     39 // returned in dir. This function returns the pointer to the first
     40 // unprocessed character, or 0 in case of error.
     41 // In case of the end-of-string, a pointer to the closing \0 is returned.
     42 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
     43                                     ScanfDirective *dir) {
     44   internal_memset(dir, 0, sizeof(*dir));
     45   dir->argIdx = -1;
     46 
     47   while (*p) {
     48     if (*p != '%') {
     49       ++p;
     50       continue;
     51     }
     52     ++p;
     53     // %%
     54     if (*p == '%') {
     55       ++p;
     56       continue;
     57     }
     58     if (*p == '\0') {
     59       return 0;
     60     }
     61     // %n$
     62     if (*p >= '0' && *p <= '9') {
     63       int number;
     64       const char *q = parse_number(p, &number);
     65       if (*q == '$') {
     66         dir->argIdx = number;
     67         p = q + 1;
     68       }
     69       // Otherwise, do not change p. This will be re-parsed later as the field
     70       // width.
     71     }
     72     // *
     73     if (*p == '*') {
     74       dir->suppressed = true;
     75       ++p;
     76     }
     77     // Field width.
     78     if (*p >= '0' && *p <= '9') {
     79       p = parse_number(p, &dir->fieldWidth);
     80       if (dir->fieldWidth <= 0)
     81         return 0;
     82     }
     83     // m
     84     if (*p == 'm') {
     85       dir->allocate = true;
     86       ++p;
     87     }
     88     // Length modifier.
     89     if (char_is_one_of(*p, "jztLq")) {
     90       dir->lengthModifier[0] = *p;
     91       ++p;
     92     } else if (*p == 'h') {
     93       dir->lengthModifier[0] = 'h';
     94       ++p;
     95       if (*p == 'h') {
     96         dir->lengthModifier[1] = 'h';
     97         ++p;
     98       }
     99     } else if (*p == 'l') {
    100       dir->lengthModifier[0] = 'l';
    101       ++p;
    102       if (*p == 'l') {
    103         dir->lengthModifier[1] = 'l';
    104         ++p;
    105       }
    106     }
    107     // Conversion specifier.
    108     dir->convSpecifier = *p++;
    109     // Consume %[...] expression.
    110     if (dir->convSpecifier == '[') {
    111       if (*p == '^')
    112         ++p;
    113       if (*p == ']')
    114         ++p;
    115       while (*p && *p != ']')
    116         ++p;
    117       if (*p == 0)
    118         return 0; // unexpected end of string
    119                   // Consume the closing ']'.
    120       ++p;
    121     }
    122     // This is unfortunately ambiguous between old GNU extension
    123     // of %as, %aS and %a[...] and newer POSIX %a followed by
    124     // letters s, S or [.
    125     if (allowGnuMalloc && dir->convSpecifier == 'a' &&
    126         !dir->lengthModifier[0]) {
    127       if (*p == 's' || *p == 'S') {
    128         dir->maybeGnuMalloc = true;
    129         ++p;
    130       } else if (*p == '[') {
    131         // Watch for %a[h-j%d], if % appears in the
    132         // [...] range, then we need to give up, we don't know
    133         // if scanf will parse it as POSIX %a [h-j %d ] or
    134         // GNU allocation of string with range dh-j plus %.
    135         const char *q = p + 1;
    136         if (*q == '^')
    137           ++q;
    138         if (*q == ']')
    139           ++q;
    140         while (*q && *q != ']' && *q != '%')
    141           ++q;
    142         if (*q == 0 || *q == '%')
    143           return 0;
    144         p = q + 1; // Consume the closing ']'.
    145         dir->maybeGnuMalloc = true;
    146       }
    147     }
    148     break;
    149   }
    150   return p;
    151 }
    152 
    153 // Returns true if the character is an integer conversion specifier.
    154 static bool scanf_is_integer_conv(char c) {
    155   return char_is_one_of(c, "diouxXn");
    156 }
    157 
    158 // Returns true if the character is an floating point conversion specifier.
    159 static bool scanf_is_float_conv(char c) {
    160   return char_is_one_of(c, "aAeEfFgG");
    161 }
    162 
    163 // Returns string output character size for string-like conversions,
    164 // or 0 if the conversion is invalid.
    165 static int scanf_get_char_size(ScanfDirective *dir) {
    166   if (char_is_one_of(dir->convSpecifier, "CS")) {
    167     // wchar_t
    168     return 0;
    169   }
    170 
    171   if (char_is_one_of(dir->convSpecifier, "cs[")) {
    172     if (dir->lengthModifier[0] == 'l')
    173       // wchar_t
    174       return 0;
    175     else if (dir->lengthModifier[0] == 0)
    176       return sizeof(char);
    177     else
    178       return 0;
    179   }
    180 
    181   return 0;
    182 }
    183 
    184 enum ScanfStoreSize {
    185   // Store size not known in advance; can be calculated as strlen() of the
    186   // destination buffer.
    187   SSS_STRLEN = -1,
    188   // Invalid conversion specifier.
    189   SSS_INVALID = 0
    190 };
    191 
    192 // Returns the store size of a scanf directive (if >0), or a value of
    193 // ScanfStoreSize.
    194 static int scanf_get_store_size(ScanfDirective *dir) {
    195   if (dir->allocate) {
    196     if (!char_is_one_of(dir->convSpecifier, "cCsS["))
    197       return SSS_INVALID;
    198     return sizeof(char *);
    199   }
    200 
    201   if (dir->maybeGnuMalloc) {
    202     if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
    203       return SSS_INVALID;
    204     // This is ambiguous, so check the smaller size of char * (if it is
    205     // a GNU extension of %as, %aS or %a[...]) and float (if it is
    206     // POSIX %a followed by s, S or [ letters).
    207     return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
    208   }
    209 
    210   if (scanf_is_integer_conv(dir->convSpecifier)) {
    211     switch (dir->lengthModifier[0]) {
    212     case 'h':
    213       return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
    214     case 'l':
    215       return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
    216     case 'L':
    217       return sizeof(long long);
    218     case 'j':
    219       return sizeof(INTMAX_T);
    220     case 'z':
    221       return sizeof(SIZE_T);
    222     case 't':
    223       return sizeof(PTRDIFF_T);
    224     case 0:
    225       return sizeof(int);
    226     default:
    227       return SSS_INVALID;
    228     }
    229   }
    230 
    231   if (scanf_is_float_conv(dir->convSpecifier)) {
    232     switch (dir->lengthModifier[0]) {
    233     case 'L':
    234     case 'q':
    235       return sizeof(long double);
    236     case 'l':
    237       return dir->lengthModifier[1] == 'l' ? sizeof(long double)
    238                                            : sizeof(double);
    239     case 0:
    240       return sizeof(float);
    241     default:
    242       return SSS_INVALID;
    243     }
    244   }
    245 
    246   if (char_is_one_of(dir->convSpecifier, "sS[")) {
    247     unsigned charSize = scanf_get_char_size(dir);
    248     if (charSize == 0)
    249       return SSS_INVALID;
    250     if (dir->fieldWidth == 0)
    251       return SSS_STRLEN;
    252     return (dir->fieldWidth + 1) * charSize;
    253   }
    254 
    255   if (char_is_one_of(dir->convSpecifier, "cC")) {
    256     unsigned charSize = scanf_get_char_size(dir);
    257     if (charSize == 0)
    258       return SSS_INVALID;
    259     if (dir->fieldWidth == 0)
    260       return charSize;
    261     return dir->fieldWidth * charSize;
    262   }
    263 
    264   if (dir->convSpecifier == 'p') {
    265     if (dir->lengthModifier[1] != 0)
    266       return SSS_INVALID;
    267     return sizeof(void *);
    268   }
    269 
    270   return SSS_INVALID;
    271 }
    272 
    273 // Common part of *scanf interceptors.
    274 // Process format string and va_list, and report all store ranges.
    275 // Stops when "consuming" n_inputs input items.
    276 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
    277                          const char *format, va_list aq) {
    278   CHECK_GT(n_inputs, 0);
    279   const char *p = format;
    280 
    281   while (*p && n_inputs) {
    282     ScanfDirective dir;
    283     p = scanf_parse_next(p, allowGnuMalloc, &dir);
    284     if (!p)
    285       break;
    286     if (dir.convSpecifier == 0) {
    287       // This can only happen at the end of the format string.
    288       CHECK_EQ(*p, 0);
    289       break;
    290     }
    291     // Here the directive is valid. Do what it says.
    292     if (dir.argIdx != -1) {
    293       // Unsupported.
    294       break;
    295     }
    296     if (dir.suppressed)
    297       continue;
    298     int size = scanf_get_store_size(&dir);
    299     if (size == SSS_INVALID)
    300       break;
    301     void *argp = va_arg(aq, void *);
    302     if (dir.convSpecifier != 'n')
    303       --n_inputs;
    304     if (size == SSS_STRLEN) {
    305       size = internal_strlen((const char *)argp) + 1;
    306     }
    307     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
    308   }
    309 }
    310