1 //===-- sanitizer_common_interceptors_scanf.inc -----------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Scanf implementation for use in *Sanitizer interceptors. 11 // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html 12 // with a few common GNU extensions. 13 // 14 //===----------------------------------------------------------------------===// 15 #include <stdarg.h> 16 17 struct ScanfDirective { 18 int argIdx; // argument index, or -1 of not specified ("%n$") 19 int fieldWidth; 20 bool suppressed; // suppress assignment ("*") 21 bool allocate; // allocate space ("m") 22 char lengthModifier[2]; 23 char convSpecifier; 24 bool maybeGnuMalloc; 25 }; 26 27 static const char *parse_number(const char *p, int *out) { 28 *out = internal_atoll(p); 29 while (*p >= '0' && *p <= '9') 30 ++p; 31 return p; 32 } 33 34 static bool char_is_one_of(char c, const char *s) { 35 return !!internal_strchr(s, c); 36 } 37 38 // Parse scanf format string. If a valid directive in encountered, it is 39 // returned in dir. This function returns the pointer to the first 40 // unprocessed character, or 0 in case of error. 41 // In case of the end-of-string, a pointer to the closing \0 is returned. 42 static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, 43 ScanfDirective *dir) { 44 internal_memset(dir, 0, sizeof(*dir)); 45 dir->argIdx = -1; 46 47 while (*p) { 48 if (*p != '%') { 49 ++p; 50 continue; 51 } 52 ++p; 53 // %% 54 if (*p == '%') { 55 ++p; 56 continue; 57 } 58 if (*p == '\0') { 59 return 0; 60 } 61 // %n$ 62 if (*p >= '0' && *p <= '9') { 63 int number; 64 const char *q = parse_number(p, &number); 65 if (*q == '$') { 66 dir->argIdx = number; 67 p = q + 1; 68 } 69 // Otherwise, do not change p. This will be re-parsed later as the field 70 // width. 71 } 72 // * 73 if (*p == '*') { 74 dir->suppressed = true; 75 ++p; 76 } 77 // Field width. 78 if (*p >= '0' && *p <= '9') { 79 p = parse_number(p, &dir->fieldWidth); 80 if (dir->fieldWidth <= 0) 81 return 0; 82 } 83 // m 84 if (*p == 'm') { 85 dir->allocate = true; 86 ++p; 87 } 88 // Length modifier. 89 if (char_is_one_of(*p, "jztLq")) { 90 dir->lengthModifier[0] = *p; 91 ++p; 92 } else if (*p == 'h') { 93 dir->lengthModifier[0] = 'h'; 94 ++p; 95 if (*p == 'h') { 96 dir->lengthModifier[1] = 'h'; 97 ++p; 98 } 99 } else if (*p == 'l') { 100 dir->lengthModifier[0] = 'l'; 101 ++p; 102 if (*p == 'l') { 103 dir->lengthModifier[1] = 'l'; 104 ++p; 105 } 106 } 107 // Conversion specifier. 108 dir->convSpecifier = *p++; 109 // Consume %[...] expression. 110 if (dir->convSpecifier == '[') { 111 if (*p == '^') 112 ++p; 113 if (*p == ']') 114 ++p; 115 while (*p && *p != ']') 116 ++p; 117 if (*p == 0) 118 return 0; // unexpected end of string 119 // Consume the closing ']'. 120 ++p; 121 } 122 // This is unfortunately ambiguous between old GNU extension 123 // of %as, %aS and %a[...] and newer POSIX %a followed by 124 // letters s, S or [. 125 if (allowGnuMalloc && dir->convSpecifier == 'a' && 126 !dir->lengthModifier[0]) { 127 if (*p == 's' || *p == 'S') { 128 dir->maybeGnuMalloc = true; 129 ++p; 130 } else if (*p == '[') { 131 // Watch for %a[h-j%d], if % appears in the 132 // [...] range, then we need to give up, we don't know 133 // if scanf will parse it as POSIX %a [h-j %d ] or 134 // GNU allocation of string with range dh-j plus %. 135 const char *q = p + 1; 136 if (*q == '^') 137 ++q; 138 if (*q == ']') 139 ++q; 140 while (*q && *q != ']' && *q != '%') 141 ++q; 142 if (*q == 0 || *q == '%') 143 return 0; 144 p = q + 1; // Consume the closing ']'. 145 dir->maybeGnuMalloc = true; 146 } 147 } 148 break; 149 } 150 return p; 151 } 152 153 // Returns true if the character is an integer conversion specifier. 154 static bool scanf_is_integer_conv(char c) { 155 return char_is_one_of(c, "diouxXn"); 156 } 157 158 // Returns true if the character is an floating point conversion specifier. 159 static bool scanf_is_float_conv(char c) { 160 return char_is_one_of(c, "aAeEfFgG"); 161 } 162 163 // Returns string output character size for string-like conversions, 164 // or 0 if the conversion is invalid. 165 static int scanf_get_char_size(ScanfDirective *dir) { 166 if (char_is_one_of(dir->convSpecifier, "CS")) { 167 // wchar_t 168 return 0; 169 } 170 171 if (char_is_one_of(dir->convSpecifier, "cs[")) { 172 if (dir->lengthModifier[0] == 'l') 173 // wchar_t 174 return 0; 175 else if (dir->lengthModifier[0] == 0) 176 return sizeof(char); 177 else 178 return 0; 179 } 180 181 return 0; 182 } 183 184 enum ScanfStoreSize { 185 // Store size not known in advance; can be calculated as strlen() of the 186 // destination buffer. 187 SSS_STRLEN = -1, 188 // Invalid conversion specifier. 189 SSS_INVALID = 0 190 }; 191 192 // Returns the store size of a scanf directive (if >0), or a value of 193 // ScanfStoreSize. 194 static int scanf_get_store_size(ScanfDirective *dir) { 195 if (dir->allocate) { 196 if (!char_is_one_of(dir->convSpecifier, "cCsS[")) 197 return SSS_INVALID; 198 return sizeof(char *); 199 } 200 201 if (dir->maybeGnuMalloc) { 202 if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) 203 return SSS_INVALID; 204 // This is ambiguous, so check the smaller size of char * (if it is 205 // a GNU extension of %as, %aS or %a[...]) and float (if it is 206 // POSIX %a followed by s, S or [ letters). 207 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); 208 } 209 210 if (scanf_is_integer_conv(dir->convSpecifier)) { 211 switch (dir->lengthModifier[0]) { 212 case 'h': 213 return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); 214 case 'l': 215 return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); 216 case 'L': 217 return sizeof(long long); 218 case 'j': 219 return sizeof(INTMAX_T); 220 case 'z': 221 return sizeof(SIZE_T); 222 case 't': 223 return sizeof(PTRDIFF_T); 224 case 0: 225 return sizeof(int); 226 default: 227 return SSS_INVALID; 228 } 229 } 230 231 if (scanf_is_float_conv(dir->convSpecifier)) { 232 switch (dir->lengthModifier[0]) { 233 case 'L': 234 case 'q': 235 return sizeof(long double); 236 case 'l': 237 return dir->lengthModifier[1] == 'l' ? sizeof(long double) 238 : sizeof(double); 239 case 0: 240 return sizeof(float); 241 default: 242 return SSS_INVALID; 243 } 244 } 245 246 if (char_is_one_of(dir->convSpecifier, "sS[")) { 247 unsigned charSize = scanf_get_char_size(dir); 248 if (charSize == 0) 249 return SSS_INVALID; 250 if (dir->fieldWidth == 0) 251 return SSS_STRLEN; 252 return (dir->fieldWidth + 1) * charSize; 253 } 254 255 if (char_is_one_of(dir->convSpecifier, "cC")) { 256 unsigned charSize = scanf_get_char_size(dir); 257 if (charSize == 0) 258 return SSS_INVALID; 259 if (dir->fieldWidth == 0) 260 return charSize; 261 return dir->fieldWidth * charSize; 262 } 263 264 if (dir->convSpecifier == 'p') { 265 if (dir->lengthModifier[1] != 0) 266 return SSS_INVALID; 267 return sizeof(void *); 268 } 269 270 return SSS_INVALID; 271 } 272 273 // Common part of *scanf interceptors. 274 // Process format string and va_list, and report all store ranges. 275 // Stops when "consuming" n_inputs input items. 276 static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, 277 const char *format, va_list aq) { 278 CHECK_GT(n_inputs, 0); 279 const char *p = format; 280 281 while (*p && n_inputs) { 282 ScanfDirective dir; 283 p = scanf_parse_next(p, allowGnuMalloc, &dir); 284 if (!p) 285 break; 286 if (dir.convSpecifier == 0) { 287 // This can only happen at the end of the format string. 288 CHECK_EQ(*p, 0); 289 break; 290 } 291 // Here the directive is valid. Do what it says. 292 if (dir.argIdx != -1) { 293 // Unsupported. 294 break; 295 } 296 if (dir.suppressed) 297 continue; 298 int size = scanf_get_store_size(&dir); 299 if (size == SSS_INVALID) 300 break; 301 void *argp = va_arg(aq, void *); 302 if (dir.convSpecifier != 'n') 303 --n_inputs; 304 if (size == SSS_STRLEN) { 305 size = internal_strlen((const char *)argp) + 1; 306 } 307 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); 308 } 309 } 310