1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2015, Daniel Stenberg, <daniel (at) haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at http://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 23 #include "curl_setup.h" 24 25 #include "curl_fnmatch.h" 26 #include "curl_memory.h" 27 28 /* The last #include file should be: */ 29 #include "memdebug.h" 30 31 #define CURLFNM_CHARSET_LEN (sizeof(char) * 256) 32 #define CURLFNM_CHSET_SIZE (CURLFNM_CHARSET_LEN + 15) 33 34 #define CURLFNM_NEGATE CURLFNM_CHARSET_LEN 35 36 #define CURLFNM_ALNUM (CURLFNM_CHARSET_LEN + 1) 37 #define CURLFNM_DIGIT (CURLFNM_CHARSET_LEN + 2) 38 #define CURLFNM_XDIGIT (CURLFNM_CHARSET_LEN + 3) 39 #define CURLFNM_ALPHA (CURLFNM_CHARSET_LEN + 4) 40 #define CURLFNM_PRINT (CURLFNM_CHARSET_LEN + 5) 41 #define CURLFNM_BLANK (CURLFNM_CHARSET_LEN + 6) 42 #define CURLFNM_LOWER (CURLFNM_CHARSET_LEN + 7) 43 #define CURLFNM_GRAPH (CURLFNM_CHARSET_LEN + 8) 44 #define CURLFNM_SPACE (CURLFNM_CHARSET_LEN + 9) 45 #define CURLFNM_UPPER (CURLFNM_CHARSET_LEN + 10) 46 47 typedef enum { 48 CURLFNM_LOOP_DEFAULT = 0, 49 CURLFNM_LOOP_BACKSLASH 50 } loop_state; 51 52 typedef enum { 53 CURLFNM_SCHS_DEFAULT = 0, 54 CURLFNM_SCHS_MAYRANGE, 55 CURLFNM_SCHS_MAYRANGE2, 56 CURLFNM_SCHS_RIGHTBR, 57 CURLFNM_SCHS_RIGHTBRLEFTBR 58 } setcharset_state; 59 60 typedef enum { 61 CURLFNM_PKW_INIT = 0, 62 CURLFNM_PKW_DDOT 63 } parsekey_state; 64 65 #define SETCHARSET_OK 1 66 #define SETCHARSET_FAIL 0 67 68 static int parsekeyword(unsigned char **pattern, unsigned char *charset) 69 { 70 parsekey_state state = CURLFNM_PKW_INIT; 71 #define KEYLEN 10 72 char keyword[KEYLEN] = { 0 }; 73 int found = FALSE; 74 int i; 75 unsigned char *p = *pattern; 76 for(i = 0; !found; i++) { 77 char c = *p++; 78 if(i >= KEYLEN) 79 return SETCHARSET_FAIL; 80 switch(state) { 81 case CURLFNM_PKW_INIT: 82 if(ISALPHA(c) && ISLOWER(c)) 83 keyword[i] = c; 84 else if(c == ':') 85 state = CURLFNM_PKW_DDOT; 86 else 87 return 0; 88 break; 89 case CURLFNM_PKW_DDOT: 90 if(c == ']') 91 found = TRUE; 92 else 93 return SETCHARSET_FAIL; 94 } 95 } 96 #undef KEYLEN 97 98 *pattern = p; /* move caller's pattern pointer */ 99 if(strcmp(keyword, "digit") == 0) 100 charset[CURLFNM_DIGIT] = 1; 101 else if(strcmp(keyword, "alnum") == 0) 102 charset[CURLFNM_ALNUM] = 1; 103 else if(strcmp(keyword, "alpha") == 0) 104 charset[CURLFNM_ALPHA] = 1; 105 else if(strcmp(keyword, "xdigit") == 0) 106 charset[CURLFNM_XDIGIT] = 1; 107 else if(strcmp(keyword, "print") == 0) 108 charset[CURLFNM_PRINT] = 1; 109 else if(strcmp(keyword, "graph") == 0) 110 charset[CURLFNM_GRAPH] = 1; 111 else if(strcmp(keyword, "space") == 0) 112 charset[CURLFNM_SPACE] = 1; 113 else if(strcmp(keyword, "blank") == 0) 114 charset[CURLFNM_BLANK] = 1; 115 else if(strcmp(keyword, "upper") == 0) 116 charset[CURLFNM_UPPER] = 1; 117 else if(strcmp(keyword, "lower") == 0) 118 charset[CURLFNM_LOWER] = 1; 119 else 120 return SETCHARSET_FAIL; 121 return SETCHARSET_OK; 122 } 123 124 /* returns 1 (true) if pattern is OK, 0 if is bad ("p" is pattern pointer) */ 125 static int setcharset(unsigned char **p, unsigned char *charset) 126 { 127 setcharset_state state = CURLFNM_SCHS_DEFAULT; 128 unsigned char rangestart = 0; 129 unsigned char lastchar = 0; 130 bool something_found = FALSE; 131 unsigned char c; 132 for(;;) { 133 c = **p; 134 switch(state) { 135 case CURLFNM_SCHS_DEFAULT: 136 if(ISALNUM(c)) { /* ASCII value */ 137 rangestart = c; 138 charset[c] = 1; 139 (*p)++; 140 state = CURLFNM_SCHS_MAYRANGE; 141 something_found = TRUE; 142 } 143 else if(c == ']') { 144 if(something_found) 145 return SETCHARSET_OK; 146 else 147 something_found = TRUE; 148 state = CURLFNM_SCHS_RIGHTBR; 149 charset[c] = 1; 150 (*p)++; 151 } 152 else if(c == '[') { 153 char c2 = *((*p)+1); 154 if(c2 == ':') { /* there has to be a keyword */ 155 (*p) += 2; 156 if(parsekeyword(p, charset)) { 157 state = CURLFNM_SCHS_DEFAULT; 158 } 159 else 160 return SETCHARSET_FAIL; 161 } 162 else { 163 charset[c] = 1; 164 (*p)++; 165 } 166 something_found = TRUE; 167 } 168 else if(c == '?' || c == '*') { 169 something_found = TRUE; 170 charset[c] = 1; 171 (*p)++; 172 } 173 else if(c == '^' || c == '!') { 174 if(!something_found) { 175 if(charset[CURLFNM_NEGATE]) { 176 charset[c] = 1; 177 something_found = TRUE; 178 } 179 else 180 charset[CURLFNM_NEGATE] = 1; /* negate charset */ 181 } 182 else 183 charset[c] = 1; 184 (*p)++; 185 } 186 else if(c == '\\') { 187 c = *(++(*p)); 188 if(ISPRINT((c))) { 189 something_found = TRUE; 190 state = CURLFNM_SCHS_MAYRANGE; 191 charset[c] = 1; 192 rangestart = c; 193 (*p)++; 194 } 195 else 196 return SETCHARSET_FAIL; 197 } 198 else if(c == '\0') { 199 return SETCHARSET_FAIL; 200 } 201 else { 202 charset[c] = 1; 203 (*p)++; 204 something_found = TRUE; 205 } 206 break; 207 case CURLFNM_SCHS_MAYRANGE: 208 if(c == '-') { 209 charset[c] = 1; 210 (*p)++; 211 lastchar = '-'; 212 state = CURLFNM_SCHS_MAYRANGE2; 213 } 214 else if(c == '[') { 215 state = CURLFNM_SCHS_DEFAULT; 216 } 217 else if(ISALNUM(c)) { 218 charset[c] = 1; 219 (*p)++; 220 } 221 else if(c == '\\') { 222 c = *(++(*p)); 223 if(ISPRINT(c)) { 224 charset[c] = 1; 225 (*p)++; 226 } 227 else 228 return SETCHARSET_FAIL; 229 } 230 else if(c == ']') { 231 return SETCHARSET_OK; 232 } 233 else 234 return SETCHARSET_FAIL; 235 break; 236 case CURLFNM_SCHS_MAYRANGE2: 237 if(c == '\\') { 238 c = *(++(*p)); 239 if(!ISPRINT(c)) 240 return SETCHARSET_FAIL; 241 } 242 if(c == ']') { 243 return SETCHARSET_OK; 244 } 245 else if(c == '\\') { 246 c = *(++(*p)); 247 if(ISPRINT(c)) { 248 charset[c] = 1; 249 state = CURLFNM_SCHS_DEFAULT; 250 (*p)++; 251 } 252 else 253 return SETCHARSET_FAIL; 254 } 255 if(c >= rangestart) { 256 if((ISLOWER(c) && ISLOWER(rangestart)) || 257 (ISDIGIT(c) && ISDIGIT(rangestart)) || 258 (ISUPPER(c) && ISUPPER(rangestart))) { 259 charset[lastchar] = 0; 260 rangestart++; 261 while(rangestart++ <= c) 262 charset[rangestart-1] = 1; 263 (*p)++; 264 state = CURLFNM_SCHS_DEFAULT; 265 } 266 else 267 return SETCHARSET_FAIL; 268 } 269 break; 270 case CURLFNM_SCHS_RIGHTBR: 271 if(c == '[') { 272 state = CURLFNM_SCHS_RIGHTBRLEFTBR; 273 charset[c] = 1; 274 (*p)++; 275 } 276 else if(c == ']') { 277 return SETCHARSET_OK; 278 } 279 else if(c == '\0') { 280 return SETCHARSET_FAIL; 281 } 282 else if(ISPRINT(c)) { 283 charset[c] = 1; 284 (*p)++; 285 state = CURLFNM_SCHS_DEFAULT; 286 } 287 else 288 /* used 'goto fail' instead of 'return SETCHARSET_FAIL' to avoid a 289 * nonsense warning 'statement not reached' at end of the fnc when 290 * compiling on Solaris */ 291 goto fail; 292 break; 293 case CURLFNM_SCHS_RIGHTBRLEFTBR: 294 if(c == ']') { 295 return SETCHARSET_OK; 296 } 297 else { 298 state = CURLFNM_SCHS_DEFAULT; 299 charset[c] = 1; 300 (*p)++; 301 } 302 break; 303 } 304 } 305 fail: 306 return SETCHARSET_FAIL; 307 } 308 309 static int loop(const unsigned char *pattern, const unsigned char *string) 310 { 311 loop_state state = CURLFNM_LOOP_DEFAULT; 312 unsigned char *p = (unsigned char *)pattern; 313 unsigned char *s = (unsigned char *)string; 314 unsigned char charset[CURLFNM_CHSET_SIZE] = { 0 }; 315 int rc = 0; 316 317 for(;;) { 318 switch(state) { 319 case CURLFNM_LOOP_DEFAULT: 320 if(*p == '*') { 321 while(*(p+1) == '*') /* eliminate multiple stars */ 322 p++; 323 if(*s == '\0' && *(p+1) == '\0') 324 return CURL_FNMATCH_MATCH; 325 rc = loop(p + 1, s); /* *.txt matches .txt <=> .txt matches .txt */ 326 if(rc == CURL_FNMATCH_MATCH) 327 return CURL_FNMATCH_MATCH; 328 if(*s) /* let the star eat up one character */ 329 s++; 330 else 331 return CURL_FNMATCH_NOMATCH; 332 } 333 else if(*p == '?') { 334 if(ISPRINT(*s)) { 335 s++; 336 p++; 337 } 338 else if(*s == '\0') 339 return CURL_FNMATCH_NOMATCH; 340 else 341 return CURL_FNMATCH_FAIL; /* cannot deal with other character */ 342 } 343 else if(*p == '\0') { 344 if(*s == '\0') 345 return CURL_FNMATCH_MATCH; 346 else 347 return CURL_FNMATCH_NOMATCH; 348 } 349 else if(*p == '\\') { 350 state = CURLFNM_LOOP_BACKSLASH; 351 p++; 352 } 353 else if(*p == '[') { 354 unsigned char *pp = p+1; /* cannot handle with pointer to register */ 355 if(setcharset(&pp, charset)) { 356 int found = FALSE; 357 if(charset[(unsigned int)*s]) 358 found = TRUE; 359 else if(charset[CURLFNM_ALNUM]) 360 found = ISALNUM(*s); 361 else if(charset[CURLFNM_ALPHA]) 362 found = ISALPHA(*s); 363 else if(charset[CURLFNM_DIGIT]) 364 found = ISDIGIT(*s); 365 else if(charset[CURLFNM_XDIGIT]) 366 found = ISXDIGIT(*s); 367 else if(charset[CURLFNM_PRINT]) 368 found = ISPRINT(*s); 369 else if(charset[CURLFNM_SPACE]) 370 found = ISSPACE(*s); 371 else if(charset[CURLFNM_UPPER]) 372 found = ISUPPER(*s); 373 else if(charset[CURLFNM_LOWER]) 374 found = ISLOWER(*s); 375 else if(charset[CURLFNM_BLANK]) 376 found = ISBLANK(*s); 377 else if(charset[CURLFNM_GRAPH]) 378 found = ISGRAPH(*s); 379 380 if(charset[CURLFNM_NEGATE]) 381 found = !found; 382 383 if(found) { 384 p = pp+1; 385 s++; 386 memset(charset, 0, CURLFNM_CHSET_SIZE); 387 } 388 else 389 return CURL_FNMATCH_NOMATCH; 390 } 391 else 392 return CURL_FNMATCH_FAIL; 393 } 394 else { 395 if(*p++ != *s++) 396 return CURL_FNMATCH_NOMATCH; 397 } 398 break; 399 case CURLFNM_LOOP_BACKSLASH: 400 if(ISPRINT(*p)) { 401 if(*p++ == *s++) 402 state = CURLFNM_LOOP_DEFAULT; 403 else 404 return CURL_FNMATCH_NOMATCH; 405 } 406 else 407 return CURL_FNMATCH_FAIL; 408 break; 409 } 410 } 411 } 412 413 /* 414 * @unittest: 1307 415 */ 416 int Curl_fnmatch(void *ptr, const char *pattern, const char *string) 417 { 418 (void)ptr; /* the argument is specified by the curl_fnmatch_callback 419 prototype, but not used by Curl_fnmatch() */ 420 if(!pattern || !string) { 421 return CURL_FNMATCH_FAIL; 422 } 423 return loop((unsigned char *)pattern, (unsigned char *)string); 424 } 425