1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2016, Daniel Stenberg, <daniel (at) haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 #include "tool_setup.h" 23 24 #define ENABLE_CURLX_PRINTF 25 /* use our own printf() functions */ 26 #include "curlx.h" 27 #include "tool_cfgable.h" 28 #include "tool_doswin.h" 29 #include "tool_urlglob.h" 30 #include "tool_vms.h" 31 32 #include "memdebug.h" /* keep this as LAST include */ 33 34 #define GLOBERROR(string, column, code) \ 35 glob->error = string, glob->pos = column, code 36 37 void glob_cleanup(URLGlob* glob); 38 39 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len) 40 { 41 URLPattern *pat = &glob->pattern[glob->size]; 42 pat->type = UPTSet; 43 pat->content.Set.size = 1; 44 pat->content.Set.ptr_s = 0; 45 pat->globindex = -1; 46 47 pat->content.Set.elements = malloc(sizeof(char*)); 48 49 if(!pat->content.Set.elements) 50 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 51 52 pat->content.Set.elements[0] = malloc(len+1); 53 if(!pat->content.Set.elements[0]) 54 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 55 56 memcpy(pat->content.Set.elements[0], fixed, len); 57 pat->content.Set.elements[0][len] = 0; 58 59 return CURLE_OK; 60 } 61 62 /* multiply 63 * 64 * Multiplies and checks for overflow. 65 */ 66 static int multiply(unsigned long *amount, long with) 67 { 68 unsigned long sum = *amount * with; 69 if(sum/with != *amount) 70 return 1; /* didn't fit, bail out */ 71 *amount = sum; 72 return 0; 73 } 74 75 static CURLcode glob_set(URLGlob *glob, char **patternp, 76 size_t *posp, unsigned long *amount, 77 int globindex) 78 { 79 /* processes a set expression with the point behind the opening '{' 80 ','-separated elements are collected until the next closing '}' 81 */ 82 URLPattern *pat; 83 bool done = FALSE; 84 char *buf = glob->glob_buffer; 85 char *pattern = *patternp; 86 char *opattern = pattern; 87 size_t opos = *posp-1; 88 89 pat = &glob->pattern[glob->size]; 90 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 91 pat->type = UPTSet; 92 pat->content.Set.size = 0; 93 pat->content.Set.ptr_s = 0; 94 pat->content.Set.elements = NULL; 95 pat->globindex = globindex; 96 97 while(!done) { 98 switch (*pattern) { 99 case '\0': /* URL ended while set was still open */ 100 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); 101 102 case '{': 103 case '[': /* no nested expressions at this time */ 104 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); 105 106 case '}': /* set element completed */ 107 if(opattern == pattern) 108 return GLOBERROR("empty string within braces", *posp, 109 CURLE_URL_MALFORMAT); 110 111 /* add 1 to size since it'll be incremented below */ 112 if(multiply(amount, pat->content.Set.size+1)) 113 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); 114 115 /* fall-through */ 116 case ',': 117 118 *buf = '\0'; 119 if(pat->content.Set.elements) { 120 char **new_arr = realloc(pat->content.Set.elements, 121 (pat->content.Set.size + 1) * sizeof(char*)); 122 if(!new_arr) 123 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 124 125 pat->content.Set.elements = new_arr; 126 } 127 else 128 pat->content.Set.elements = malloc(sizeof(char*)); 129 130 if(!pat->content.Set.elements) 131 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 132 133 pat->content.Set.elements[pat->content.Set.size] = 134 strdup(glob->glob_buffer); 135 if(!pat->content.Set.elements[pat->content.Set.size]) 136 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 137 ++pat->content.Set.size; 138 139 if(*pattern == '}') { 140 pattern++; /* pass the closing brace */ 141 done = TRUE; 142 continue; 143 } 144 145 buf = glob->glob_buffer; 146 ++pattern; 147 ++(*posp); 148 break; 149 150 case ']': /* illegal closing bracket */ 151 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); 152 153 case '\\': /* escaped character, skip '\' */ 154 if(pattern[1]) { 155 ++pattern; 156 ++(*posp); 157 } 158 /* intentional fallthrough */ 159 default: 160 *buf++ = *pattern++; /* copy character to set element */ 161 ++(*posp); 162 } 163 } 164 165 *patternp = pattern; /* return with the new position */ 166 return CURLE_OK; 167 } 168 169 static CURLcode glob_range(URLGlob *glob, char **patternp, 170 size_t *posp, unsigned long *amount, 171 int globindex) 172 { 173 /* processes a range expression with the point behind the opening '[' 174 - char range: e.g. "a-z]", "B-Q]" 175 - num range: e.g. "0-9]", "17-2000]" 176 - num range with leading zeros: e.g. "001-999]" 177 expression is checked for well-formedness and collected until the next ']' 178 */ 179 URLPattern *pat; 180 int rc; 181 char *pattern = *patternp; 182 char *c; 183 184 pat = &glob->pattern[glob->size]; 185 pat->globindex = globindex; 186 187 if(ISALPHA(*pattern)) { 188 /* character range detected */ 189 char min_c; 190 char max_c; 191 int step=1; 192 193 pat->type = UPTCharRange; 194 195 rc = sscanf(pattern, "%c-%c", &min_c, &max_c); 196 197 if((rc == 2) && (pattern[3] == ':')) { 198 char *endp; 199 unsigned long lstep; 200 errno = 0; 201 lstep = strtoul(&pattern[4], &endp, 10); 202 if(errno || (*endp != ']')) 203 step = -1; 204 else { 205 pattern = endp+1; 206 step = (int)lstep; 207 if(step > (max_c - min_c)) 208 step = -1; 209 } 210 } 211 else 212 pattern += 4; 213 214 *posp += (pattern - *patternp); 215 216 if((rc != 2) || (min_c >= max_c) || ((max_c - min_c) > ('z' - 'a')) || 217 (step <= 0) ) 218 /* the pattern is not well-formed */ 219 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 220 221 /* if there was a ":[num]" thing, use that as step or else use 1 */ 222 pat->content.CharRange.step = step; 223 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 224 pat->content.CharRange.max_c = max_c; 225 226 if(multiply(amount, (pat->content.CharRange.max_c - 227 pat->content.CharRange.min_c) / 228 pat->content.CharRange.step + 1) ) 229 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 230 } 231 else if(ISDIGIT(*pattern)) { 232 /* numeric range detected */ 233 unsigned long min_n; 234 unsigned long max_n = 0; 235 unsigned long step_n = 0; 236 char *endp; 237 238 pat->type = UPTNumRange; 239 pat->content.NumRange.padlength = 0; 240 241 if(*pattern == '0') { 242 /* leading zero specified, count them! */ 243 c = pattern; 244 while(ISDIGIT(*c)) { 245 c++; 246 ++pat->content.NumRange.padlength; /* padding length is set for all 247 instances of this pattern */ 248 } 249 } 250 251 errno = 0; 252 min_n = strtoul(pattern, &endp, 10); 253 if(errno || (endp == pattern)) 254 endp=NULL; 255 else { 256 if(*endp != '-') 257 endp = NULL; 258 else { 259 pattern = endp+1; 260 errno = 0; 261 max_n = strtoul(pattern, &endp, 10); 262 if(errno || (*endp == ':')) { 263 pattern = endp+1; 264 errno = 0; 265 step_n = strtoul(pattern, &endp, 10); 266 if(errno) 267 /* over/underflow situation */ 268 endp = NULL; 269 } 270 else 271 step_n = 1; 272 if(endp && (*endp == ']')) { 273 pattern= endp+1; 274 } 275 else 276 endp = NULL; 277 } 278 } 279 280 *posp += (pattern - *patternp); 281 282 if(!endp || (min_n > max_n) || (step_n > (max_n - min_n)) || !step_n) 283 /* the pattern is not well-formed */ 284 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 285 286 /* typecasting to ints are fine here since we make sure above that we 287 are within 31 bits */ 288 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 289 pat->content.NumRange.max_n = max_n; 290 pat->content.NumRange.step = step_n; 291 292 if(multiply(amount, (pat->content.NumRange.max_n - 293 pat->content.NumRange.min_n) / 294 pat->content.NumRange.step + 1) ) 295 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 296 } 297 else 298 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); 299 300 *patternp = pattern; 301 return CURLE_OK; 302 } 303 304 static bool peek_ipv6(const char *str, size_t *skip) 305 { 306 /* 307 * Scan for a potential IPv6 literal. 308 * - Valid globs contain a hyphen and <= 1 colon. 309 * - IPv6 literals contain no hyphens and >= 2 colons. 310 */ 311 size_t i = 0; 312 size_t colons = 0; 313 if(str[i++] != '[') { 314 return FALSE; 315 } 316 for(;;) { 317 const char c = str[i++]; 318 if(ISALNUM(c) || c == '.' || c == '%') { 319 /* ok */ 320 } 321 else if(c == ':') { 322 colons++; 323 } 324 else if(c == ']') { 325 *skip = i; 326 return colons >= 2 ? TRUE : FALSE; 327 } 328 else { 329 return FALSE; 330 } 331 } 332 } 333 334 static CURLcode glob_parse(URLGlob *glob, char *pattern, 335 size_t pos, unsigned long *amount) 336 { 337 /* processes a literal string component of a URL 338 special characters '{' and '[' branch to set/range processing functions 339 */ 340 CURLcode res = CURLE_OK; 341 int globindex = 0; /* count "actual" globs */ 342 343 *amount = 1; 344 345 while(*pattern && !res) { 346 char *buf = glob->glob_buffer; 347 size_t sublen = 0; 348 while(*pattern && *pattern != '{') { 349 if(*pattern == '[') { 350 /* Skip over potential IPv6 literals. */ 351 size_t skip; 352 if(peek_ipv6(pattern, &skip)) { 353 memcpy(buf, pattern, skip); 354 buf += skip; 355 pattern += skip; 356 sublen += skip; 357 continue; 358 } 359 break; 360 } 361 if(*pattern == '}' || *pattern == ']') 362 return GLOBERROR("unmatched close brace/bracket", pos, 363 CURLE_URL_MALFORMAT); 364 365 /* only allow \ to escape known "special letters" */ 366 if(*pattern == '\\' && 367 (*(pattern+1) == '{' || *(pattern+1) == '[' || 368 *(pattern+1) == '}' || *(pattern+1) == ']') ) { 369 370 /* escape character, skip '\' */ 371 ++pattern; 372 ++pos; 373 } 374 *buf++ = *pattern++; /* copy character to literal */ 375 ++pos; 376 sublen++; 377 } 378 if(sublen) { 379 /* we got a literal string, add it as a single-item list */ 380 *buf = '\0'; 381 res = glob_fixed(glob, glob->glob_buffer, sublen); 382 } 383 else { 384 switch (*pattern) { 385 case '\0': /* done */ 386 break; 387 388 case '{': 389 /* process set pattern */ 390 pattern++; 391 pos++; 392 res = glob_set(glob, &pattern, &pos, amount, globindex++); 393 break; 394 395 case '[': 396 /* process range pattern */ 397 pattern++; 398 pos++; 399 res = glob_range(glob, &pattern, &pos, amount, globindex++); 400 break; 401 } 402 } 403 404 if(++glob->size >= GLOB_PATTERN_NUM) 405 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); 406 } 407 return res; 408 } 409 410 CURLcode glob_url(URLGlob** glob, char* url, unsigned long *urlnum, 411 FILE *error) 412 { 413 /* 414 * We can deal with any-size, just make a buffer with the same length 415 * as the specified URL! 416 */ 417 URLGlob *glob_expand; 418 unsigned long amount = 0; 419 char *glob_buffer; 420 CURLcode res; 421 422 *glob = NULL; 423 424 glob_buffer = malloc(strlen(url) + 1); 425 if(!glob_buffer) 426 return CURLE_OUT_OF_MEMORY; 427 428 glob_expand = calloc(1, sizeof(URLGlob)); 429 if(!glob_expand) { 430 Curl_safefree(glob_buffer); 431 return CURLE_OUT_OF_MEMORY; 432 } 433 glob_expand->urllen = strlen(url); 434 glob_expand->glob_buffer = glob_buffer; 435 436 res = glob_parse(glob_expand, url, 1, &amount); 437 if(!res) 438 *urlnum = amount; 439 else { 440 if(error && glob_expand->error) { 441 char text[128]; 442 const char *t; 443 if(glob_expand->pos) { 444 snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error, 445 glob_expand->pos); 446 t = text; 447 } 448 else 449 t = glob_expand->error; 450 451 /* send error description to the error-stream */ 452 fprintf(error, "curl: (%d) [globbing] %s\n", res, t); 453 } 454 /* it failed, we cleanup */ 455 glob_cleanup(glob_expand); 456 *urlnum = 1; 457 return res; 458 } 459 460 *glob = glob_expand; 461 return CURLE_OK; 462 } 463 464 void glob_cleanup(URLGlob* glob) 465 { 466 size_t i; 467 int elem; 468 469 for(i = 0; i < glob->size; i++) { 470 if((glob->pattern[i].type == UPTSet) && 471 (glob->pattern[i].content.Set.elements)) { 472 for(elem = glob->pattern[i].content.Set.size - 1; 473 elem >= 0; 474 --elem) { 475 Curl_safefree(glob->pattern[i].content.Set.elements[elem]); 476 } 477 Curl_safefree(glob->pattern[i].content.Set.elements); 478 } 479 } 480 Curl_safefree(glob->glob_buffer); 481 Curl_safefree(glob); 482 } 483 484 CURLcode glob_next_url(char **globbed, URLGlob *glob) 485 { 486 URLPattern *pat; 487 size_t i; 488 size_t len; 489 size_t buflen = glob->urllen + 1; 490 char *buf = glob->glob_buffer; 491 492 *globbed = NULL; 493 494 if(!glob->beenhere) 495 glob->beenhere = 1; 496 else { 497 bool carry = TRUE; 498 499 /* implement a counter over the index ranges of all patterns, starting 500 with the rightmost pattern */ 501 for(i = 0; carry && (i < glob->size); i++) { 502 carry = FALSE; 503 pat = &glob->pattern[glob->size - 1 - i]; 504 switch (pat->type) { 505 case UPTSet: 506 if((pat->content.Set.elements) && 507 (++pat->content.Set.ptr_s == pat->content.Set.size)) { 508 pat->content.Set.ptr_s = 0; 509 carry = TRUE; 510 } 511 break; 512 case UPTCharRange: 513 pat->content.CharRange.ptr_c = 514 (char)(pat->content.CharRange.step + 515 (int)((unsigned char)pat->content.CharRange.ptr_c)); 516 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 517 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 518 carry = TRUE; 519 } 520 break; 521 case UPTNumRange: 522 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 523 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 524 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 525 carry = TRUE; 526 } 527 break; 528 default: 529 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 530 return CURLE_FAILED_INIT; 531 } 532 } 533 if(carry) { /* first pattern ptr has run into overflow, done! */ 534 /* TODO: verify if this should actally return CURLE_OK. */ 535 return CURLE_OK; /* CURLE_OK to match previous behavior */ 536 } 537 } 538 539 for(i = 0; i < glob->size; ++i) { 540 pat = &glob->pattern[i]; 541 switch(pat->type) { 542 case UPTSet: 543 if(pat->content.Set.elements) { 544 len = strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 545 snprintf(buf, buflen, "%s", 546 pat->content.Set.elements[pat->content.Set.ptr_s]); 547 buf += len; 548 buflen -= len; 549 } 550 break; 551 case UPTCharRange: 552 *buf++ = pat->content.CharRange.ptr_c; 553 break; 554 case UPTNumRange: 555 len = snprintf(buf, buflen, "%0*ld", 556 pat->content.NumRange.padlength, 557 pat->content.NumRange.ptr_n); 558 buf += len; 559 buflen -= len; 560 break; 561 default: 562 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 563 return CURLE_FAILED_INIT; 564 } 565 } 566 *buf = '\0'; 567 568 *globbed = strdup(glob->glob_buffer); 569 if(!*globbed) 570 return CURLE_OUT_OF_MEMORY; 571 572 return CURLE_OK; 573 } 574 575 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob) 576 { 577 char *target; 578 size_t allocsize; 579 char numbuf[18]; 580 char *appendthis = NULL; 581 size_t appendlen = 0; 582 size_t stringlen = 0; 583 584 *result = NULL; 585 586 /* We cannot use the glob_buffer for storage here since the filename may 587 * be longer than the URL we use. We allocate a good start size, then 588 * we need to realloc in case of need. 589 */ 590 allocsize = strlen(filename) + 1; /* make it at least one byte to store the 591 trailing zero */ 592 target = malloc(allocsize); 593 if(!target) 594 return CURLE_OUT_OF_MEMORY; 595 596 while(*filename) { 597 if(*filename == '#' && ISDIGIT(filename[1])) { 598 unsigned long i; 599 char *ptr = filename; 600 unsigned long num = strtoul(&filename[1], &filename, 10); 601 URLPattern *pat =NULL; 602 603 if(num < glob->size) { 604 num--; /* make it zero based */ 605 /* find the correct glob entry */ 606 for(i=0; i<glob->size; i++) { 607 if(glob->pattern[i].globindex == (int)num) { 608 pat = &glob->pattern[i]; 609 break; 610 } 611 } 612 } 613 614 if(pat) { 615 switch (pat->type) { 616 case UPTSet: 617 if(pat->content.Set.elements) { 618 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; 619 appendlen = 620 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 621 } 622 break; 623 case UPTCharRange: 624 numbuf[0] = pat->content.CharRange.ptr_c; 625 numbuf[1] = 0; 626 appendthis = numbuf; 627 appendlen = 1; 628 break; 629 case UPTNumRange: 630 snprintf(numbuf, sizeof(numbuf), "%0*d", 631 pat->content.NumRange.padlength, 632 pat->content.NumRange.ptr_n); 633 appendthis = numbuf; 634 appendlen = strlen(numbuf); 635 break; 636 default: 637 fprintf(stderr, "internal error: invalid pattern type (%d)\n", 638 (int)pat->type); 639 Curl_safefree(target); 640 return CURLE_FAILED_INIT; 641 } 642 } 643 else { 644 /* #[num] out of range, use the #[num] in the output */ 645 filename = ptr; 646 appendthis = filename++; 647 appendlen = 1; 648 } 649 } 650 else { 651 appendthis = filename++; 652 appendlen = 1; 653 } 654 if(appendlen + stringlen >= allocsize) { 655 char *newstr; 656 /* we append a single byte to allow for the trailing byte to be appended 657 at the end of this function outside the while() loop */ 658 allocsize = (appendlen + stringlen) * 2; 659 newstr = realloc(target, allocsize + 1); 660 if(!newstr) { 661 Curl_safefree(target); 662 return CURLE_OUT_OF_MEMORY; 663 } 664 target = newstr; 665 } 666 memcpy(&target[stringlen], appendthis, appendlen); 667 stringlen += appendlen; 668 } 669 target[stringlen]= '\0'; 670 671 #if defined(MSDOS) || defined(WIN32) 672 { 673 char *sanitized; 674 SANITIZEcode sc = sanitize_file_name(&sanitized, target, 675 (SANITIZE_ALLOW_PATH | 676 SANITIZE_ALLOW_RESERVED)); 677 Curl_safefree(target); 678 if(sc) 679 return CURLE_URL_MALFORMAT; 680 target = sanitized; 681 } 682 #endif /* MSDOS || WIN32 */ 683 684 *result = target; 685 return CURLE_OK; 686 } 687