1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel (at) haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 #include "tool_setup.h" 23 24 #define ENABLE_CURLX_PRINTF 25 /* use our own printf() functions */ 26 #include "curlx.h" 27 #include "tool_cfgable.h" 28 #include "tool_doswin.h" 29 #include "tool_urlglob.h" 30 #include "tool_vms.h" 31 32 #include "memdebug.h" /* keep this as LAST include */ 33 34 #define GLOBERROR(string, column, code) \ 35 glob->error = string, glob->pos = column, code 36 37 void glob_cleanup(URLGlob* glob); 38 39 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len) 40 { 41 URLPattern *pat = &glob->pattern[glob->size]; 42 pat->type = UPTSet; 43 pat->content.Set.size = 1; 44 pat->content.Set.ptr_s = 0; 45 pat->globindex = -1; 46 47 pat->content.Set.elements = malloc(sizeof(char *)); 48 49 if(!pat->content.Set.elements) 50 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 51 52 pat->content.Set.elements[0] = malloc(len + 1); 53 if(!pat->content.Set.elements[0]) 54 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 55 56 memcpy(pat->content.Set.elements[0], fixed, len); 57 pat->content.Set.elements[0][len] = 0; 58 59 return CURLE_OK; 60 } 61 62 /* multiply 63 * 64 * Multiplies and checks for overflow. 65 */ 66 static int multiply(unsigned long *amount, long with) 67 { 68 unsigned long sum = *amount * with; 69 if(!with) { 70 *amount = 0; 71 return 0; 72 } 73 if(sum/with != *amount) 74 return 1; /* didn't fit, bail out */ 75 *amount = sum; 76 return 0; 77 } 78 79 static CURLcode glob_set(URLGlob *glob, char **patternp, 80 size_t *posp, unsigned long *amount, 81 int globindex) 82 { 83 /* processes a set expression with the point behind the opening '{' 84 ','-separated elements are collected until the next closing '}' 85 */ 86 URLPattern *pat; 87 bool done = FALSE; 88 char *buf = glob->glob_buffer; 89 char *pattern = *patternp; 90 char *opattern = pattern; 91 size_t opos = *posp-1; 92 93 pat = &glob->pattern[glob->size]; 94 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 95 pat->type = UPTSet; 96 pat->content.Set.size = 0; 97 pat->content.Set.ptr_s = 0; 98 pat->content.Set.elements = NULL; 99 pat->globindex = globindex; 100 101 while(!done) { 102 switch (*pattern) { 103 case '\0': /* URL ended while set was still open */ 104 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); 105 106 case '{': 107 case '[': /* no nested expressions at this time */ 108 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); 109 110 case '}': /* set element completed */ 111 if(opattern == pattern) 112 return GLOBERROR("empty string within braces", *posp, 113 CURLE_URL_MALFORMAT); 114 115 /* add 1 to size since it'll be incremented below */ 116 if(multiply(amount, pat->content.Set.size + 1)) 117 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); 118 119 /* fall-through */ 120 case ',': 121 122 *buf = '\0'; 123 if(pat->content.Set.elements) { 124 char **new_arr = realloc(pat->content.Set.elements, 125 (pat->content.Set.size + 1) * sizeof(char *)); 126 if(!new_arr) 127 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 128 129 pat->content.Set.elements = new_arr; 130 } 131 else 132 pat->content.Set.elements = malloc(sizeof(char *)); 133 134 if(!pat->content.Set.elements) 135 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 136 137 pat->content.Set.elements[pat->content.Set.size] = 138 strdup(glob->glob_buffer); 139 if(!pat->content.Set.elements[pat->content.Set.size]) 140 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 141 ++pat->content.Set.size; 142 143 if(*pattern == '}') { 144 pattern++; /* pass the closing brace */ 145 done = TRUE; 146 continue; 147 } 148 149 buf = glob->glob_buffer; 150 ++pattern; 151 ++(*posp); 152 break; 153 154 case ']': /* illegal closing bracket */ 155 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); 156 157 case '\\': /* escaped character, skip '\' */ 158 if(pattern[1]) { 159 ++pattern; 160 ++(*posp); 161 } 162 /* intentional fallthrough */ 163 default: 164 *buf++ = *pattern++; /* copy character to set element */ 165 ++(*posp); 166 } 167 } 168 169 *patternp = pattern; /* return with the new position */ 170 return CURLE_OK; 171 } 172 173 static CURLcode glob_range(URLGlob *glob, char **patternp, 174 size_t *posp, unsigned long *amount, 175 int globindex) 176 { 177 /* processes a range expression with the point behind the opening '[' 178 - char range: e.g. "a-z]", "B-Q]" 179 - num range: e.g. "0-9]", "17-2000]" 180 - num range with leading zeros: e.g. "001-999]" 181 expression is checked for well-formedness and collected until the next ']' 182 */ 183 URLPattern *pat; 184 int rc; 185 char *pattern = *patternp; 186 char *c; 187 188 pat = &glob->pattern[glob->size]; 189 pat->globindex = globindex; 190 191 if(ISALPHA(*pattern)) { 192 /* character range detected */ 193 char min_c; 194 char max_c; 195 char end_c; 196 unsigned long step = 1; 197 198 pat->type = UPTCharRange; 199 200 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c); 201 202 if(rc == 3) { 203 if(end_c == ':') { 204 char *endp; 205 errno = 0; 206 step = strtoul(&pattern[4], &endp, 10); 207 if(errno || &pattern[4] == endp || *endp != ']') 208 step = 0; 209 else 210 pattern = endp + 1; 211 } 212 else if(end_c != ']') 213 /* then this is wrong */ 214 rc = 0; 215 else 216 /* end_c == ']' */ 217 pattern += 4; 218 } 219 220 *posp += (pattern - *patternp); 221 222 if(rc != 3 || !step || step > (unsigned)INT_MAX || 223 (min_c == max_c && step != 1) || 224 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) || 225 (max_c - min_c) > ('z' - 'a')))) 226 /* the pattern is not well-formed */ 227 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 228 229 /* if there was a ":[num]" thing, use that as step or else use 1 */ 230 pat->content.CharRange.step = (int)step; 231 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 232 pat->content.CharRange.max_c = max_c; 233 234 if(multiply(amount, ((pat->content.CharRange.max_c - 235 pat->content.CharRange.min_c) / 236 pat->content.CharRange.step + 1))) 237 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 238 } 239 else if(ISDIGIT(*pattern)) { 240 /* numeric range detected */ 241 unsigned long min_n; 242 unsigned long max_n = 0; 243 unsigned long step_n = 0; 244 char *endp; 245 246 pat->type = UPTNumRange; 247 pat->content.NumRange.padlength = 0; 248 249 if(*pattern == '0') { 250 /* leading zero specified, count them! */ 251 c = pattern; 252 while(ISDIGIT(*c)) { 253 c++; 254 ++pat->content.NumRange.padlength; /* padding length is set for all 255 instances of this pattern */ 256 } 257 } 258 259 errno = 0; 260 min_n = strtoul(pattern, &endp, 10); 261 if(errno || (endp == pattern)) 262 endp = NULL; 263 else { 264 if(*endp != '-') 265 endp = NULL; 266 else { 267 pattern = endp + 1; 268 while(*pattern && ISBLANK(*pattern)) 269 pattern++; 270 if(!ISDIGIT(*pattern)) { 271 endp = NULL; 272 goto fail; 273 } 274 errno = 0; 275 max_n = strtoul(pattern, &endp, 10); 276 if(errno) 277 /* overflow */ 278 endp = NULL; 279 else if(*endp == ':') { 280 pattern = endp + 1; 281 errno = 0; 282 step_n = strtoul(pattern, &endp, 10); 283 if(errno) 284 /* over/underflow situation */ 285 endp = NULL; 286 } 287 else 288 step_n = 1; 289 if(endp && (*endp == ']')) { 290 pattern = endp + 1; 291 } 292 else 293 endp = NULL; 294 } 295 } 296 297 fail: 298 *posp += (pattern - *patternp); 299 300 if(!endp || !step_n || 301 (min_n == max_n && step_n != 1) || 302 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n)))) 303 /* the pattern is not well-formed */ 304 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 305 306 /* typecasting to ints are fine here since we make sure above that we 307 are within 31 bits */ 308 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 309 pat->content.NumRange.max_n = max_n; 310 pat->content.NumRange.step = step_n; 311 312 if(multiply(amount, ((pat->content.NumRange.max_n - 313 pat->content.NumRange.min_n) / 314 pat->content.NumRange.step + 1))) 315 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 316 } 317 else 318 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); 319 320 *patternp = pattern; 321 return CURLE_OK; 322 } 323 324 static bool peek_ipv6(const char *str, size_t *skip) 325 { 326 /* 327 * Scan for a potential IPv6 literal. 328 * - Valid globs contain a hyphen and <= 1 colon. 329 * - IPv6 literals contain no hyphens and >= 2 colons. 330 */ 331 size_t i = 0; 332 size_t colons = 0; 333 if(str[i++] != '[') { 334 return FALSE; 335 } 336 for(;;) { 337 const char c = str[i++]; 338 if(ISALNUM(c) || c == '.' || c == '%') { 339 /* ok */ 340 } 341 else if(c == ':') { 342 colons++; 343 } 344 else if(c == ']') { 345 *skip = i; 346 return colons >= 2 ? TRUE : FALSE; 347 } 348 else { 349 return FALSE; 350 } 351 } 352 } 353 354 static CURLcode glob_parse(URLGlob *glob, char *pattern, 355 size_t pos, unsigned long *amount) 356 { 357 /* processes a literal string component of a URL 358 special characters '{' and '[' branch to set/range processing functions 359 */ 360 CURLcode res = CURLE_OK; 361 int globindex = 0; /* count "actual" globs */ 362 363 *amount = 1; 364 365 while(*pattern && !res) { 366 char *buf = glob->glob_buffer; 367 size_t sublen = 0; 368 while(*pattern && *pattern != '{') { 369 if(*pattern == '[') { 370 /* skip over IPv6 literals and [] */ 371 size_t skip = 0; 372 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']')) 373 skip = 2; 374 if(skip) { 375 memcpy(buf, pattern, skip); 376 buf += skip; 377 pattern += skip; 378 sublen += skip; 379 continue; 380 } 381 break; 382 } 383 if(*pattern == '}' || *pattern == ']') 384 return GLOBERROR("unmatched close brace/bracket", pos, 385 CURLE_URL_MALFORMAT); 386 387 /* only allow \ to escape known "special letters" */ 388 if(*pattern == '\\' && 389 (*(pattern + 1) == '{' || *(pattern + 1) == '[' || 390 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) { 391 392 /* escape character, skip '\' */ 393 ++pattern; 394 ++pos; 395 } 396 *buf++ = *pattern++; /* copy character to literal */ 397 ++pos; 398 sublen++; 399 } 400 if(sublen) { 401 /* we got a literal string, add it as a single-item list */ 402 *buf = '\0'; 403 res = glob_fixed(glob, glob->glob_buffer, sublen); 404 } 405 else { 406 switch (*pattern) { 407 case '\0': /* done */ 408 break; 409 410 case '{': 411 /* process set pattern */ 412 pattern++; 413 pos++; 414 res = glob_set(glob, &pattern, &pos, amount, globindex++); 415 break; 416 417 case '[': 418 /* process range pattern */ 419 pattern++; 420 pos++; 421 res = glob_range(glob, &pattern, &pos, amount, globindex++); 422 break; 423 } 424 } 425 426 if(++glob->size >= GLOB_PATTERN_NUM) 427 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); 428 } 429 return res; 430 } 431 432 CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum, 433 FILE *error) 434 { 435 /* 436 * We can deal with any-size, just make a buffer with the same length 437 * as the specified URL! 438 */ 439 URLGlob *glob_expand; 440 unsigned long amount = 0; 441 char *glob_buffer; 442 CURLcode res; 443 444 *glob = NULL; 445 446 glob_buffer = malloc(strlen(url) + 1); 447 if(!glob_buffer) 448 return CURLE_OUT_OF_MEMORY; 449 glob_buffer[0] = 0; 450 451 glob_expand = calloc(1, sizeof(URLGlob)); 452 if(!glob_expand) { 453 Curl_safefree(glob_buffer); 454 return CURLE_OUT_OF_MEMORY; 455 } 456 glob_expand->urllen = strlen(url); 457 glob_expand->glob_buffer = glob_buffer; 458 459 res = glob_parse(glob_expand, url, 1, &amount); 460 if(!res) 461 *urlnum = amount; 462 else { 463 if(error && glob_expand->error) { 464 char text[128]; 465 const char *t; 466 if(glob_expand->pos) { 467 snprintf(text, sizeof(text), "%s in column %zu", glob_expand->error, 468 glob_expand->pos); 469 t = text; 470 } 471 else 472 t = glob_expand->error; 473 474 /* send error description to the error-stream */ 475 fprintf(error, "curl: (%d) [globbing] %s\n", res, t); 476 } 477 /* it failed, we cleanup */ 478 glob_cleanup(glob_expand); 479 *urlnum = 1; 480 return res; 481 } 482 483 *glob = glob_expand; 484 return CURLE_OK; 485 } 486 487 void glob_cleanup(URLGlob* glob) 488 { 489 size_t i; 490 int elem; 491 492 for(i = 0; i < glob->size; i++) { 493 if((glob->pattern[i].type == UPTSet) && 494 (glob->pattern[i].content.Set.elements)) { 495 for(elem = glob->pattern[i].content.Set.size - 1; 496 elem >= 0; 497 --elem) { 498 Curl_safefree(glob->pattern[i].content.Set.elements[elem]); 499 } 500 Curl_safefree(glob->pattern[i].content.Set.elements); 501 } 502 } 503 Curl_safefree(glob->glob_buffer); 504 Curl_safefree(glob); 505 } 506 507 CURLcode glob_next_url(char **globbed, URLGlob *glob) 508 { 509 URLPattern *pat; 510 size_t i; 511 size_t len; 512 size_t buflen = glob->urllen + 1; 513 char *buf = glob->glob_buffer; 514 515 *globbed = NULL; 516 517 if(!glob->beenhere) 518 glob->beenhere = 1; 519 else { 520 bool carry = TRUE; 521 522 /* implement a counter over the index ranges of all patterns, starting 523 with the rightmost pattern */ 524 for(i = 0; carry && (i < glob->size); i++) { 525 carry = FALSE; 526 pat = &glob->pattern[glob->size - 1 - i]; 527 switch(pat->type) { 528 case UPTSet: 529 if((pat->content.Set.elements) && 530 (++pat->content.Set.ptr_s == pat->content.Set.size)) { 531 pat->content.Set.ptr_s = 0; 532 carry = TRUE; 533 } 534 break; 535 case UPTCharRange: 536 pat->content.CharRange.ptr_c = 537 (char)(pat->content.CharRange.step + 538 (int)((unsigned char)pat->content.CharRange.ptr_c)); 539 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 540 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 541 carry = TRUE; 542 } 543 break; 544 case UPTNumRange: 545 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 546 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 547 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 548 carry = TRUE; 549 } 550 break; 551 default: 552 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 553 return CURLE_FAILED_INIT; 554 } 555 } 556 if(carry) { /* first pattern ptr has run into overflow, done! */ 557 /* TODO: verify if this should actally return CURLE_OK. */ 558 return CURLE_OK; /* CURLE_OK to match previous behavior */ 559 } 560 } 561 562 for(i = 0; i < glob->size; ++i) { 563 pat = &glob->pattern[i]; 564 switch(pat->type) { 565 case UPTSet: 566 if(pat->content.Set.elements) { 567 snprintf(buf, buflen, "%s", 568 pat->content.Set.elements[pat->content.Set.ptr_s]); 569 len = strlen(buf); 570 buf += len; 571 buflen -= len; 572 } 573 break; 574 case UPTCharRange: 575 if(buflen) { 576 *buf++ = pat->content.CharRange.ptr_c; 577 *buf = '\0'; 578 buflen--; 579 } 580 break; 581 case UPTNumRange: 582 snprintf(buf, buflen, "%0*ld", 583 pat->content.NumRange.padlength, 584 pat->content.NumRange.ptr_n); 585 len = strlen(buf); 586 buf += len; 587 buflen -= len; 588 break; 589 default: 590 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 591 return CURLE_FAILED_INIT; 592 } 593 } 594 595 *globbed = strdup(glob->glob_buffer); 596 if(!*globbed) 597 return CURLE_OUT_OF_MEMORY; 598 599 return CURLE_OK; 600 } 601 602 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob) 603 { 604 char *target; 605 size_t allocsize; 606 char numbuf[18]; 607 char *appendthis = NULL; 608 size_t appendlen = 0; 609 size_t stringlen = 0; 610 611 *result = NULL; 612 613 /* We cannot use the glob_buffer for storage here since the filename may 614 * be longer than the URL we use. We allocate a good start size, then 615 * we need to realloc in case of need. 616 */ 617 allocsize = strlen(filename) + 1; /* make it at least one byte to store the 618 trailing zero */ 619 target = malloc(allocsize); 620 if(!target) 621 return CURLE_OUT_OF_MEMORY; 622 623 while(*filename) { 624 if(*filename == '#' && ISDIGIT(filename[1])) { 625 unsigned long i; 626 char *ptr = filename; 627 unsigned long num = strtoul(&filename[1], &filename, 10); 628 URLPattern *pat = NULL; 629 630 if(num < glob->size) { 631 num--; /* make it zero based */ 632 /* find the correct glob entry */ 633 for(i = 0; i<glob->size; i++) { 634 if(glob->pattern[i].globindex == (int)num) { 635 pat = &glob->pattern[i]; 636 break; 637 } 638 } 639 } 640 641 if(pat) { 642 switch(pat->type) { 643 case UPTSet: 644 if(pat->content.Set.elements) { 645 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; 646 appendlen = 647 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 648 } 649 break; 650 case UPTCharRange: 651 numbuf[0] = pat->content.CharRange.ptr_c; 652 numbuf[1] = 0; 653 appendthis = numbuf; 654 appendlen = 1; 655 break; 656 case UPTNumRange: 657 snprintf(numbuf, sizeof(numbuf), "%0*lu", 658 pat->content.NumRange.padlength, 659 pat->content.NumRange.ptr_n); 660 appendthis = numbuf; 661 appendlen = strlen(numbuf); 662 break; 663 default: 664 fprintf(stderr, "internal error: invalid pattern type (%d)\n", 665 (int)pat->type); 666 Curl_safefree(target); 667 return CURLE_FAILED_INIT; 668 } 669 } 670 else { 671 /* #[num] out of range, use the #[num] in the output */ 672 filename = ptr; 673 appendthis = filename++; 674 appendlen = 1; 675 } 676 } 677 else { 678 appendthis = filename++; 679 appendlen = 1; 680 } 681 if(appendlen + stringlen >= allocsize) { 682 char *newstr; 683 /* we append a single byte to allow for the trailing byte to be appended 684 at the end of this function outside the while() loop */ 685 allocsize = (appendlen + stringlen) * 2; 686 newstr = realloc(target, allocsize + 1); 687 if(!newstr) { 688 Curl_safefree(target); 689 return CURLE_OUT_OF_MEMORY; 690 } 691 target = newstr; 692 } 693 memcpy(&target[stringlen], appendthis, appendlen); 694 stringlen += appendlen; 695 } 696 target[stringlen]= '\0'; 697 698 #if defined(MSDOS) || defined(WIN32) 699 { 700 char *sanitized; 701 SANITIZEcode sc = sanitize_file_name(&sanitized, target, 702 (SANITIZE_ALLOW_PATH | 703 SANITIZE_ALLOW_RESERVED)); 704 Curl_safefree(target); 705 if(sc) 706 return CURLE_URL_MALFORMAT; 707 target = sanitized; 708 } 709 #endif /* MSDOS || WIN32 */ 710 711 *result = target; 712 return CURLE_OK; 713 } 714