1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel (at) haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 #include "tool_setup.h" 23 24 #define ENABLE_CURLX_PRINTF 25 /* use our own printf() functions */ 26 #include "curlx.h" 27 #include "tool_cfgable.h" 28 #include "tool_doswin.h" 29 #include "tool_urlglob.h" 30 #include "tool_vms.h" 31 32 #include "memdebug.h" /* keep this as LAST include */ 33 34 #define GLOBERROR(string, column, code) \ 35 glob->error = string, glob->pos = column, code 36 37 static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len) 38 { 39 URLPattern *pat = &glob->pattern[glob->size]; 40 pat->type = UPTSet; 41 pat->content.Set.size = 1; 42 pat->content.Set.ptr_s = 0; 43 pat->globindex = -1; 44 45 pat->content.Set.elements = malloc(sizeof(char *)); 46 47 if(!pat->content.Set.elements) 48 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 49 50 pat->content.Set.elements[0] = malloc(len + 1); 51 if(!pat->content.Set.elements[0]) 52 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 53 54 memcpy(pat->content.Set.elements[0], fixed, len); 55 pat->content.Set.elements[0][len] = 0; 56 57 return CURLE_OK; 58 } 59 60 /* multiply 61 * 62 * Multiplies and checks for overflow. 63 */ 64 static int multiply(unsigned long *amount, long with) 65 { 66 unsigned long sum = *amount * with; 67 if(!with) { 68 *amount = 0; 69 return 0; 70 } 71 if(sum/with != *amount) 72 return 1; /* didn't fit, bail out */ 73 *amount = sum; 74 return 0; 75 } 76 77 static CURLcode glob_set(URLGlob *glob, char **patternp, 78 size_t *posp, unsigned long *amount, 79 int globindex) 80 { 81 /* processes a set expression with the point behind the opening '{' 82 ','-separated elements are collected until the next closing '}' 83 */ 84 URLPattern *pat; 85 bool done = FALSE; 86 char *buf = glob->glob_buffer; 87 char *pattern = *patternp; 88 char *opattern = pattern; 89 size_t opos = *posp-1; 90 91 pat = &glob->pattern[glob->size]; 92 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 93 pat->type = UPTSet; 94 pat->content.Set.size = 0; 95 pat->content.Set.ptr_s = 0; 96 pat->content.Set.elements = NULL; 97 pat->globindex = globindex; 98 99 while(!done) { 100 switch (*pattern) { 101 case '\0': /* URL ended while set was still open */ 102 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); 103 104 case '{': 105 case '[': /* no nested expressions at this time */ 106 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); 107 108 case '}': /* set element completed */ 109 if(opattern == pattern) 110 return GLOBERROR("empty string within braces", *posp, 111 CURLE_URL_MALFORMAT); 112 113 /* add 1 to size since it'll be incremented below */ 114 if(multiply(amount, pat->content.Set.size + 1)) 115 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); 116 117 /* FALLTHROUGH */ 118 case ',': 119 120 *buf = '\0'; 121 if(pat->content.Set.elements) { 122 char **new_arr = realloc(pat->content.Set.elements, 123 (pat->content.Set.size + 1) * sizeof(char *)); 124 if(!new_arr) 125 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 126 127 pat->content.Set.elements = new_arr; 128 } 129 else 130 pat->content.Set.elements = malloc(sizeof(char *)); 131 132 if(!pat->content.Set.elements) 133 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 134 135 pat->content.Set.elements[pat->content.Set.size] = 136 strdup(glob->glob_buffer); 137 if(!pat->content.Set.elements[pat->content.Set.size]) 138 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 139 ++pat->content.Set.size; 140 141 if(*pattern == '}') { 142 pattern++; /* pass the closing brace */ 143 done = TRUE; 144 continue; 145 } 146 147 buf = glob->glob_buffer; 148 ++pattern; 149 ++(*posp); 150 break; 151 152 case ']': /* illegal closing bracket */ 153 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); 154 155 case '\\': /* escaped character, skip '\' */ 156 if(pattern[1]) { 157 ++pattern; 158 ++(*posp); 159 } 160 /* FALLTHROUGH */ 161 default: 162 *buf++ = *pattern++; /* copy character to set element */ 163 ++(*posp); 164 } 165 } 166 167 *patternp = pattern; /* return with the new position */ 168 return CURLE_OK; 169 } 170 171 static CURLcode glob_range(URLGlob *glob, char **patternp, 172 size_t *posp, unsigned long *amount, 173 int globindex) 174 { 175 /* processes a range expression with the point behind the opening '[' 176 - char range: e.g. "a-z]", "B-Q]" 177 - num range: e.g. "0-9]", "17-2000]" 178 - num range with leading zeros: e.g. "001-999]" 179 expression is checked for well-formedness and collected until the next ']' 180 */ 181 URLPattern *pat; 182 int rc; 183 char *pattern = *patternp; 184 char *c; 185 186 pat = &glob->pattern[glob->size]; 187 pat->globindex = globindex; 188 189 if(ISALPHA(*pattern)) { 190 /* character range detected */ 191 char min_c; 192 char max_c; 193 char end_c; 194 unsigned long step = 1; 195 196 pat->type = UPTCharRange; 197 198 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c); 199 200 if(rc == 3) { 201 if(end_c == ':') { 202 char *endp; 203 errno = 0; 204 step = strtoul(&pattern[4], &endp, 10); 205 if(errno || &pattern[4] == endp || *endp != ']') 206 step = 0; 207 else 208 pattern = endp + 1; 209 } 210 else if(end_c != ']') 211 /* then this is wrong */ 212 rc = 0; 213 else 214 /* end_c == ']' */ 215 pattern += 4; 216 } 217 218 *posp += (pattern - *patternp); 219 220 if(rc != 3 || !step || step > (unsigned)INT_MAX || 221 (min_c == max_c && step != 1) || 222 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) || 223 (max_c - min_c) > ('z' - 'a')))) 224 /* the pattern is not well-formed */ 225 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 226 227 /* if there was a ":[num]" thing, use that as step or else use 1 */ 228 pat->content.CharRange.step = (int)step; 229 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 230 pat->content.CharRange.max_c = max_c; 231 232 if(multiply(amount, ((pat->content.CharRange.max_c - 233 pat->content.CharRange.min_c) / 234 pat->content.CharRange.step + 1))) 235 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 236 } 237 else if(ISDIGIT(*pattern)) { 238 /* numeric range detected */ 239 unsigned long min_n; 240 unsigned long max_n = 0; 241 unsigned long step_n = 0; 242 char *endp; 243 244 pat->type = UPTNumRange; 245 pat->content.NumRange.padlength = 0; 246 247 if(*pattern == '0') { 248 /* leading zero specified, count them! */ 249 c = pattern; 250 while(ISDIGIT(*c)) { 251 c++; 252 ++pat->content.NumRange.padlength; /* padding length is set for all 253 instances of this pattern */ 254 } 255 } 256 257 errno = 0; 258 min_n = strtoul(pattern, &endp, 10); 259 if(errno || (endp == pattern)) 260 endp = NULL; 261 else { 262 if(*endp != '-') 263 endp = NULL; 264 else { 265 pattern = endp + 1; 266 while(*pattern && ISBLANK(*pattern)) 267 pattern++; 268 if(!ISDIGIT(*pattern)) { 269 endp = NULL; 270 goto fail; 271 } 272 errno = 0; 273 max_n = strtoul(pattern, &endp, 10); 274 if(errno) 275 /* overflow */ 276 endp = NULL; 277 else if(*endp == ':') { 278 pattern = endp + 1; 279 errno = 0; 280 step_n = strtoul(pattern, &endp, 10); 281 if(errno) 282 /* over/underflow situation */ 283 endp = NULL; 284 } 285 else 286 step_n = 1; 287 if(endp && (*endp == ']')) { 288 pattern = endp + 1; 289 } 290 else 291 endp = NULL; 292 } 293 } 294 295 fail: 296 *posp += (pattern - *patternp); 297 298 if(!endp || !step_n || 299 (min_n == max_n && step_n != 1) || 300 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n)))) 301 /* the pattern is not well-formed */ 302 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 303 304 /* typecasting to ints are fine here since we make sure above that we 305 are within 31 bits */ 306 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 307 pat->content.NumRange.max_n = max_n; 308 pat->content.NumRange.step = step_n; 309 310 if(multiply(amount, ((pat->content.NumRange.max_n - 311 pat->content.NumRange.min_n) / 312 pat->content.NumRange.step + 1))) 313 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 314 } 315 else 316 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); 317 318 *patternp = pattern; 319 return CURLE_OK; 320 } 321 322 static bool peek_ipv6(const char *str, size_t *skip) 323 { 324 /* 325 * Scan for a potential IPv6 literal. 326 * - Valid globs contain a hyphen and <= 1 colon. 327 * - IPv6 literals contain no hyphens and >= 2 colons. 328 */ 329 size_t i = 0; 330 size_t colons = 0; 331 if(str[i++] != '[') { 332 return FALSE; 333 } 334 for(;;) { 335 const char c = str[i++]; 336 if(ISALNUM(c) || c == '.' || c == '%') { 337 /* ok */ 338 } 339 else if(c == ':') { 340 colons++; 341 } 342 else if(c == ']') { 343 *skip = i; 344 return colons >= 2 ? TRUE : FALSE; 345 } 346 else { 347 return FALSE; 348 } 349 } 350 } 351 352 static CURLcode glob_parse(URLGlob *glob, char *pattern, 353 size_t pos, unsigned long *amount) 354 { 355 /* processes a literal string component of a URL 356 special characters '{' and '[' branch to set/range processing functions 357 */ 358 CURLcode res = CURLE_OK; 359 int globindex = 0; /* count "actual" globs */ 360 361 *amount = 1; 362 363 while(*pattern && !res) { 364 char *buf = glob->glob_buffer; 365 size_t sublen = 0; 366 while(*pattern && *pattern != '{') { 367 if(*pattern == '[') { 368 /* skip over IPv6 literals and [] */ 369 size_t skip = 0; 370 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']')) 371 skip = 2; 372 if(skip) { 373 memcpy(buf, pattern, skip); 374 buf += skip; 375 pattern += skip; 376 sublen += skip; 377 continue; 378 } 379 break; 380 } 381 if(*pattern == '}' || *pattern == ']') 382 return GLOBERROR("unmatched close brace/bracket", pos, 383 CURLE_URL_MALFORMAT); 384 385 /* only allow \ to escape known "special letters" */ 386 if(*pattern == '\\' && 387 (*(pattern + 1) == '{' || *(pattern + 1) == '[' || 388 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) { 389 390 /* escape character, skip '\' */ 391 ++pattern; 392 ++pos; 393 } 394 *buf++ = *pattern++; /* copy character to literal */ 395 ++pos; 396 sublen++; 397 } 398 if(sublen) { 399 /* we got a literal string, add it as a single-item list */ 400 *buf = '\0'; 401 res = glob_fixed(glob, glob->glob_buffer, sublen); 402 } 403 else { 404 switch (*pattern) { 405 case '\0': /* done */ 406 break; 407 408 case '{': 409 /* process set pattern */ 410 pattern++; 411 pos++; 412 res = glob_set(glob, &pattern, &pos, amount, globindex++); 413 break; 414 415 case '[': 416 /* process range pattern */ 417 pattern++; 418 pos++; 419 res = glob_range(glob, &pattern, &pos, amount, globindex++); 420 break; 421 } 422 } 423 424 if(++glob->size >= GLOB_PATTERN_NUM) 425 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); 426 } 427 return res; 428 } 429 430 CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum, 431 FILE *error) 432 { 433 /* 434 * We can deal with any-size, just make a buffer with the same length 435 * as the specified URL! 436 */ 437 URLGlob *glob_expand; 438 unsigned long amount = 0; 439 char *glob_buffer; 440 CURLcode res; 441 442 *glob = NULL; 443 444 glob_buffer = malloc(strlen(url) + 1); 445 if(!glob_buffer) 446 return CURLE_OUT_OF_MEMORY; 447 glob_buffer[0] = 0; 448 449 glob_expand = calloc(1, sizeof(URLGlob)); 450 if(!glob_expand) { 451 Curl_safefree(glob_buffer); 452 return CURLE_OUT_OF_MEMORY; 453 } 454 glob_expand->urllen = strlen(url); 455 glob_expand->glob_buffer = glob_buffer; 456 457 res = glob_parse(glob_expand, url, 1, &amount); 458 if(!res) 459 *urlnum = amount; 460 else { 461 if(error && glob_expand->error) { 462 char text[512]; 463 const char *t; 464 if(glob_expand->pos) { 465 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^", 466 glob_expand->error, 467 glob_expand->pos, url, glob_expand->pos - 1, " "); 468 t = text; 469 } 470 else 471 t = glob_expand->error; 472 473 /* send error description to the error-stream */ 474 fprintf(error, "curl: (%d) %s\n", res, t); 475 } 476 /* it failed, we cleanup */ 477 glob_cleanup(glob_expand); 478 *urlnum = 1; 479 return res; 480 } 481 482 *glob = glob_expand; 483 return CURLE_OK; 484 } 485 486 void glob_cleanup(URLGlob* glob) 487 { 488 size_t i; 489 int elem; 490 491 for(i = 0; i < glob->size; i++) { 492 if((glob->pattern[i].type == UPTSet) && 493 (glob->pattern[i].content.Set.elements)) { 494 for(elem = glob->pattern[i].content.Set.size - 1; 495 elem >= 0; 496 --elem) { 497 Curl_safefree(glob->pattern[i].content.Set.elements[elem]); 498 } 499 Curl_safefree(glob->pattern[i].content.Set.elements); 500 } 501 } 502 Curl_safefree(glob->glob_buffer); 503 Curl_safefree(glob); 504 } 505 506 CURLcode glob_next_url(char **globbed, URLGlob *glob) 507 { 508 URLPattern *pat; 509 size_t i; 510 size_t len; 511 size_t buflen = glob->urllen + 1; 512 char *buf = glob->glob_buffer; 513 514 *globbed = NULL; 515 516 if(!glob->beenhere) 517 glob->beenhere = 1; 518 else { 519 bool carry = TRUE; 520 521 /* implement a counter over the index ranges of all patterns, starting 522 with the rightmost pattern */ 523 for(i = 0; carry && (i < glob->size); i++) { 524 carry = FALSE; 525 pat = &glob->pattern[glob->size - 1 - i]; 526 switch(pat->type) { 527 case UPTSet: 528 if((pat->content.Set.elements) && 529 (++pat->content.Set.ptr_s == pat->content.Set.size)) { 530 pat->content.Set.ptr_s = 0; 531 carry = TRUE; 532 } 533 break; 534 case UPTCharRange: 535 pat->content.CharRange.ptr_c = 536 (char)(pat->content.CharRange.step + 537 (int)((unsigned char)pat->content.CharRange.ptr_c)); 538 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 539 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 540 carry = TRUE; 541 } 542 break; 543 case UPTNumRange: 544 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 545 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 546 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 547 carry = TRUE; 548 } 549 break; 550 default: 551 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 552 return CURLE_FAILED_INIT; 553 } 554 } 555 if(carry) { /* first pattern ptr has run into overflow, done! */ 556 /* TODO: verify if this should actually return CURLE_OK. */ 557 return CURLE_OK; /* CURLE_OK to match previous behavior */ 558 } 559 } 560 561 for(i = 0; i < glob->size; ++i) { 562 pat = &glob->pattern[i]; 563 switch(pat->type) { 564 case UPTSet: 565 if(pat->content.Set.elements) { 566 msnprintf(buf, buflen, "%s", 567 pat->content.Set.elements[pat->content.Set.ptr_s]); 568 len = strlen(buf); 569 buf += len; 570 buflen -= len; 571 } 572 break; 573 case UPTCharRange: 574 if(buflen) { 575 *buf++ = pat->content.CharRange.ptr_c; 576 *buf = '\0'; 577 buflen--; 578 } 579 break; 580 case UPTNumRange: 581 msnprintf(buf, buflen, "%0*lu", 582 pat->content.NumRange.padlength, 583 pat->content.NumRange.ptr_n); 584 len = strlen(buf); 585 buf += len; 586 buflen -= len; 587 break; 588 default: 589 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 590 return CURLE_FAILED_INIT; 591 } 592 } 593 594 *globbed = strdup(glob->glob_buffer); 595 if(!*globbed) 596 return CURLE_OUT_OF_MEMORY; 597 598 return CURLE_OK; 599 } 600 601 CURLcode glob_match_url(char **result, char *filename, URLGlob *glob) 602 { 603 char *target; 604 size_t allocsize; 605 char numbuf[18]; 606 char *appendthis = (char *)""; 607 size_t appendlen = 0; 608 size_t stringlen = 0; 609 610 *result = NULL; 611 612 /* We cannot use the glob_buffer for storage here since the filename may 613 * be longer than the URL we use. We allocate a good start size, then 614 * we need to realloc in case of need. 615 */ 616 allocsize = strlen(filename) + 1; /* make it at least one byte to store the 617 trailing zero */ 618 target = malloc(allocsize); 619 if(!target) 620 return CURLE_OUT_OF_MEMORY; 621 622 while(*filename) { 623 if(*filename == '#' && ISDIGIT(filename[1])) { 624 char *ptr = filename; 625 unsigned long num = strtoul(&filename[1], &filename, 10); 626 URLPattern *pat = NULL; 627 628 if(num < glob->size) { 629 unsigned long i; 630 num--; /* make it zero based */ 631 /* find the correct glob entry */ 632 for(i = 0; i<glob->size; i++) { 633 if(glob->pattern[i].globindex == (int)num) { 634 pat = &glob->pattern[i]; 635 break; 636 } 637 } 638 } 639 640 if(pat) { 641 switch(pat->type) { 642 case UPTSet: 643 if(pat->content.Set.elements) { 644 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; 645 appendlen = 646 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 647 } 648 break; 649 case UPTCharRange: 650 numbuf[0] = pat->content.CharRange.ptr_c; 651 numbuf[1] = 0; 652 appendthis = numbuf; 653 appendlen = 1; 654 break; 655 case UPTNumRange: 656 msnprintf(numbuf, sizeof(numbuf), "%0*lu", 657 pat->content.NumRange.padlength, 658 pat->content.NumRange.ptr_n); 659 appendthis = numbuf; 660 appendlen = strlen(numbuf); 661 break; 662 default: 663 fprintf(stderr, "internal error: invalid pattern type (%d)\n", 664 (int)pat->type); 665 Curl_safefree(target); 666 return CURLE_FAILED_INIT; 667 } 668 } 669 else { 670 /* #[num] out of range, use the #[num] in the output */ 671 filename = ptr; 672 appendthis = filename++; 673 appendlen = 1; 674 } 675 } 676 else { 677 appendthis = filename++; 678 appendlen = 1; 679 } 680 if(appendlen + stringlen >= allocsize) { 681 char *newstr; 682 /* we append a single byte to allow for the trailing byte to be appended 683 at the end of this function outside the while() loop */ 684 allocsize = (appendlen + stringlen) * 2; 685 newstr = realloc(target, allocsize + 1); 686 if(!newstr) { 687 Curl_safefree(target); 688 return CURLE_OUT_OF_MEMORY; 689 } 690 target = newstr; 691 } 692 memcpy(&target[stringlen], appendthis, appendlen); 693 stringlen += appendlen; 694 } 695 target[stringlen]= '\0'; 696 697 #if defined(MSDOS) || defined(WIN32) 698 { 699 char *sanitized; 700 SANITIZEcode sc = sanitize_file_name(&sanitized, target, 701 (SANITIZE_ALLOW_PATH | 702 SANITIZE_ALLOW_RESERVED)); 703 Curl_safefree(target); 704 if(sc) 705 return CURLE_URL_MALFORMAT; 706 target = sanitized; 707 } 708 #endif /* MSDOS || WIN32 */ 709 710 *result = target; 711 return CURLE_OK; 712 } 713