1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel (at) haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 23 #include "curl_setup.h" 24 25 #include "urldata.h" 26 #include "urlapi-int.h" 27 #include "strcase.h" 28 #include "dotdot.h" 29 #include "url.h" 30 #include "escape.h" 31 #include "curl_ctype.h" 32 33 /* The last 3 #include files should be in this order */ 34 #include "curl_printf.h" 35 #include "curl_memory.h" 36 #include "memdebug.h" 37 38 /* MSDOS/Windows style drive prefix, eg c: in c:foo */ 39 #define STARTS_WITH_DRIVE_PREFIX(str) \ 40 ((('a' <= str[0] && str[0] <= 'z') || \ 41 ('A' <= str[0] && str[0] <= 'Z')) && \ 42 (str[1] == ':')) 43 44 /* MSDOS/Windows style drive prefix, optionally with 45 * a '|' instead of ':', followed by a slash or NUL */ 46 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \ 47 ((('a' <= (str)[0] && (str)[0] <= 'z') || \ 48 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \ 49 ((str)[1] == ':' || (str)[1] == '|') && \ 50 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) 51 52 /* Internal representation of CURLU. Point to URL-encoded strings. */ 53 struct Curl_URL { 54 char *scheme; 55 char *user; 56 char *password; 57 char *options; /* IMAP only? */ 58 char *host; 59 char *port; 60 char *path; 61 char *query; 62 char *fragment; 63 64 char *scratch; /* temporary scratch area */ 65 long portnum; /* the numerical version */ 66 }; 67 68 #define DEFAULT_SCHEME "https" 69 70 static void free_urlhandle(struct Curl_URL *u) 71 { 72 free(u->scheme); 73 free(u->user); 74 free(u->password); 75 free(u->options); 76 free(u->host); 77 free(u->port); 78 free(u->path); 79 free(u->query); 80 free(u->fragment); 81 free(u->scratch); 82 } 83 84 /* move the full contents of one handle onto another and 85 free the original */ 86 static void mv_urlhandle(struct Curl_URL *from, 87 struct Curl_URL *to) 88 { 89 free_urlhandle(to); 90 *to = *from; 91 free(from); 92 } 93 94 /* 95 * Find the separator at the end of the host name, or the '?' in cases like 96 * http://www.url.com?id=2380 97 */ 98 static const char *find_host_sep(const char *url) 99 { 100 const char *sep; 101 const char *query; 102 103 /* Find the start of the hostname */ 104 sep = strstr(url, "//"); 105 if(!sep) 106 sep = url; 107 else 108 sep += 2; 109 110 query = strchr(sep, '?'); 111 sep = strchr(sep, '/'); 112 113 if(!sep) 114 sep = url + strlen(url); 115 116 if(!query) 117 query = url + strlen(url); 118 119 return sep < query ? sep : query; 120 } 121 122 /* 123 * Decide in an encoding-independent manner whether a character in an 124 * URL must be escaped. The same criterion must be used in strlen_url() 125 * and strcpy_url(). 126 */ 127 static bool urlchar_needs_escaping(int c) 128 { 129 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); 130 } 131 132 /* 133 * strlen_url() returns the length of the given URL if the spaces within the 134 * URL were properly URL encoded. 135 * URL encoding should be skipped for host names, otherwise IDN resolution 136 * will fail. 137 */ 138 static size_t strlen_url(const char *url, bool relative) 139 { 140 const unsigned char *ptr; 141 size_t newlen = 0; 142 bool left = TRUE; /* left side of the ? */ 143 const unsigned char *host_sep = (const unsigned char *) url; 144 145 if(!relative) 146 host_sep = (const unsigned char *) find_host_sep(url); 147 148 for(ptr = (unsigned char *)url; *ptr; ptr++) { 149 150 if(ptr < host_sep) { 151 ++newlen; 152 continue; 153 } 154 155 switch(*ptr) { 156 case '?': 157 left = FALSE; 158 /* FALLTHROUGH */ 159 default: 160 if(urlchar_needs_escaping(*ptr)) 161 newlen += 2; 162 newlen++; 163 break; 164 case ' ': 165 if(left) 166 newlen += 3; 167 else 168 newlen++; 169 break; 170 } 171 } 172 return newlen; 173 } 174 175 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in 176 * the source URL accordingly. 177 * URL encoding should be skipped for host names, otherwise IDN resolution 178 * will fail. 179 */ 180 static void strcpy_url(char *output, const char *url, bool relative) 181 { 182 /* we must add this with whitespace-replacing */ 183 bool left = TRUE; 184 const unsigned char *iptr; 185 char *optr = output; 186 const unsigned char *host_sep = (const unsigned char *) url; 187 188 if(!relative) 189 host_sep = (const unsigned char *) find_host_sep(url); 190 191 for(iptr = (unsigned char *)url; /* read from here */ 192 *iptr; /* until zero byte */ 193 iptr++) { 194 195 if(iptr < host_sep) { 196 *optr++ = *iptr; 197 continue; 198 } 199 200 switch(*iptr) { 201 case '?': 202 left = FALSE; 203 /* FALLTHROUGH */ 204 default: 205 if(urlchar_needs_escaping(*iptr)) { 206 msnprintf(optr, 4, "%%%02x", *iptr); 207 optr += 3; 208 } 209 else 210 *optr++=*iptr; 211 break; 212 case ' ': 213 if(left) { 214 *optr++='%'; /* add a '%' */ 215 *optr++='2'; /* add a '2' */ 216 *optr++='0'; /* add a '0' */ 217 } 218 else 219 *optr++='+'; /* add a '+' here */ 220 break; 221 } 222 } 223 *optr = 0; /* zero terminate output buffer */ 224 225 } 226 227 /* 228 * Returns true if the given URL is absolute (as opposed to relative) within 229 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is 230 * non-NULL. 231 */ 232 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) 233 { 234 size_t i; 235 #ifdef WIN32 236 if(STARTS_WITH_DRIVE_PREFIX(url)) 237 return FALSE; 238 #endif 239 for(i = 0; i < buflen && url[i]; ++i) { 240 char s = url[i]; 241 if((s == ':') && (url[i + 1] == '/')) { 242 if(buf) 243 buf[i] = 0; 244 return TRUE; 245 } 246 /* RFC 3986 3.1 explains: 247 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 248 */ 249 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) { 250 if(buf) 251 buf[i] = (char)TOLOWER(s); 252 } 253 else 254 break; 255 } 256 return FALSE; 257 } 258 259 /* 260 * Concatenate a relative URL to a base URL making it absolute. 261 * URL-encodes any spaces. 262 * The returned pointer must be freed by the caller unless NULL 263 * (returns NULL on out of memory). 264 */ 265 static char *concat_url(const char *base, const char *relurl) 266 { 267 /*** 268 TRY to append this new path to the old URL 269 to the right of the host part. Oh crap, this is doomed to cause 270 problems in the future... 271 */ 272 char *newest; 273 char *protsep; 274 char *pathsep; 275 size_t newlen; 276 bool host_changed = FALSE; 277 278 const char *useurl = relurl; 279 size_t urllen; 280 281 /* we must make our own copy of the URL to play with, as it may 282 point to read-only data */ 283 char *url_clone = strdup(base); 284 285 if(!url_clone) 286 return NULL; /* skip out of this NOW */ 287 288 /* protsep points to the start of the host name */ 289 protsep = strstr(url_clone, "//"); 290 if(!protsep) 291 protsep = url_clone; 292 else 293 protsep += 2; /* pass the slashes */ 294 295 if('/' != relurl[0]) { 296 int level = 0; 297 298 /* First we need to find out if there's a ?-letter in the URL, 299 and cut it and the right-side of that off */ 300 pathsep = strchr(protsep, '?'); 301 if(pathsep) 302 *pathsep = 0; 303 304 /* we have a relative path to append to the last slash if there's one 305 available, or if the new URL is just a query string (starts with a 306 '?') we append the new one at the end of the entire currently worked 307 out URL */ 308 if(useurl[0] != '?') { 309 pathsep = strrchr(protsep, '/'); 310 if(pathsep) 311 *pathsep = 0; 312 } 313 314 /* Check if there's any slash after the host name, and if so, remember 315 that position instead */ 316 pathsep = strchr(protsep, '/'); 317 if(pathsep) 318 protsep = pathsep + 1; 319 else 320 protsep = NULL; 321 322 /* now deal with one "./" or any amount of "../" in the newurl 323 and act accordingly */ 324 325 if((useurl[0] == '.') && (useurl[1] == '/')) 326 useurl += 2; /* just skip the "./" */ 327 328 while((useurl[0] == '.') && 329 (useurl[1] == '.') && 330 (useurl[2] == '/')) { 331 level++; 332 useurl += 3; /* pass the "../" */ 333 } 334 335 if(protsep) { 336 while(level--) { 337 /* cut off one more level from the right of the original URL */ 338 pathsep = strrchr(protsep, '/'); 339 if(pathsep) 340 *pathsep = 0; 341 else { 342 *protsep = 0; 343 break; 344 } 345 } 346 } 347 } 348 else { 349 /* We got a new absolute path for this server */ 350 351 if((relurl[0] == '/') && (relurl[1] == '/')) { 352 /* the new URL starts with //, just keep the protocol part from the 353 original one */ 354 *protsep = 0; 355 useurl = &relurl[2]; /* we keep the slashes from the original, so we 356 skip the new ones */ 357 host_changed = TRUE; 358 } 359 else { 360 /* cut off the original URL from the first slash, or deal with URLs 361 without slash */ 362 pathsep = strchr(protsep, '/'); 363 if(pathsep) { 364 /* When people use badly formatted URLs, such as 365 "http://www.url.com?dir=/home/daniel" we must not use the first 366 slash, if there's a ?-letter before it! */ 367 char *sep = strchr(protsep, '?'); 368 if(sep && (sep < pathsep)) 369 pathsep = sep; 370 *pathsep = 0; 371 } 372 else { 373 /* There was no slash. Now, since we might be operating on a badly 374 formatted URL, such as "http://www.url.com?id=2380" which doesn't 375 use a slash separator as it is supposed to, we need to check for a 376 ?-letter as well! */ 377 pathsep = strchr(protsep, '?'); 378 if(pathsep) 379 *pathsep = 0; 380 } 381 } 382 } 383 384 /* If the new part contains a space, this is a mighty stupid redirect 385 but we still make an effort to do "right". To the left of a '?' 386 letter we replace each space with %20 while it is replaced with '+' 387 on the right side of the '?' letter. 388 */ 389 newlen = strlen_url(useurl, !host_changed); 390 391 urllen = strlen(url_clone); 392 393 newest = malloc(urllen + 1 + /* possible slash */ 394 newlen + 1 /* zero byte */); 395 396 if(!newest) { 397 free(url_clone); /* don't leak this */ 398 return NULL; 399 } 400 401 /* copy over the root url part */ 402 memcpy(newest, url_clone, urllen); 403 404 /* check if we need to append a slash */ 405 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) 406 ; 407 else 408 newest[urllen++]='/'; 409 410 /* then append the new piece on the right side */ 411 strcpy_url(&newest[urllen], useurl, !host_changed); 412 413 free(url_clone); 414 415 return newest; 416 } 417 418 /* 419 * parse_hostname_login() 420 * 421 * Parse the login details (user name, password and options) from the URL and 422 * strip them out of the host name 423 * 424 */ 425 static CURLUcode parse_hostname_login(struct Curl_URL *u, 426 const struct Curl_handler *h, 427 char **hostname, 428 unsigned int flags) 429 { 430 CURLUcode result = CURLUE_OK; 431 CURLcode ccode; 432 char *userp = NULL; 433 char *passwdp = NULL; 434 char *optionsp = NULL; 435 436 /* At this point, we're hoping all the other special cases have 437 * been taken care of, so conn->host.name is at most 438 * [user[:password][;options]]@]hostname 439 * 440 * We need somewhere to put the embedded details, so do that first. 441 */ 442 443 char *ptr = strchr(*hostname, '@'); 444 char *login = *hostname; 445 446 if(!ptr) 447 goto out; 448 449 /* We will now try to extract the 450 * possible login information in a string like: 451 * ftp://user:password@ftp.my.site:8021/README */ 452 *hostname = ++ptr; 453 454 /* We could use the login information in the URL so extract it. Only parse 455 options if the handler says we should. Note that 'h' might be NULL! */ 456 ccode = Curl_parse_login_details(login, ptr - login - 1, 457 &userp, &passwdp, 458 (h && (h->flags & PROTOPT_URLOPTIONS)) ? 459 &optionsp:NULL); 460 if(ccode) { 461 result = CURLUE_MALFORMED_INPUT; 462 goto out; 463 } 464 465 if(userp) { 466 if(flags & CURLU_DISALLOW_USER) { 467 /* Option DISALLOW_USER is set and url contains username. */ 468 result = CURLUE_USER_NOT_ALLOWED; 469 goto out; 470 } 471 472 u->user = userp; 473 } 474 475 if(passwdp) 476 u->password = passwdp; 477 478 if(optionsp) 479 u->options = optionsp; 480 481 return CURLUE_OK; 482 out: 483 484 free(userp); 485 free(passwdp); 486 free(optionsp); 487 488 return result; 489 } 490 491 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname) 492 { 493 char *portptr = NULL; 494 char endbracket; 495 int len; 496 497 /* 498 * Find the end of an IPv6 address, either on the ']' ending bracket or 499 * a percent-encoded zone index. 500 */ 501 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n", 502 &endbracket, &len)) { 503 if(']' == endbracket) 504 portptr = &hostname[len]; 505 else if('%' == endbracket) { 506 int zonelen = len; 507 if(1 == sscanf(hostname + zonelen, "25%*[^]]%c%n", &endbracket, &len)) { 508 if(']' != endbracket) 509 return CURLUE_MALFORMED_INPUT; 510 portptr = &hostname[--zonelen + len + 1]; 511 } 512 else 513 return CURLUE_MALFORMED_INPUT; 514 } 515 else 516 return CURLUE_MALFORMED_INPUT; 517 518 /* this is a RFC2732-style specified IP-address */ 519 if(portptr && *portptr) { 520 if(*portptr != ':') 521 return CURLUE_MALFORMED_INPUT; 522 } 523 else 524 portptr = NULL; 525 } 526 else 527 portptr = strchr(hostname, ':'); 528 529 if(portptr) { 530 char *rest; 531 long port; 532 char portbuf[7]; 533 534 /* Browser behavior adaptation. If there's a colon with no digits after, 535 just cut off the name there which makes us ignore the colon and just 536 use the default port. Firefox, Chrome and Safari all do that. */ 537 if(!portptr[1]) { 538 *portptr = '\0'; 539 return CURLUE_OK; 540 } 541 542 if(!ISDIGIT(portptr[1])) 543 return CURLUE_BAD_PORT_NUMBER; 544 545 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ 546 547 if((port <= 0) || (port > 0xffff)) 548 /* Single unix standard says port numbers are 16 bits long, but we don't 549 treat port zero as OK. */ 550 return CURLUE_BAD_PORT_NUMBER; 551 552 if(rest[0]) 553 return CURLUE_BAD_PORT_NUMBER; 554 555 *portptr++ = '\0'; /* cut off the name there */ 556 *rest = 0; 557 /* generate a new port number string to get rid of leading zeroes etc */ 558 msnprintf(portbuf, sizeof(portbuf), "%ld", port); 559 u->portnum = port; 560 u->port = strdup(portbuf); 561 if(!u->port) 562 return CURLUE_OUT_OF_MEMORY; 563 } 564 565 return CURLUE_OK; 566 } 567 568 /* scan for byte values < 31 or 127 */ 569 static CURLUcode junkscan(char *part) 570 { 571 if(part) { 572 static const char badbytes[]={ 573 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 574 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 575 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 576 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 577 0x7f, 578 0x00 /* zero terminate */ 579 }; 580 size_t n = strlen(part); 581 size_t nfine = strcspn(part, badbytes); 582 if(nfine != n) 583 /* since we don't know which part is scanned, return a generic error 584 code */ 585 return CURLUE_MALFORMED_INPUT; 586 } 587 return CURLUE_OK; 588 } 589 590 static CURLUcode hostname_check(char *hostname, unsigned int flags) 591 { 592 const char *l = NULL; /* accepted characters */ 593 size_t len; 594 size_t hlen = strlen(hostname); 595 (void)flags; 596 597 if(hostname[0] == '[') { 598 hostname++; 599 l = "0123456789abcdefABCDEF::.%"; 600 hlen -= 2; 601 } 602 603 if(l) { 604 /* only valid letters are ok */ 605 len = strspn(hostname, l); 606 if(hlen != len) 607 /* hostname with bad content */ 608 return CURLUE_MALFORMED_INPUT; 609 } 610 else { 611 /* letters from the second string is not ok */ 612 len = strcspn(hostname, " "); 613 if(hlen != len) 614 /* hostname with bad content */ 615 return CURLUE_MALFORMED_INPUT; 616 } 617 return CURLUE_OK; 618 } 619 620 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#')) 621 622 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) 623 { 624 char *path; 625 bool path_alloced = FALSE; 626 char *hostname; 627 char *query = NULL; 628 char *fragment = NULL; 629 CURLUcode result; 630 bool url_has_scheme = FALSE; 631 char schemebuf[MAX_SCHEME_LEN]; 632 char *schemep = NULL; 633 size_t schemelen = 0; 634 size_t urllen; 635 const struct Curl_handler *h = NULL; 636 637 if(!url) 638 return CURLUE_MALFORMED_INPUT; 639 640 /************************************************************* 641 * Parse the URL. 642 ************************************************************/ 643 /* allocate scratch area */ 644 urllen = strlen(url); 645 path = u->scratch = malloc(urllen * 2 + 2); 646 if(!path) 647 return CURLUE_OUT_OF_MEMORY; 648 649 hostname = &path[urllen + 1]; 650 hostname[0] = 0; 651 652 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) { 653 url_has_scheme = TRUE; 654 schemelen = strlen(schemebuf); 655 } 656 657 /* handle the file: scheme */ 658 if(url_has_scheme && strcasecompare(schemebuf, "file")) { 659 /* path has been allocated large enough to hold this */ 660 strcpy(path, &url[5]); 661 662 hostname = NULL; /* no host for file: URLs */ 663 u->scheme = strdup("file"); 664 if(!u->scheme) 665 return CURLUE_OUT_OF_MEMORY; 666 667 /* Extra handling URLs with an authority component (i.e. that start with 668 * "file://") 669 * 670 * We allow omitted hostname (e.g. file:/<path>) -- valid according to 671 * RFC 8089, but not the (current) WHAT-WG URL spec. 672 */ 673 if(path[0] == '/' && path[1] == '/') { 674 /* swallow the two slashes */ 675 char *ptr = &path[2]; 676 677 /* 678 * According to RFC 8089, a file: URL can be reliably dereferenced if: 679 * 680 * o it has no/blank hostname, or 681 * 682 * o the hostname matches "localhost" (case-insensitively), or 683 * 684 * o the hostname is a FQDN that resolves to this machine. 685 * 686 * For brevity, we only consider URLs with empty, "localhost", or 687 * "127.0.0.1" hostnames as local. 688 * 689 * Additionally, there is an exception for URLs with a Windows drive 690 * letter in the authority (which was accidentally omitted from RFC 8089 691 * Appendix E, but believe me, it was meant to be there. --MK) 692 */ 693 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { 694 /* the URL includes a host name, it must match "localhost" or 695 "127.0.0.1" to be valid */ 696 if(!checkprefix("localhost/", ptr) && 697 !checkprefix("127.0.0.1/", ptr)) { 698 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or 699 none */ 700 return CURLUE_MALFORMED_INPUT; 701 } 702 ptr += 9; /* now points to the slash after the host */ 703 } 704 705 path = ptr; 706 } 707 708 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) 709 /* Don't allow Windows drive letters when not in Windows. 710 * This catches both "file:/c:" and "file:c:" */ 711 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || 712 STARTS_WITH_URL_DRIVE_PREFIX(path)) { 713 /* File drive letters are only accepted in MSDOS/Windows */ 714 return CURLUE_MALFORMED_INPUT; 715 } 716 #else 717 /* If the path starts with a slash and a drive letter, ditch the slash */ 718 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { 719 /* This cannot be done with strcpy, as the memory chunks overlap! */ 720 memmove(path, &path[1], strlen(&path[1]) + 1); 721 } 722 #endif 723 724 } 725 else { 726 /* clear path */ 727 const char *p; 728 const char *hostp; 729 size_t len; 730 path[0] = 0; 731 732 if(url_has_scheme) { 733 int i = 0; 734 p = &url[schemelen + 1]; 735 while(p && (*p == '/') && (i < 4)) { 736 p++; 737 i++; 738 } 739 if((i < 1) || (i>3)) 740 /* less than one or more than three slashes */ 741 return CURLUE_MALFORMED_INPUT; 742 743 schemep = schemebuf; 744 if(!Curl_builtin_scheme(schemep) && 745 !(flags & CURLU_NON_SUPPORT_SCHEME)) 746 return CURLUE_UNSUPPORTED_SCHEME; 747 748 if(junkscan(schemep)) 749 return CURLUE_MALFORMED_INPUT; 750 } 751 else { 752 /* no scheme! */ 753 754 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) 755 return CURLUE_MALFORMED_INPUT; 756 if(flags & CURLU_DEFAULT_SCHEME) 757 schemep = (char *) DEFAULT_SCHEME; 758 759 /* 760 * The URL was badly formatted, let's try without scheme specified. 761 */ 762 p = url; 763 } 764 hostp = p; /* host name starts here */ 765 766 while(*p && !HOSTNAME_END(*p)) /* find end of host name */ 767 p++; 768 769 len = p - hostp; 770 if(!len) 771 return CURLUE_MALFORMED_INPUT; 772 773 memcpy(hostname, hostp, len); 774 hostname[len] = 0; 775 776 if((flags & CURLU_GUESS_SCHEME) && !schemep) { 777 /* legacy curl-style guess based on host name */ 778 if(checkprefix("ftp.", hostname)) 779 schemep = (char *)"ftp"; 780 else if(checkprefix("dict.", hostname)) 781 schemep = (char *)"dict"; 782 else if(checkprefix("ldap.", hostname)) 783 schemep = (char *)"ldap"; 784 else if(checkprefix("imap.", hostname)) 785 schemep = (char *)"imap"; 786 else if(checkprefix("smtp.", hostname)) 787 schemep = (char *)"smtp"; 788 else if(checkprefix("pop3.", hostname)) 789 schemep = (char *)"pop3"; 790 else 791 schemep = (char *)"http"; 792 } 793 794 len = strlen(p); 795 memcpy(path, p, len); 796 path[len] = 0; 797 798 u->scheme = strdup(schemep); 799 if(!u->scheme) 800 return CURLUE_OUT_OF_MEMORY; 801 } 802 803 /* if this is a known scheme, get some details */ 804 h = Curl_builtin_scheme(u->scheme); 805 806 if(junkscan(path)) 807 return CURLUE_MALFORMED_INPUT; 808 809 query = strchr(path, '?'); 810 if(query) 811 *query++ = 0; 812 813 fragment = strchr(query?query:path, '#'); 814 if(fragment) 815 *fragment++ = 0; 816 817 if(!path[0]) 818 /* if there's no path set, unset */ 819 path = NULL; 820 else if(!(flags & CURLU_PATH_AS_IS)) { 821 /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ 822 char *newp = Curl_dedotdotify(path); 823 if(!newp) 824 return CURLUE_OUT_OF_MEMORY; 825 826 if(strcmp(newp, path)) { 827 /* if we got a new version */ 828 path = newp; 829 path_alloced = TRUE; 830 } 831 else 832 free(newp); 833 } 834 if(path) { 835 u->path = path_alloced?path:strdup(path); 836 if(!u->path) 837 return CURLUE_OUT_OF_MEMORY; 838 } 839 840 if(hostname) { 841 /* 842 * Parse the login details and strip them out of the host name. 843 */ 844 if(junkscan(hostname)) 845 return CURLUE_MALFORMED_INPUT; 846 847 result = parse_hostname_login(u, h, &hostname, flags); 848 if(result) 849 return result; 850 851 result = Curl_parse_port(u, hostname); 852 if(result) 853 return result; 854 855 result = hostname_check(hostname, flags); 856 if(result) 857 return result; 858 859 u->host = strdup(hostname); 860 if(!u->host) 861 return CURLUE_OUT_OF_MEMORY; 862 } 863 864 if(query) { 865 u->query = strdup(query); 866 if(!u->query) 867 return CURLUE_OUT_OF_MEMORY; 868 } 869 if(fragment && fragment[0]) { 870 u->fragment = strdup(fragment); 871 if(!u->fragment) 872 return CURLUE_OUT_OF_MEMORY; 873 } 874 875 free(u->scratch); 876 u->scratch = NULL; 877 878 return CURLUE_OK; 879 } 880 881 /* 882 * Parse the URL and set the relevant members of the Curl_URL struct. 883 */ 884 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) 885 { 886 CURLUcode result = seturl(url, u, flags); 887 if(result) { 888 free_urlhandle(u); 889 memset(u, 0, sizeof(struct Curl_URL)); 890 } 891 return result; 892 } 893 894 /* 895 */ 896 CURLU *curl_url(void) 897 { 898 return calloc(sizeof(struct Curl_URL), 1); 899 } 900 901 void curl_url_cleanup(CURLU *u) 902 { 903 if(u) { 904 free_urlhandle(u); 905 free(u); 906 } 907 } 908 909 #define DUP(dest, src, name) \ 910 if(src->name) { \ 911 dest->name = strdup(src->name); \ 912 if(!dest->name) \ 913 goto fail; \ 914 } 915 916 CURLU *curl_url_dup(CURLU *in) 917 { 918 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1); 919 if(u) { 920 DUP(u, in, scheme); 921 DUP(u, in, user); 922 DUP(u, in, password); 923 DUP(u, in, options); 924 DUP(u, in, host); 925 DUP(u, in, port); 926 DUP(u, in, path); 927 DUP(u, in, query); 928 DUP(u, in, fragment); 929 u->portnum = in->portnum; 930 } 931 return u; 932 fail: 933 curl_url_cleanup(u); 934 return NULL; 935 } 936 937 CURLUcode curl_url_get(CURLU *u, CURLUPart what, 938 char **part, unsigned int flags) 939 { 940 char *ptr; 941 CURLUcode ifmissing = CURLUE_UNKNOWN_PART; 942 char portbuf[7]; 943 bool urldecode = (flags & CURLU_URLDECODE)?1:0; 944 bool plusdecode = FALSE; 945 (void)flags; 946 if(!u) 947 return CURLUE_BAD_HANDLE; 948 if(!part) 949 return CURLUE_BAD_PARTPOINTER; 950 *part = NULL; 951 952 switch(what) { 953 case CURLUPART_SCHEME: 954 ptr = u->scheme; 955 ifmissing = CURLUE_NO_SCHEME; 956 urldecode = FALSE; /* never for schemes */ 957 break; 958 case CURLUPART_USER: 959 ptr = u->user; 960 ifmissing = CURLUE_NO_USER; 961 break; 962 case CURLUPART_PASSWORD: 963 ptr = u->password; 964 ifmissing = CURLUE_NO_PASSWORD; 965 break; 966 case CURLUPART_OPTIONS: 967 ptr = u->options; 968 ifmissing = CURLUE_NO_OPTIONS; 969 break; 970 case CURLUPART_HOST: 971 ptr = u->host; 972 ifmissing = CURLUE_NO_HOST; 973 break; 974 case CURLUPART_PORT: 975 ptr = u->port; 976 ifmissing = CURLUE_NO_PORT; 977 urldecode = FALSE; /* never for port */ 978 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { 979 /* there's no stored port number, but asked to deliver 980 a default one for the scheme */ 981 const struct Curl_handler *h = 982 Curl_builtin_scheme(u->scheme); 983 if(h) { 984 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); 985 ptr = portbuf; 986 } 987 } 988 else if(ptr && u->scheme) { 989 /* there is a stored port number, but ask to inhibit if 990 it matches the default one for the scheme */ 991 const struct Curl_handler *h = 992 Curl_builtin_scheme(u->scheme); 993 if(h && (h->defport == u->portnum) && 994 (flags & CURLU_NO_DEFAULT_PORT)) 995 ptr = NULL; 996 } 997 break; 998 case CURLUPART_PATH: 999 ptr = u->path; 1000 if(!ptr) { 1001 ptr = u->path = strdup("/"); 1002 if(!u->path) 1003 return CURLUE_OUT_OF_MEMORY; 1004 } 1005 break; 1006 case CURLUPART_QUERY: 1007 ptr = u->query; 1008 ifmissing = CURLUE_NO_QUERY; 1009 plusdecode = urldecode; 1010 break; 1011 case CURLUPART_FRAGMENT: 1012 ptr = u->fragment; 1013 ifmissing = CURLUE_NO_FRAGMENT; 1014 break; 1015 case CURLUPART_URL: { 1016 char *url; 1017 char *scheme; 1018 char *options = u->options; 1019 char *port = u->port; 1020 if(u->scheme && strcasecompare("file", u->scheme)) { 1021 url = aprintf("file://%s%s%s", 1022 u->path, 1023 u->fragment? "#": "", 1024 u->fragment? u->fragment : ""); 1025 } 1026 else if(!u->host) 1027 return CURLUE_NO_HOST; 1028 else { 1029 const struct Curl_handler *h = NULL; 1030 if(u->scheme) 1031 scheme = u->scheme; 1032 else if(flags & CURLU_DEFAULT_SCHEME) 1033 scheme = (char *) DEFAULT_SCHEME; 1034 else 1035 return CURLUE_NO_SCHEME; 1036 1037 if(scheme) { 1038 h = Curl_builtin_scheme(scheme); 1039 if(!port && (flags & CURLU_DEFAULT_PORT)) { 1040 /* there's no stored port number, but asked to deliver 1041 a default one for the scheme */ 1042 if(h) { 1043 msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport); 1044 port = portbuf; 1045 } 1046 } 1047 else if(port) { 1048 /* there is a stored port number, but asked to inhibit if it matches 1049 the default one for the scheme */ 1050 if(h && (h->defport == u->portnum) && 1051 (flags & CURLU_NO_DEFAULT_PORT)) 1052 port = NULL; 1053 } 1054 } 1055 if(h && !(h->flags & PROTOPT_URLOPTIONS)) 1056 options = NULL; 1057 1058 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1059 scheme, 1060 u->user ? u->user : "", 1061 u->password ? ":": "", 1062 u->password ? u->password : "", 1063 options ? ";" : "", 1064 options ? options : "", 1065 (u->user || u->password || options) ? "@": "", 1066 u->host, 1067 port ? ":": "", 1068 port ? port : "", 1069 (u->path && (u->path[0] != '/')) ? "/": "", 1070 u->path ? u->path : "/", 1071 (u->query && u->query[0]) ? "?": "", 1072 (u->query && u->query[0]) ? u->query : "", 1073 u->fragment? "#": "", 1074 u->fragment? u->fragment : ""); 1075 } 1076 if(!url) 1077 return CURLUE_OUT_OF_MEMORY; 1078 *part = url; 1079 return CURLUE_OK; 1080 } 1081 default: 1082 ptr = NULL; 1083 break; 1084 } 1085 if(ptr) { 1086 *part = strdup(ptr); 1087 if(!*part) 1088 return CURLUE_OUT_OF_MEMORY; 1089 if(plusdecode) { 1090 /* convert + to space */ 1091 char *plus; 1092 for(plus = *part; *plus; ++plus) { 1093 if(*plus == '+') 1094 *plus = ' '; 1095 } 1096 } 1097 if(urldecode) { 1098 char *decoded; 1099 size_t dlen; 1100 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen, TRUE); 1101 free(*part); 1102 if(res) { 1103 *part = NULL; 1104 return CURLUE_URLDECODE; 1105 } 1106 *part = decoded; 1107 } 1108 return CURLUE_OK; 1109 } 1110 else 1111 return ifmissing; 1112 } 1113 1114 CURLUcode curl_url_set(CURLU *u, CURLUPart what, 1115 const char *part, unsigned int flags) 1116 { 1117 char **storep = NULL; 1118 long port = 0; 1119 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; 1120 bool plusencode = FALSE; 1121 bool urlskipslash = FALSE; 1122 bool appendquery = FALSE; 1123 bool equalsencode = FALSE; 1124 1125 if(!u) 1126 return CURLUE_BAD_HANDLE; 1127 if(!part) { 1128 /* setting a part to NULL clears it */ 1129 switch(what) { 1130 case CURLUPART_URL: 1131 break; 1132 case CURLUPART_SCHEME: 1133 storep = &u->scheme; 1134 break; 1135 case CURLUPART_USER: 1136 storep = &u->user; 1137 break; 1138 case CURLUPART_PASSWORD: 1139 storep = &u->password; 1140 break; 1141 case CURLUPART_OPTIONS: 1142 storep = &u->options; 1143 break; 1144 case CURLUPART_HOST: 1145 storep = &u->host; 1146 break; 1147 case CURLUPART_PORT: 1148 storep = &u->port; 1149 break; 1150 case CURLUPART_PATH: 1151 storep = &u->path; 1152 break; 1153 case CURLUPART_QUERY: 1154 storep = &u->query; 1155 break; 1156 case CURLUPART_FRAGMENT: 1157 storep = &u->fragment; 1158 break; 1159 default: 1160 return CURLUE_UNKNOWN_PART; 1161 } 1162 if(storep && *storep) { 1163 free(*storep); 1164 *storep = NULL; 1165 } 1166 return CURLUE_OK; 1167 } 1168 1169 switch(what) { 1170 case CURLUPART_SCHEME: 1171 if(!(flags & CURLU_NON_SUPPORT_SCHEME) && 1172 /* verify that it is a fine scheme */ 1173 !Curl_builtin_scheme(part)) 1174 return CURLUE_UNSUPPORTED_SCHEME; 1175 storep = &u->scheme; 1176 urlencode = FALSE; /* never */ 1177 break; 1178 case CURLUPART_USER: 1179 storep = &u->user; 1180 break; 1181 case CURLUPART_PASSWORD: 1182 storep = &u->password; 1183 break; 1184 case CURLUPART_OPTIONS: 1185 storep = &u->options; 1186 break; 1187 case CURLUPART_HOST: 1188 storep = &u->host; 1189 break; 1190 case CURLUPART_PORT: 1191 urlencode = FALSE; /* never */ 1192 port = strtol(part, NULL, 10); /* Port number must be decimal */ 1193 if((port <= 0) || (port > 0xffff)) 1194 return CURLUE_BAD_PORT_NUMBER; 1195 storep = &u->port; 1196 break; 1197 case CURLUPART_PATH: 1198 urlskipslash = TRUE; 1199 storep = &u->path; 1200 break; 1201 case CURLUPART_QUERY: 1202 plusencode = urlencode; 1203 appendquery = (flags & CURLU_APPENDQUERY)?1:0; 1204 equalsencode = appendquery; 1205 storep = &u->query; 1206 break; 1207 case CURLUPART_FRAGMENT: 1208 storep = &u->fragment; 1209 break; 1210 case CURLUPART_URL: { 1211 /* 1212 * Allow a new URL to replace the existing (if any) contents. 1213 * 1214 * If the existing contents is enough for a URL, allow a relative URL to 1215 * replace it. 1216 */ 1217 CURLUcode result; 1218 char *oldurl; 1219 char *redired_url; 1220 CURLU *handle2; 1221 1222 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN)) { 1223 handle2 = curl_url(); 1224 if(!handle2) 1225 return CURLUE_OUT_OF_MEMORY; 1226 result = parseurl(part, handle2, flags); 1227 if(!result) 1228 mv_urlhandle(handle2, u); 1229 else 1230 curl_url_cleanup(handle2); 1231 return result; 1232 } 1233 /* extract the full "old" URL to do the redirect on */ 1234 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags); 1235 if(result) { 1236 /* couldn't get the old URL, just use the new! */ 1237 handle2 = curl_url(); 1238 if(!handle2) 1239 return CURLUE_OUT_OF_MEMORY; 1240 result = parseurl(part, handle2, flags); 1241 if(!result) 1242 mv_urlhandle(handle2, u); 1243 else 1244 curl_url_cleanup(handle2); 1245 return result; 1246 } 1247 1248 /* apply the relative part to create a new URL */ 1249 redired_url = concat_url(oldurl, part); 1250 free(oldurl); 1251 if(!redired_url) 1252 return CURLUE_OUT_OF_MEMORY; 1253 1254 /* now parse the new URL */ 1255 handle2 = curl_url(); 1256 if(!handle2) { 1257 free(redired_url); 1258 return CURLUE_OUT_OF_MEMORY; 1259 } 1260 result = parseurl(redired_url, handle2, flags); 1261 free(redired_url); 1262 if(!result) 1263 mv_urlhandle(handle2, u); 1264 else 1265 curl_url_cleanup(handle2); 1266 return result; 1267 } 1268 default: 1269 return CURLUE_UNKNOWN_PART; 1270 } 1271 if(storep) { 1272 const char *newp = part; 1273 size_t nalloc = strlen(part); 1274 1275 if(urlencode) { 1276 const char *i; 1277 char *o; 1278 bool free_part = FALSE; 1279 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */ 1280 if(!enc) 1281 return CURLUE_OUT_OF_MEMORY; 1282 if(plusencode) { 1283 /* space to plus */ 1284 i = part; 1285 for(o = enc; *i; ++o, ++i) 1286 *o = (*i == ' ') ? '+' : *i; 1287 *o = 0; /* zero terminate */ 1288 part = strdup(enc); 1289 if(!part) { 1290 free(enc); 1291 return CURLUE_OUT_OF_MEMORY; 1292 } 1293 free_part = TRUE; 1294 } 1295 for(i = part, o = enc; *i; i++) { 1296 if(Curl_isunreserved(*i) || 1297 ((*i == '/') && urlskipslash) || 1298 ((*i == '=') && equalsencode) || 1299 ((*i == '+') && plusencode)) { 1300 if((*i == '=') && equalsencode) 1301 /* only skip the first equals sign */ 1302 equalsencode = FALSE; 1303 *o = *i; 1304 o++; 1305 } 1306 else { 1307 msnprintf(o, 4, "%%%02x", *i); 1308 o += 3; 1309 } 1310 } 1311 *o = 0; /* zero terminate */ 1312 newp = enc; 1313 if(free_part) 1314 free((char *)part); 1315 } 1316 else { 1317 char *p; 1318 newp = strdup(part); 1319 if(!newp) 1320 return CURLUE_OUT_OF_MEMORY; 1321 p = (char *)newp; 1322 while(*p) { 1323 /* make sure percent encoded are lower case */ 1324 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && 1325 (ISUPPER(p[1]) || ISUPPER(p[2]))) { 1326 p[1] = (char)TOLOWER(p[1]); 1327 p[2] = (char)TOLOWER(p[2]); 1328 p += 3; 1329 } 1330 else 1331 p++; 1332 } 1333 } 1334 1335 if(appendquery) { 1336 /* Append the string onto the old query. Add a '&' separator if none is 1337 present at the end of the exsting query already */ 1338 size_t querylen = u->query ? strlen(u->query) : 0; 1339 bool addamperand = querylen && (u->query[querylen -1] != '&'); 1340 if(querylen) { 1341 size_t newplen = strlen(newp); 1342 char *p = malloc(querylen + addamperand + newplen + 1); 1343 if(!p) { 1344 free((char *)newp); 1345 return CURLUE_OUT_OF_MEMORY; 1346 } 1347 strcpy(p, u->query); /* original query */ 1348 if(addamperand) 1349 p[querylen] = '&'; /* ampersand */ 1350 strcpy(&p[querylen + addamperand], newp); /* new suffix */ 1351 free((char *)newp); 1352 free(*storep); 1353 *storep = p; 1354 return CURLUE_OK; 1355 } 1356 } 1357 1358 free(*storep); 1359 *storep = (char *)newp; 1360 } 1361 /* set after the string, to make it not assigned if the allocation above 1362 fails */ 1363 if(port) 1364 u->portnum = port; 1365 return CURLUE_OK; 1366 } 1367