1 /** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFCs 3986, 2732 and 2373 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel (at) veillard.com 9 */ 10 11 #define IN_LIBXML 12 #include "libxml.h" 13 14 #include <string.h> 15 16 #include <libxml/xmlmemory.h> 17 #include <libxml/uri.h> 18 #include <libxml/globals.h> 19 #include <libxml/xmlerror.h> 20 21 static void xmlCleanURI(xmlURIPtr uri); 22 23 /* 24 * Old rule from 2396 used in legacy handling code 25 * alpha = lowalpha | upalpha 26 */ 27 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 28 29 30 /* 31 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 32 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 33 * "u" | "v" | "w" | "x" | "y" | "z" 34 */ 35 36 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 37 38 /* 39 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 40 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 41 * "U" | "V" | "W" | "X" | "Y" | "Z" 42 */ 43 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 44 45 #ifdef IS_DIGIT 46 #undef IS_DIGIT 47 #endif 48 /* 49 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 50 */ 51 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 52 53 /* 54 * alphanum = alpha | digit 55 */ 56 57 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 58 59 /* 60 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 61 */ 62 63 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 64 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 65 ((x) == '(') || ((x) == ')')) 66 67 /* 68 * unwise = "{" | "}" | "|" | "\" | "^" | "`" 69 */ 70 71 #define IS_UNWISE(p) \ 72 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 73 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 74 ((*(p) == ']')) || ((*(p) == '`'))) 75 /* 76 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 77 * "[" | "]" 78 */ 79 80 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 81 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 82 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 83 ((x) == ']')) 84 85 /* 86 * unreserved = alphanum | mark 87 */ 88 89 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 90 91 /* 92 * Skip to next pointer char, handle escaped sequences 93 */ 94 95 #define NEXT(p) ((*p == '%')? p += 3 : p++) 96 97 /* 98 * Productions from the spec. 99 * 100 * authority = server | reg_name 101 * reg_name = 1*( unreserved | escaped | "$" | "," | 102 * ";" | ":" | "@" | "&" | "=" | "+" ) 103 * 104 * path = [ abs_path | opaque_part ] 105 */ 106 107 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 108 109 /************************************************************************ 110 * * 111 * RFC 3986 parser * 112 * * 113 ************************************************************************/ 114 115 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 116 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 117 ((*(p) >= 'A') && (*(p) <= 'Z'))) 118 #define ISA_HEXDIG(p) \ 119 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 120 ((*(p) >= 'A') && (*(p) <= 'F'))) 121 122 /* 123 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 124 * / "*" / "+" / "," / ";" / "=" 125 */ 126 #define ISA_SUB_DELIM(p) \ 127 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 128 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 129 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 130 ((*(p) == '='))) 131 132 /* 133 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 134 */ 135 #define ISA_GEN_DELIM(p) \ 136 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 137 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 138 ((*(p) == '@'))) 139 140 /* 141 * reserved = gen-delims / sub-delims 142 */ 143 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 144 145 /* 146 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 147 */ 148 #define ISA_UNRESERVED(p) \ 149 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 150 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 151 152 /* 153 * pct-encoded = "%" HEXDIG HEXDIG 154 */ 155 #define ISA_PCT_ENCODED(p) \ 156 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 157 158 /* 159 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 160 */ 161 #define ISA_PCHAR(p) \ 162 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 163 ((*(p) == ':')) || ((*(p) == '@'))) 164 165 /** 166 * xmlParse3986Scheme: 167 * @uri: pointer to an URI structure 168 * @str: pointer to the string to analyze 169 * 170 * Parse an URI scheme 171 * 172 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 173 * 174 * Returns 0 or the error code 175 */ 176 static int 177 xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 178 const char *cur; 179 180 if (str == NULL) 181 return(-1); 182 183 cur = *str; 184 if (!ISA_ALPHA(cur)) 185 return(2); 186 cur++; 187 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 188 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 189 if (uri != NULL) { 190 if (uri->scheme != NULL) xmlFree(uri->scheme); 191 uri->scheme = STRNDUP(*str, cur - *str); 192 } 193 *str = cur; 194 return(0); 195 } 196 197 /** 198 * xmlParse3986Fragment: 199 * @uri: pointer to an URI structure 200 * @str: pointer to the string to analyze 201 * 202 * Parse the query part of an URI 203 * 204 * fragment = *( pchar / "/" / "?" ) 205 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 206 * in the fragment identifier but this is used very broadly for 207 * xpointer scheme selection, so we are allowing it here to not break 208 * for example all the DocBook processing chains. 209 * 210 * Returns 0 or the error code 211 */ 212 static int 213 xmlParse3986Fragment(xmlURIPtr uri, const char **str) 214 { 215 const char *cur; 216 217 if (str == NULL) 218 return (-1); 219 220 cur = *str; 221 222 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 223 (*cur == '[') || (*cur == ']') || 224 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 225 NEXT(cur); 226 if (uri != NULL) { 227 if (uri->fragment != NULL) 228 xmlFree(uri->fragment); 229 if (uri->cleanup & 2) 230 uri->fragment = STRNDUP(*str, cur - *str); 231 else 232 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 233 } 234 *str = cur; 235 return (0); 236 } 237 238 /** 239 * xmlParse3986Query: 240 * @uri: pointer to an URI structure 241 * @str: pointer to the string to analyze 242 * 243 * Parse the query part of an URI 244 * 245 * query = *uric 246 * 247 * Returns 0 or the error code 248 */ 249 static int 250 xmlParse3986Query(xmlURIPtr uri, const char **str) 251 { 252 const char *cur; 253 254 if (str == NULL) 255 return (-1); 256 257 cur = *str; 258 259 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 260 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 261 NEXT(cur); 262 if (uri != NULL) { 263 if (uri->query != NULL) 264 xmlFree(uri->query); 265 if (uri->cleanup & 2) 266 uri->query = STRNDUP(*str, cur - *str); 267 else 268 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 269 270 /* Save the raw bytes of the query as well. 271 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 272 */ 273 if (uri->query_raw != NULL) 274 xmlFree (uri->query_raw); 275 uri->query_raw = STRNDUP (*str, cur - *str); 276 } 277 *str = cur; 278 return (0); 279 } 280 281 /** 282 * xmlParse3986Port: 283 * @uri: pointer to an URI structure 284 * @str: the string to analyze 285 * 286 * Parse a port part and fills in the appropriate fields 287 * of the @uri structure 288 * 289 * port = *DIGIT 290 * 291 * Returns 0 or the error code 292 */ 293 static int 294 xmlParse3986Port(xmlURIPtr uri, const char **str) 295 { 296 const char *cur = *str; 297 298 if (ISA_DIGIT(cur)) { 299 if (uri != NULL) 300 uri->port = 0; 301 while (ISA_DIGIT(cur)) { 302 if (uri != NULL) 303 uri->port = uri->port * 10 + (*cur - '0'); 304 cur++; 305 } 306 *str = cur; 307 return(0); 308 } 309 return(1); 310 } 311 312 /** 313 * xmlParse3986Userinfo: 314 * @uri: pointer to an URI structure 315 * @str: the string to analyze 316 * 317 * Parse an user informations part and fills in the appropriate fields 318 * of the @uri structure 319 * 320 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 321 * 322 * Returns 0 or the error code 323 */ 324 static int 325 xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 326 { 327 const char *cur; 328 329 cur = *str; 330 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 331 ISA_SUB_DELIM(cur) || (*cur == ':')) 332 NEXT(cur); 333 if (*cur == '@') { 334 if (uri != NULL) { 335 if (uri->user != NULL) xmlFree(uri->user); 336 if (uri->cleanup & 2) 337 uri->user = STRNDUP(*str, cur - *str); 338 else 339 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 340 } 341 *str = cur; 342 return(0); 343 } 344 return(1); 345 } 346 347 /** 348 * xmlParse3986DecOctet: 349 * @str: the string to analyze 350 * 351 * dec-octet = DIGIT ; 0-9 352 * / %x31-39 DIGIT ; 10-99 353 * / "1" 2DIGIT ; 100-199 354 * / "2" %x30-34 DIGIT ; 200-249 355 * / "25" %x30-35 ; 250-255 356 * 357 * Skip a dec-octet. 358 * 359 * Returns 0 if found and skipped, 1 otherwise 360 */ 361 static int 362 xmlParse3986DecOctet(const char **str) { 363 const char *cur = *str; 364 365 if (!(ISA_DIGIT(cur))) 366 return(1); 367 if (!ISA_DIGIT(cur+1)) 368 cur++; 369 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 370 cur += 2; 371 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 372 cur += 3; 373 else if ((*cur == '2') && (*(cur + 1) >= '0') && 374 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 375 cur += 3; 376 else if ((*cur == '2') && (*(cur + 1) == '5') && 377 (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 378 cur += 3; 379 else 380 return(1); 381 *str = cur; 382 return(0); 383 } 384 /** 385 * xmlParse3986Host: 386 * @uri: pointer to an URI structure 387 * @str: the string to analyze 388 * 389 * Parse an host part and fills in the appropriate fields 390 * of the @uri structure 391 * 392 * host = IP-literal / IPv4address / reg-name 393 * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 394 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 395 * reg-name = *( unreserved / pct-encoded / sub-delims ) 396 * 397 * Returns 0 or the error code 398 */ 399 static int 400 xmlParse3986Host(xmlURIPtr uri, const char **str) 401 { 402 const char *cur = *str; 403 const char *host; 404 405 host = cur; 406 /* 407 * IPv6 and future adressing scheme are enclosed between brackets 408 */ 409 if (*cur == '[') { 410 cur++; 411 while ((*cur != ']') && (*cur != 0)) 412 cur++; 413 if (*cur != ']') 414 return(1); 415 cur++; 416 goto found; 417 } 418 /* 419 * try to parse an IPv4 420 */ 421 if (ISA_DIGIT(cur)) { 422 if (xmlParse3986DecOctet(&cur) != 0) 423 goto not_ipv4; 424 if (*cur != '.') 425 goto not_ipv4; 426 cur++; 427 if (xmlParse3986DecOctet(&cur) != 0) 428 goto not_ipv4; 429 if (*cur != '.') 430 goto not_ipv4; 431 if (xmlParse3986DecOctet(&cur) != 0) 432 goto not_ipv4; 433 if (*cur != '.') 434 goto not_ipv4; 435 if (xmlParse3986DecOctet(&cur) != 0) 436 goto not_ipv4; 437 goto found; 438 not_ipv4: 439 cur = *str; 440 } 441 /* 442 * then this should be a hostname which can be empty 443 */ 444 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 445 NEXT(cur); 446 found: 447 if (uri != NULL) { 448 if (uri->authority != NULL) xmlFree(uri->authority); 449 uri->authority = NULL; 450 if (uri->server != NULL) xmlFree(uri->server); 451 if (cur != host) { 452 if (uri->cleanup & 2) 453 uri->server = STRNDUP(host, cur - host); 454 else 455 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 456 } else 457 uri->server = NULL; 458 } 459 *str = cur; 460 return(0); 461 } 462 463 /** 464 * xmlParse3986Authority: 465 * @uri: pointer to an URI structure 466 * @str: the string to analyze 467 * 468 * Parse an authority part and fills in the appropriate fields 469 * of the @uri structure 470 * 471 * authority = [ userinfo "@" ] host [ ":" port ] 472 * 473 * Returns 0 or the error code 474 */ 475 static int 476 xmlParse3986Authority(xmlURIPtr uri, const char **str) 477 { 478 const char *cur; 479 int ret; 480 481 cur = *str; 482 /* 483 * try to parse an userinfo and check for the trailing @ 484 */ 485 ret = xmlParse3986Userinfo(uri, &cur); 486 if ((ret != 0) || (*cur != '@')) 487 cur = *str; 488 else 489 cur++; 490 ret = xmlParse3986Host(uri, &cur); 491 if (ret != 0) return(ret); 492 if (*cur == ':') { 493 cur++; 494 ret = xmlParse3986Port(uri, &cur); 495 if (ret != 0) return(ret); 496 } 497 *str = cur; 498 return(0); 499 } 500 501 /** 502 * xmlParse3986Segment: 503 * @str: the string to analyze 504 * @forbid: an optional forbidden character 505 * @empty: allow an empty segment 506 * 507 * Parse a segment and fills in the appropriate fields 508 * of the @uri structure 509 * 510 * segment = *pchar 511 * segment-nz = 1*pchar 512 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 513 * ; non-zero-length segment without any colon ":" 514 * 515 * Returns 0 or the error code 516 */ 517 static int 518 xmlParse3986Segment(const char **str, char forbid, int empty) 519 { 520 const char *cur; 521 522 cur = *str; 523 if (!ISA_PCHAR(cur)) { 524 if (empty) 525 return(0); 526 return(1); 527 } 528 while (ISA_PCHAR(cur) && (*cur != forbid)) 529 NEXT(cur); 530 *str = cur; 531 return (0); 532 } 533 534 /** 535 * xmlParse3986PathAbEmpty: 536 * @uri: pointer to an URI structure 537 * @str: the string to analyze 538 * 539 * Parse an path absolute or empty and fills in the appropriate fields 540 * of the @uri structure 541 * 542 * path-abempty = *( "/" segment ) 543 * 544 * Returns 0 or the error code 545 */ 546 static int 547 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 548 { 549 const char *cur; 550 int ret; 551 552 cur = *str; 553 554 while (*cur == '/') { 555 cur++; 556 ret = xmlParse3986Segment(&cur, 0, 1); 557 if (ret != 0) return(ret); 558 } 559 if (uri != NULL) { 560 if (uri->path != NULL) xmlFree(uri->path); 561 if (uri->cleanup & 2) 562 uri->path = STRNDUP(*str, cur - *str); 563 else 564 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 565 } 566 *str = cur; 567 return (0); 568 } 569 570 /** 571 * xmlParse3986PathAbsolute: 572 * @uri: pointer to an URI structure 573 * @str: the string to analyze 574 * 575 * Parse an path absolute and fills in the appropriate fields 576 * of the @uri structure 577 * 578 * path-absolute = "/" [ segment-nz *( "/" segment ) ] 579 * 580 * Returns 0 or the error code 581 */ 582 static int 583 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 584 { 585 const char *cur; 586 int ret; 587 588 cur = *str; 589 590 if (*cur != '/') 591 return(1); 592 cur++; 593 ret = xmlParse3986Segment(&cur, 0, 0); 594 if (ret == 0) { 595 while (*cur == '/') { 596 cur++; 597 ret = xmlParse3986Segment(&cur, 0, 1); 598 if (ret != 0) return(ret); 599 } 600 } 601 if (uri != NULL) { 602 if (uri->path != NULL) xmlFree(uri->path); 603 if (uri->cleanup & 2) 604 uri->path = STRNDUP(*str, cur - *str); 605 else 606 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 607 } 608 *str = cur; 609 return (0); 610 } 611 612 /** 613 * xmlParse3986PathRootless: 614 * @uri: pointer to an URI structure 615 * @str: the string to analyze 616 * 617 * Parse an path without root and fills in the appropriate fields 618 * of the @uri structure 619 * 620 * path-rootless = segment-nz *( "/" segment ) 621 * 622 * Returns 0 or the error code 623 */ 624 static int 625 xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 626 { 627 const char *cur; 628 int ret; 629 630 cur = *str; 631 632 ret = xmlParse3986Segment(&cur, 0, 0); 633 if (ret != 0) return(ret); 634 while (*cur == '/') { 635 cur++; 636 ret = xmlParse3986Segment(&cur, 0, 1); 637 if (ret != 0) return(ret); 638 } 639 if (uri != NULL) { 640 if (uri->path != NULL) xmlFree(uri->path); 641 if (uri->cleanup & 2) 642 uri->path = STRNDUP(*str, cur - *str); 643 else 644 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 645 } 646 *str = cur; 647 return (0); 648 } 649 650 /** 651 * xmlParse3986PathNoScheme: 652 * @uri: pointer to an URI structure 653 * @str: the string to analyze 654 * 655 * Parse an path which is not a scheme and fills in the appropriate fields 656 * of the @uri structure 657 * 658 * path-noscheme = segment-nz-nc *( "/" segment ) 659 * 660 * Returns 0 or the error code 661 */ 662 static int 663 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 664 { 665 const char *cur; 666 int ret; 667 668 cur = *str; 669 670 ret = xmlParse3986Segment(&cur, ':', 0); 671 if (ret != 0) return(ret); 672 while (*cur == '/') { 673 cur++; 674 ret = xmlParse3986Segment(&cur, 0, 1); 675 if (ret != 0) return(ret); 676 } 677 if (uri != NULL) { 678 if (uri->path != NULL) xmlFree(uri->path); 679 if (uri->cleanup & 2) 680 uri->path = STRNDUP(*str, cur - *str); 681 else 682 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 683 } 684 *str = cur; 685 return (0); 686 } 687 688 /** 689 * xmlParse3986HierPart: 690 * @uri: pointer to an URI structure 691 * @str: the string to analyze 692 * 693 * Parse an hierarchical part and fills in the appropriate fields 694 * of the @uri structure 695 * 696 * hier-part = "//" authority path-abempty 697 * / path-absolute 698 * / path-rootless 699 * / path-empty 700 * 701 * Returns 0 or the error code 702 */ 703 static int 704 xmlParse3986HierPart(xmlURIPtr uri, const char **str) 705 { 706 const char *cur; 707 int ret; 708 709 cur = *str; 710 711 if ((*cur == '/') && (*(cur + 1) == '/')) { 712 cur += 2; 713 ret = xmlParse3986Authority(uri, &cur); 714 if (ret != 0) return(ret); 715 ret = xmlParse3986PathAbEmpty(uri, &cur); 716 if (ret != 0) return(ret); 717 *str = cur; 718 return(0); 719 } else if (*cur == '/') { 720 ret = xmlParse3986PathAbsolute(uri, &cur); 721 if (ret != 0) return(ret); 722 } else if (ISA_PCHAR(cur)) { 723 ret = xmlParse3986PathRootless(uri, &cur); 724 if (ret != 0) return(ret); 725 } else { 726 /* path-empty is effectively empty */ 727 if (uri != NULL) { 728 if (uri->path != NULL) xmlFree(uri->path); 729 uri->path = NULL; 730 } 731 } 732 *str = cur; 733 return (0); 734 } 735 736 /** 737 * xmlParse3986RelativeRef: 738 * @uri: pointer to an URI structure 739 * @str: the string to analyze 740 * 741 * Parse an URI string and fills in the appropriate fields 742 * of the @uri structure 743 * 744 * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 745 * relative-part = "//" authority path-abempty 746 * / path-absolute 747 * / path-noscheme 748 * / path-empty 749 * 750 * Returns 0 or the error code 751 */ 752 static int 753 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 754 int ret; 755 756 if ((*str == '/') && (*(str + 1) == '/')) { 757 str += 2; 758 ret = xmlParse3986Authority(uri, &str); 759 if (ret != 0) return(ret); 760 ret = xmlParse3986PathAbEmpty(uri, &str); 761 if (ret != 0) return(ret); 762 } else if (*str == '/') { 763 ret = xmlParse3986PathAbsolute(uri, &str); 764 if (ret != 0) return(ret); 765 } else if (ISA_PCHAR(str)) { 766 ret = xmlParse3986PathNoScheme(uri, &str); 767 if (ret != 0) return(ret); 768 } else { 769 /* path-empty is effectively empty */ 770 if (uri != NULL) { 771 if (uri->path != NULL) xmlFree(uri->path); 772 uri->path = NULL; 773 } 774 } 775 776 if (*str == '?') { 777 str++; 778 ret = xmlParse3986Query(uri, &str); 779 if (ret != 0) return(ret); 780 } 781 if (*str == '#') { 782 str++; 783 ret = xmlParse3986Fragment(uri, &str); 784 if (ret != 0) return(ret); 785 } 786 if (*str != 0) { 787 xmlCleanURI(uri); 788 return(1); 789 } 790 return(0); 791 } 792 793 794 /** 795 * xmlParse3986URI: 796 * @uri: pointer to an URI structure 797 * @str: the string to analyze 798 * 799 * Parse an URI string and fills in the appropriate fields 800 * of the @uri structure 801 * 802 * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 803 * 804 * Returns 0 or the error code 805 */ 806 static int 807 xmlParse3986URI(xmlURIPtr uri, const char *str) { 808 int ret; 809 810 ret = xmlParse3986Scheme(uri, &str); 811 if (ret != 0) return(ret); 812 if (*str != ':') { 813 return(1); 814 } 815 str++; 816 ret = xmlParse3986HierPart(uri, &str); 817 if (ret != 0) return(ret); 818 if (*str == '?') { 819 str++; 820 ret = xmlParse3986Query(uri, &str); 821 if (ret != 0) return(ret); 822 } 823 if (*str == '#') { 824 str++; 825 ret = xmlParse3986Fragment(uri, &str); 826 if (ret != 0) return(ret); 827 } 828 if (*str != 0) { 829 xmlCleanURI(uri); 830 return(1); 831 } 832 return(0); 833 } 834 835 /** 836 * xmlParse3986URIReference: 837 * @uri: pointer to an URI structure 838 * @str: the string to analyze 839 * 840 * Parse an URI reference string and fills in the appropriate fields 841 * of the @uri structure 842 * 843 * URI-reference = URI / relative-ref 844 * 845 * Returns 0 or the error code 846 */ 847 static int 848 xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 849 int ret; 850 851 if (str == NULL) 852 return(-1); 853 xmlCleanURI(uri); 854 855 /* 856 * Try first to parse absolute refs, then fallback to relative if 857 * it fails. 858 */ 859 ret = xmlParse3986URI(uri, str); 860 if (ret != 0) { 861 xmlCleanURI(uri); 862 ret = xmlParse3986RelativeRef(uri, str); 863 if (ret != 0) { 864 xmlCleanURI(uri); 865 return(ret); 866 } 867 } 868 return(0); 869 } 870 871 /** 872 * xmlParseURI: 873 * @str: the URI string to analyze 874 * 875 * Parse an URI based on RFC 3986 876 * 877 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 878 * 879 * Returns a newly built xmlURIPtr or NULL in case of error 880 */ 881 xmlURIPtr 882 xmlParseURI(const char *str) { 883 xmlURIPtr uri; 884 int ret; 885 886 if (str == NULL) 887 return(NULL); 888 uri = xmlCreateURI(); 889 if (uri != NULL) { 890 ret = xmlParse3986URIReference(uri, str); 891 if (ret) { 892 xmlFreeURI(uri); 893 return(NULL); 894 } 895 } 896 return(uri); 897 } 898 899 /** 900 * xmlParseURIReference: 901 * @uri: pointer to an URI structure 902 * @str: the string to analyze 903 * 904 * Parse an URI reference string based on RFC 3986 and fills in the 905 * appropriate fields of the @uri structure 906 * 907 * URI-reference = URI / relative-ref 908 * 909 * Returns 0 or the error code 910 */ 911 int 912 xmlParseURIReference(xmlURIPtr uri, const char *str) { 913 return(xmlParse3986URIReference(uri, str)); 914 } 915 916 /** 917 * xmlParseURIRaw: 918 * @str: the URI string to analyze 919 * @raw: if 1 unescaping of URI pieces are disabled 920 * 921 * Parse an URI but allows to keep intact the original fragments. 922 * 923 * URI-reference = URI / relative-ref 924 * 925 * Returns a newly built xmlURIPtr or NULL in case of error 926 */ 927 xmlURIPtr 928 xmlParseURIRaw(const char *str, int raw) { 929 xmlURIPtr uri; 930 int ret; 931 932 if (str == NULL) 933 return(NULL); 934 uri = xmlCreateURI(); 935 if (uri != NULL) { 936 if (raw) { 937 uri->cleanup |= 2; 938 } 939 ret = xmlParseURIReference(uri, str); 940 if (ret) { 941 xmlFreeURI(uri); 942 return(NULL); 943 } 944 } 945 return(uri); 946 } 947 948 /************************************************************************ 949 * * 950 * Generic URI structure functions * 951 * * 952 ************************************************************************/ 953 954 /** 955 * xmlCreateURI: 956 * 957 * Simply creates an empty xmlURI 958 * 959 * Returns the new structure or NULL in case of error 960 */ 961 xmlURIPtr 962 xmlCreateURI(void) { 963 xmlURIPtr ret; 964 965 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 966 if (ret == NULL) { 967 xmlGenericError(xmlGenericErrorContext, 968 "xmlCreateURI: out of memory\n"); 969 return(NULL); 970 } 971 memset(ret, 0, sizeof(xmlURI)); 972 return(ret); 973 } 974 975 /** 976 * xmlSaveUri: 977 * @uri: pointer to an xmlURI 978 * 979 * Save the URI as an escaped string 980 * 981 * Returns a new string (to be deallocated by caller) 982 */ 983 xmlChar * 984 xmlSaveUri(xmlURIPtr uri) { 985 xmlChar *ret = NULL; 986 xmlChar *temp; 987 const char *p; 988 int len; 989 int max; 990 991 if (uri == NULL) return(NULL); 992 993 994 max = 80; 995 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 996 if (ret == NULL) { 997 xmlGenericError(xmlGenericErrorContext, 998 "xmlSaveUri: out of memory\n"); 999 return(NULL); 1000 } 1001 len = 0; 1002 1003 if (uri->scheme != NULL) { 1004 p = uri->scheme; 1005 while (*p != 0) { 1006 if (len >= max) { 1007 max *= 2; 1008 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1009 if (temp == NULL) { 1010 xmlGenericError(xmlGenericErrorContext, 1011 "xmlSaveUri: out of memory\n"); 1012 xmlFree(ret); 1013 return(NULL); 1014 } 1015 ret = temp; 1016 } 1017 ret[len++] = *p++; 1018 } 1019 if (len >= max) { 1020 max *= 2; 1021 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1022 if (temp == NULL) { 1023 xmlGenericError(xmlGenericErrorContext, 1024 "xmlSaveUri: out of memory\n"); 1025 xmlFree(ret); 1026 return(NULL); 1027 } 1028 ret = temp; 1029 } 1030 ret[len++] = ':'; 1031 } 1032 if (uri->opaque != NULL) { 1033 p = uri->opaque; 1034 while (*p != 0) { 1035 if (len + 3 >= max) { 1036 max *= 2; 1037 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1038 if (temp == NULL) { 1039 xmlGenericError(xmlGenericErrorContext, 1040 "xmlSaveUri: out of memory\n"); 1041 xmlFree(ret); 1042 return(NULL); 1043 } 1044 ret = temp; 1045 } 1046 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1047 ret[len++] = *p++; 1048 else { 1049 int val = *(unsigned char *)p++; 1050 int hi = val / 0x10, lo = val % 0x10; 1051 ret[len++] = '%'; 1052 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1053 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1054 } 1055 } 1056 } else { 1057 if (uri->server != NULL) { 1058 if (len + 3 >= max) { 1059 max *= 2; 1060 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1061 if (temp == NULL) { 1062 xmlGenericError(xmlGenericErrorContext, 1063 "xmlSaveUri: out of memory\n"); 1064 xmlFree(ret); 1065 return(NULL); 1066 } 1067 ret = temp; 1068 } 1069 ret[len++] = '/'; 1070 ret[len++] = '/'; 1071 if (uri->user != NULL) { 1072 p = uri->user; 1073 while (*p != 0) { 1074 if (len + 3 >= max) { 1075 max *= 2; 1076 temp = (xmlChar *) xmlRealloc(ret, 1077 (max + 1) * sizeof(xmlChar)); 1078 if (temp == NULL) { 1079 xmlGenericError(xmlGenericErrorContext, 1080 "xmlSaveUri: out of memory\n"); 1081 xmlFree(ret); 1082 return(NULL); 1083 } 1084 ret = temp; 1085 } 1086 if ((IS_UNRESERVED(*(p))) || 1087 ((*(p) == ';')) || ((*(p) == ':')) || 1088 ((*(p) == '&')) || ((*(p) == '=')) || 1089 ((*(p) == '+')) || ((*(p) == '$')) || 1090 ((*(p) == ','))) 1091 ret[len++] = *p++; 1092 else { 1093 int val = *(unsigned char *)p++; 1094 int hi = val / 0x10, lo = val % 0x10; 1095 ret[len++] = '%'; 1096 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1097 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1098 } 1099 } 1100 if (len + 3 >= max) { 1101 max *= 2; 1102 temp = (xmlChar *) xmlRealloc(ret, 1103 (max + 1) * sizeof(xmlChar)); 1104 if (temp == NULL) { 1105 xmlGenericError(xmlGenericErrorContext, 1106 "xmlSaveUri: out of memory\n"); 1107 xmlFree(ret); 1108 return(NULL); 1109 } 1110 ret = temp; 1111 } 1112 ret[len++] = '@'; 1113 } 1114 p = uri->server; 1115 while (*p != 0) { 1116 if (len >= max) { 1117 max *= 2; 1118 temp = (xmlChar *) xmlRealloc(ret, 1119 (max + 1) * sizeof(xmlChar)); 1120 if (temp == NULL) { 1121 xmlGenericError(xmlGenericErrorContext, 1122 "xmlSaveUri: out of memory\n"); 1123 xmlFree(ret); 1124 return(NULL); 1125 } 1126 ret = temp; 1127 } 1128 ret[len++] = *p++; 1129 } 1130 if (uri->port > 0) { 1131 if (len + 10 >= max) { 1132 max *= 2; 1133 temp = (xmlChar *) xmlRealloc(ret, 1134 (max + 1) * sizeof(xmlChar)); 1135 if (temp == NULL) { 1136 xmlGenericError(xmlGenericErrorContext, 1137 "xmlSaveUri: out of memory\n"); 1138 xmlFree(ret); 1139 return(NULL); 1140 } 1141 ret = temp; 1142 } 1143 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1144 } 1145 } else if (uri->authority != NULL) { 1146 if (len + 3 >= max) { 1147 max *= 2; 1148 temp = (xmlChar *) xmlRealloc(ret, 1149 (max + 1) * sizeof(xmlChar)); 1150 if (temp == NULL) { 1151 xmlGenericError(xmlGenericErrorContext, 1152 "xmlSaveUri: out of memory\n"); 1153 xmlFree(ret); 1154 return(NULL); 1155 } 1156 ret = temp; 1157 } 1158 ret[len++] = '/'; 1159 ret[len++] = '/'; 1160 p = uri->authority; 1161 while (*p != 0) { 1162 if (len + 3 >= max) { 1163 max *= 2; 1164 temp = (xmlChar *) xmlRealloc(ret, 1165 (max + 1) * sizeof(xmlChar)); 1166 if (temp == NULL) { 1167 xmlGenericError(xmlGenericErrorContext, 1168 "xmlSaveUri: out of memory\n"); 1169 xmlFree(ret); 1170 return(NULL); 1171 } 1172 ret = temp; 1173 } 1174 if ((IS_UNRESERVED(*(p))) || 1175 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1176 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1177 ((*(p) == '=')) || ((*(p) == '+'))) 1178 ret[len++] = *p++; 1179 else { 1180 int val = *(unsigned char *)p++; 1181 int hi = val / 0x10, lo = val % 0x10; 1182 ret[len++] = '%'; 1183 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1184 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1185 } 1186 } 1187 } else if (uri->scheme != NULL) { 1188 if (len + 3 >= max) { 1189 max *= 2; 1190 temp = (xmlChar *) xmlRealloc(ret, 1191 (max + 1) * sizeof(xmlChar)); 1192 if (temp == NULL) { 1193 xmlGenericError(xmlGenericErrorContext, 1194 "xmlSaveUri: out of memory\n"); 1195 xmlFree(ret); 1196 return(NULL); 1197 } 1198 ret = temp; 1199 } 1200 ret[len++] = '/'; 1201 ret[len++] = '/'; 1202 } 1203 if (uri->path != NULL) { 1204 p = uri->path; 1205 /* 1206 * the colon in file:///d: should not be escaped or 1207 * Windows accesses fail later. 1208 */ 1209 if ((uri->scheme != NULL) && 1210 (p[0] == '/') && 1211 (((p[1] >= 'a') && (p[1] <= 'z')) || 1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1213 (p[2] == ':') && 1214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1215 if (len + 3 >= max) { 1216 max *= 2; 1217 ret = (xmlChar *) xmlRealloc(ret, 1218 (max + 1) * sizeof(xmlChar)); 1219 if (ret == NULL) { 1220 xmlGenericError(xmlGenericErrorContext, 1221 "xmlSaveUri: out of memory\n"); 1222 return(NULL); 1223 } 1224 } 1225 ret[len++] = *p++; 1226 ret[len++] = *p++; 1227 ret[len++] = *p++; 1228 } 1229 while (*p != 0) { 1230 if (len + 3 >= max) { 1231 max *= 2; 1232 temp = (xmlChar *) xmlRealloc(ret, 1233 (max + 1) * sizeof(xmlChar)); 1234 if (temp == NULL) { 1235 xmlGenericError(xmlGenericErrorContext, 1236 "xmlSaveUri: out of memory\n"); 1237 xmlFree(ret); 1238 return(NULL); 1239 } 1240 ret = temp; 1241 } 1242 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1243 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1244 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1245 ((*(p) == ','))) 1246 ret[len++] = *p++; 1247 else { 1248 int val = *(unsigned char *)p++; 1249 int hi = val / 0x10, lo = val % 0x10; 1250 ret[len++] = '%'; 1251 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1252 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1253 } 1254 } 1255 } 1256 if (uri->query_raw != NULL) { 1257 if (len + 1 >= max) { 1258 max *= 2; 1259 temp = (xmlChar *) xmlRealloc(ret, 1260 (max + 1) * sizeof(xmlChar)); 1261 if (temp == NULL) { 1262 xmlGenericError(xmlGenericErrorContext, 1263 "xmlSaveUri: out of memory\n"); 1264 xmlFree(ret); 1265 return(NULL); 1266 } 1267 ret = temp; 1268 } 1269 ret[len++] = '?'; 1270 p = uri->query_raw; 1271 while (*p != 0) { 1272 if (len + 1 >= max) { 1273 max *= 2; 1274 temp = (xmlChar *) xmlRealloc(ret, 1275 (max + 1) * sizeof(xmlChar)); 1276 if (temp == NULL) { 1277 xmlGenericError(xmlGenericErrorContext, 1278 "xmlSaveUri: out of memory\n"); 1279 xmlFree(ret); 1280 return(NULL); 1281 } 1282 ret = temp; 1283 } 1284 ret[len++] = *p++; 1285 } 1286 } else if (uri->query != NULL) { 1287 if (len + 3 >= max) { 1288 max *= 2; 1289 temp = (xmlChar *) xmlRealloc(ret, 1290 (max + 1) * sizeof(xmlChar)); 1291 if (temp == NULL) { 1292 xmlGenericError(xmlGenericErrorContext, 1293 "xmlSaveUri: out of memory\n"); 1294 xmlFree(ret); 1295 return(NULL); 1296 } 1297 ret = temp; 1298 } 1299 ret[len++] = '?'; 1300 p = uri->query; 1301 while (*p != 0) { 1302 if (len + 3 >= max) { 1303 max *= 2; 1304 temp = (xmlChar *) xmlRealloc(ret, 1305 (max + 1) * sizeof(xmlChar)); 1306 if (temp == NULL) { 1307 xmlGenericError(xmlGenericErrorContext, 1308 "xmlSaveUri: out of memory\n"); 1309 xmlFree(ret); 1310 return(NULL); 1311 } 1312 ret = temp; 1313 } 1314 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1315 ret[len++] = *p++; 1316 else { 1317 int val = *(unsigned char *)p++; 1318 int hi = val / 0x10, lo = val % 0x10; 1319 ret[len++] = '%'; 1320 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1321 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1322 } 1323 } 1324 } 1325 } 1326 if (uri->fragment != NULL) { 1327 if (len + 3 >= max) { 1328 max *= 2; 1329 temp = (xmlChar *) xmlRealloc(ret, 1330 (max + 1) * sizeof(xmlChar)); 1331 if (temp == NULL) { 1332 xmlGenericError(xmlGenericErrorContext, 1333 "xmlSaveUri: out of memory\n"); 1334 xmlFree(ret); 1335 return(NULL); 1336 } 1337 ret = temp; 1338 } 1339 ret[len++] = '#'; 1340 p = uri->fragment; 1341 while (*p != 0) { 1342 if (len + 3 >= max) { 1343 max *= 2; 1344 temp = (xmlChar *) xmlRealloc(ret, 1345 (max + 1) * sizeof(xmlChar)); 1346 if (temp == NULL) { 1347 xmlGenericError(xmlGenericErrorContext, 1348 "xmlSaveUri: out of memory\n"); 1349 xmlFree(ret); 1350 return(NULL); 1351 } 1352 ret = temp; 1353 } 1354 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1355 ret[len++] = *p++; 1356 else { 1357 int val = *(unsigned char *)p++; 1358 int hi = val / 0x10, lo = val % 0x10; 1359 ret[len++] = '%'; 1360 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1361 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1362 } 1363 } 1364 } 1365 if (len >= max) { 1366 max *= 2; 1367 temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1368 if (temp == NULL) { 1369 xmlGenericError(xmlGenericErrorContext, 1370 "xmlSaveUri: out of memory\n"); 1371 xmlFree(ret); 1372 return(NULL); 1373 } 1374 ret = temp; 1375 } 1376 ret[len++] = 0; 1377 return(ret); 1378 } 1379 1380 /** 1381 * xmlPrintURI: 1382 * @stream: a FILE* for the output 1383 * @uri: pointer to an xmlURI 1384 * 1385 * Prints the URI in the stream @stream. 1386 */ 1387 void 1388 xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1389 xmlChar *out; 1390 1391 out = xmlSaveUri(uri); 1392 if (out != NULL) { 1393 fprintf(stream, "%s", (char *) out); 1394 xmlFree(out); 1395 } 1396 } 1397 1398 /** 1399 * xmlCleanURI: 1400 * @uri: pointer to an xmlURI 1401 * 1402 * Make sure the xmlURI struct is free of content 1403 */ 1404 static void 1405 xmlCleanURI(xmlURIPtr uri) { 1406 if (uri == NULL) return; 1407 1408 if (uri->scheme != NULL) xmlFree(uri->scheme); 1409 uri->scheme = NULL; 1410 if (uri->server != NULL) xmlFree(uri->server); 1411 uri->server = NULL; 1412 if (uri->user != NULL) xmlFree(uri->user); 1413 uri->user = NULL; 1414 if (uri->path != NULL) xmlFree(uri->path); 1415 uri->path = NULL; 1416 if (uri->fragment != NULL) xmlFree(uri->fragment); 1417 uri->fragment = NULL; 1418 if (uri->opaque != NULL) xmlFree(uri->opaque); 1419 uri->opaque = NULL; 1420 if (uri->authority != NULL) xmlFree(uri->authority); 1421 uri->authority = NULL; 1422 if (uri->query != NULL) xmlFree(uri->query); 1423 uri->query = NULL; 1424 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1425 uri->query_raw = NULL; 1426 } 1427 1428 /** 1429 * xmlFreeURI: 1430 * @uri: pointer to an xmlURI 1431 * 1432 * Free up the xmlURI struct 1433 */ 1434 void 1435 xmlFreeURI(xmlURIPtr uri) { 1436 if (uri == NULL) return; 1437 1438 if (uri->scheme != NULL) xmlFree(uri->scheme); 1439 if (uri->server != NULL) xmlFree(uri->server); 1440 if (uri->user != NULL) xmlFree(uri->user); 1441 if (uri->path != NULL) xmlFree(uri->path); 1442 if (uri->fragment != NULL) xmlFree(uri->fragment); 1443 if (uri->opaque != NULL) xmlFree(uri->opaque); 1444 if (uri->authority != NULL) xmlFree(uri->authority); 1445 if (uri->query != NULL) xmlFree(uri->query); 1446 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1447 xmlFree(uri); 1448 } 1449 1450 /************************************************************************ 1451 * * 1452 * Helper functions * 1453 * * 1454 ************************************************************************/ 1455 1456 /** 1457 * xmlNormalizeURIPath: 1458 * @path: pointer to the path string 1459 * 1460 * Applies the 5 normalization steps to a path string--that is, RFC 2396 1461 * Section 5.2, steps 6.c through 6.g. 1462 * 1463 * Normalization occurs directly on the string, no new allocation is done 1464 * 1465 * Returns 0 or an error code 1466 */ 1467 int 1468 xmlNormalizeURIPath(char *path) { 1469 char *cur, *out; 1470 1471 if (path == NULL) 1472 return(-1); 1473 1474 /* Skip all initial "/" chars. We want to get to the beginning of the 1475 * first non-empty segment. 1476 */ 1477 cur = path; 1478 while (cur[0] == '/') 1479 ++cur; 1480 if (cur[0] == '\0') 1481 return(0); 1482 1483 /* Keep everything we've seen so far. */ 1484 out = cur; 1485 1486 /* 1487 * Analyze each segment in sequence for cases (c) and (d). 1488 */ 1489 while (cur[0] != '\0') { 1490 /* 1491 * c) All occurrences of "./", where "." is a complete path segment, 1492 * are removed from the buffer string. 1493 */ 1494 if ((cur[0] == '.') && (cur[1] == '/')) { 1495 cur += 2; 1496 /* '//' normalization should be done at this point too */ 1497 while (cur[0] == '/') 1498 cur++; 1499 continue; 1500 } 1501 1502 /* 1503 * d) If the buffer string ends with "." as a complete path segment, 1504 * that "." is removed. 1505 */ 1506 if ((cur[0] == '.') && (cur[1] == '\0')) 1507 break; 1508 1509 /* Otherwise keep the segment. */ 1510 while (cur[0] != '/') { 1511 if (cur[0] == '\0') 1512 goto done_cd; 1513 (out++)[0] = (cur++)[0]; 1514 } 1515 /* nomalize // */ 1516 while ((cur[0] == '/') && (cur[1] == '/')) 1517 cur++; 1518 1519 (out++)[0] = (cur++)[0]; 1520 } 1521 done_cd: 1522 out[0] = '\0'; 1523 1524 /* Reset to the beginning of the first segment for the next sequence. */ 1525 cur = path; 1526 while (cur[0] == '/') 1527 ++cur; 1528 if (cur[0] == '\0') 1529 return(0); 1530 1531 /* 1532 * Analyze each segment in sequence for cases (e) and (f). 1533 * 1534 * e) All occurrences of "<segment>/../", where <segment> is a 1535 * complete path segment not equal to "..", are removed from the 1536 * buffer string. Removal of these path segments is performed 1537 * iteratively, removing the leftmost matching pattern on each 1538 * iteration, until no matching pattern remains. 1539 * 1540 * f) If the buffer string ends with "<segment>/..", where <segment> 1541 * is a complete path segment not equal to "..", that 1542 * "<segment>/.." is removed. 1543 * 1544 * To satisfy the "iterative" clause in (e), we need to collapse the 1545 * string every time we find something that needs to be removed. Thus, 1546 * we don't need to keep two pointers into the string: we only need a 1547 * "current position" pointer. 1548 */ 1549 while (1) { 1550 char *segp, *tmp; 1551 1552 /* At the beginning of each iteration of this loop, "cur" points to 1553 * the first character of the segment we want to examine. 1554 */ 1555 1556 /* Find the end of the current segment. */ 1557 segp = cur; 1558 while ((segp[0] != '/') && (segp[0] != '\0')) 1559 ++segp; 1560 1561 /* If this is the last segment, we're done (we need at least two 1562 * segments to meet the criteria for the (e) and (f) cases). 1563 */ 1564 if (segp[0] == '\0') 1565 break; 1566 1567 /* If the first segment is "..", or if the next segment _isn't_ "..", 1568 * keep this segment and try the next one. 1569 */ 1570 ++segp; 1571 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1572 || ((segp[0] != '.') || (segp[1] != '.') 1573 || ((segp[2] != '/') && (segp[2] != '\0')))) { 1574 cur = segp; 1575 continue; 1576 } 1577 1578 /* If we get here, remove this segment and the next one and back up 1579 * to the previous segment (if there is one), to implement the 1580 * "iteratively" clause. It's pretty much impossible to back up 1581 * while maintaining two pointers into the buffer, so just compact 1582 * the whole buffer now. 1583 */ 1584 1585 /* If this is the end of the buffer, we're done. */ 1586 if (segp[2] == '\0') { 1587 cur[0] = '\0'; 1588 break; 1589 } 1590 /* Valgrind complained, strcpy(cur, segp + 3); */ 1591 /* string will overlap, do not use strcpy */ 1592 tmp = cur; 1593 segp += 3; 1594 while ((*tmp++ = *segp++) != 0); 1595 1596 /* If there are no previous segments, then keep going from here. */ 1597 segp = cur; 1598 while ((segp > path) && ((--segp)[0] == '/')) 1599 ; 1600 if (segp == path) 1601 continue; 1602 1603 /* "segp" is pointing to the end of a previous segment; find it's 1604 * start. We need to back up to the previous segment and start 1605 * over with that to handle things like "foo/bar/../..". If we 1606 * don't do this, then on the first pass we'll remove the "bar/..", 1607 * but be pointing at the second ".." so we won't realize we can also 1608 * remove the "foo/..". 1609 */ 1610 cur = segp; 1611 while ((cur > path) && (cur[-1] != '/')) 1612 --cur; 1613 } 1614 out[0] = '\0'; 1615 1616 /* 1617 * g) If the resulting buffer string still begins with one or more 1618 * complete path segments of "..", then the reference is 1619 * considered to be in error. Implementations may handle this 1620 * error by retaining these components in the resolved path (i.e., 1621 * treating them as part of the final URI), by removing them from 1622 * the resolved path (i.e., discarding relative levels above the 1623 * root), or by avoiding traversal of the reference. 1624 * 1625 * We discard them from the final path. 1626 */ 1627 if (path[0] == '/') { 1628 cur = path; 1629 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1630 && ((cur[3] == '/') || (cur[3] == '\0'))) 1631 cur += 3; 1632 1633 if (cur != path) { 1634 out = path; 1635 while (cur[0] != '\0') 1636 (out++)[0] = (cur++)[0]; 1637 out[0] = 0; 1638 } 1639 } 1640 1641 return(0); 1642 } 1643 1644 static int is_hex(char c) { 1645 if (((c >= '0') && (c <= '9')) || 1646 ((c >= 'a') && (c <= 'f')) || 1647 ((c >= 'A') && (c <= 'F'))) 1648 return(1); 1649 return(0); 1650 } 1651 1652 /** 1653 * xmlURIUnescapeString: 1654 * @str: the string to unescape 1655 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1656 * @target: optional destination buffer 1657 * 1658 * Unescaping routine, but does not check that the string is an URI. The 1659 * output is a direct unsigned char translation of %XX values (no encoding) 1660 * Note that the length of the result can only be smaller or same size as 1661 * the input string. 1662 * 1663 * Returns a copy of the string, but unescaped, will return NULL only in case 1664 * of error 1665 */ 1666 char * 1667 xmlURIUnescapeString(const char *str, int len, char *target) { 1668 char *ret, *out; 1669 const char *in; 1670 1671 if (str == NULL) 1672 return(NULL); 1673 if (len <= 0) len = strlen(str); 1674 if (len < 0) return(NULL); 1675 1676 if (target == NULL) { 1677 ret = (char *) xmlMallocAtomic(len + 1); 1678 if (ret == NULL) { 1679 xmlGenericError(xmlGenericErrorContext, 1680 "xmlURIUnescapeString: out of memory\n"); 1681 return(NULL); 1682 } 1683 } else 1684 ret = target; 1685 in = str; 1686 out = ret; 1687 while(len > 0) { 1688 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1689 in++; 1690 if ((*in >= '0') && (*in <= '9')) 1691 *out = (*in - '0'); 1692 else if ((*in >= 'a') && (*in <= 'f')) 1693 *out = (*in - 'a') + 10; 1694 else if ((*in >= 'A') && (*in <= 'F')) 1695 *out = (*in - 'A') + 10; 1696 in++; 1697 if ((*in >= '0') && (*in <= '9')) 1698 *out = *out * 16 + (*in - '0'); 1699 else if ((*in >= 'a') && (*in <= 'f')) 1700 *out = *out * 16 + (*in - 'a') + 10; 1701 else if ((*in >= 'A') && (*in <= 'F')) 1702 *out = *out * 16 + (*in - 'A') + 10; 1703 in++; 1704 len -= 3; 1705 out++; 1706 } else { 1707 *out++ = *in++; 1708 len--; 1709 } 1710 } 1711 *out = 0; 1712 return(ret); 1713 } 1714 1715 /** 1716 * xmlURIEscapeStr: 1717 * @str: string to escape 1718 * @list: exception list string of chars not to escape 1719 * 1720 * This routine escapes a string to hex, ignoring reserved characters (a-z) 1721 * and the characters in the exception list. 1722 * 1723 * Returns a new escaped string or NULL in case of error. 1724 */ 1725 xmlChar * 1726 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1727 xmlChar *ret, ch; 1728 xmlChar *temp; 1729 const xmlChar *in; 1730 1731 unsigned int len, out; 1732 1733 if (str == NULL) 1734 return(NULL); 1735 if (str[0] == 0) 1736 return(xmlStrdup(str)); 1737 len = xmlStrlen(str); 1738 if (!(len > 0)) return(NULL); 1739 1740 len += 20; 1741 ret = (xmlChar *) xmlMallocAtomic(len); 1742 if (ret == NULL) { 1743 xmlGenericError(xmlGenericErrorContext, 1744 "xmlURIEscapeStr: out of memory\n"); 1745 return(NULL); 1746 } 1747 in = (const xmlChar *) str; 1748 out = 0; 1749 while(*in != 0) { 1750 if (len - out <= 3) { 1751 len += 20; 1752 temp = (xmlChar *) xmlRealloc(ret, len); 1753 if (temp == NULL) { 1754 xmlGenericError(xmlGenericErrorContext, 1755 "xmlURIEscapeStr: out of memory\n"); 1756 xmlFree(ret); 1757 return(NULL); 1758 } 1759 ret = temp; 1760 } 1761 1762 ch = *in; 1763 1764 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1765 unsigned char val; 1766 ret[out++] = '%'; 1767 val = ch >> 4; 1768 if (val <= 9) 1769 ret[out++] = '0' + val; 1770 else 1771 ret[out++] = 'A' + val - 0xA; 1772 val = ch & 0xF; 1773 if (val <= 9) 1774 ret[out++] = '0' + val; 1775 else 1776 ret[out++] = 'A' + val - 0xA; 1777 in++; 1778 } else { 1779 ret[out++] = *in++; 1780 } 1781 1782 } 1783 ret[out] = 0; 1784 return(ret); 1785 } 1786 1787 /** 1788 * xmlURIEscape: 1789 * @str: the string of the URI to escape 1790 * 1791 * Escaping routine, does not do validity checks ! 1792 * It will try to escape the chars needing this, but this is heuristic 1793 * based it's impossible to be sure. 1794 * 1795 * Returns an copy of the string, but escaped 1796 * 1797 * 25 May 2001 1798 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1799 * according to RFC2396. 1800 * - Carl Douglas 1801 */ 1802 xmlChar * 1803 xmlURIEscape(const xmlChar * str) 1804 { 1805 xmlChar *ret, *segment = NULL; 1806 xmlURIPtr uri; 1807 int ret2; 1808 1809 #define NULLCHK(p) if(!p) { \ 1810 xmlGenericError(xmlGenericErrorContext, \ 1811 "xmlURIEscape: out of memory\n"); \ 1812 xmlFreeURI(uri); \ 1813 return NULL; } \ 1814 1815 if (str == NULL) 1816 return (NULL); 1817 1818 uri = xmlCreateURI(); 1819 if (uri != NULL) { 1820 /* 1821 * Allow escaping errors in the unescaped form 1822 */ 1823 uri->cleanup = 1; 1824 ret2 = xmlParseURIReference(uri, (const char *)str); 1825 if (ret2) { 1826 xmlFreeURI(uri); 1827 return (NULL); 1828 } 1829 } 1830 1831 if (!uri) 1832 return NULL; 1833 1834 ret = NULL; 1835 1836 if (uri->scheme) { 1837 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1838 NULLCHK(segment) 1839 ret = xmlStrcat(ret, segment); 1840 ret = xmlStrcat(ret, BAD_CAST ":"); 1841 xmlFree(segment); 1842 } 1843 1844 if (uri->authority) { 1845 segment = 1846 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1847 NULLCHK(segment) 1848 ret = xmlStrcat(ret, BAD_CAST "//"); 1849 ret = xmlStrcat(ret, segment); 1850 xmlFree(segment); 1851 } 1852 1853 if (uri->user) { 1854 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1855 NULLCHK(segment) 1856 ret = xmlStrcat(ret,BAD_CAST "//"); 1857 ret = xmlStrcat(ret, segment); 1858 ret = xmlStrcat(ret, BAD_CAST "@"); 1859 xmlFree(segment); 1860 } 1861 1862 if (uri->server) { 1863 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1864 NULLCHK(segment) 1865 if (uri->user == NULL) 1866 ret = xmlStrcat(ret, BAD_CAST "//"); 1867 ret = xmlStrcat(ret, segment); 1868 xmlFree(segment); 1869 } 1870 1871 if (uri->port) { 1872 xmlChar port[10]; 1873 1874 snprintf((char *) port, 10, "%d", uri->port); 1875 ret = xmlStrcat(ret, BAD_CAST ":"); 1876 ret = xmlStrcat(ret, port); 1877 } 1878 1879 if (uri->path) { 1880 segment = 1881 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1882 NULLCHK(segment) 1883 ret = xmlStrcat(ret, segment); 1884 xmlFree(segment); 1885 } 1886 1887 if (uri->query_raw) { 1888 ret = xmlStrcat(ret, BAD_CAST "?"); 1889 ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1890 } 1891 else if (uri->query) { 1892 segment = 1893 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1894 NULLCHK(segment) 1895 ret = xmlStrcat(ret, BAD_CAST "?"); 1896 ret = xmlStrcat(ret, segment); 1897 xmlFree(segment); 1898 } 1899 1900 if (uri->opaque) { 1901 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1902 NULLCHK(segment) 1903 ret = xmlStrcat(ret, segment); 1904 xmlFree(segment); 1905 } 1906 1907 if (uri->fragment) { 1908 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1909 NULLCHK(segment) 1910 ret = xmlStrcat(ret, BAD_CAST "#"); 1911 ret = xmlStrcat(ret, segment); 1912 xmlFree(segment); 1913 } 1914 1915 xmlFreeURI(uri); 1916 #undef NULLCHK 1917 1918 return (ret); 1919 } 1920 1921 /************************************************************************ 1922 * * 1923 * Public functions * 1924 * * 1925 ************************************************************************/ 1926 1927 /** 1928 * xmlBuildURI: 1929 * @URI: the URI instance found in the document 1930 * @base: the base value 1931 * 1932 * Computes he final URI of the reference done by checking that 1933 * the given URI is valid, and building the final URI using the 1934 * base URI. This is processed according to section 5.2 of the 1935 * RFC 2396 1936 * 1937 * 5.2. Resolving Relative References to Absolute Form 1938 * 1939 * Returns a new URI string (to be freed by the caller) or NULL in case 1940 * of error. 1941 */ 1942 xmlChar * 1943 xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1944 xmlChar *val = NULL; 1945 int ret, len, indx, cur, out; 1946 xmlURIPtr ref = NULL; 1947 xmlURIPtr bas = NULL; 1948 xmlURIPtr res = NULL; 1949 1950 /* 1951 * 1) The URI reference is parsed into the potential four components and 1952 * fragment identifier, as described in Section 4.3. 1953 * 1954 * NOTE that a completely empty URI is treated by modern browsers 1955 * as a reference to "." rather than as a synonym for the current 1956 * URI. Should we do that here? 1957 */ 1958 if (URI == NULL) 1959 ret = -1; 1960 else { 1961 if (*URI) { 1962 ref = xmlCreateURI(); 1963 if (ref == NULL) 1964 goto done; 1965 ret = xmlParseURIReference(ref, (const char *) URI); 1966 } 1967 else 1968 ret = 0; 1969 } 1970 if (ret != 0) 1971 goto done; 1972 if ((ref != NULL) && (ref->scheme != NULL)) { 1973 /* 1974 * The URI is absolute don't modify. 1975 */ 1976 val = xmlStrdup(URI); 1977 goto done; 1978 } 1979 if (base == NULL) 1980 ret = -1; 1981 else { 1982 bas = xmlCreateURI(); 1983 if (bas == NULL) 1984 goto done; 1985 ret = xmlParseURIReference(bas, (const char *) base); 1986 } 1987 if (ret != 0) { 1988 if (ref) 1989 val = xmlSaveUri(ref); 1990 goto done; 1991 } 1992 if (ref == NULL) { 1993 /* 1994 * the base fragment must be ignored 1995 */ 1996 if (bas->fragment != NULL) { 1997 xmlFree(bas->fragment); 1998 bas->fragment = NULL; 1999 } 2000 val = xmlSaveUri(bas); 2001 goto done; 2002 } 2003 2004 /* 2005 * 2) If the path component is empty and the scheme, authority, and 2006 * query components are undefined, then it is a reference to the 2007 * current document and we are done. Otherwise, the reference URI's 2008 * query and fragment components are defined as found (or not found) 2009 * within the URI reference and not inherited from the base URI. 2010 * 2011 * NOTE that in modern browsers, the parsing differs from the above 2012 * in the following aspect: the query component is allowed to be 2013 * defined while still treating this as a reference to the current 2014 * document. 2015 */ 2016 res = xmlCreateURI(); 2017 if (res == NULL) 2018 goto done; 2019 if ((ref->scheme == NULL) && (ref->path == NULL) && 2020 ((ref->authority == NULL) && (ref->server == NULL))) { 2021 if (bas->scheme != NULL) 2022 res->scheme = xmlMemStrdup(bas->scheme); 2023 if (bas->authority != NULL) 2024 res->authority = xmlMemStrdup(bas->authority); 2025 else if (bas->server != NULL) { 2026 res->server = xmlMemStrdup(bas->server); 2027 if (bas->user != NULL) 2028 res->user = xmlMemStrdup(bas->user); 2029 res->port = bas->port; 2030 } 2031 if (bas->path != NULL) 2032 res->path = xmlMemStrdup(bas->path); 2033 if (ref->query_raw != NULL) 2034 res->query_raw = xmlMemStrdup (ref->query_raw); 2035 else if (ref->query != NULL) 2036 res->query = xmlMemStrdup(ref->query); 2037 else if (bas->query_raw != NULL) 2038 res->query_raw = xmlMemStrdup(bas->query_raw); 2039 else if (bas->query != NULL) 2040 res->query = xmlMemStrdup(bas->query); 2041 if (ref->fragment != NULL) 2042 res->fragment = xmlMemStrdup(ref->fragment); 2043 goto step_7; 2044 } 2045 2046 /* 2047 * 3) If the scheme component is defined, indicating that the reference 2048 * starts with a scheme name, then the reference is interpreted as an 2049 * absolute URI and we are done. Otherwise, the reference URI's 2050 * scheme is inherited from the base URI's scheme component. 2051 */ 2052 if (ref->scheme != NULL) { 2053 val = xmlSaveUri(ref); 2054 goto done; 2055 } 2056 if (bas->scheme != NULL) 2057 res->scheme = xmlMemStrdup(bas->scheme); 2058 2059 if (ref->query_raw != NULL) 2060 res->query_raw = xmlMemStrdup(ref->query_raw); 2061 else if (ref->query != NULL) 2062 res->query = xmlMemStrdup(ref->query); 2063 if (ref->fragment != NULL) 2064 res->fragment = xmlMemStrdup(ref->fragment); 2065 2066 /* 2067 * 4) If the authority component is defined, then the reference is a 2068 * network-path and we skip to step 7. Otherwise, the reference 2069 * URI's authority is inherited from the base URI's authority 2070 * component, which will also be undefined if the URI scheme does not 2071 * use an authority component. 2072 */ 2073 if ((ref->authority != NULL) || (ref->server != NULL)) { 2074 if (ref->authority != NULL) 2075 res->authority = xmlMemStrdup(ref->authority); 2076 else { 2077 res->server = xmlMemStrdup(ref->server); 2078 if (ref->user != NULL) 2079 res->user = xmlMemStrdup(ref->user); 2080 res->port = ref->port; 2081 } 2082 if (ref->path != NULL) 2083 res->path = xmlMemStrdup(ref->path); 2084 goto step_7; 2085 } 2086 if (bas->authority != NULL) 2087 res->authority = xmlMemStrdup(bas->authority); 2088 else if (bas->server != NULL) { 2089 res->server = xmlMemStrdup(bas->server); 2090 if (bas->user != NULL) 2091 res->user = xmlMemStrdup(bas->user); 2092 res->port = bas->port; 2093 } 2094 2095 /* 2096 * 5) If the path component begins with a slash character ("/"), then 2097 * the reference is an absolute-path and we skip to step 7. 2098 */ 2099 if ((ref->path != NULL) && (ref->path[0] == '/')) { 2100 res->path = xmlMemStrdup(ref->path); 2101 goto step_7; 2102 } 2103 2104 2105 /* 2106 * 6) If this step is reached, then we are resolving a relative-path 2107 * reference. The relative path needs to be merged with the base 2108 * URI's path. Although there are many ways to do this, we will 2109 * describe a simple method using a separate string buffer. 2110 * 2111 * Allocate a buffer large enough for the result string. 2112 */ 2113 len = 2; /* extra / and 0 */ 2114 if (ref->path != NULL) 2115 len += strlen(ref->path); 2116 if (bas->path != NULL) 2117 len += strlen(bas->path); 2118 res->path = (char *) xmlMallocAtomic(len); 2119 if (res->path == NULL) { 2120 xmlGenericError(xmlGenericErrorContext, 2121 "xmlBuildURI: out of memory\n"); 2122 goto done; 2123 } 2124 res->path[0] = 0; 2125 2126 /* 2127 * a) All but the last segment of the base URI's path component is 2128 * copied to the buffer. In other words, any characters after the 2129 * last (right-most) slash character, if any, are excluded. 2130 */ 2131 cur = 0; 2132 out = 0; 2133 if (bas->path != NULL) { 2134 while (bas->path[cur] != 0) { 2135 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2136 cur++; 2137 if (bas->path[cur] == 0) 2138 break; 2139 2140 cur++; 2141 while (out < cur) { 2142 res->path[out] = bas->path[out]; 2143 out++; 2144 } 2145 } 2146 } 2147 res->path[out] = 0; 2148 2149 /* 2150 * b) The reference's path component is appended to the buffer 2151 * string. 2152 */ 2153 if (ref->path != NULL && ref->path[0] != 0) { 2154 indx = 0; 2155 /* 2156 * Ensure the path includes a '/' 2157 */ 2158 if ((out == 0) && (bas->server != NULL)) 2159 res->path[out++] = '/'; 2160 while (ref->path[indx] != 0) { 2161 res->path[out++] = ref->path[indx++]; 2162 } 2163 } 2164 res->path[out] = 0; 2165 2166 /* 2167 * Steps c) to h) are really path normalization steps 2168 */ 2169 xmlNormalizeURIPath(res->path); 2170 2171 step_7: 2172 2173 /* 2174 * 7) The resulting URI components, including any inherited from the 2175 * base URI, are recombined to give the absolute form of the URI 2176 * reference. 2177 */ 2178 val = xmlSaveUri(res); 2179 2180 done: 2181 if (ref != NULL) 2182 xmlFreeURI(ref); 2183 if (bas != NULL) 2184 xmlFreeURI(bas); 2185 if (res != NULL) 2186 xmlFreeURI(res); 2187 return(val); 2188 } 2189 2190 /** 2191 * xmlBuildRelativeURI: 2192 * @URI: the URI reference under consideration 2193 * @base: the base value 2194 * 2195 * Expresses the URI of the reference in terms relative to the 2196 * base. Some examples of this operation include: 2197 * base = "http://site1.com/docs/book1.html" 2198 * URI input URI returned 2199 * docs/pic1.gif pic1.gif 2200 * docs/img/pic1.gif img/pic1.gif 2201 * img/pic1.gif ../img/pic1.gif 2202 * http://site1.com/docs/pic1.gif pic1.gif 2203 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2204 * 2205 * base = "docs/book1.html" 2206 * URI input URI returned 2207 * docs/pic1.gif pic1.gif 2208 * docs/img/pic1.gif img/pic1.gif 2209 * img/pic1.gif ../img/pic1.gif 2210 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2211 * 2212 * 2213 * Note: if the URI reference is really wierd or complicated, it may be 2214 * worthwhile to first convert it into a "nice" one by calling 2215 * xmlBuildURI (using 'base') before calling this routine, 2216 * since this routine (for reasonable efficiency) assumes URI has 2217 * already been through some validation. 2218 * 2219 * Returns a new URI string (to be freed by the caller) or NULL in case 2220 * error. 2221 */ 2222 xmlChar * 2223 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2224 { 2225 xmlChar *val = NULL; 2226 int ret; 2227 int ix; 2228 int pos = 0; 2229 int nbslash = 0; 2230 int len; 2231 xmlURIPtr ref = NULL; 2232 xmlURIPtr bas = NULL; 2233 xmlChar *bptr, *uptr, *vptr; 2234 int remove_path = 0; 2235 2236 if ((URI == NULL) || (*URI == 0)) 2237 return NULL; 2238 2239 /* 2240 * First parse URI into a standard form 2241 */ 2242 ref = xmlCreateURI (); 2243 if (ref == NULL) 2244 return NULL; 2245 /* If URI not already in "relative" form */ 2246 if (URI[0] != '.') { 2247 ret = xmlParseURIReference (ref, (const char *) URI); 2248 if (ret != 0) 2249 goto done; /* Error in URI, return NULL */ 2250 } else 2251 ref->path = (char *)xmlStrdup(URI); 2252 2253 /* 2254 * Next parse base into the same standard form 2255 */ 2256 if ((base == NULL) || (*base == 0)) { 2257 val = xmlStrdup (URI); 2258 goto done; 2259 } 2260 bas = xmlCreateURI (); 2261 if (bas == NULL) 2262 goto done; 2263 if (base[0] != '.') { 2264 ret = xmlParseURIReference (bas, (const char *) base); 2265 if (ret != 0) 2266 goto done; /* Error in base, return NULL */ 2267 } else 2268 bas->path = (char *)xmlStrdup(base); 2269 2270 /* 2271 * If the scheme / server on the URI differs from the base, 2272 * just return the URI 2273 */ 2274 if ((ref->scheme != NULL) && 2275 ((bas->scheme == NULL) || 2276 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2277 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2278 val = xmlStrdup (URI); 2279 goto done; 2280 } 2281 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2282 val = xmlStrdup(BAD_CAST ""); 2283 goto done; 2284 } 2285 if (bas->path == NULL) { 2286 val = xmlStrdup((xmlChar *)ref->path); 2287 goto done; 2288 } 2289 if (ref->path == NULL) { 2290 ref->path = (char *) "/"; 2291 remove_path = 1; 2292 } 2293 2294 /* 2295 * At this point (at last!) we can compare the two paths 2296 * 2297 * First we take care of the special case where either of the 2298 * two path components may be missing (bug 316224) 2299 */ 2300 if (bas->path == NULL) { 2301 if (ref->path != NULL) { 2302 uptr = (xmlChar *) ref->path; 2303 if (*uptr == '/') 2304 uptr++; 2305 /* exception characters from xmlSaveUri */ 2306 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2307 } 2308 goto done; 2309 } 2310 bptr = (xmlChar *)bas->path; 2311 if (ref->path == NULL) { 2312 for (ix = 0; bptr[ix] != 0; ix++) { 2313 if (bptr[ix] == '/') 2314 nbslash++; 2315 } 2316 uptr = NULL; 2317 len = 1; /* this is for a string terminator only */ 2318 } else { 2319 /* 2320 * Next we compare the two strings and find where they first differ 2321 */ 2322 if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) 2323 pos += 2; 2324 if ((*bptr == '.') && (bptr[1] == '/')) 2325 bptr += 2; 2326 else if ((*bptr == '/') && (ref->path[pos] != '/')) 2327 bptr++; 2328 while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) 2329 pos++; 2330 2331 if (bptr[pos] == ref->path[pos]) { 2332 val = xmlStrdup(BAD_CAST ""); 2333 goto done; /* (I can't imagine why anyone would do this) */ 2334 } 2335 2336 /* 2337 * In URI, "back up" to the last '/' encountered. This will be the 2338 * beginning of the "unique" suffix of URI 2339 */ 2340 ix = pos; 2341 if ((ref->path[ix] == '/') && (ix > 0)) 2342 ix--; 2343 else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) 2344 ix -= 2; 2345 for (; ix > 0; ix--) { 2346 if (ref->path[ix] == '/') 2347 break; 2348 } 2349 if (ix == 0) { 2350 uptr = (xmlChar *)ref->path; 2351 } else { 2352 ix++; 2353 uptr = (xmlChar *)&ref->path[ix]; 2354 } 2355 2356 /* 2357 * In base, count the number of '/' from the differing point 2358 */ 2359 if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ 2360 for (; bptr[ix] != 0; ix++) { 2361 if (bptr[ix] == '/') 2362 nbslash++; 2363 } 2364 } 2365 len = xmlStrlen (uptr) + 1; 2366 } 2367 2368 if (nbslash == 0) { 2369 if (uptr != NULL) 2370 /* exception characters from xmlSaveUri */ 2371 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2372 goto done; 2373 } 2374 2375 /* 2376 * Allocate just enough space for the returned string - 2377 * length of the remainder of the URI, plus enough space 2378 * for the "../" groups, plus one for the terminator 2379 */ 2380 val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2381 if (val == NULL) { 2382 xmlGenericError(xmlGenericErrorContext, 2383 "xmlBuildRelativeURI: out of memory\n"); 2384 goto done; 2385 } 2386 vptr = val; 2387 /* 2388 * Put in as many "../" as needed 2389 */ 2390 for (; nbslash>0; nbslash--) { 2391 *vptr++ = '.'; 2392 *vptr++ = '.'; 2393 *vptr++ = '/'; 2394 } 2395 /* 2396 * Finish up with the end of the URI 2397 */ 2398 if (uptr != NULL) { 2399 if ((vptr > val) && (len > 0) && 2400 (uptr[0] == '/') && (vptr[-1] == '/')) { 2401 memcpy (vptr, uptr + 1, len - 1); 2402 vptr[len - 2] = 0; 2403 } else { 2404 memcpy (vptr, uptr, len); 2405 vptr[len - 1] = 0; 2406 } 2407 } else { 2408 vptr[len - 1] = 0; 2409 } 2410 2411 /* escape the freshly-built path */ 2412 vptr = val; 2413 /* exception characters from xmlSaveUri */ 2414 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2415 xmlFree(vptr); 2416 2417 done: 2418 /* 2419 * Free the working variables 2420 */ 2421 if (remove_path != 0) 2422 ref->path = NULL; 2423 if (ref != NULL) 2424 xmlFreeURI (ref); 2425 if (bas != NULL) 2426 xmlFreeURI (bas); 2427 2428 return val; 2429 } 2430 2431 /** 2432 * xmlCanonicPath: 2433 * @path: the resource locator in a filesystem notation 2434 * 2435 * Constructs a canonic path from the specified path. 2436 * 2437 * Returns a new canonic path, or a duplicate of the path parameter if the 2438 * construction fails. The caller is responsible for freeing the memory occupied 2439 * by the returned string. If there is insufficient memory available, or the 2440 * argument is NULL, the function returns NULL. 2441 */ 2442 #define IS_WINDOWS_PATH(p) \ 2443 ((p != NULL) && \ 2444 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2445 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2446 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2447 xmlChar * 2448 xmlCanonicPath(const xmlChar *path) 2449 { 2450 /* 2451 * For Windows implementations, additional work needs to be done to 2452 * replace backslashes in pathnames with "forward slashes" 2453 */ 2454 #if defined(_WIN32) && !defined(__CYGWIN__) 2455 int len = 0; 2456 int i = 0; 2457 xmlChar *p = NULL; 2458 #endif 2459 xmlURIPtr uri; 2460 xmlChar *ret; 2461 const xmlChar *absuri; 2462 2463 if (path == NULL) 2464 return(NULL); 2465 2466 /* sanitize filename starting with // so it can be used as URI */ 2467 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2468 path++; 2469 2470 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2471 xmlFreeURI(uri); 2472 return xmlStrdup(path); 2473 } 2474 2475 /* Check if this is an "absolute uri" */ 2476 absuri = xmlStrstr(path, BAD_CAST "://"); 2477 if (absuri != NULL) { 2478 int l, j; 2479 unsigned char c; 2480 xmlChar *escURI; 2481 2482 /* 2483 * this looks like an URI where some parts have not been 2484 * escaped leading to a parsing problem. Check that the first 2485 * part matches a protocol. 2486 */ 2487 l = absuri - path; 2488 /* Bypass if first part (part before the '://') is > 20 chars */ 2489 if ((l <= 0) || (l > 20)) 2490 goto path_processing; 2491 /* Bypass if any non-alpha characters are present in first part */ 2492 for (j = 0;j < l;j++) { 2493 c = path[j]; 2494 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2495 goto path_processing; 2496 } 2497 2498 /* Escape all except the characters specified in the supplied path */ 2499 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2500 if (escURI != NULL) { 2501 /* Try parsing the escaped path */ 2502 uri = xmlParseURI((const char *) escURI); 2503 /* If successful, return the escaped string */ 2504 if (uri != NULL) { 2505 xmlFreeURI(uri); 2506 return escURI; 2507 } 2508 } 2509 } 2510 2511 path_processing: 2512 /* For Windows implementations, replace backslashes with 'forward slashes' */ 2513 #if defined(_WIN32) && !defined(__CYGWIN__) 2514 /* 2515 * Create a URI structure 2516 */ 2517 uri = xmlCreateURI(); 2518 if (uri == NULL) { /* Guard against 'out of memory' */ 2519 return(NULL); 2520 } 2521 2522 len = xmlStrlen(path); 2523 if ((len > 2) && IS_WINDOWS_PATH(path)) { 2524 /* make the scheme 'file' */ 2525 uri->scheme = xmlStrdup(BAD_CAST "file"); 2526 /* allocate space for leading '/' + path + string terminator */ 2527 uri->path = xmlMallocAtomic(len + 2); 2528 if (uri->path == NULL) { 2529 xmlFreeURI(uri); /* Guard agains 'out of memory' */ 2530 return(NULL); 2531 } 2532 /* Put in leading '/' plus path */ 2533 uri->path[0] = '/'; 2534 p = uri->path + 1; 2535 strncpy(p, path, len + 1); 2536 } else { 2537 uri->path = xmlStrdup(path); 2538 if (uri->path == NULL) { 2539 xmlFreeURI(uri); 2540 return(NULL); 2541 } 2542 p = uri->path; 2543 } 2544 /* Now change all occurences of '\' to '/' */ 2545 while (*p != '\0') { 2546 if (*p == '\\') 2547 *p = '/'; 2548 p++; 2549 } 2550 2551 if (uri->scheme == NULL) { 2552 ret = xmlStrdup((const xmlChar *) uri->path); 2553 } else { 2554 ret = xmlSaveUri(uri); 2555 } 2556 2557 xmlFreeURI(uri); 2558 #else 2559 ret = xmlStrdup((const xmlChar *) path); 2560 #endif 2561 return(ret); 2562 } 2563 2564 /** 2565 * xmlPathToURI: 2566 * @path: the resource locator in a filesystem notation 2567 * 2568 * Constructs an URI expressing the existing path 2569 * 2570 * Returns a new URI, or a duplicate of the path parameter if the 2571 * construction fails. The caller is responsible for freeing the memory 2572 * occupied by the returned string. If there is insufficient memory available, 2573 * or the argument is NULL, the function returns NULL. 2574 */ 2575 xmlChar * 2576 xmlPathToURI(const xmlChar *path) 2577 { 2578 xmlURIPtr uri; 2579 xmlURI temp; 2580 xmlChar *ret, *cal; 2581 2582 if (path == NULL) 2583 return(NULL); 2584 2585 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2586 xmlFreeURI(uri); 2587 return xmlStrdup(path); 2588 } 2589 cal = xmlCanonicPath(path); 2590 if (cal == NULL) 2591 return(NULL); 2592 #if defined(_WIN32) && !defined(__CYGWIN__) 2593 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2594 If 'cal' is a valid URI allready then we are done here, as continuing would make 2595 it invalid. */ 2596 if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2597 xmlFreeURI(uri); 2598 return cal; 2599 } 2600 /* 'cal' can contain a relative path with backslashes. If that is processed 2601 by xmlSaveURI, they will be escaped and the external entity loader machinery 2602 will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2603 ret = cal; 2604 while (*ret != '\0') { 2605 if (*ret == '\\') 2606 *ret = '/'; 2607 ret++; 2608 } 2609 #endif 2610 memset(&temp, 0, sizeof(temp)); 2611 temp.path = (char *) cal; 2612 ret = xmlSaveUri(&temp); 2613 xmlFree(cal); 2614 return(ret); 2615 } 2616 #define bottom_uri 2617 #include "elfgcchack.h" 2618