Home | History | Annotate | Download | only in libxml2
      1 /**
      2  * uri.c: set of generic URI related routines
      3  *
      4  * Reference: RFCs 3986, 2732 and 2373
      5  *
      6  * See Copyright for the status of this software.
      7  *
      8  * daniel (at) veillard.com
      9  */
     10 
     11 #define IN_LIBXML
     12 #include "libxml.h"
     13 
     14 #include <string.h>
     15 
     16 #include <libxml/xmlmemory.h>
     17 #include <libxml/uri.h>
     18 #include <libxml/globals.h>
     19 #include <libxml/xmlerror.h>
     20 
     21 /**
     22  * MAX_URI_LENGTH:
     23  *
     24  * The definition of the URI regexp in the above RFC has no size limit
     25  * In practice they are usually relativey short except for the
     26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
     27  * maximum size before hitting random practical limits is around 64 KB
     28  * and 4KB is usually a maximum admitted limit for proper operations.
     29  * The value below is more a security limit than anything else and
     30  * really should never be hit by 'normal' operations
     31  * Set to 1 MByte in 2012, this is only enforced on output
     32  */
     33 #define MAX_URI_LENGTH 1024 * 1024
     34 
     35 static void
     36 xmlURIErrMemory(const char *extra)
     37 {
     38     if (extra)
     39         __xmlRaiseError(NULL, NULL, NULL,
     40                         NULL, NULL, XML_FROM_URI,
     41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
     42                         extra, NULL, NULL, 0, 0,
     43                         "Memory allocation failed : %s\n", extra);
     44     else
     45         __xmlRaiseError(NULL, NULL, NULL,
     46                         NULL, NULL, XML_FROM_URI,
     47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
     48                         NULL, NULL, NULL, 0, 0,
     49                         "Memory allocation failed\n");
     50 }
     51 
     52 static void xmlCleanURI(xmlURIPtr uri);
     53 
     54 /*
     55  * Old rule from 2396 used in legacy handling code
     56  * alpha    = lowalpha | upalpha
     57  */
     58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
     59 
     60 
     61 /*
     62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
     63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
     64  *            "u" | "v" | "w" | "x" | "y" | "z"
     65  */
     66 
     67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
     68 
     69 /*
     70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
     71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
     72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
     73  */
     74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
     75 
     76 #ifdef IS_DIGIT
     77 #undef IS_DIGIT
     78 #endif
     79 /*
     80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
     81  */
     82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
     83 
     84 /*
     85  * alphanum = alpha | digit
     86  */
     87 
     88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
     89 
     90 /*
     91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
     92  */
     93 
     94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
     95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
     96     ((x) == '(') || ((x) == ')'))
     97 
     98 /*
     99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
    100  */
    101 
    102 #define IS_UNWISE(p)                                                    \
    103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
    104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
    105        ((*(p) == ']')) || ((*(p) == '`')))
    106 /*
    107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
    108  *            "[" | "]"
    109  */
    110 
    111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
    112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
    113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
    114         ((x) == ']'))
    115 
    116 /*
    117  * unreserved = alphanum | mark
    118  */
    119 
    120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
    121 
    122 /*
    123  * Skip to next pointer char, handle escaped sequences
    124  */
    125 
    126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
    127 
    128 /*
    129  * Productions from the spec.
    130  *
    131  *    authority     = server | reg_name
    132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
    133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
    134  *
    135  * path          = [ abs_path | opaque_part ]
    136  */
    137 
    138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
    139 
    140 /************************************************************************
    141  *									*
    142  *                         RFC 3986 parser				*
    143  *									*
    144  ************************************************************************/
    145 
    146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
    147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
    148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
    149 #define ISA_HEXDIG(p)							\
    150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
    151         ((*(p) >= 'A') && (*(p) <= 'F')))
    152 
    153 /*
    154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
    155  *                     / "*" / "+" / "," / ";" / "="
    156  */
    157 #define ISA_SUB_DELIM(p)						\
    158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
    159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
    160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
    161        ((*(p) == '=')) || ((*(p) == '\'')))
    162 
    163 /*
    164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
    165  */
    166 #define ISA_GEN_DELIM(p)						\
    167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
    168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
    169        ((*(p) == '@')))
    170 
    171 /*
    172  *    reserved      = gen-delims / sub-delims
    173  */
    174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
    175 
    176 /*
    177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
    178  */
    179 #define ISA_UNRESERVED(p)						\
    180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
    181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
    182 
    183 /*
    184  *    pct-encoded   = "%" HEXDIG HEXDIG
    185  */
    186 #define ISA_PCT_ENCODED(p)						\
    187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
    188 
    189 /*
    190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
    191  */
    192 #define ISA_PCHAR(p)							\
    193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
    194       ((*(p) == ':')) || ((*(p) == '@')))
    195 
    196 /**
    197  * xmlParse3986Scheme:
    198  * @uri:  pointer to an URI structure
    199  * @str:  pointer to the string to analyze
    200  *
    201  * Parse an URI scheme
    202  *
    203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    204  *
    205  * Returns 0 or the error code
    206  */
    207 static int
    208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
    209     const char *cur;
    210 
    211     if (str == NULL)
    212 	return(-1);
    213 
    214     cur = *str;
    215     if (!ISA_ALPHA(cur))
    216 	return(2);
    217     cur++;
    218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
    219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
    220     if (uri != NULL) {
    221 	if (uri->scheme != NULL) xmlFree(uri->scheme);
    222 	uri->scheme = STRNDUP(*str, cur - *str);
    223     }
    224     *str = cur;
    225     return(0);
    226 }
    227 
    228 /**
    229  * xmlParse3986Fragment:
    230  * @uri:  pointer to an URI structure
    231  * @str:  pointer to the string to analyze
    232  *
    233  * Parse the query part of an URI
    234  *
    235  * fragment      = *( pchar / "/" / "?" )
    236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
    237  *       in the fragment identifier but this is used very broadly for
    238  *       xpointer scheme selection, so we are allowing it here to not break
    239  *       for example all the DocBook processing chains.
    240  *
    241  * Returns 0 or the error code
    242  */
    243 static int
    244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
    245 {
    246     const char *cur;
    247 
    248     if (str == NULL)
    249         return (-1);
    250 
    251     cur = *str;
    252 
    253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
    254            (*cur == '[') || (*cur == ']') ||
    255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
    256         NEXT(cur);
    257     if (uri != NULL) {
    258         if (uri->fragment != NULL)
    259             xmlFree(uri->fragment);
    260 	if (uri->cleanup & 2)
    261 	    uri->fragment = STRNDUP(*str, cur - *str);
    262 	else
    263 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
    264     }
    265     *str = cur;
    266     return (0);
    267 }
    268 
    269 /**
    270  * xmlParse3986Query:
    271  * @uri:  pointer to an URI structure
    272  * @str:  pointer to the string to analyze
    273  *
    274  * Parse the query part of an URI
    275  *
    276  * query = *uric
    277  *
    278  * Returns 0 or the error code
    279  */
    280 static int
    281 xmlParse3986Query(xmlURIPtr uri, const char **str)
    282 {
    283     const char *cur;
    284 
    285     if (str == NULL)
    286         return (-1);
    287 
    288     cur = *str;
    289 
    290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
    291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
    292         NEXT(cur);
    293     if (uri != NULL) {
    294         if (uri->query != NULL)
    295             xmlFree(uri->query);
    296 	if (uri->cleanup & 2)
    297 	    uri->query = STRNDUP(*str, cur - *str);
    298 	else
    299 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
    300 
    301 	/* Save the raw bytes of the query as well.
    302 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
    303 	 */
    304 	if (uri->query_raw != NULL)
    305 	    xmlFree (uri->query_raw);
    306 	uri->query_raw = STRNDUP (*str, cur - *str);
    307     }
    308     *str = cur;
    309     return (0);
    310 }
    311 
    312 /**
    313  * xmlParse3986Port:
    314  * @uri:  pointer to an URI structure
    315  * @str:  the string to analyze
    316  *
    317  * Parse a port part and fills in the appropriate fields
    318  * of the @uri structure
    319  *
    320  * port          = *DIGIT
    321  *
    322  * Returns 0 or the error code
    323  */
    324 static int
    325 xmlParse3986Port(xmlURIPtr uri, const char **str)
    326 {
    327     const char *cur = *str;
    328     unsigned port = 0; /* unsigned for defined overflow behavior */
    329 
    330     if (ISA_DIGIT(cur)) {
    331 	while (ISA_DIGIT(cur)) {
    332 	    port = port * 10 + (*cur - '0');
    333 
    334 	    cur++;
    335 	}
    336 	if (uri != NULL)
    337 	    uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
    338 	*str = cur;
    339 	return(0);
    340     }
    341     return(1);
    342 }
    343 
    344 /**
    345  * xmlParse3986Userinfo:
    346  * @uri:  pointer to an URI structure
    347  * @str:  the string to analyze
    348  *
    349  * Parse an user informations part and fills in the appropriate fields
    350  * of the @uri structure
    351  *
    352  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
    353  *
    354  * Returns 0 or the error code
    355  */
    356 static int
    357 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
    358 {
    359     const char *cur;
    360 
    361     cur = *str;
    362     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
    363            ISA_SUB_DELIM(cur) || (*cur == ':'))
    364 	NEXT(cur);
    365     if (*cur == '@') {
    366 	if (uri != NULL) {
    367 	    if (uri->user != NULL) xmlFree(uri->user);
    368 	    if (uri->cleanup & 2)
    369 		uri->user = STRNDUP(*str, cur - *str);
    370 	    else
    371 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
    372 	}
    373 	*str = cur;
    374 	return(0);
    375     }
    376     return(1);
    377 }
    378 
    379 /**
    380  * xmlParse3986DecOctet:
    381  * @str:  the string to analyze
    382  *
    383  *    dec-octet     = DIGIT                 ; 0-9
    384  *                  / %x31-39 DIGIT         ; 10-99
    385  *                  / "1" 2DIGIT            ; 100-199
    386  *                  / "2" %x30-34 DIGIT     ; 200-249
    387  *                  / "25" %x30-35          ; 250-255
    388  *
    389  * Skip a dec-octet.
    390  *
    391  * Returns 0 if found and skipped, 1 otherwise
    392  */
    393 static int
    394 xmlParse3986DecOctet(const char **str) {
    395     const char *cur = *str;
    396 
    397     if (!(ISA_DIGIT(cur)))
    398         return(1);
    399     if (!ISA_DIGIT(cur+1))
    400 	cur++;
    401     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
    402 	cur += 2;
    403     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
    404 	cur += 3;
    405     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
    406 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
    407 	cur += 3;
    408     else if ((*cur == '2') && (*(cur + 1) == '5') &&
    409 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
    410 	cur += 3;
    411     else
    412         return(1);
    413     *str = cur;
    414     return(0);
    415 }
    416 /**
    417  * xmlParse3986Host:
    418  * @uri:  pointer to an URI structure
    419  * @str:  the string to analyze
    420  *
    421  * Parse an host part and fills in the appropriate fields
    422  * of the @uri structure
    423  *
    424  * host          = IP-literal / IPv4address / reg-name
    425  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
    426  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
    427  * reg-name      = *( unreserved / pct-encoded / sub-delims )
    428  *
    429  * Returns 0 or the error code
    430  */
    431 static int
    432 xmlParse3986Host(xmlURIPtr uri, const char **str)
    433 {
    434     const char *cur = *str;
    435     const char *host;
    436 
    437     host = cur;
    438     /*
    439      * IPv6 and future adressing scheme are enclosed between brackets
    440      */
    441     if (*cur == '[') {
    442         cur++;
    443 	while ((*cur != ']') && (*cur != 0))
    444 	    cur++;
    445 	if (*cur != ']')
    446 	    return(1);
    447 	cur++;
    448 	goto found;
    449     }
    450     /*
    451      * try to parse an IPv4
    452      */
    453     if (ISA_DIGIT(cur)) {
    454         if (xmlParse3986DecOctet(&cur) != 0)
    455 	    goto not_ipv4;
    456 	if (*cur != '.')
    457 	    goto not_ipv4;
    458 	cur++;
    459         if (xmlParse3986DecOctet(&cur) != 0)
    460 	    goto not_ipv4;
    461 	if (*cur != '.')
    462 	    goto not_ipv4;
    463         if (xmlParse3986DecOctet(&cur) != 0)
    464 	    goto not_ipv4;
    465 	if (*cur != '.')
    466 	    goto not_ipv4;
    467         if (xmlParse3986DecOctet(&cur) != 0)
    468 	    goto not_ipv4;
    469 	goto found;
    470 not_ipv4:
    471         cur = *str;
    472     }
    473     /*
    474      * then this should be a hostname which can be empty
    475      */
    476     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
    477         NEXT(cur);
    478 found:
    479     if (uri != NULL) {
    480 	if (uri->authority != NULL) xmlFree(uri->authority);
    481 	uri->authority = NULL;
    482 	if (uri->server != NULL) xmlFree(uri->server);
    483 	if (cur != host) {
    484 	    if (uri->cleanup & 2)
    485 		uri->server = STRNDUP(host, cur - host);
    486 	    else
    487 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
    488 	} else
    489 	    uri->server = NULL;
    490     }
    491     *str = cur;
    492     return(0);
    493 }
    494 
    495 /**
    496  * xmlParse3986Authority:
    497  * @uri:  pointer to an URI structure
    498  * @str:  the string to analyze
    499  *
    500  * Parse an authority part and fills in the appropriate fields
    501  * of the @uri structure
    502  *
    503  * authority     = [ userinfo "@" ] host [ ":" port ]
    504  *
    505  * Returns 0 or the error code
    506  */
    507 static int
    508 xmlParse3986Authority(xmlURIPtr uri, const char **str)
    509 {
    510     const char *cur;
    511     int ret;
    512 
    513     cur = *str;
    514     /*
    515      * try to parse an userinfo and check for the trailing @
    516      */
    517     ret = xmlParse3986Userinfo(uri, &cur);
    518     if ((ret != 0) || (*cur != '@'))
    519         cur = *str;
    520     else
    521         cur++;
    522     ret = xmlParse3986Host(uri, &cur);
    523     if (ret != 0) return(ret);
    524     if (*cur == ':') {
    525         cur++;
    526         ret = xmlParse3986Port(uri, &cur);
    527 	if (ret != 0) return(ret);
    528     }
    529     *str = cur;
    530     return(0);
    531 }
    532 
    533 /**
    534  * xmlParse3986Segment:
    535  * @str:  the string to analyze
    536  * @forbid: an optional forbidden character
    537  * @empty: allow an empty segment
    538  *
    539  * Parse a segment and fills in the appropriate fields
    540  * of the @uri structure
    541  *
    542  * segment       = *pchar
    543  * segment-nz    = 1*pchar
    544  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
    545  *               ; non-zero-length segment without any colon ":"
    546  *
    547  * Returns 0 or the error code
    548  */
    549 static int
    550 xmlParse3986Segment(const char **str, char forbid, int empty)
    551 {
    552     const char *cur;
    553 
    554     cur = *str;
    555     if (!ISA_PCHAR(cur)) {
    556         if (empty)
    557 	    return(0);
    558 	return(1);
    559     }
    560     while (ISA_PCHAR(cur) && (*cur != forbid))
    561         NEXT(cur);
    562     *str = cur;
    563     return (0);
    564 }
    565 
    566 /**
    567  * xmlParse3986PathAbEmpty:
    568  * @uri:  pointer to an URI structure
    569  * @str:  the string to analyze
    570  *
    571  * Parse an path absolute or empty and fills in the appropriate fields
    572  * of the @uri structure
    573  *
    574  * path-abempty  = *( "/" segment )
    575  *
    576  * Returns 0 or the error code
    577  */
    578 static int
    579 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
    580 {
    581     const char *cur;
    582     int ret;
    583 
    584     cur = *str;
    585 
    586     while (*cur == '/') {
    587         cur++;
    588 	ret = xmlParse3986Segment(&cur, 0, 1);
    589 	if (ret != 0) return(ret);
    590     }
    591     if (uri != NULL) {
    592 	if (uri->path != NULL) xmlFree(uri->path);
    593         if (*str != cur) {
    594             if (uri->cleanup & 2)
    595                 uri->path = STRNDUP(*str, cur - *str);
    596             else
    597                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    598         } else {
    599             uri->path = NULL;
    600         }
    601     }
    602     *str = cur;
    603     return (0);
    604 }
    605 
    606 /**
    607  * xmlParse3986PathAbsolute:
    608  * @uri:  pointer to an URI structure
    609  * @str:  the string to analyze
    610  *
    611  * Parse an path absolute and fills in the appropriate fields
    612  * of the @uri structure
    613  *
    614  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
    615  *
    616  * Returns 0 or the error code
    617  */
    618 static int
    619 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
    620 {
    621     const char *cur;
    622     int ret;
    623 
    624     cur = *str;
    625 
    626     if (*cur != '/')
    627         return(1);
    628     cur++;
    629     ret = xmlParse3986Segment(&cur, 0, 0);
    630     if (ret == 0) {
    631 	while (*cur == '/') {
    632 	    cur++;
    633 	    ret = xmlParse3986Segment(&cur, 0, 1);
    634 	    if (ret != 0) return(ret);
    635 	}
    636     }
    637     if (uri != NULL) {
    638 	if (uri->path != NULL) xmlFree(uri->path);
    639         if (cur != *str) {
    640             if (uri->cleanup & 2)
    641                 uri->path = STRNDUP(*str, cur - *str);
    642             else
    643                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    644         } else {
    645             uri->path = NULL;
    646         }
    647     }
    648     *str = cur;
    649     return (0);
    650 }
    651 
    652 /**
    653  * xmlParse3986PathRootless:
    654  * @uri:  pointer to an URI structure
    655  * @str:  the string to analyze
    656  *
    657  * Parse an path without root and fills in the appropriate fields
    658  * of the @uri structure
    659  *
    660  * path-rootless = segment-nz *( "/" segment )
    661  *
    662  * Returns 0 or the error code
    663  */
    664 static int
    665 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
    666 {
    667     const char *cur;
    668     int ret;
    669 
    670     cur = *str;
    671 
    672     ret = xmlParse3986Segment(&cur, 0, 0);
    673     if (ret != 0) return(ret);
    674     while (*cur == '/') {
    675         cur++;
    676 	ret = xmlParse3986Segment(&cur, 0, 1);
    677 	if (ret != 0) return(ret);
    678     }
    679     if (uri != NULL) {
    680 	if (uri->path != NULL) xmlFree(uri->path);
    681         if (cur != *str) {
    682             if (uri->cleanup & 2)
    683                 uri->path = STRNDUP(*str, cur - *str);
    684             else
    685                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    686         } else {
    687             uri->path = NULL;
    688         }
    689     }
    690     *str = cur;
    691     return (0);
    692 }
    693 
    694 /**
    695  * xmlParse3986PathNoScheme:
    696  * @uri:  pointer to an URI structure
    697  * @str:  the string to analyze
    698  *
    699  * Parse an path which is not a scheme and fills in the appropriate fields
    700  * of the @uri structure
    701  *
    702  * path-noscheme = segment-nz-nc *( "/" segment )
    703  *
    704  * Returns 0 or the error code
    705  */
    706 static int
    707 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
    708 {
    709     const char *cur;
    710     int ret;
    711 
    712     cur = *str;
    713 
    714     ret = xmlParse3986Segment(&cur, ':', 0);
    715     if (ret != 0) return(ret);
    716     while (*cur == '/') {
    717         cur++;
    718 	ret = xmlParse3986Segment(&cur, 0, 1);
    719 	if (ret != 0) return(ret);
    720     }
    721     if (uri != NULL) {
    722 	if (uri->path != NULL) xmlFree(uri->path);
    723         if (cur != *str) {
    724             if (uri->cleanup & 2)
    725                 uri->path = STRNDUP(*str, cur - *str);
    726             else
    727                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    728         } else {
    729             uri->path = NULL;
    730         }
    731     }
    732     *str = cur;
    733     return (0);
    734 }
    735 
    736 /**
    737  * xmlParse3986HierPart:
    738  * @uri:  pointer to an URI structure
    739  * @str:  the string to analyze
    740  *
    741  * Parse an hierarchical part and fills in the appropriate fields
    742  * of the @uri structure
    743  *
    744  * hier-part     = "//" authority path-abempty
    745  *                / path-absolute
    746  *                / path-rootless
    747  *                / path-empty
    748  *
    749  * Returns 0 or the error code
    750  */
    751 static int
    752 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
    753 {
    754     const char *cur;
    755     int ret;
    756 
    757     cur = *str;
    758 
    759     if ((*cur == '/') && (*(cur + 1) == '/')) {
    760         cur += 2;
    761 	ret = xmlParse3986Authority(uri, &cur);
    762 	if (ret != 0) return(ret);
    763 	if (uri->server == NULL)
    764 	    uri->port = -1;
    765 	ret = xmlParse3986PathAbEmpty(uri, &cur);
    766 	if (ret != 0) return(ret);
    767 	*str = cur;
    768 	return(0);
    769     } else if (*cur == '/') {
    770         ret = xmlParse3986PathAbsolute(uri, &cur);
    771 	if (ret != 0) return(ret);
    772     } else if (ISA_PCHAR(cur)) {
    773         ret = xmlParse3986PathRootless(uri, &cur);
    774 	if (ret != 0) return(ret);
    775     } else {
    776 	/* path-empty is effectively empty */
    777 	if (uri != NULL) {
    778 	    if (uri->path != NULL) xmlFree(uri->path);
    779 	    uri->path = NULL;
    780 	}
    781     }
    782     *str = cur;
    783     return (0);
    784 }
    785 
    786 /**
    787  * xmlParse3986RelativeRef:
    788  * @uri:  pointer to an URI structure
    789  * @str:  the string to analyze
    790  *
    791  * Parse an URI string and fills in the appropriate fields
    792  * of the @uri structure
    793  *
    794  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
    795  * relative-part = "//" authority path-abempty
    796  *               / path-absolute
    797  *               / path-noscheme
    798  *               / path-empty
    799  *
    800  * Returns 0 or the error code
    801  */
    802 static int
    803 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
    804     int ret;
    805 
    806     if ((*str == '/') && (*(str + 1) == '/')) {
    807         str += 2;
    808 	ret = xmlParse3986Authority(uri, &str);
    809 	if (ret != 0) return(ret);
    810 	ret = xmlParse3986PathAbEmpty(uri, &str);
    811 	if (ret != 0) return(ret);
    812     } else if (*str == '/') {
    813 	ret = xmlParse3986PathAbsolute(uri, &str);
    814 	if (ret != 0) return(ret);
    815     } else if (ISA_PCHAR(str)) {
    816         ret = xmlParse3986PathNoScheme(uri, &str);
    817 	if (ret != 0) return(ret);
    818     } else {
    819 	/* path-empty is effectively empty */
    820 	if (uri != NULL) {
    821 	    if (uri->path != NULL) xmlFree(uri->path);
    822 	    uri->path = NULL;
    823 	}
    824     }
    825 
    826     if (*str == '?') {
    827 	str++;
    828 	ret = xmlParse3986Query(uri, &str);
    829 	if (ret != 0) return(ret);
    830     }
    831     if (*str == '#') {
    832 	str++;
    833 	ret = xmlParse3986Fragment(uri, &str);
    834 	if (ret != 0) return(ret);
    835     }
    836     if (*str != 0) {
    837 	xmlCleanURI(uri);
    838 	return(1);
    839     }
    840     return(0);
    841 }
    842 
    843 
    844 /**
    845  * xmlParse3986URI:
    846  * @uri:  pointer to an URI structure
    847  * @str:  the string to analyze
    848  *
    849  * Parse an URI string and fills in the appropriate fields
    850  * of the @uri structure
    851  *
    852  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
    853  *
    854  * Returns 0 or the error code
    855  */
    856 static int
    857 xmlParse3986URI(xmlURIPtr uri, const char *str) {
    858     int ret;
    859 
    860     ret = xmlParse3986Scheme(uri, &str);
    861     if (ret != 0) return(ret);
    862     if (*str != ':') {
    863 	return(1);
    864     }
    865     str++;
    866     ret = xmlParse3986HierPart(uri, &str);
    867     if (ret != 0) return(ret);
    868     if (*str == '?') {
    869 	str++;
    870 	ret = xmlParse3986Query(uri, &str);
    871 	if (ret != 0) return(ret);
    872     }
    873     if (*str == '#') {
    874 	str++;
    875 	ret = xmlParse3986Fragment(uri, &str);
    876 	if (ret != 0) return(ret);
    877     }
    878     if (*str != 0) {
    879 	xmlCleanURI(uri);
    880 	return(1);
    881     }
    882     return(0);
    883 }
    884 
    885 /**
    886  * xmlParse3986URIReference:
    887  * @uri:  pointer to an URI structure
    888  * @str:  the string to analyze
    889  *
    890  * Parse an URI reference string and fills in the appropriate fields
    891  * of the @uri structure
    892  *
    893  * URI-reference = URI / relative-ref
    894  *
    895  * Returns 0 or the error code
    896  */
    897 static int
    898 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
    899     int ret;
    900 
    901     if (str == NULL)
    902 	return(-1);
    903     xmlCleanURI(uri);
    904 
    905     /*
    906      * Try first to parse absolute refs, then fallback to relative if
    907      * it fails.
    908      */
    909     ret = xmlParse3986URI(uri, str);
    910     if (ret != 0) {
    911 	xmlCleanURI(uri);
    912         ret = xmlParse3986RelativeRef(uri, str);
    913 	if (ret != 0) {
    914 	    xmlCleanURI(uri);
    915 	    return(ret);
    916 	}
    917     }
    918     return(0);
    919 }
    920 
    921 /**
    922  * xmlParseURI:
    923  * @str:  the URI string to analyze
    924  *
    925  * Parse an URI based on RFC 3986
    926  *
    927  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    928  *
    929  * Returns a newly built xmlURIPtr or NULL in case of error
    930  */
    931 xmlURIPtr
    932 xmlParseURI(const char *str) {
    933     xmlURIPtr uri;
    934     int ret;
    935 
    936     if (str == NULL)
    937 	return(NULL);
    938     uri = xmlCreateURI();
    939     if (uri != NULL) {
    940 	ret = xmlParse3986URIReference(uri, str);
    941         if (ret) {
    942 	    xmlFreeURI(uri);
    943 	    return(NULL);
    944 	}
    945     }
    946     return(uri);
    947 }
    948 
    949 /**
    950  * xmlParseURIReference:
    951  * @uri:  pointer to an URI structure
    952  * @str:  the string to analyze
    953  *
    954  * Parse an URI reference string based on RFC 3986 and fills in the
    955  * appropriate fields of the @uri structure
    956  *
    957  * URI-reference = URI / relative-ref
    958  *
    959  * Returns 0 or the error code
    960  */
    961 int
    962 xmlParseURIReference(xmlURIPtr uri, const char *str) {
    963     return(xmlParse3986URIReference(uri, str));
    964 }
    965 
    966 /**
    967  * xmlParseURIRaw:
    968  * @str:  the URI string to analyze
    969  * @raw:  if 1 unescaping of URI pieces are disabled
    970  *
    971  * Parse an URI but allows to keep intact the original fragments.
    972  *
    973  * URI-reference = URI / relative-ref
    974  *
    975  * Returns a newly built xmlURIPtr or NULL in case of error
    976  */
    977 xmlURIPtr
    978 xmlParseURIRaw(const char *str, int raw) {
    979     xmlURIPtr uri;
    980     int ret;
    981 
    982     if (str == NULL)
    983 	return(NULL);
    984     uri = xmlCreateURI();
    985     if (uri != NULL) {
    986         if (raw) {
    987 	    uri->cleanup |= 2;
    988 	}
    989 	ret = xmlParseURIReference(uri, str);
    990         if (ret) {
    991 	    xmlFreeURI(uri);
    992 	    return(NULL);
    993 	}
    994     }
    995     return(uri);
    996 }
    997 
    998 /************************************************************************
    999  *									*
   1000  *			Generic URI structure functions			*
   1001  *									*
   1002  ************************************************************************/
   1003 
   1004 /**
   1005  * xmlCreateURI:
   1006  *
   1007  * Simply creates an empty xmlURI
   1008  *
   1009  * Returns the new structure or NULL in case of error
   1010  */
   1011 xmlURIPtr
   1012 xmlCreateURI(void) {
   1013     xmlURIPtr ret;
   1014 
   1015     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
   1016     if (ret == NULL) {
   1017         xmlURIErrMemory("creating URI structure\n");
   1018 	return(NULL);
   1019     }
   1020     memset(ret, 0, sizeof(xmlURI));
   1021     return(ret);
   1022 }
   1023 
   1024 /**
   1025  * xmlSaveUriRealloc:
   1026  *
   1027  * Function to handle properly a reallocation when saving an URI
   1028  * Also imposes some limit on the length of an URI string output
   1029  */
   1030 static xmlChar *
   1031 xmlSaveUriRealloc(xmlChar *ret, int *max) {
   1032     xmlChar *temp;
   1033     int tmp;
   1034 
   1035     if (*max > MAX_URI_LENGTH) {
   1036         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
   1037         return(NULL);
   1038     }
   1039     tmp = *max * 2;
   1040     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
   1041     if (temp == NULL) {
   1042         xmlURIErrMemory("saving URI\n");
   1043         return(NULL);
   1044     }
   1045     *max = tmp;
   1046     return(temp);
   1047 }
   1048 
   1049 /**
   1050  * xmlSaveUri:
   1051  * @uri:  pointer to an xmlURI
   1052  *
   1053  * Save the URI as an escaped string
   1054  *
   1055  * Returns a new string (to be deallocated by caller)
   1056  */
   1057 xmlChar *
   1058 xmlSaveUri(xmlURIPtr uri) {
   1059     xmlChar *ret = NULL;
   1060     xmlChar *temp;
   1061     const char *p;
   1062     int len;
   1063     int max;
   1064 
   1065     if (uri == NULL) return(NULL);
   1066 
   1067 
   1068     max = 80;
   1069     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
   1070     if (ret == NULL) {
   1071         xmlURIErrMemory("saving URI\n");
   1072 	return(NULL);
   1073     }
   1074     len = 0;
   1075 
   1076     if (uri->scheme != NULL) {
   1077 	p = uri->scheme;
   1078 	while (*p != 0) {
   1079 	    if (len >= max) {
   1080                 temp = xmlSaveUriRealloc(ret, &max);
   1081                 if (temp == NULL) goto mem_error;
   1082 		ret = temp;
   1083 	    }
   1084 	    ret[len++] = *p++;
   1085 	}
   1086 	if (len >= max) {
   1087             temp = xmlSaveUriRealloc(ret, &max);
   1088             if (temp == NULL) goto mem_error;
   1089             ret = temp;
   1090 	}
   1091 	ret[len++] = ':';
   1092     }
   1093     if (uri->opaque != NULL) {
   1094 	p = uri->opaque;
   1095 	while (*p != 0) {
   1096 	    if (len + 3 >= max) {
   1097                 temp = xmlSaveUriRealloc(ret, &max);
   1098                 if (temp == NULL) goto mem_error;
   1099                 ret = temp;
   1100 	    }
   1101 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
   1102 		ret[len++] = *p++;
   1103 	    else {
   1104 		int val = *(unsigned char *)p++;
   1105 		int hi = val / 0x10, lo = val % 0x10;
   1106 		ret[len++] = '%';
   1107 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1108 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1109 	    }
   1110 	}
   1111     } else {
   1112 	if ((uri->server != NULL) || (uri->port == -1)) {
   1113 	    if (len + 3 >= max) {
   1114                 temp = xmlSaveUriRealloc(ret, &max);
   1115                 if (temp == NULL) goto mem_error;
   1116                 ret = temp;
   1117 	    }
   1118 	    ret[len++] = '/';
   1119 	    ret[len++] = '/';
   1120 	    if (uri->user != NULL) {
   1121 		p = uri->user;
   1122 		while (*p != 0) {
   1123 		    if (len + 3 >= max) {
   1124                         temp = xmlSaveUriRealloc(ret, &max);
   1125                         if (temp == NULL) goto mem_error;
   1126                         ret = temp;
   1127 		    }
   1128 		    if ((IS_UNRESERVED(*(p))) ||
   1129 			((*(p) == ';')) || ((*(p) == ':')) ||
   1130 			((*(p) == '&')) || ((*(p) == '=')) ||
   1131 			((*(p) == '+')) || ((*(p) == '$')) ||
   1132 			((*(p) == ',')))
   1133 			ret[len++] = *p++;
   1134 		    else {
   1135 			int val = *(unsigned char *)p++;
   1136 			int hi = val / 0x10, lo = val % 0x10;
   1137 			ret[len++] = '%';
   1138 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1139 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1140 		    }
   1141 		}
   1142 		if (len + 3 >= max) {
   1143                     temp = xmlSaveUriRealloc(ret, &max);
   1144                     if (temp == NULL) goto mem_error;
   1145                     ret = temp;
   1146 		}
   1147 		ret[len++] = '@';
   1148 	    }
   1149 	    if (uri->server != NULL) {
   1150 		p = uri->server;
   1151 		while (*p != 0) {
   1152 		    if (len >= max) {
   1153 			temp = xmlSaveUriRealloc(ret, &max);
   1154 			if (temp == NULL) goto mem_error;
   1155 			ret = temp;
   1156 		    }
   1157 		    ret[len++] = *p++;
   1158 		}
   1159 		if (uri->port > 0) {
   1160 		    if (len + 10 >= max) {
   1161 			temp = xmlSaveUriRealloc(ret, &max);
   1162 			if (temp == NULL) goto mem_error;
   1163 			ret = temp;
   1164 		    }
   1165 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
   1166 		}
   1167 	    }
   1168 	} else if (uri->authority != NULL) {
   1169 	    if (len + 3 >= max) {
   1170                 temp = xmlSaveUriRealloc(ret, &max);
   1171                 if (temp == NULL) goto mem_error;
   1172                 ret = temp;
   1173 	    }
   1174 	    ret[len++] = '/';
   1175 	    ret[len++] = '/';
   1176 	    p = uri->authority;
   1177 	    while (*p != 0) {
   1178 		if (len + 3 >= max) {
   1179                     temp = xmlSaveUriRealloc(ret, &max);
   1180                     if (temp == NULL) goto mem_error;
   1181                     ret = temp;
   1182 		}
   1183 		if ((IS_UNRESERVED(*(p))) ||
   1184                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
   1185                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
   1186                     ((*(p) == '=')) || ((*(p) == '+')))
   1187 		    ret[len++] = *p++;
   1188 		else {
   1189 		    int val = *(unsigned char *)p++;
   1190 		    int hi = val / 0x10, lo = val % 0x10;
   1191 		    ret[len++] = '%';
   1192 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1193 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1194 		}
   1195 	    }
   1196 	} else if (uri->scheme != NULL) {
   1197 	    if (len + 3 >= max) {
   1198                 temp = xmlSaveUriRealloc(ret, &max);
   1199                 if (temp == NULL) goto mem_error;
   1200                 ret = temp;
   1201 	    }
   1202 	}
   1203 	if (uri->path != NULL) {
   1204 	    p = uri->path;
   1205 	    /*
   1206 	     * the colon in file:///d: should not be escaped or
   1207 	     * Windows accesses fail later.
   1208 	     */
   1209 	    if ((uri->scheme != NULL) &&
   1210 		(p[0] == '/') &&
   1211 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
   1212 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
   1213 		(p[2] == ':') &&
   1214 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
   1215 		if (len + 3 >= max) {
   1216                     temp = xmlSaveUriRealloc(ret, &max);
   1217                     if (temp == NULL) goto mem_error;
   1218                     ret = temp;
   1219 		}
   1220 		ret[len++] = *p++;
   1221 		ret[len++] = *p++;
   1222 		ret[len++] = *p++;
   1223 	    }
   1224 	    while (*p != 0) {
   1225 		if (len + 3 >= max) {
   1226                     temp = xmlSaveUriRealloc(ret, &max);
   1227                     if (temp == NULL) goto mem_error;
   1228                     ret = temp;
   1229 		}
   1230 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
   1231                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
   1232 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
   1233 	            ((*(p) == ',')))
   1234 		    ret[len++] = *p++;
   1235 		else {
   1236 		    int val = *(unsigned char *)p++;
   1237 		    int hi = val / 0x10, lo = val % 0x10;
   1238 		    ret[len++] = '%';
   1239 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1240 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1241 		}
   1242 	    }
   1243 	}
   1244 	if (uri->query_raw != NULL) {
   1245 	    if (len + 1 >= max) {
   1246                 temp = xmlSaveUriRealloc(ret, &max);
   1247                 if (temp == NULL) goto mem_error;
   1248                 ret = temp;
   1249 	    }
   1250 	    ret[len++] = '?';
   1251 	    p = uri->query_raw;
   1252 	    while (*p != 0) {
   1253 		if (len + 1 >= max) {
   1254                     temp = xmlSaveUriRealloc(ret, &max);
   1255                     if (temp == NULL) goto mem_error;
   1256                     ret = temp;
   1257 		}
   1258 		ret[len++] = *p++;
   1259 	    }
   1260 	} else if (uri->query != NULL) {
   1261 	    if (len + 3 >= max) {
   1262                 temp = xmlSaveUriRealloc(ret, &max);
   1263                 if (temp == NULL) goto mem_error;
   1264                 ret = temp;
   1265 	    }
   1266 	    ret[len++] = '?';
   1267 	    p = uri->query;
   1268 	    while (*p != 0) {
   1269 		if (len + 3 >= max) {
   1270                     temp = xmlSaveUriRealloc(ret, &max);
   1271                     if (temp == NULL) goto mem_error;
   1272                     ret = temp;
   1273 		}
   1274 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
   1275 		    ret[len++] = *p++;
   1276 		else {
   1277 		    int val = *(unsigned char *)p++;
   1278 		    int hi = val / 0x10, lo = val % 0x10;
   1279 		    ret[len++] = '%';
   1280 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1281 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1282 		}
   1283 	    }
   1284 	}
   1285     }
   1286     if (uri->fragment != NULL) {
   1287 	if (len + 3 >= max) {
   1288             temp = xmlSaveUriRealloc(ret, &max);
   1289             if (temp == NULL) goto mem_error;
   1290             ret = temp;
   1291 	}
   1292 	ret[len++] = '#';
   1293 	p = uri->fragment;
   1294 	while (*p != 0) {
   1295 	    if (len + 3 >= max) {
   1296                 temp = xmlSaveUriRealloc(ret, &max);
   1297                 if (temp == NULL) goto mem_error;
   1298                 ret = temp;
   1299 	    }
   1300 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
   1301 		ret[len++] = *p++;
   1302 	    else {
   1303 		int val = *(unsigned char *)p++;
   1304 		int hi = val / 0x10, lo = val % 0x10;
   1305 		ret[len++] = '%';
   1306 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1307 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1308 	    }
   1309 	}
   1310     }
   1311     if (len >= max) {
   1312         temp = xmlSaveUriRealloc(ret, &max);
   1313         if (temp == NULL) goto mem_error;
   1314         ret = temp;
   1315     }
   1316     ret[len] = 0;
   1317     return(ret);
   1318 
   1319 mem_error:
   1320     xmlFree(ret);
   1321     return(NULL);
   1322 }
   1323 
   1324 /**
   1325  * xmlPrintURI:
   1326  * @stream:  a FILE* for the output
   1327  * @uri:  pointer to an xmlURI
   1328  *
   1329  * Prints the URI in the stream @stream.
   1330  */
   1331 void
   1332 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
   1333     xmlChar *out;
   1334 
   1335     out = xmlSaveUri(uri);
   1336     if (out != NULL) {
   1337 	fprintf(stream, "%s", (char *) out);
   1338 	xmlFree(out);
   1339     }
   1340 }
   1341 
   1342 /**
   1343  * xmlCleanURI:
   1344  * @uri:  pointer to an xmlURI
   1345  *
   1346  * Make sure the xmlURI struct is free of content
   1347  */
   1348 static void
   1349 xmlCleanURI(xmlURIPtr uri) {
   1350     if (uri == NULL) return;
   1351 
   1352     if (uri->scheme != NULL) xmlFree(uri->scheme);
   1353     uri->scheme = NULL;
   1354     if (uri->server != NULL) xmlFree(uri->server);
   1355     uri->server = NULL;
   1356     if (uri->user != NULL) xmlFree(uri->user);
   1357     uri->user = NULL;
   1358     if (uri->path != NULL) xmlFree(uri->path);
   1359     uri->path = NULL;
   1360     if (uri->fragment != NULL) xmlFree(uri->fragment);
   1361     uri->fragment = NULL;
   1362     if (uri->opaque != NULL) xmlFree(uri->opaque);
   1363     uri->opaque = NULL;
   1364     if (uri->authority != NULL) xmlFree(uri->authority);
   1365     uri->authority = NULL;
   1366     if (uri->query != NULL) xmlFree(uri->query);
   1367     uri->query = NULL;
   1368     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
   1369     uri->query_raw = NULL;
   1370 }
   1371 
   1372 /**
   1373  * xmlFreeURI:
   1374  * @uri:  pointer to an xmlURI
   1375  *
   1376  * Free up the xmlURI struct
   1377  */
   1378 void
   1379 xmlFreeURI(xmlURIPtr uri) {
   1380     if (uri == NULL) return;
   1381 
   1382     if (uri->scheme != NULL) xmlFree(uri->scheme);
   1383     if (uri->server != NULL) xmlFree(uri->server);
   1384     if (uri->user != NULL) xmlFree(uri->user);
   1385     if (uri->path != NULL) xmlFree(uri->path);
   1386     if (uri->fragment != NULL) xmlFree(uri->fragment);
   1387     if (uri->opaque != NULL) xmlFree(uri->opaque);
   1388     if (uri->authority != NULL) xmlFree(uri->authority);
   1389     if (uri->query != NULL) xmlFree(uri->query);
   1390     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
   1391     xmlFree(uri);
   1392 }
   1393 
   1394 /************************************************************************
   1395  *									*
   1396  *			Helper functions				*
   1397  *									*
   1398  ************************************************************************/
   1399 
   1400 /**
   1401  * xmlNormalizeURIPath:
   1402  * @path:  pointer to the path string
   1403  *
   1404  * Applies the 5 normalization steps to a path string--that is, RFC 2396
   1405  * Section 5.2, steps 6.c through 6.g.
   1406  *
   1407  * Normalization occurs directly on the string, no new allocation is done
   1408  *
   1409  * Returns 0 or an error code
   1410  */
   1411 int
   1412 xmlNormalizeURIPath(char *path) {
   1413     char *cur, *out;
   1414 
   1415     if (path == NULL)
   1416 	return(-1);
   1417 
   1418     /* Skip all initial "/" chars.  We want to get to the beginning of the
   1419      * first non-empty segment.
   1420      */
   1421     cur = path;
   1422     while (cur[0] == '/')
   1423       ++cur;
   1424     if (cur[0] == '\0')
   1425       return(0);
   1426 
   1427     /* Keep everything we've seen so far.  */
   1428     out = cur;
   1429 
   1430     /*
   1431      * Analyze each segment in sequence for cases (c) and (d).
   1432      */
   1433     while (cur[0] != '\0') {
   1434 	/*
   1435 	 * c) All occurrences of "./", where "." is a complete path segment,
   1436 	 *    are removed from the buffer string.
   1437 	 */
   1438 	if ((cur[0] == '.') && (cur[1] == '/')) {
   1439 	    cur += 2;
   1440 	    /* '//' normalization should be done at this point too */
   1441 	    while (cur[0] == '/')
   1442 		cur++;
   1443 	    continue;
   1444 	}
   1445 
   1446 	/*
   1447 	 * d) If the buffer string ends with "." as a complete path segment,
   1448 	 *    that "." is removed.
   1449 	 */
   1450 	if ((cur[0] == '.') && (cur[1] == '\0'))
   1451 	    break;
   1452 
   1453 	/* Otherwise keep the segment.  */
   1454 	while (cur[0] != '/') {
   1455             if (cur[0] == '\0')
   1456               goto done_cd;
   1457 	    (out++)[0] = (cur++)[0];
   1458 	}
   1459 	/* nomalize // */
   1460 	while ((cur[0] == '/') && (cur[1] == '/'))
   1461 	    cur++;
   1462 
   1463         (out++)[0] = (cur++)[0];
   1464     }
   1465  done_cd:
   1466     out[0] = '\0';
   1467 
   1468     /* Reset to the beginning of the first segment for the next sequence.  */
   1469     cur = path;
   1470     while (cur[0] == '/')
   1471       ++cur;
   1472     if (cur[0] == '\0')
   1473 	return(0);
   1474 
   1475     /*
   1476      * Analyze each segment in sequence for cases (e) and (f).
   1477      *
   1478      * e) All occurrences of "<segment>/../", where <segment> is a
   1479      *    complete path segment not equal to "..", are removed from the
   1480      *    buffer string.  Removal of these path segments is performed
   1481      *    iteratively, removing the leftmost matching pattern on each
   1482      *    iteration, until no matching pattern remains.
   1483      *
   1484      * f) If the buffer string ends with "<segment>/..", where <segment>
   1485      *    is a complete path segment not equal to "..", that
   1486      *    "<segment>/.." is removed.
   1487      *
   1488      * To satisfy the "iterative" clause in (e), we need to collapse the
   1489      * string every time we find something that needs to be removed.  Thus,
   1490      * we don't need to keep two pointers into the string: we only need a
   1491      * "current position" pointer.
   1492      */
   1493     while (1) {
   1494         char *segp, *tmp;
   1495 
   1496         /* At the beginning of each iteration of this loop, "cur" points to
   1497          * the first character of the segment we want to examine.
   1498          */
   1499 
   1500         /* Find the end of the current segment.  */
   1501         segp = cur;
   1502         while ((segp[0] != '/') && (segp[0] != '\0'))
   1503           ++segp;
   1504 
   1505         /* If this is the last segment, we're done (we need at least two
   1506          * segments to meet the criteria for the (e) and (f) cases).
   1507          */
   1508         if (segp[0] == '\0')
   1509           break;
   1510 
   1511         /* If the first segment is "..", or if the next segment _isn't_ "..",
   1512          * keep this segment and try the next one.
   1513          */
   1514         ++segp;
   1515         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
   1516             || ((segp[0] != '.') || (segp[1] != '.')
   1517                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
   1518           cur = segp;
   1519           continue;
   1520         }
   1521 
   1522         /* If we get here, remove this segment and the next one and back up
   1523          * to the previous segment (if there is one), to implement the
   1524          * "iteratively" clause.  It's pretty much impossible to back up
   1525          * while maintaining two pointers into the buffer, so just compact
   1526          * the whole buffer now.
   1527          */
   1528 
   1529         /* If this is the end of the buffer, we're done.  */
   1530         if (segp[2] == '\0') {
   1531           cur[0] = '\0';
   1532           break;
   1533         }
   1534         /* Valgrind complained, strcpy(cur, segp + 3); */
   1535         /* string will overlap, do not use strcpy */
   1536         tmp = cur;
   1537         segp += 3;
   1538         while ((*tmp++ = *segp++) != 0)
   1539           ;
   1540 
   1541         /* If there are no previous segments, then keep going from here.  */
   1542         segp = cur;
   1543         while ((segp > path) && ((--segp)[0] == '/'))
   1544           ;
   1545         if (segp == path)
   1546           continue;
   1547 
   1548         /* "segp" is pointing to the end of a previous segment; find it's
   1549          * start.  We need to back up to the previous segment and start
   1550          * over with that to handle things like "foo/bar/../..".  If we
   1551          * don't do this, then on the first pass we'll remove the "bar/..",
   1552          * but be pointing at the second ".." so we won't realize we can also
   1553          * remove the "foo/..".
   1554          */
   1555         cur = segp;
   1556         while ((cur > path) && (cur[-1] != '/'))
   1557           --cur;
   1558     }
   1559     out[0] = '\0';
   1560 
   1561     /*
   1562      * g) If the resulting buffer string still begins with one or more
   1563      *    complete path segments of "..", then the reference is
   1564      *    considered to be in error. Implementations may handle this
   1565      *    error by retaining these components in the resolved path (i.e.,
   1566      *    treating them as part of the final URI), by removing them from
   1567      *    the resolved path (i.e., discarding relative levels above the
   1568      *    root), or by avoiding traversal of the reference.
   1569      *
   1570      * We discard them from the final path.
   1571      */
   1572     if (path[0] == '/') {
   1573       cur = path;
   1574       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
   1575              && ((cur[3] == '/') || (cur[3] == '\0')))
   1576 	cur += 3;
   1577 
   1578       if (cur != path) {
   1579 	out = path;
   1580 	while (cur[0] != '\0')
   1581           (out++)[0] = (cur++)[0];
   1582 	out[0] = 0;
   1583       }
   1584     }
   1585 
   1586     return(0);
   1587 }
   1588 
   1589 static int is_hex(char c) {
   1590     if (((c >= '0') && (c <= '9')) ||
   1591         ((c >= 'a') && (c <= 'f')) ||
   1592         ((c >= 'A') && (c <= 'F')))
   1593 	return(1);
   1594     return(0);
   1595 }
   1596 
   1597 /**
   1598  * xmlURIUnescapeString:
   1599  * @str:  the string to unescape
   1600  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
   1601  * @target:  optional destination buffer
   1602  *
   1603  * Unescaping routine, but does not check that the string is an URI. The
   1604  * output is a direct unsigned char translation of %XX values (no encoding)
   1605  * Note that the length of the result can only be smaller or same size as
   1606  * the input string.
   1607  *
   1608  * Returns a copy of the string, but unescaped, will return NULL only in case
   1609  * of error
   1610  */
   1611 char *
   1612 xmlURIUnescapeString(const char *str, int len, char *target) {
   1613     char *ret, *out;
   1614     const char *in;
   1615 
   1616     if (str == NULL)
   1617 	return(NULL);
   1618     if (len <= 0) len = strlen(str);
   1619     if (len < 0) return(NULL);
   1620 
   1621     if (target == NULL) {
   1622 	ret = (char *) xmlMallocAtomic(len + 1);
   1623 	if (ret == NULL) {
   1624             xmlURIErrMemory("unescaping URI value\n");
   1625 	    return(NULL);
   1626 	}
   1627     } else
   1628 	ret = target;
   1629     in = str;
   1630     out = ret;
   1631     while(len > 0) {
   1632 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
   1633 	    in++;
   1634 	    if ((*in >= '0') && (*in <= '9'))
   1635 	        *out = (*in - '0');
   1636 	    else if ((*in >= 'a') && (*in <= 'f'))
   1637 	        *out = (*in - 'a') + 10;
   1638 	    else if ((*in >= 'A') && (*in <= 'F'))
   1639 	        *out = (*in - 'A') + 10;
   1640 	    in++;
   1641 	    if ((*in >= '0') && (*in <= '9'))
   1642 	        *out = *out * 16 + (*in - '0');
   1643 	    else if ((*in >= 'a') && (*in <= 'f'))
   1644 	        *out = *out * 16 + (*in - 'a') + 10;
   1645 	    else if ((*in >= 'A') && (*in <= 'F'))
   1646 	        *out = *out * 16 + (*in - 'A') + 10;
   1647 	    in++;
   1648 	    len -= 3;
   1649 	    out++;
   1650 	} else {
   1651 	    *out++ = *in++;
   1652 	    len--;
   1653 	}
   1654     }
   1655     *out = 0;
   1656     return(ret);
   1657 }
   1658 
   1659 /**
   1660  * xmlURIEscapeStr:
   1661  * @str:  string to escape
   1662  * @list: exception list string of chars not to escape
   1663  *
   1664  * This routine escapes a string to hex, ignoring reserved characters (a-z)
   1665  * and the characters in the exception list.
   1666  *
   1667  * Returns a new escaped string or NULL in case of error.
   1668  */
   1669 xmlChar *
   1670 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
   1671     xmlChar *ret, ch;
   1672     xmlChar *temp;
   1673     const xmlChar *in;
   1674     int len, out;
   1675 
   1676     if (str == NULL)
   1677 	return(NULL);
   1678     if (str[0] == 0)
   1679 	return(xmlStrdup(str));
   1680     len = xmlStrlen(str);
   1681     if (!(len > 0)) return(NULL);
   1682 
   1683     len += 20;
   1684     ret = (xmlChar *) xmlMallocAtomic(len);
   1685     if (ret == NULL) {
   1686         xmlURIErrMemory("escaping URI value\n");
   1687 	return(NULL);
   1688     }
   1689     in = (const xmlChar *) str;
   1690     out = 0;
   1691     while(*in != 0) {
   1692 	if (len - out <= 3) {
   1693             temp = xmlSaveUriRealloc(ret, &len);
   1694 	    if (temp == NULL) {
   1695                 xmlURIErrMemory("escaping URI value\n");
   1696 		xmlFree(ret);
   1697 		return(NULL);
   1698 	    }
   1699 	    ret = temp;
   1700 	}
   1701 
   1702 	ch = *in;
   1703 
   1704 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
   1705 	    unsigned char val;
   1706 	    ret[out++] = '%';
   1707 	    val = ch >> 4;
   1708 	    if (val <= 9)
   1709 		ret[out++] = '0' + val;
   1710 	    else
   1711 		ret[out++] = 'A' + val - 0xA;
   1712 	    val = ch & 0xF;
   1713 	    if (val <= 9)
   1714 		ret[out++] = '0' + val;
   1715 	    else
   1716 		ret[out++] = 'A' + val - 0xA;
   1717 	    in++;
   1718 	} else {
   1719 	    ret[out++] = *in++;
   1720 	}
   1721 
   1722     }
   1723     ret[out] = 0;
   1724     return(ret);
   1725 }
   1726 
   1727 /**
   1728  * xmlURIEscape:
   1729  * @str:  the string of the URI to escape
   1730  *
   1731  * Escaping routine, does not do validity checks !
   1732  * It will try to escape the chars needing this, but this is heuristic
   1733  * based it's impossible to be sure.
   1734  *
   1735  * Returns an copy of the string, but escaped
   1736  *
   1737  * 25 May 2001
   1738  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
   1739  * according to RFC2396.
   1740  *   - Carl Douglas
   1741  */
   1742 xmlChar *
   1743 xmlURIEscape(const xmlChar * str)
   1744 {
   1745     xmlChar *ret, *segment = NULL;
   1746     xmlURIPtr uri;
   1747     int ret2;
   1748 
   1749 #define NULLCHK(p) if(!p) { \
   1750          xmlURIErrMemory("escaping URI value\n"); \
   1751          xmlFreeURI(uri); \
   1752          return NULL; } \
   1753 
   1754     if (str == NULL)
   1755         return (NULL);
   1756 
   1757     uri = xmlCreateURI();
   1758     if (uri != NULL) {
   1759 	/*
   1760 	 * Allow escaping errors in the unescaped form
   1761 	 */
   1762         uri->cleanup = 1;
   1763         ret2 = xmlParseURIReference(uri, (const char *)str);
   1764         if (ret2) {
   1765             xmlFreeURI(uri);
   1766             return (NULL);
   1767         }
   1768     }
   1769 
   1770     if (!uri)
   1771         return NULL;
   1772 
   1773     ret = NULL;
   1774 
   1775     if (uri->scheme) {
   1776         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
   1777         NULLCHK(segment)
   1778         ret = xmlStrcat(ret, segment);
   1779         ret = xmlStrcat(ret, BAD_CAST ":");
   1780         xmlFree(segment);
   1781     }
   1782 
   1783     if (uri->authority) {
   1784         segment =
   1785             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
   1786         NULLCHK(segment)
   1787         ret = xmlStrcat(ret, BAD_CAST "//");
   1788         ret = xmlStrcat(ret, segment);
   1789         xmlFree(segment);
   1790     }
   1791 
   1792     if (uri->user) {
   1793         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
   1794         NULLCHK(segment)
   1795 		ret = xmlStrcat(ret,BAD_CAST "//");
   1796         ret = xmlStrcat(ret, segment);
   1797         ret = xmlStrcat(ret, BAD_CAST "@");
   1798         xmlFree(segment);
   1799     }
   1800 
   1801     if (uri->server) {
   1802         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
   1803         NULLCHK(segment)
   1804 		if (uri->user == NULL)
   1805 		ret = xmlStrcat(ret, BAD_CAST "//");
   1806         ret = xmlStrcat(ret, segment);
   1807         xmlFree(segment);
   1808     }
   1809 
   1810     if (uri->port) {
   1811         xmlChar port[10];
   1812 
   1813         snprintf((char *) port, 10, "%d", uri->port);
   1814         ret = xmlStrcat(ret, BAD_CAST ":");
   1815         ret = xmlStrcat(ret, port);
   1816     }
   1817 
   1818     if (uri->path) {
   1819         segment =
   1820             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
   1821         NULLCHK(segment)
   1822         ret = xmlStrcat(ret, segment);
   1823         xmlFree(segment);
   1824     }
   1825 
   1826     if (uri->query_raw) {
   1827         ret = xmlStrcat(ret, BAD_CAST "?");
   1828         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
   1829     }
   1830     else if (uri->query) {
   1831         segment =
   1832             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
   1833         NULLCHK(segment)
   1834         ret = xmlStrcat(ret, BAD_CAST "?");
   1835         ret = xmlStrcat(ret, segment);
   1836         xmlFree(segment);
   1837     }
   1838 
   1839     if (uri->opaque) {
   1840         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
   1841         NULLCHK(segment)
   1842         ret = xmlStrcat(ret, segment);
   1843         xmlFree(segment);
   1844     }
   1845 
   1846     if (uri->fragment) {
   1847         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
   1848         NULLCHK(segment)
   1849         ret = xmlStrcat(ret, BAD_CAST "#");
   1850         ret = xmlStrcat(ret, segment);
   1851         xmlFree(segment);
   1852     }
   1853 
   1854     xmlFreeURI(uri);
   1855 #undef NULLCHK
   1856 
   1857     return (ret);
   1858 }
   1859 
   1860 /************************************************************************
   1861  *									*
   1862  *			Public functions				*
   1863  *									*
   1864  ************************************************************************/
   1865 
   1866 /**
   1867  * xmlBuildURI:
   1868  * @URI:  the URI instance found in the document
   1869  * @base:  the base value
   1870  *
   1871  * Computes he final URI of the reference done by checking that
   1872  * the given URI is valid, and building the final URI using the
   1873  * base URI. This is processed according to section 5.2 of the
   1874  * RFC 2396
   1875  *
   1876  * 5.2. Resolving Relative References to Absolute Form
   1877  *
   1878  * Returns a new URI string (to be freed by the caller) or NULL in case
   1879  *         of error.
   1880  */
   1881 xmlChar *
   1882 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
   1883     xmlChar *val = NULL;
   1884     int ret, len, indx, cur, out;
   1885     xmlURIPtr ref = NULL;
   1886     xmlURIPtr bas = NULL;
   1887     xmlURIPtr res = NULL;
   1888 
   1889     /*
   1890      * 1) The URI reference is parsed into the potential four components and
   1891      *    fragment identifier, as described in Section 4.3.
   1892      *
   1893      *    NOTE that a completely empty URI is treated by modern browsers
   1894      *    as a reference to "." rather than as a synonym for the current
   1895      *    URI.  Should we do that here?
   1896      */
   1897     if (URI == NULL)
   1898 	ret = -1;
   1899     else {
   1900 	if (*URI) {
   1901 	    ref = xmlCreateURI();
   1902 	    if (ref == NULL)
   1903 		goto done;
   1904 	    ret = xmlParseURIReference(ref, (const char *) URI);
   1905 	}
   1906 	else
   1907 	    ret = 0;
   1908     }
   1909     if (ret != 0)
   1910 	goto done;
   1911     if ((ref != NULL) && (ref->scheme != NULL)) {
   1912 	/*
   1913 	 * The URI is absolute don't modify.
   1914 	 */
   1915 	val = xmlStrdup(URI);
   1916 	goto done;
   1917     }
   1918     if (base == NULL)
   1919 	ret = -1;
   1920     else {
   1921 	bas = xmlCreateURI();
   1922 	if (bas == NULL)
   1923 	    goto done;
   1924 	ret = xmlParseURIReference(bas, (const char *) base);
   1925     }
   1926     if (ret != 0) {
   1927 	if (ref)
   1928 	    val = xmlSaveUri(ref);
   1929 	goto done;
   1930     }
   1931     if (ref == NULL) {
   1932 	/*
   1933 	 * the base fragment must be ignored
   1934 	 */
   1935 	if (bas->fragment != NULL) {
   1936 	    xmlFree(bas->fragment);
   1937 	    bas->fragment = NULL;
   1938 	}
   1939 	val = xmlSaveUri(bas);
   1940 	goto done;
   1941     }
   1942 
   1943     /*
   1944      * 2) If the path component is empty and the scheme, authority, and
   1945      *    query components are undefined, then it is a reference to the
   1946      *    current document and we are done.  Otherwise, the reference URI's
   1947      *    query and fragment components are defined as found (or not found)
   1948      *    within the URI reference and not inherited from the base URI.
   1949      *
   1950      *    NOTE that in modern browsers, the parsing differs from the above
   1951      *    in the following aspect:  the query component is allowed to be
   1952      *    defined while still treating this as a reference to the current
   1953      *    document.
   1954      */
   1955     res = xmlCreateURI();
   1956     if (res == NULL)
   1957 	goto done;
   1958     if ((ref->scheme == NULL) && (ref->path == NULL) &&
   1959 	((ref->authority == NULL) && (ref->server == NULL))) {
   1960 	if (bas->scheme != NULL)
   1961 	    res->scheme = xmlMemStrdup(bas->scheme);
   1962 	if (bas->authority != NULL)
   1963 	    res->authority = xmlMemStrdup(bas->authority);
   1964 	else if ((bas->server != NULL) || (bas->port == -1)) {
   1965 	    if (bas->server != NULL)
   1966 		res->server = xmlMemStrdup(bas->server);
   1967 	    if (bas->user != NULL)
   1968 		res->user = xmlMemStrdup(bas->user);
   1969 	    res->port = bas->port;
   1970 	}
   1971 	if (bas->path != NULL)
   1972 	    res->path = xmlMemStrdup(bas->path);
   1973 	if (ref->query_raw != NULL)
   1974 	    res->query_raw = xmlMemStrdup (ref->query_raw);
   1975 	else if (ref->query != NULL)
   1976 	    res->query = xmlMemStrdup(ref->query);
   1977 	else if (bas->query_raw != NULL)
   1978 	    res->query_raw = xmlMemStrdup(bas->query_raw);
   1979 	else if (bas->query != NULL)
   1980 	    res->query = xmlMemStrdup(bas->query);
   1981 	if (ref->fragment != NULL)
   1982 	    res->fragment = xmlMemStrdup(ref->fragment);
   1983 	goto step_7;
   1984     }
   1985 
   1986     /*
   1987      * 3) If the scheme component is defined, indicating that the reference
   1988      *    starts with a scheme name, then the reference is interpreted as an
   1989      *    absolute URI and we are done.  Otherwise, the reference URI's
   1990      *    scheme is inherited from the base URI's scheme component.
   1991      */
   1992     if (ref->scheme != NULL) {
   1993 	val = xmlSaveUri(ref);
   1994 	goto done;
   1995     }
   1996     if (bas->scheme != NULL)
   1997 	res->scheme = xmlMemStrdup(bas->scheme);
   1998 
   1999     if (ref->query_raw != NULL)
   2000 	res->query_raw = xmlMemStrdup(ref->query_raw);
   2001     else if (ref->query != NULL)
   2002 	res->query = xmlMemStrdup(ref->query);
   2003     if (ref->fragment != NULL)
   2004 	res->fragment = xmlMemStrdup(ref->fragment);
   2005 
   2006     /*
   2007      * 4) If the authority component is defined, then the reference is a
   2008      *    network-path and we skip to step 7.  Otherwise, the reference
   2009      *    URI's authority is inherited from the base URI's authority
   2010      *    component, which will also be undefined if the URI scheme does not
   2011      *    use an authority component.
   2012      */
   2013     if ((ref->authority != NULL) || (ref->server != NULL)) {
   2014 	if (ref->authority != NULL)
   2015 	    res->authority = xmlMemStrdup(ref->authority);
   2016 	else {
   2017 	    res->server = xmlMemStrdup(ref->server);
   2018 	    if (ref->user != NULL)
   2019 		res->user = xmlMemStrdup(ref->user);
   2020             res->port = ref->port;
   2021 	}
   2022 	if (ref->path != NULL)
   2023 	    res->path = xmlMemStrdup(ref->path);
   2024 	goto step_7;
   2025     }
   2026     if (bas->authority != NULL)
   2027 	res->authority = xmlMemStrdup(bas->authority);
   2028     else if ((bas->server != NULL) || (bas->port == -1)) {
   2029 	if (bas->server != NULL)
   2030 	    res->server = xmlMemStrdup(bas->server);
   2031 	if (bas->user != NULL)
   2032 	    res->user = xmlMemStrdup(bas->user);
   2033 	res->port = bas->port;
   2034     }
   2035 
   2036     /*
   2037      * 5) If the path component begins with a slash character ("/"), then
   2038      *    the reference is an absolute-path and we skip to step 7.
   2039      */
   2040     if ((ref->path != NULL) && (ref->path[0] == '/')) {
   2041 	res->path = xmlMemStrdup(ref->path);
   2042 	goto step_7;
   2043     }
   2044 
   2045 
   2046     /*
   2047      * 6) If this step is reached, then we are resolving a relative-path
   2048      *    reference.  The relative path needs to be merged with the base
   2049      *    URI's path.  Although there are many ways to do this, we will
   2050      *    describe a simple method using a separate string buffer.
   2051      *
   2052      * Allocate a buffer large enough for the result string.
   2053      */
   2054     len = 2; /* extra / and 0 */
   2055     if (ref->path != NULL)
   2056 	len += strlen(ref->path);
   2057     if (bas->path != NULL)
   2058 	len += strlen(bas->path);
   2059     res->path = (char *) xmlMallocAtomic(len);
   2060     if (res->path == NULL) {
   2061         xmlURIErrMemory("resolving URI against base\n");
   2062 	goto done;
   2063     }
   2064     res->path[0] = 0;
   2065 
   2066     /*
   2067      * a) All but the last segment of the base URI's path component is
   2068      *    copied to the buffer.  In other words, any characters after the
   2069      *    last (right-most) slash character, if any, are excluded.
   2070      */
   2071     cur = 0;
   2072     out = 0;
   2073     if (bas->path != NULL) {
   2074 	while (bas->path[cur] != 0) {
   2075 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
   2076 		cur++;
   2077 	    if (bas->path[cur] == 0)
   2078 		break;
   2079 
   2080 	    cur++;
   2081 	    while (out < cur) {
   2082 		res->path[out] = bas->path[out];
   2083 		out++;
   2084 	    }
   2085 	}
   2086     }
   2087     res->path[out] = 0;
   2088 
   2089     /*
   2090      * b) The reference's path component is appended to the buffer
   2091      *    string.
   2092      */
   2093     if (ref->path != NULL && ref->path[0] != 0) {
   2094 	indx = 0;
   2095 	/*
   2096 	 * Ensure the path includes a '/'
   2097 	 */
   2098 	if ((out == 0) && (bas->server != NULL))
   2099 	    res->path[out++] = '/';
   2100 	while (ref->path[indx] != 0) {
   2101 	    res->path[out++] = ref->path[indx++];
   2102 	}
   2103     }
   2104     res->path[out] = 0;
   2105 
   2106     /*
   2107      * Steps c) to h) are really path normalization steps
   2108      */
   2109     xmlNormalizeURIPath(res->path);
   2110 
   2111 step_7:
   2112 
   2113     /*
   2114      * 7) The resulting URI components, including any inherited from the
   2115      *    base URI, are recombined to give the absolute form of the URI
   2116      *    reference.
   2117      */
   2118     val = xmlSaveUri(res);
   2119 
   2120 done:
   2121     if (ref != NULL)
   2122 	xmlFreeURI(ref);
   2123     if (bas != NULL)
   2124 	xmlFreeURI(bas);
   2125     if (res != NULL)
   2126 	xmlFreeURI(res);
   2127     return(val);
   2128 }
   2129 
   2130 /**
   2131  * xmlBuildRelativeURI:
   2132  * @URI:  the URI reference under consideration
   2133  * @base:  the base value
   2134  *
   2135  * Expresses the URI of the reference in terms relative to the
   2136  * base.  Some examples of this operation include:
   2137  *     base = "http://site1.com/docs/book1.html"
   2138  *        URI input                        URI returned
   2139  *     docs/pic1.gif                    pic1.gif
   2140  *     docs/img/pic1.gif                img/pic1.gif
   2141  *     img/pic1.gif                     ../img/pic1.gif
   2142  *     http://site1.com/docs/pic1.gif   pic1.gif
   2143  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
   2144  *
   2145  *     base = "docs/book1.html"
   2146  *        URI input                        URI returned
   2147  *     docs/pic1.gif                    pic1.gif
   2148  *     docs/img/pic1.gif                img/pic1.gif
   2149  *     img/pic1.gif                     ../img/pic1.gif
   2150  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
   2151  *
   2152  *
   2153  * Note: if the URI reference is really wierd or complicated, it may be
   2154  *       worthwhile to first convert it into a "nice" one by calling
   2155  *       xmlBuildURI (using 'base') before calling this routine,
   2156  *       since this routine (for reasonable efficiency) assumes URI has
   2157  *       already been through some validation.
   2158  *
   2159  * Returns a new URI string (to be freed by the caller) or NULL in case
   2160  * error.
   2161  */
   2162 xmlChar *
   2163 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
   2164 {
   2165     xmlChar *val = NULL;
   2166     int ret;
   2167     int ix;
   2168     int nbslash = 0;
   2169     int len;
   2170     xmlURIPtr ref = NULL;
   2171     xmlURIPtr bas = NULL;
   2172     xmlChar *bptr, *uptr, *vptr;
   2173     int remove_path = 0;
   2174 
   2175     if ((URI == NULL) || (*URI == 0))
   2176 	return NULL;
   2177 
   2178     /*
   2179      * First parse URI into a standard form
   2180      */
   2181     ref = xmlCreateURI ();
   2182     if (ref == NULL)
   2183 	return NULL;
   2184     /* If URI not already in "relative" form */
   2185     if (URI[0] != '.') {
   2186 	ret = xmlParseURIReference (ref, (const char *) URI);
   2187 	if (ret != 0)
   2188 	    goto done;		/* Error in URI, return NULL */
   2189     } else
   2190 	ref->path = (char *)xmlStrdup(URI);
   2191 
   2192     /*
   2193      * Next parse base into the same standard form
   2194      */
   2195     if ((base == NULL) || (*base == 0)) {
   2196 	val = xmlStrdup (URI);
   2197 	goto done;
   2198     }
   2199     bas = xmlCreateURI ();
   2200     if (bas == NULL)
   2201 	goto done;
   2202     if (base[0] != '.') {
   2203 	ret = xmlParseURIReference (bas, (const char *) base);
   2204 	if (ret != 0)
   2205 	    goto done;		/* Error in base, return NULL */
   2206     } else
   2207 	bas->path = (char *)xmlStrdup(base);
   2208 
   2209     /*
   2210      * If the scheme / server on the URI differs from the base,
   2211      * just return the URI
   2212      */
   2213     if ((ref->scheme != NULL) &&
   2214 	((bas->scheme == NULL) ||
   2215 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
   2216 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
   2217 	val = xmlStrdup (URI);
   2218 	goto done;
   2219     }
   2220     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
   2221 	val = xmlStrdup(BAD_CAST "");
   2222 	goto done;
   2223     }
   2224     if (bas->path == NULL) {
   2225 	val = xmlStrdup((xmlChar *)ref->path);
   2226 	goto done;
   2227     }
   2228     if (ref->path == NULL) {
   2229         ref->path = (char *) "/";
   2230 	remove_path = 1;
   2231     }
   2232 
   2233     /*
   2234      * At this point (at last!) we can compare the two paths
   2235      *
   2236      * First we take care of the special case where either of the
   2237      * two path components may be missing (bug 316224)
   2238      */
   2239     bptr = (xmlChar *)bas->path;
   2240     {
   2241         xmlChar *rptr = (xmlChar *) ref->path;
   2242         int pos = 0;
   2243 
   2244         /*
   2245          * Next we compare the two strings and find where they first differ
   2246          */
   2247 	if ((*rptr == '.') && (rptr[1] == '/'))
   2248             rptr += 2;
   2249 	if ((*bptr == '.') && (bptr[1] == '/'))
   2250             bptr += 2;
   2251 	else if ((*bptr == '/') && (*rptr != '/'))
   2252 	    bptr++;
   2253 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
   2254 	    pos++;
   2255 
   2256 	if (bptr[pos] == rptr[pos]) {
   2257 	    val = xmlStrdup(BAD_CAST "");
   2258 	    goto done;		/* (I can't imagine why anyone would do this) */
   2259 	}
   2260 
   2261 	/*
   2262 	 * In URI, "back up" to the last '/' encountered.  This will be the
   2263 	 * beginning of the "unique" suffix of URI
   2264 	 */
   2265 	ix = pos;
   2266 	for (; ix > 0; ix--) {
   2267 	    if (rptr[ix - 1] == '/')
   2268 		break;
   2269 	}
   2270 	uptr = (xmlChar *)&rptr[ix];
   2271 
   2272 	/*
   2273 	 * In base, count the number of '/' from the differing point
   2274 	 */
   2275 	for (; bptr[ix] != 0; ix++) {
   2276 	    if (bptr[ix] == '/')
   2277 		nbslash++;
   2278 	}
   2279 
   2280 	/*
   2281 	 * e.g: URI="foo/" base="foo/bar" -> "./"
   2282 	 */
   2283 	if (nbslash == 0 && !uptr[0]) {
   2284 	    val = xmlStrdup(BAD_CAST "./");
   2285 	    goto done;
   2286 	}
   2287 
   2288 	len = xmlStrlen (uptr) + 1;
   2289     }
   2290 
   2291     if (nbslash == 0) {
   2292 	if (uptr != NULL)
   2293 	    /* exception characters from xmlSaveUri */
   2294 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
   2295 	goto done;
   2296     }
   2297 
   2298     /*
   2299      * Allocate just enough space for the returned string -
   2300      * length of the remainder of the URI, plus enough space
   2301      * for the "../" groups, plus one for the terminator
   2302      */
   2303     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
   2304     if (val == NULL) {
   2305         xmlURIErrMemory("building relative URI\n");
   2306 	goto done;
   2307     }
   2308     vptr = val;
   2309     /*
   2310      * Put in as many "../" as needed
   2311      */
   2312     for (; nbslash>0; nbslash--) {
   2313 	*vptr++ = '.';
   2314 	*vptr++ = '.';
   2315 	*vptr++ = '/';
   2316     }
   2317     /*
   2318      * Finish up with the end of the URI
   2319      */
   2320     if (uptr != NULL) {
   2321         if ((vptr > val) && (len > 0) &&
   2322 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
   2323 	    memcpy (vptr, uptr + 1, len - 1);
   2324 	    vptr[len - 2] = 0;
   2325 	} else {
   2326 	    memcpy (vptr, uptr, len);
   2327 	    vptr[len - 1] = 0;
   2328 	}
   2329     } else {
   2330 	vptr[len - 1] = 0;
   2331     }
   2332 
   2333     /* escape the freshly-built path */
   2334     vptr = val;
   2335 	/* exception characters from xmlSaveUri */
   2336     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
   2337     xmlFree(vptr);
   2338 
   2339 done:
   2340     /*
   2341      * Free the working variables
   2342      */
   2343     if (remove_path != 0)
   2344         ref->path = NULL;
   2345     if (ref != NULL)
   2346 	xmlFreeURI (ref);
   2347     if (bas != NULL)
   2348 	xmlFreeURI (bas);
   2349 
   2350     return val;
   2351 }
   2352 
   2353 /**
   2354  * xmlCanonicPath:
   2355  * @path:  the resource locator in a filesystem notation
   2356  *
   2357  * Constructs a canonic path from the specified path.
   2358  *
   2359  * Returns a new canonic path, or a duplicate of the path parameter if the
   2360  * construction fails. The caller is responsible for freeing the memory occupied
   2361  * by the returned string. If there is insufficient memory available, or the
   2362  * argument is NULL, the function returns NULL.
   2363  */
   2364 #define IS_WINDOWS_PATH(p)					\
   2365 	((p != NULL) &&						\
   2366 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
   2367 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
   2368 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
   2369 xmlChar *
   2370 xmlCanonicPath(const xmlChar *path)
   2371 {
   2372 /*
   2373  * For Windows implementations, additional work needs to be done to
   2374  * replace backslashes in pathnames with "forward slashes"
   2375  */
   2376 #if defined(_WIN32) && !defined(__CYGWIN__)
   2377     int len = 0;
   2378     char *p = NULL;
   2379 #endif
   2380     xmlURIPtr uri;
   2381     xmlChar *ret;
   2382     const xmlChar *absuri;
   2383 
   2384     if (path == NULL)
   2385 	return(NULL);
   2386 
   2387 #if defined(_WIN32)
   2388     /*
   2389      * We must not change the backslashes to slashes if the the path
   2390      * starts with \\?\
   2391      * Those paths can be up to 32k characters long.
   2392      * Was added specifically for OpenOffice, those paths can't be converted
   2393      * to URIs anyway.
   2394      */
   2395     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
   2396         (path[3] == '\\') )
   2397 	return xmlStrdup((const xmlChar *) path);
   2398 #endif
   2399 
   2400 	/* sanitize filename starting with // so it can be used as URI */
   2401     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
   2402         path++;
   2403 
   2404     if ((uri = xmlParseURI((const char *) path)) != NULL) {
   2405 	xmlFreeURI(uri);
   2406 	return xmlStrdup(path);
   2407     }
   2408 
   2409     /* Check if this is an "absolute uri" */
   2410     absuri = xmlStrstr(path, BAD_CAST "://");
   2411     if (absuri != NULL) {
   2412         int l, j;
   2413 	unsigned char c;
   2414 	xmlChar *escURI;
   2415 
   2416         /*
   2417 	 * this looks like an URI where some parts have not been
   2418 	 * escaped leading to a parsing problem.  Check that the first
   2419 	 * part matches a protocol.
   2420 	 */
   2421 	l = absuri - path;
   2422 	/* Bypass if first part (part before the '://') is > 20 chars */
   2423 	if ((l <= 0) || (l > 20))
   2424 	    goto path_processing;
   2425 	/* Bypass if any non-alpha characters are present in first part */
   2426 	for (j = 0;j < l;j++) {
   2427 	    c = path[j];
   2428 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
   2429 	        goto path_processing;
   2430 	}
   2431 
   2432 	/* Escape all except the characters specified in the supplied path */
   2433         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
   2434 	if (escURI != NULL) {
   2435 	    /* Try parsing the escaped path */
   2436 	    uri = xmlParseURI((const char *) escURI);
   2437 	    /* If successful, return the escaped string */
   2438 	    if (uri != NULL) {
   2439 	        xmlFreeURI(uri);
   2440 		return escURI;
   2441 	    }
   2442             xmlFree(escURI);
   2443 	}
   2444     }
   2445 
   2446 path_processing:
   2447 /* For Windows implementations, replace backslashes with 'forward slashes' */
   2448 #if defined(_WIN32) && !defined(__CYGWIN__)
   2449     /*
   2450      * Create a URI structure
   2451      */
   2452     uri = xmlCreateURI();
   2453     if (uri == NULL) {		/* Guard against 'out of memory' */
   2454         return(NULL);
   2455     }
   2456 
   2457     len = xmlStrlen(path);
   2458     if ((len > 2) && IS_WINDOWS_PATH(path)) {
   2459         /* make the scheme 'file' */
   2460 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
   2461 	/* allocate space for leading '/' + path + string terminator */
   2462 	uri->path = xmlMallocAtomic(len + 2);
   2463 	if (uri->path == NULL) {
   2464 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
   2465 	    return(NULL);
   2466 	}
   2467 	/* Put in leading '/' plus path */
   2468 	uri->path[0] = '/';
   2469 	p = uri->path + 1;
   2470 	strncpy(p, (char *) path, len + 1);
   2471     } else {
   2472 	uri->path = (char *) xmlStrdup(path);
   2473 	if (uri->path == NULL) {
   2474 	    xmlFreeURI(uri);
   2475 	    return(NULL);
   2476 	}
   2477 	p = uri->path;
   2478     }
   2479     /* Now change all occurences of '\' to '/' */
   2480     while (*p != '\0') {
   2481 	if (*p == '\\')
   2482 	    *p = '/';
   2483 	p++;
   2484     }
   2485 
   2486     if (uri->scheme == NULL) {
   2487 	ret = xmlStrdup((const xmlChar *) uri->path);
   2488     } else {
   2489 	ret = xmlSaveUri(uri);
   2490     }
   2491 
   2492     xmlFreeURI(uri);
   2493 #else
   2494     ret = xmlStrdup((const xmlChar *) path);
   2495 #endif
   2496     return(ret);
   2497 }
   2498 
   2499 /**
   2500  * xmlPathToURI:
   2501  * @path:  the resource locator in a filesystem notation
   2502  *
   2503  * Constructs an URI expressing the existing path
   2504  *
   2505  * Returns a new URI, or a duplicate of the path parameter if the
   2506  * construction fails. The caller is responsible for freeing the memory
   2507  * occupied by the returned string. If there is insufficient memory available,
   2508  * or the argument is NULL, the function returns NULL.
   2509  */
   2510 xmlChar *
   2511 xmlPathToURI(const xmlChar *path)
   2512 {
   2513     xmlURIPtr uri;
   2514     xmlURI temp;
   2515     xmlChar *ret, *cal;
   2516 
   2517     if (path == NULL)
   2518         return(NULL);
   2519 
   2520     if ((uri = xmlParseURI((const char *) path)) != NULL) {
   2521 	xmlFreeURI(uri);
   2522 	return xmlStrdup(path);
   2523     }
   2524     cal = xmlCanonicPath(path);
   2525     if (cal == NULL)
   2526         return(NULL);
   2527 #if defined(_WIN32) && !defined(__CYGWIN__)
   2528     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
   2529        If 'cal' is a valid URI allready then we are done here, as continuing would make
   2530        it invalid. */
   2531     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
   2532 	xmlFreeURI(uri);
   2533 	return cal;
   2534     }
   2535     /* 'cal' can contain a relative path with backslashes. If that is processed
   2536        by xmlSaveURI, they will be escaped and the external entity loader machinery
   2537        will fail. So convert them to slashes. Misuse 'ret' for walking. */
   2538     ret = cal;
   2539     while (*ret != '\0') {
   2540 	if (*ret == '\\')
   2541 	    *ret = '/';
   2542 	ret++;
   2543     }
   2544 #endif
   2545     memset(&temp, 0, sizeof(temp));
   2546     temp.path = (char *) cal;
   2547     ret = xmlSaveUri(&temp);
   2548     xmlFree(cal);
   2549     return(ret);
   2550 }
   2551 #define bottom_uri
   2552 #include "elfgcchack.h"
   2553