Home | History | Annotate | Download | only in libxml2
      1 /**
      2  * uri.c: set of generic URI related routines
      3  *
      4  * Reference: RFCs 3986, 2732 and 2373
      5  *
      6  * See Copyright for the status of this software.
      7  *
      8  * daniel (at) veillard.com
      9  */
     10 
     11 #define IN_LIBXML
     12 #include "libxml.h"
     13 
     14 #include <string.h>
     15 
     16 #include <libxml/xmlmemory.h>
     17 #include <libxml/uri.h>
     18 #include <libxml/globals.h>
     19 #include <libxml/xmlerror.h>
     20 
     21 /**
     22  * MAX_URI_LENGTH:
     23  *
     24  * The definition of the URI regexp in the above RFC has no size limit
     25  * In practice they are usually relativey short except for the
     26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
     27  * maximum size before hitting random practical limits is around 64 KB
     28  * and 4KB is usually a maximum admitted limit for proper operations.
     29  * The value below is more a security limit than anything else and
     30  * really should never be hit by 'normal' operations
     31  * Set to 1 MByte in 2012, this is only enforced on output
     32  */
     33 #define MAX_URI_LENGTH 1024 * 1024
     34 
     35 static void
     36 xmlURIErrMemory(const char *extra)
     37 {
     38     if (extra)
     39         __xmlRaiseError(NULL, NULL, NULL,
     40                         NULL, NULL, XML_FROM_URI,
     41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
     42                         extra, NULL, NULL, 0, 0,
     43                         "Memory allocation failed : %s\n", extra);
     44     else
     45         __xmlRaiseError(NULL, NULL, NULL,
     46                         NULL, NULL, XML_FROM_URI,
     47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
     48                         NULL, NULL, NULL, 0, 0,
     49                         "Memory allocation failed\n");
     50 }
     51 
     52 static void xmlCleanURI(xmlURIPtr uri);
     53 
     54 /*
     55  * Old rule from 2396 used in legacy handling code
     56  * alpha    = lowalpha | upalpha
     57  */
     58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
     59 
     60 
     61 /*
     62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
     63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
     64  *            "u" | "v" | "w" | "x" | "y" | "z"
     65  */
     66 
     67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
     68 
     69 /*
     70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
     71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
     72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
     73  */
     74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
     75 
     76 #ifdef IS_DIGIT
     77 #undef IS_DIGIT
     78 #endif
     79 /*
     80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
     81  */
     82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
     83 
     84 /*
     85  * alphanum = alpha | digit
     86  */
     87 
     88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
     89 
     90 /*
     91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
     92  */
     93 
     94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
     95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
     96     ((x) == '(') || ((x) == ')'))
     97 
     98 /*
     99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
    100  */
    101 
    102 #define IS_UNWISE(p)                                                    \
    103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
    104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
    105        ((*(p) == ']')) || ((*(p) == '`')))
    106 /*
    107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
    108  *            "[" | "]"
    109  */
    110 
    111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
    112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
    113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
    114         ((x) == ']'))
    115 
    116 /*
    117  * unreserved = alphanum | mark
    118  */
    119 
    120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
    121 
    122 /*
    123  * Skip to next pointer char, handle escaped sequences
    124  */
    125 
    126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
    127 
    128 /*
    129  * Productions from the spec.
    130  *
    131  *    authority     = server | reg_name
    132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
    133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
    134  *
    135  * path          = [ abs_path | opaque_part ]
    136  */
    137 
    138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
    139 
    140 /************************************************************************
    141  *									*
    142  *                         RFC 3986 parser				*
    143  *									*
    144  ************************************************************************/
    145 
    146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
    147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
    148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
    149 #define ISA_HEXDIG(p)							\
    150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
    151         ((*(p) >= 'A') && (*(p) <= 'F')))
    152 
    153 /*
    154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
    155  *                     / "*" / "+" / "," / ";" / "="
    156  */
    157 #define ISA_SUB_DELIM(p)						\
    158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
    159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
    160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
    161        ((*(p) == '=')) || ((*(p) == '\'')))
    162 
    163 /*
    164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
    165  */
    166 #define ISA_GEN_DELIM(p)						\
    167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
    168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
    169        ((*(p) == '@')))
    170 
    171 /*
    172  *    reserved      = gen-delims / sub-delims
    173  */
    174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
    175 
    176 /*
    177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
    178  */
    179 #define ISA_UNRESERVED(p)						\
    180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
    181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
    182 
    183 /*
    184  *    pct-encoded   = "%" HEXDIG HEXDIG
    185  */
    186 #define ISA_PCT_ENCODED(p)						\
    187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
    188 
    189 /*
    190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
    191  */
    192 #define ISA_PCHAR(p)							\
    193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
    194       ((*(p) == ':')) || ((*(p) == '@')))
    195 
    196 /**
    197  * xmlParse3986Scheme:
    198  * @uri:  pointer to an URI structure
    199  * @str:  pointer to the string to analyze
    200  *
    201  * Parse an URI scheme
    202  *
    203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    204  *
    205  * Returns 0 or the error code
    206  */
    207 static int
    208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
    209     const char *cur;
    210 
    211     if (str == NULL)
    212 	return(-1);
    213 
    214     cur = *str;
    215     if (!ISA_ALPHA(cur))
    216 	return(2);
    217     cur++;
    218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
    219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
    220     if (uri != NULL) {
    221 	if (uri->scheme != NULL) xmlFree(uri->scheme);
    222 	uri->scheme = STRNDUP(*str, cur - *str);
    223     }
    224     *str = cur;
    225     return(0);
    226 }
    227 
    228 /**
    229  * xmlParse3986Fragment:
    230  * @uri:  pointer to an URI structure
    231  * @str:  pointer to the string to analyze
    232  *
    233  * Parse the query part of an URI
    234  *
    235  * fragment      = *( pchar / "/" / "?" )
    236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
    237  *       in the fragment identifier but this is used very broadly for
    238  *       xpointer scheme selection, so we are allowing it here to not break
    239  *       for example all the DocBook processing chains.
    240  *
    241  * Returns 0 or the error code
    242  */
    243 static int
    244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
    245 {
    246     const char *cur;
    247 
    248     if (str == NULL)
    249         return (-1);
    250 
    251     cur = *str;
    252 
    253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
    254            (*cur == '[') || (*cur == ']') ||
    255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
    256         NEXT(cur);
    257     if (uri != NULL) {
    258         if (uri->fragment != NULL)
    259             xmlFree(uri->fragment);
    260 	if (uri->cleanup & 2)
    261 	    uri->fragment = STRNDUP(*str, cur - *str);
    262 	else
    263 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
    264     }
    265     *str = cur;
    266     return (0);
    267 }
    268 
    269 /**
    270  * xmlParse3986Query:
    271  * @uri:  pointer to an URI structure
    272  * @str:  pointer to the string to analyze
    273  *
    274  * Parse the query part of an URI
    275  *
    276  * query = *uric
    277  *
    278  * Returns 0 or the error code
    279  */
    280 static int
    281 xmlParse3986Query(xmlURIPtr uri, const char **str)
    282 {
    283     const char *cur;
    284 
    285     if (str == NULL)
    286         return (-1);
    287 
    288     cur = *str;
    289 
    290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
    291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
    292         NEXT(cur);
    293     if (uri != NULL) {
    294         if (uri->query != NULL)
    295             xmlFree(uri->query);
    296 	if (uri->cleanup & 2)
    297 	    uri->query = STRNDUP(*str, cur - *str);
    298 	else
    299 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
    300 
    301 	/* Save the raw bytes of the query as well.
    302 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
    303 	 */
    304 	if (uri->query_raw != NULL)
    305 	    xmlFree (uri->query_raw);
    306 	uri->query_raw = STRNDUP (*str, cur - *str);
    307     }
    308     *str = cur;
    309     return (0);
    310 }
    311 
    312 /**
    313  * xmlParse3986Port:
    314  * @uri:  pointer to an URI structure
    315  * @str:  the string to analyze
    316  *
    317  * Parse a port  part and fills in the appropriate fields
    318  * of the @uri structure
    319  *
    320  * port          = *DIGIT
    321  *
    322  * Returns 0 or the error code
    323  */
    324 static int
    325 xmlParse3986Port(xmlURIPtr uri, const char **str)
    326 {
    327     const char *cur = *str;
    328 
    329     if (ISA_DIGIT(cur)) {
    330 	if (uri != NULL)
    331 	    uri->port = 0;
    332 	while (ISA_DIGIT(cur)) {
    333 	    if (uri != NULL)
    334 		uri->port = uri->port * 10 + (*cur - '0');
    335 	    cur++;
    336 	}
    337 	*str = cur;
    338 	return(0);
    339     }
    340     return(1);
    341 }
    342 
    343 /**
    344  * xmlParse3986Userinfo:
    345  * @uri:  pointer to an URI structure
    346  * @str:  the string to analyze
    347  *
    348  * Parse an user informations part and fills in the appropriate fields
    349  * of the @uri structure
    350  *
    351  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
    352  *
    353  * Returns 0 or the error code
    354  */
    355 static int
    356 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
    357 {
    358     const char *cur;
    359 
    360     cur = *str;
    361     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
    362            ISA_SUB_DELIM(cur) || (*cur == ':'))
    363 	NEXT(cur);
    364     if (*cur == '@') {
    365 	if (uri != NULL) {
    366 	    if (uri->user != NULL) xmlFree(uri->user);
    367 	    if (uri->cleanup & 2)
    368 		uri->user = STRNDUP(*str, cur - *str);
    369 	    else
    370 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
    371 	}
    372 	*str = cur;
    373 	return(0);
    374     }
    375     return(1);
    376 }
    377 
    378 /**
    379  * xmlParse3986DecOctet:
    380  * @str:  the string to analyze
    381  *
    382  *    dec-octet     = DIGIT                 ; 0-9
    383  *                  / %x31-39 DIGIT         ; 10-99
    384  *                  / "1" 2DIGIT            ; 100-199
    385  *                  / "2" %x30-34 DIGIT     ; 200-249
    386  *                  / "25" %x30-35          ; 250-255
    387  *
    388  * Skip a dec-octet.
    389  *
    390  * Returns 0 if found and skipped, 1 otherwise
    391  */
    392 static int
    393 xmlParse3986DecOctet(const char **str) {
    394     const char *cur = *str;
    395 
    396     if (!(ISA_DIGIT(cur)))
    397         return(1);
    398     if (!ISA_DIGIT(cur+1))
    399 	cur++;
    400     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
    401 	cur += 2;
    402     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
    403 	cur += 3;
    404     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
    405 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
    406 	cur += 3;
    407     else if ((*cur == '2') && (*(cur + 1) == '5') &&
    408 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
    409 	cur += 3;
    410     else
    411         return(1);
    412     *str = cur;
    413     return(0);
    414 }
    415 /**
    416  * xmlParse3986Host:
    417  * @uri:  pointer to an URI structure
    418  * @str:  the string to analyze
    419  *
    420  * Parse an host part and fills in the appropriate fields
    421  * of the @uri structure
    422  *
    423  * host          = IP-literal / IPv4address / reg-name
    424  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
    425  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
    426  * reg-name      = *( unreserved / pct-encoded / sub-delims )
    427  *
    428  * Returns 0 or the error code
    429  */
    430 static int
    431 xmlParse3986Host(xmlURIPtr uri, const char **str)
    432 {
    433     const char *cur = *str;
    434     const char *host;
    435 
    436     host = cur;
    437     /*
    438      * IPv6 and future adressing scheme are enclosed between brackets
    439      */
    440     if (*cur == '[') {
    441         cur++;
    442 	while ((*cur != ']') && (*cur != 0))
    443 	    cur++;
    444 	if (*cur != ']')
    445 	    return(1);
    446 	cur++;
    447 	goto found;
    448     }
    449     /*
    450      * try to parse an IPv4
    451      */
    452     if (ISA_DIGIT(cur)) {
    453         if (xmlParse3986DecOctet(&cur) != 0)
    454 	    goto not_ipv4;
    455 	if (*cur != '.')
    456 	    goto not_ipv4;
    457 	cur++;
    458         if (xmlParse3986DecOctet(&cur) != 0)
    459 	    goto not_ipv4;
    460 	if (*cur != '.')
    461 	    goto not_ipv4;
    462         if (xmlParse3986DecOctet(&cur) != 0)
    463 	    goto not_ipv4;
    464 	if (*cur != '.')
    465 	    goto not_ipv4;
    466         if (xmlParse3986DecOctet(&cur) != 0)
    467 	    goto not_ipv4;
    468 	goto found;
    469 not_ipv4:
    470         cur = *str;
    471     }
    472     /*
    473      * then this should be a hostname which can be empty
    474      */
    475     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
    476         NEXT(cur);
    477 found:
    478     if (uri != NULL) {
    479 	if (uri->authority != NULL) xmlFree(uri->authority);
    480 	uri->authority = NULL;
    481 	if (uri->server != NULL) xmlFree(uri->server);
    482 	if (cur != host) {
    483 	    if (uri->cleanup & 2)
    484 		uri->server = STRNDUP(host, cur - host);
    485 	    else
    486 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
    487 	} else
    488 	    uri->server = NULL;
    489     }
    490     *str = cur;
    491     return(0);
    492 }
    493 
    494 /**
    495  * xmlParse3986Authority:
    496  * @uri:  pointer to an URI structure
    497  * @str:  the string to analyze
    498  *
    499  * Parse an authority part and fills in the appropriate fields
    500  * of the @uri structure
    501  *
    502  * authority     = [ userinfo "@" ] host [ ":" port ]
    503  *
    504  * Returns 0 or the error code
    505  */
    506 static int
    507 xmlParse3986Authority(xmlURIPtr uri, const char **str)
    508 {
    509     const char *cur;
    510     int ret;
    511 
    512     cur = *str;
    513     /*
    514      * try to parse an userinfo and check for the trailing @
    515      */
    516     ret = xmlParse3986Userinfo(uri, &cur);
    517     if ((ret != 0) || (*cur != '@'))
    518         cur = *str;
    519     else
    520         cur++;
    521     ret = xmlParse3986Host(uri, &cur);
    522     if (ret != 0) return(ret);
    523     if (*cur == ':') {
    524         cur++;
    525         ret = xmlParse3986Port(uri, &cur);
    526 	if (ret != 0) return(ret);
    527     }
    528     *str = cur;
    529     return(0);
    530 }
    531 
    532 /**
    533  * xmlParse3986Segment:
    534  * @str:  the string to analyze
    535  * @forbid: an optional forbidden character
    536  * @empty: allow an empty segment
    537  *
    538  * Parse a segment and fills in the appropriate fields
    539  * of the @uri structure
    540  *
    541  * segment       = *pchar
    542  * segment-nz    = 1*pchar
    543  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
    544  *               ; non-zero-length segment without any colon ":"
    545  *
    546  * Returns 0 or the error code
    547  */
    548 static int
    549 xmlParse3986Segment(const char **str, char forbid, int empty)
    550 {
    551     const char *cur;
    552 
    553     cur = *str;
    554     if (!ISA_PCHAR(cur)) {
    555         if (empty)
    556 	    return(0);
    557 	return(1);
    558     }
    559     while (ISA_PCHAR(cur) && (*cur != forbid))
    560         NEXT(cur);
    561     *str = cur;
    562     return (0);
    563 }
    564 
    565 /**
    566  * xmlParse3986PathAbEmpty:
    567  * @uri:  pointer to an URI structure
    568  * @str:  the string to analyze
    569  *
    570  * Parse an path absolute or empty and fills in the appropriate fields
    571  * of the @uri structure
    572  *
    573  * path-abempty  = *( "/" segment )
    574  *
    575  * Returns 0 or the error code
    576  */
    577 static int
    578 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
    579 {
    580     const char *cur;
    581     int ret;
    582 
    583     cur = *str;
    584 
    585     while (*cur == '/') {
    586         cur++;
    587 	ret = xmlParse3986Segment(&cur, 0, 1);
    588 	if (ret != 0) return(ret);
    589     }
    590     if (uri != NULL) {
    591 	if (uri->path != NULL) xmlFree(uri->path);
    592         if (*str != cur) {
    593             if (uri->cleanup & 2)
    594                 uri->path = STRNDUP(*str, cur - *str);
    595             else
    596                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    597         } else {
    598             uri->path = NULL;
    599         }
    600     }
    601     *str = cur;
    602     return (0);
    603 }
    604 
    605 /**
    606  * xmlParse3986PathAbsolute:
    607  * @uri:  pointer to an URI structure
    608  * @str:  the string to analyze
    609  *
    610  * Parse an path absolute and fills in the appropriate fields
    611  * of the @uri structure
    612  *
    613  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
    614  *
    615  * Returns 0 or the error code
    616  */
    617 static int
    618 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
    619 {
    620     const char *cur;
    621     int ret;
    622 
    623     cur = *str;
    624 
    625     if (*cur != '/')
    626         return(1);
    627     cur++;
    628     ret = xmlParse3986Segment(&cur, 0, 0);
    629     if (ret == 0) {
    630 	while (*cur == '/') {
    631 	    cur++;
    632 	    ret = xmlParse3986Segment(&cur, 0, 1);
    633 	    if (ret != 0) return(ret);
    634 	}
    635     }
    636     if (uri != NULL) {
    637 	if (uri->path != NULL) xmlFree(uri->path);
    638         if (cur != *str) {
    639             if (uri->cleanup & 2)
    640                 uri->path = STRNDUP(*str, cur - *str);
    641             else
    642                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    643         } else {
    644             uri->path = NULL;
    645         }
    646     }
    647     *str = cur;
    648     return (0);
    649 }
    650 
    651 /**
    652  * xmlParse3986PathRootless:
    653  * @uri:  pointer to an URI structure
    654  * @str:  the string to analyze
    655  *
    656  * Parse an path without root and fills in the appropriate fields
    657  * of the @uri structure
    658  *
    659  * path-rootless = segment-nz *( "/" segment )
    660  *
    661  * Returns 0 or the error code
    662  */
    663 static int
    664 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
    665 {
    666     const char *cur;
    667     int ret;
    668 
    669     cur = *str;
    670 
    671     ret = xmlParse3986Segment(&cur, 0, 0);
    672     if (ret != 0) return(ret);
    673     while (*cur == '/') {
    674         cur++;
    675 	ret = xmlParse3986Segment(&cur, 0, 1);
    676 	if (ret != 0) return(ret);
    677     }
    678     if (uri != NULL) {
    679 	if (uri->path != NULL) xmlFree(uri->path);
    680         if (cur != *str) {
    681             if (uri->cleanup & 2)
    682                 uri->path = STRNDUP(*str, cur - *str);
    683             else
    684                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    685         } else {
    686             uri->path = NULL;
    687         }
    688     }
    689     *str = cur;
    690     return (0);
    691 }
    692 
    693 /**
    694  * xmlParse3986PathNoScheme:
    695  * @uri:  pointer to an URI structure
    696  * @str:  the string to analyze
    697  *
    698  * Parse an path which is not a scheme and fills in the appropriate fields
    699  * of the @uri structure
    700  *
    701  * path-noscheme = segment-nz-nc *( "/" segment )
    702  *
    703  * Returns 0 or the error code
    704  */
    705 static int
    706 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
    707 {
    708     const char *cur;
    709     int ret;
    710 
    711     cur = *str;
    712 
    713     ret = xmlParse3986Segment(&cur, ':', 0);
    714     if (ret != 0) return(ret);
    715     while (*cur == '/') {
    716         cur++;
    717 	ret = xmlParse3986Segment(&cur, 0, 1);
    718 	if (ret != 0) return(ret);
    719     }
    720     if (uri != NULL) {
    721 	if (uri->path != NULL) xmlFree(uri->path);
    722         if (cur != *str) {
    723             if (uri->cleanup & 2)
    724                 uri->path = STRNDUP(*str, cur - *str);
    725             else
    726                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    727         } else {
    728             uri->path = NULL;
    729         }
    730     }
    731     *str = cur;
    732     return (0);
    733 }
    734 
    735 /**
    736  * xmlParse3986HierPart:
    737  * @uri:  pointer to an URI structure
    738  * @str:  the string to analyze
    739  *
    740  * Parse an hierarchical part and fills in the appropriate fields
    741  * of the @uri structure
    742  *
    743  * hier-part     = "//" authority path-abempty
    744  *                / path-absolute
    745  *                / path-rootless
    746  *                / path-empty
    747  *
    748  * Returns 0 or the error code
    749  */
    750 static int
    751 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
    752 {
    753     const char *cur;
    754     int ret;
    755 
    756     cur = *str;
    757 
    758     if ((*cur == '/') && (*(cur + 1) == '/')) {
    759         cur += 2;
    760 	ret = xmlParse3986Authority(uri, &cur);
    761 	if (ret != 0) return(ret);
    762 	ret = xmlParse3986PathAbEmpty(uri, &cur);
    763 	if (ret != 0) return(ret);
    764 	*str = cur;
    765 	return(0);
    766     } else if (*cur == '/') {
    767         ret = xmlParse3986PathAbsolute(uri, &cur);
    768 	if (ret != 0) return(ret);
    769     } else if (ISA_PCHAR(cur)) {
    770         ret = xmlParse3986PathRootless(uri, &cur);
    771 	if (ret != 0) return(ret);
    772     } else {
    773 	/* path-empty is effectively empty */
    774 	if (uri != NULL) {
    775 	    if (uri->path != NULL) xmlFree(uri->path);
    776 	    uri->path = NULL;
    777 	}
    778     }
    779     *str = cur;
    780     return (0);
    781 }
    782 
    783 /**
    784  * xmlParse3986RelativeRef:
    785  * @uri:  pointer to an URI structure
    786  * @str:  the string to analyze
    787  *
    788  * Parse an URI string and fills in the appropriate fields
    789  * of the @uri structure
    790  *
    791  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
    792  * relative-part = "//" authority path-abempty
    793  *               / path-absolute
    794  *               / path-noscheme
    795  *               / path-empty
    796  *
    797  * Returns 0 or the error code
    798  */
    799 static int
    800 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
    801     int ret;
    802 
    803     if ((*str == '/') && (*(str + 1) == '/')) {
    804         str += 2;
    805 	ret = xmlParse3986Authority(uri, &str);
    806 	if (ret != 0) return(ret);
    807 	ret = xmlParse3986PathAbEmpty(uri, &str);
    808 	if (ret != 0) return(ret);
    809     } else if (*str == '/') {
    810 	ret = xmlParse3986PathAbsolute(uri, &str);
    811 	if (ret != 0) return(ret);
    812     } else if (ISA_PCHAR(str)) {
    813         ret = xmlParse3986PathNoScheme(uri, &str);
    814 	if (ret != 0) return(ret);
    815     } else {
    816 	/* path-empty is effectively empty */
    817 	if (uri != NULL) {
    818 	    if (uri->path != NULL) xmlFree(uri->path);
    819 	    uri->path = NULL;
    820 	}
    821     }
    822 
    823     if (*str == '?') {
    824 	str++;
    825 	ret = xmlParse3986Query(uri, &str);
    826 	if (ret != 0) return(ret);
    827     }
    828     if (*str == '#') {
    829 	str++;
    830 	ret = xmlParse3986Fragment(uri, &str);
    831 	if (ret != 0) return(ret);
    832     }
    833     if (*str != 0) {
    834 	xmlCleanURI(uri);
    835 	return(1);
    836     }
    837     return(0);
    838 }
    839 
    840 
    841 /**
    842  * xmlParse3986URI:
    843  * @uri:  pointer to an URI structure
    844  * @str:  the string to analyze
    845  *
    846  * Parse an URI string and fills in the appropriate fields
    847  * of the @uri structure
    848  *
    849  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
    850  *
    851  * Returns 0 or the error code
    852  */
    853 static int
    854 xmlParse3986URI(xmlURIPtr uri, const char *str) {
    855     int ret;
    856 
    857     ret = xmlParse3986Scheme(uri, &str);
    858     if (ret != 0) return(ret);
    859     if (*str != ':') {
    860 	return(1);
    861     }
    862     str++;
    863     ret = xmlParse3986HierPart(uri, &str);
    864     if (ret != 0) return(ret);
    865     if (*str == '?') {
    866 	str++;
    867 	ret = xmlParse3986Query(uri, &str);
    868 	if (ret != 0) return(ret);
    869     }
    870     if (*str == '#') {
    871 	str++;
    872 	ret = xmlParse3986Fragment(uri, &str);
    873 	if (ret != 0) return(ret);
    874     }
    875     if (*str != 0) {
    876 	xmlCleanURI(uri);
    877 	return(1);
    878     }
    879     return(0);
    880 }
    881 
    882 /**
    883  * xmlParse3986URIReference:
    884  * @uri:  pointer to an URI structure
    885  * @str:  the string to analyze
    886  *
    887  * Parse an URI reference string and fills in the appropriate fields
    888  * of the @uri structure
    889  *
    890  * URI-reference = URI / relative-ref
    891  *
    892  * Returns 0 or the error code
    893  */
    894 static int
    895 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
    896     int ret;
    897 
    898     if (str == NULL)
    899 	return(-1);
    900     xmlCleanURI(uri);
    901 
    902     /*
    903      * Try first to parse absolute refs, then fallback to relative if
    904      * it fails.
    905      */
    906     ret = xmlParse3986URI(uri, str);
    907     if (ret != 0) {
    908 	xmlCleanURI(uri);
    909         ret = xmlParse3986RelativeRef(uri, str);
    910 	if (ret != 0) {
    911 	    xmlCleanURI(uri);
    912 	    return(ret);
    913 	}
    914     }
    915     return(0);
    916 }
    917 
    918 /**
    919  * xmlParseURI:
    920  * @str:  the URI string to analyze
    921  *
    922  * Parse an URI based on RFC 3986
    923  *
    924  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    925  *
    926  * Returns a newly built xmlURIPtr or NULL in case of error
    927  */
    928 xmlURIPtr
    929 xmlParseURI(const char *str) {
    930     xmlURIPtr uri;
    931     int ret;
    932 
    933     if (str == NULL)
    934 	return(NULL);
    935     uri = xmlCreateURI();
    936     if (uri != NULL) {
    937 	ret = xmlParse3986URIReference(uri, str);
    938         if (ret) {
    939 	    xmlFreeURI(uri);
    940 	    return(NULL);
    941 	}
    942     }
    943     return(uri);
    944 }
    945 
    946 /**
    947  * xmlParseURIReference:
    948  * @uri:  pointer to an URI structure
    949  * @str:  the string to analyze
    950  *
    951  * Parse an URI reference string based on RFC 3986 and fills in the
    952  * appropriate fields of the @uri structure
    953  *
    954  * URI-reference = URI / relative-ref
    955  *
    956  * Returns 0 or the error code
    957  */
    958 int
    959 xmlParseURIReference(xmlURIPtr uri, const char *str) {
    960     return(xmlParse3986URIReference(uri, str));
    961 }
    962 
    963 /**
    964  * xmlParseURIRaw:
    965  * @str:  the URI string to analyze
    966  * @raw:  if 1 unescaping of URI pieces are disabled
    967  *
    968  * Parse an URI but allows to keep intact the original fragments.
    969  *
    970  * URI-reference = URI / relative-ref
    971  *
    972  * Returns a newly built xmlURIPtr or NULL in case of error
    973  */
    974 xmlURIPtr
    975 xmlParseURIRaw(const char *str, int raw) {
    976     xmlURIPtr uri;
    977     int ret;
    978 
    979     if (str == NULL)
    980 	return(NULL);
    981     uri = xmlCreateURI();
    982     if (uri != NULL) {
    983         if (raw) {
    984 	    uri->cleanup |= 2;
    985 	}
    986 	ret = xmlParseURIReference(uri, str);
    987         if (ret) {
    988 	    xmlFreeURI(uri);
    989 	    return(NULL);
    990 	}
    991     }
    992     return(uri);
    993 }
    994 
    995 /************************************************************************
    996  *									*
    997  *			Generic URI structure functions			*
    998  *									*
    999  ************************************************************************/
   1000 
   1001 /**
   1002  * xmlCreateURI:
   1003  *
   1004  * Simply creates an empty xmlURI
   1005  *
   1006  * Returns the new structure or NULL in case of error
   1007  */
   1008 xmlURIPtr
   1009 xmlCreateURI(void) {
   1010     xmlURIPtr ret;
   1011 
   1012     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
   1013     if (ret == NULL) {
   1014         xmlURIErrMemory("creating URI structure\n");
   1015 	return(NULL);
   1016     }
   1017     memset(ret, 0, sizeof(xmlURI));
   1018     return(ret);
   1019 }
   1020 
   1021 /**
   1022  * xmlSaveUriRealloc:
   1023  *
   1024  * Function to handle properly a reallocation when saving an URI
   1025  * Also imposes some limit on the length of an URI string output
   1026  */
   1027 static xmlChar *
   1028 xmlSaveUriRealloc(xmlChar *ret, int *max) {
   1029     xmlChar *temp;
   1030     int tmp;
   1031 
   1032     if (*max > MAX_URI_LENGTH) {
   1033         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
   1034         return(NULL);
   1035     }
   1036     tmp = *max * 2;
   1037     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
   1038     if (temp == NULL) {
   1039         xmlURIErrMemory("saving URI\n");
   1040         return(NULL);
   1041     }
   1042     *max = tmp;
   1043     return(temp);
   1044 }
   1045 
   1046 /**
   1047  * xmlSaveUri:
   1048  * @uri:  pointer to an xmlURI
   1049  *
   1050  * Save the URI as an escaped string
   1051  *
   1052  * Returns a new string (to be deallocated by caller)
   1053  */
   1054 xmlChar *
   1055 xmlSaveUri(xmlURIPtr uri) {
   1056     xmlChar *ret = NULL;
   1057     xmlChar *temp;
   1058     const char *p;
   1059     int len;
   1060     int max;
   1061 
   1062     if (uri == NULL) return(NULL);
   1063 
   1064 
   1065     max = 80;
   1066     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
   1067     if (ret == NULL) {
   1068         xmlURIErrMemory("saving URI\n");
   1069 	return(NULL);
   1070     }
   1071     len = 0;
   1072 
   1073     if (uri->scheme != NULL) {
   1074 	p = uri->scheme;
   1075 	while (*p != 0) {
   1076 	    if (len >= max) {
   1077                 temp = xmlSaveUriRealloc(ret, &max);
   1078                 if (temp == NULL) goto mem_error;
   1079 		ret = temp;
   1080 	    }
   1081 	    ret[len++] = *p++;
   1082 	}
   1083 	if (len >= max) {
   1084             temp = xmlSaveUriRealloc(ret, &max);
   1085             if (temp == NULL) goto mem_error;
   1086             ret = temp;
   1087 	}
   1088 	ret[len++] = ':';
   1089     }
   1090     if (uri->opaque != NULL) {
   1091 	p = uri->opaque;
   1092 	while (*p != 0) {
   1093 	    if (len + 3 >= max) {
   1094                 temp = xmlSaveUriRealloc(ret, &max);
   1095                 if (temp == NULL) goto mem_error;
   1096                 ret = temp;
   1097 	    }
   1098 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
   1099 		ret[len++] = *p++;
   1100 	    else {
   1101 		int val = *(unsigned char *)p++;
   1102 		int hi = val / 0x10, lo = val % 0x10;
   1103 		ret[len++] = '%';
   1104 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1105 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1106 	    }
   1107 	}
   1108     } else {
   1109 	if (uri->server != NULL) {
   1110 	    if (len + 3 >= max) {
   1111                 temp = xmlSaveUriRealloc(ret, &max);
   1112                 if (temp == NULL) goto mem_error;
   1113                 ret = temp;
   1114 	    }
   1115 	    ret[len++] = '/';
   1116 	    ret[len++] = '/';
   1117 	    if (uri->user != NULL) {
   1118 		p = uri->user;
   1119 		while (*p != 0) {
   1120 		    if (len + 3 >= max) {
   1121                         temp = xmlSaveUriRealloc(ret, &max);
   1122                         if (temp == NULL) goto mem_error;
   1123                         ret = temp;
   1124 		    }
   1125 		    if ((IS_UNRESERVED(*(p))) ||
   1126 			((*(p) == ';')) || ((*(p) == ':')) ||
   1127 			((*(p) == '&')) || ((*(p) == '=')) ||
   1128 			((*(p) == '+')) || ((*(p) == '$')) ||
   1129 			((*(p) == ',')))
   1130 			ret[len++] = *p++;
   1131 		    else {
   1132 			int val = *(unsigned char *)p++;
   1133 			int hi = val / 0x10, lo = val % 0x10;
   1134 			ret[len++] = '%';
   1135 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1136 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1137 		    }
   1138 		}
   1139 		if (len + 3 >= max) {
   1140                     temp = xmlSaveUriRealloc(ret, &max);
   1141                     if (temp == NULL) goto mem_error;
   1142                     ret = temp;
   1143 		}
   1144 		ret[len++] = '@';
   1145 	    }
   1146 	    p = uri->server;
   1147 	    while (*p != 0) {
   1148 		if (len >= max) {
   1149                     temp = xmlSaveUriRealloc(ret, &max);
   1150                     if (temp == NULL) goto mem_error;
   1151                     ret = temp;
   1152 		}
   1153 		ret[len++] = *p++;
   1154 	    }
   1155 	    if (uri->port > 0) {
   1156 		if (len + 10 >= max) {
   1157                     temp = xmlSaveUriRealloc(ret, &max);
   1158                     if (temp == NULL) goto mem_error;
   1159                     ret = temp;
   1160 		}
   1161 		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
   1162 	    }
   1163 	} else if (uri->authority != NULL) {
   1164 	    if (len + 3 >= max) {
   1165                 temp = xmlSaveUriRealloc(ret, &max);
   1166                 if (temp == NULL) goto mem_error;
   1167                 ret = temp;
   1168 	    }
   1169 	    ret[len++] = '/';
   1170 	    ret[len++] = '/';
   1171 	    p = uri->authority;
   1172 	    while (*p != 0) {
   1173 		if (len + 3 >= max) {
   1174                     temp = xmlSaveUriRealloc(ret, &max);
   1175                     if (temp == NULL) goto mem_error;
   1176                     ret = temp;
   1177 		}
   1178 		if ((IS_UNRESERVED(*(p))) ||
   1179                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
   1180                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
   1181                     ((*(p) == '=')) || ((*(p) == '+')))
   1182 		    ret[len++] = *p++;
   1183 		else {
   1184 		    int val = *(unsigned char *)p++;
   1185 		    int hi = val / 0x10, lo = val % 0x10;
   1186 		    ret[len++] = '%';
   1187 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1188 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1189 		}
   1190 	    }
   1191 	} else if (uri->scheme != NULL) {
   1192 	    if (len + 3 >= max) {
   1193                 temp = xmlSaveUriRealloc(ret, &max);
   1194                 if (temp == NULL) goto mem_error;
   1195                 ret = temp;
   1196 	    }
   1197 	    ret[len++] = '/';
   1198 	    ret[len++] = '/';
   1199 	}
   1200 	if (uri->path != NULL) {
   1201 	    p = uri->path;
   1202 	    /*
   1203 	     * the colon in file:///d: should not be escaped or
   1204 	     * Windows accesses fail later.
   1205 	     */
   1206 	    if ((uri->scheme != NULL) &&
   1207 		(p[0] == '/') &&
   1208 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
   1209 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
   1210 		(p[2] == ':') &&
   1211 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
   1212 		if (len + 3 >= max) {
   1213                     temp = xmlSaveUriRealloc(ret, &max);
   1214                     if (temp == NULL) goto mem_error;
   1215                     ret = temp;
   1216 		}
   1217 		ret[len++] = *p++;
   1218 		ret[len++] = *p++;
   1219 		ret[len++] = *p++;
   1220 	    }
   1221 	    while (*p != 0) {
   1222 		if (len + 3 >= max) {
   1223                     temp = xmlSaveUriRealloc(ret, &max);
   1224                     if (temp == NULL) goto mem_error;
   1225                     ret = temp;
   1226 		}
   1227 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
   1228                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
   1229 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
   1230 	            ((*(p) == ',')))
   1231 		    ret[len++] = *p++;
   1232 		else {
   1233 		    int val = *(unsigned char *)p++;
   1234 		    int hi = val / 0x10, lo = val % 0x10;
   1235 		    ret[len++] = '%';
   1236 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1237 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1238 		}
   1239 	    }
   1240 	}
   1241 	if (uri->query_raw != NULL) {
   1242 	    if (len + 1 >= max) {
   1243                 temp = xmlSaveUriRealloc(ret, &max);
   1244                 if (temp == NULL) goto mem_error;
   1245                 ret = temp;
   1246 	    }
   1247 	    ret[len++] = '?';
   1248 	    p = uri->query_raw;
   1249 	    while (*p != 0) {
   1250 		if (len + 1 >= max) {
   1251                     temp = xmlSaveUriRealloc(ret, &max);
   1252                     if (temp == NULL) goto mem_error;
   1253                     ret = temp;
   1254 		}
   1255 		ret[len++] = *p++;
   1256 	    }
   1257 	} else if (uri->query != NULL) {
   1258 	    if (len + 3 >= max) {
   1259                 temp = xmlSaveUriRealloc(ret, &max);
   1260                 if (temp == NULL) goto mem_error;
   1261                 ret = temp;
   1262 	    }
   1263 	    ret[len++] = '?';
   1264 	    p = uri->query;
   1265 	    while (*p != 0) {
   1266 		if (len + 3 >= max) {
   1267                     temp = xmlSaveUriRealloc(ret, &max);
   1268                     if (temp == NULL) goto mem_error;
   1269                     ret = temp;
   1270 		}
   1271 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
   1272 		    ret[len++] = *p++;
   1273 		else {
   1274 		    int val = *(unsigned char *)p++;
   1275 		    int hi = val / 0x10, lo = val % 0x10;
   1276 		    ret[len++] = '%';
   1277 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1278 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1279 		}
   1280 	    }
   1281 	}
   1282     }
   1283     if (uri->fragment != NULL) {
   1284 	if (len + 3 >= max) {
   1285             temp = xmlSaveUriRealloc(ret, &max);
   1286             if (temp == NULL) goto mem_error;
   1287             ret = temp;
   1288 	}
   1289 	ret[len++] = '#';
   1290 	p = uri->fragment;
   1291 	while (*p != 0) {
   1292 	    if (len + 3 >= max) {
   1293                 temp = xmlSaveUriRealloc(ret, &max);
   1294                 if (temp == NULL) goto mem_error;
   1295                 ret = temp;
   1296 	    }
   1297 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
   1298 		ret[len++] = *p++;
   1299 	    else {
   1300 		int val = *(unsigned char *)p++;
   1301 		int hi = val / 0x10, lo = val % 0x10;
   1302 		ret[len++] = '%';
   1303 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1304 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1305 	    }
   1306 	}
   1307     }
   1308     if (len >= max) {
   1309         temp = xmlSaveUriRealloc(ret, &max);
   1310         if (temp == NULL) goto mem_error;
   1311         ret = temp;
   1312     }
   1313     ret[len] = 0;
   1314     return(ret);
   1315 
   1316 mem_error:
   1317     xmlFree(ret);
   1318     return(NULL);
   1319 }
   1320 
   1321 /**
   1322  * xmlPrintURI:
   1323  * @stream:  a FILE* for the output
   1324  * @uri:  pointer to an xmlURI
   1325  *
   1326  * Prints the URI in the stream @stream.
   1327  */
   1328 void
   1329 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
   1330     xmlChar *out;
   1331 
   1332     out = xmlSaveUri(uri);
   1333     if (out != NULL) {
   1334 	fprintf(stream, "%s", (char *) out);
   1335 	xmlFree(out);
   1336     }
   1337 }
   1338 
   1339 /**
   1340  * xmlCleanURI:
   1341  * @uri:  pointer to an xmlURI
   1342  *
   1343  * Make sure the xmlURI struct is free of content
   1344  */
   1345 static void
   1346 xmlCleanURI(xmlURIPtr uri) {
   1347     if (uri == NULL) return;
   1348 
   1349     if (uri->scheme != NULL) xmlFree(uri->scheme);
   1350     uri->scheme = NULL;
   1351     if (uri->server != NULL) xmlFree(uri->server);
   1352     uri->server = NULL;
   1353     if (uri->user != NULL) xmlFree(uri->user);
   1354     uri->user = NULL;
   1355     if (uri->path != NULL) xmlFree(uri->path);
   1356     uri->path = NULL;
   1357     if (uri->fragment != NULL) xmlFree(uri->fragment);
   1358     uri->fragment = NULL;
   1359     if (uri->opaque != NULL) xmlFree(uri->opaque);
   1360     uri->opaque = NULL;
   1361     if (uri->authority != NULL) xmlFree(uri->authority);
   1362     uri->authority = NULL;
   1363     if (uri->query != NULL) xmlFree(uri->query);
   1364     uri->query = NULL;
   1365     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
   1366     uri->query_raw = NULL;
   1367 }
   1368 
   1369 /**
   1370  * xmlFreeURI:
   1371  * @uri:  pointer to an xmlURI
   1372  *
   1373  * Free up the xmlURI struct
   1374  */
   1375 void
   1376 xmlFreeURI(xmlURIPtr uri) {
   1377     if (uri == NULL) return;
   1378 
   1379     if (uri->scheme != NULL) xmlFree(uri->scheme);
   1380     if (uri->server != NULL) xmlFree(uri->server);
   1381     if (uri->user != NULL) xmlFree(uri->user);
   1382     if (uri->path != NULL) xmlFree(uri->path);
   1383     if (uri->fragment != NULL) xmlFree(uri->fragment);
   1384     if (uri->opaque != NULL) xmlFree(uri->opaque);
   1385     if (uri->authority != NULL) xmlFree(uri->authority);
   1386     if (uri->query != NULL) xmlFree(uri->query);
   1387     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
   1388     xmlFree(uri);
   1389 }
   1390 
   1391 /************************************************************************
   1392  *									*
   1393  *			Helper functions				*
   1394  *									*
   1395  ************************************************************************/
   1396 
   1397 /**
   1398  * xmlNormalizeURIPath:
   1399  * @path:  pointer to the path string
   1400  *
   1401  * Applies the 5 normalization steps to a path string--that is, RFC 2396
   1402  * Section 5.2, steps 6.c through 6.g.
   1403  *
   1404  * Normalization occurs directly on the string, no new allocation is done
   1405  *
   1406  * Returns 0 or an error code
   1407  */
   1408 int
   1409 xmlNormalizeURIPath(char *path) {
   1410     char *cur, *out;
   1411 
   1412     if (path == NULL)
   1413 	return(-1);
   1414 
   1415     /* Skip all initial "/" chars.  We want to get to the beginning of the
   1416      * first non-empty segment.
   1417      */
   1418     cur = path;
   1419     while (cur[0] == '/')
   1420       ++cur;
   1421     if (cur[0] == '\0')
   1422       return(0);
   1423 
   1424     /* Keep everything we've seen so far.  */
   1425     out = cur;
   1426 
   1427     /*
   1428      * Analyze each segment in sequence for cases (c) and (d).
   1429      */
   1430     while (cur[0] != '\0') {
   1431 	/*
   1432 	 * c) All occurrences of "./", where "." is a complete path segment,
   1433 	 *    are removed from the buffer string.
   1434 	 */
   1435 	if ((cur[0] == '.') && (cur[1] == '/')) {
   1436 	    cur += 2;
   1437 	    /* '//' normalization should be done at this point too */
   1438 	    while (cur[0] == '/')
   1439 		cur++;
   1440 	    continue;
   1441 	}
   1442 
   1443 	/*
   1444 	 * d) If the buffer string ends with "." as a complete path segment,
   1445 	 *    that "." is removed.
   1446 	 */
   1447 	if ((cur[0] == '.') && (cur[1] == '\0'))
   1448 	    break;
   1449 
   1450 	/* Otherwise keep the segment.  */
   1451 	while (cur[0] != '/') {
   1452             if (cur[0] == '\0')
   1453               goto done_cd;
   1454 	    (out++)[0] = (cur++)[0];
   1455 	}
   1456 	/* nomalize // */
   1457 	while ((cur[0] == '/') && (cur[1] == '/'))
   1458 	    cur++;
   1459 
   1460         (out++)[0] = (cur++)[0];
   1461     }
   1462  done_cd:
   1463     out[0] = '\0';
   1464 
   1465     /* Reset to the beginning of the first segment for the next sequence.  */
   1466     cur = path;
   1467     while (cur[0] == '/')
   1468       ++cur;
   1469     if (cur[0] == '\0')
   1470 	return(0);
   1471 
   1472     /*
   1473      * Analyze each segment in sequence for cases (e) and (f).
   1474      *
   1475      * e) All occurrences of "<segment>/../", where <segment> is a
   1476      *    complete path segment not equal to "..", are removed from the
   1477      *    buffer string.  Removal of these path segments is performed
   1478      *    iteratively, removing the leftmost matching pattern on each
   1479      *    iteration, until no matching pattern remains.
   1480      *
   1481      * f) If the buffer string ends with "<segment>/..", where <segment>
   1482      *    is a complete path segment not equal to "..", that
   1483      *    "<segment>/.." is removed.
   1484      *
   1485      * To satisfy the "iterative" clause in (e), we need to collapse the
   1486      * string every time we find something that needs to be removed.  Thus,
   1487      * we don't need to keep two pointers into the string: we only need a
   1488      * "current position" pointer.
   1489      */
   1490     while (1) {
   1491         char *segp, *tmp;
   1492 
   1493         /* At the beginning of each iteration of this loop, "cur" points to
   1494          * the first character of the segment we want to examine.
   1495          */
   1496 
   1497         /* Find the end of the current segment.  */
   1498         segp = cur;
   1499         while ((segp[0] != '/') && (segp[0] != '\0'))
   1500           ++segp;
   1501 
   1502         /* If this is the last segment, we're done (we need at least two
   1503          * segments to meet the criteria for the (e) and (f) cases).
   1504          */
   1505         if (segp[0] == '\0')
   1506           break;
   1507 
   1508         /* If the first segment is "..", or if the next segment _isn't_ "..",
   1509          * keep this segment and try the next one.
   1510          */
   1511         ++segp;
   1512         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
   1513             || ((segp[0] != '.') || (segp[1] != '.')
   1514                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
   1515           cur = segp;
   1516           continue;
   1517         }
   1518 
   1519         /* If we get here, remove this segment and the next one and back up
   1520          * to the previous segment (if there is one), to implement the
   1521          * "iteratively" clause.  It's pretty much impossible to back up
   1522          * while maintaining two pointers into the buffer, so just compact
   1523          * the whole buffer now.
   1524          */
   1525 
   1526         /* If this is the end of the buffer, we're done.  */
   1527         if (segp[2] == '\0') {
   1528           cur[0] = '\0';
   1529           break;
   1530         }
   1531         /* Valgrind complained, strcpy(cur, segp + 3); */
   1532         /* string will overlap, do not use strcpy */
   1533         tmp = cur;
   1534         segp += 3;
   1535         while ((*tmp++ = *segp++) != 0)
   1536           ;
   1537 
   1538         /* If there are no previous segments, then keep going from here.  */
   1539         segp = cur;
   1540         while ((segp > path) && ((--segp)[0] == '/'))
   1541           ;
   1542         if (segp == path)
   1543           continue;
   1544 
   1545         /* "segp" is pointing to the end of a previous segment; find it's
   1546          * start.  We need to back up to the previous segment and start
   1547          * over with that to handle things like "foo/bar/../..".  If we
   1548          * don't do this, then on the first pass we'll remove the "bar/..",
   1549          * but be pointing at the second ".." so we won't realize we can also
   1550          * remove the "foo/..".
   1551          */
   1552         cur = segp;
   1553         while ((cur > path) && (cur[-1] != '/'))
   1554           --cur;
   1555     }
   1556     out[0] = '\0';
   1557 
   1558     /*
   1559      * g) If the resulting buffer string still begins with one or more
   1560      *    complete path segments of "..", then the reference is
   1561      *    considered to be in error. Implementations may handle this
   1562      *    error by retaining these components in the resolved path (i.e.,
   1563      *    treating them as part of the final URI), by removing them from
   1564      *    the resolved path (i.e., discarding relative levels above the
   1565      *    root), or by avoiding traversal of the reference.
   1566      *
   1567      * We discard them from the final path.
   1568      */
   1569     if (path[0] == '/') {
   1570       cur = path;
   1571       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
   1572              && ((cur[3] == '/') || (cur[3] == '\0')))
   1573 	cur += 3;
   1574 
   1575       if (cur != path) {
   1576 	out = path;
   1577 	while (cur[0] != '\0')
   1578           (out++)[0] = (cur++)[0];
   1579 	out[0] = 0;
   1580       }
   1581     }
   1582 
   1583     return(0);
   1584 }
   1585 
   1586 static int is_hex(char c) {
   1587     if (((c >= '0') && (c <= '9')) ||
   1588         ((c >= 'a') && (c <= 'f')) ||
   1589         ((c >= 'A') && (c <= 'F')))
   1590 	return(1);
   1591     return(0);
   1592 }
   1593 
   1594 /**
   1595  * xmlURIUnescapeString:
   1596  * @str:  the string to unescape
   1597  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
   1598  * @target:  optional destination buffer
   1599  *
   1600  * Unescaping routine, but does not check that the string is an URI. The
   1601  * output is a direct unsigned char translation of %XX values (no encoding)
   1602  * Note that the length of the result can only be smaller or same size as
   1603  * the input string.
   1604  *
   1605  * Returns a copy of the string, but unescaped, will return NULL only in case
   1606  * of error
   1607  */
   1608 char *
   1609 xmlURIUnescapeString(const char *str, int len, char *target) {
   1610     char *ret, *out;
   1611     const char *in;
   1612 
   1613     if (str == NULL)
   1614 	return(NULL);
   1615     if (len <= 0) len = strlen(str);
   1616     if (len < 0) return(NULL);
   1617 
   1618     if (target == NULL) {
   1619 	ret = (char *) xmlMallocAtomic(len + 1);
   1620 	if (ret == NULL) {
   1621             xmlURIErrMemory("unescaping URI value\n");
   1622 	    return(NULL);
   1623 	}
   1624     } else
   1625 	ret = target;
   1626     in = str;
   1627     out = ret;
   1628     while(len > 0) {
   1629 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
   1630 	    in++;
   1631 	    if ((*in >= '0') && (*in <= '9'))
   1632 	        *out = (*in - '0');
   1633 	    else if ((*in >= 'a') && (*in <= 'f'))
   1634 	        *out = (*in - 'a') + 10;
   1635 	    else if ((*in >= 'A') && (*in <= 'F'))
   1636 	        *out = (*in - 'A') + 10;
   1637 	    in++;
   1638 	    if ((*in >= '0') && (*in <= '9'))
   1639 	        *out = *out * 16 + (*in - '0');
   1640 	    else if ((*in >= 'a') && (*in <= 'f'))
   1641 	        *out = *out * 16 + (*in - 'a') + 10;
   1642 	    else if ((*in >= 'A') && (*in <= 'F'))
   1643 	        *out = *out * 16 + (*in - 'A') + 10;
   1644 	    in++;
   1645 	    len -= 3;
   1646 	    out++;
   1647 	} else {
   1648 	    *out++ = *in++;
   1649 	    len--;
   1650 	}
   1651     }
   1652     *out = 0;
   1653     return(ret);
   1654 }
   1655 
   1656 /**
   1657  * xmlURIEscapeStr:
   1658  * @str:  string to escape
   1659  * @list: exception list string of chars not to escape
   1660  *
   1661  * This routine escapes a string to hex, ignoring reserved characters (a-z)
   1662  * and the characters in the exception list.
   1663  *
   1664  * Returns a new escaped string or NULL in case of error.
   1665  */
   1666 xmlChar *
   1667 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
   1668     xmlChar *ret, ch;
   1669     xmlChar *temp;
   1670     const xmlChar *in;
   1671     int len, out;
   1672 
   1673     if (str == NULL)
   1674 	return(NULL);
   1675     if (str[0] == 0)
   1676 	return(xmlStrdup(str));
   1677     len = xmlStrlen(str);
   1678     if (!(len > 0)) return(NULL);
   1679 
   1680     len += 20;
   1681     ret = (xmlChar *) xmlMallocAtomic(len);
   1682     if (ret == NULL) {
   1683         xmlURIErrMemory("escaping URI value\n");
   1684 	return(NULL);
   1685     }
   1686     in = (const xmlChar *) str;
   1687     out = 0;
   1688     while(*in != 0) {
   1689 	if (len - out <= 3) {
   1690             temp = xmlSaveUriRealloc(ret, &len);
   1691 	    if (temp == NULL) {
   1692                 xmlURIErrMemory("escaping URI value\n");
   1693 		xmlFree(ret);
   1694 		return(NULL);
   1695 	    }
   1696 	    ret = temp;
   1697 	}
   1698 
   1699 	ch = *in;
   1700 
   1701 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
   1702 	    unsigned char val;
   1703 	    ret[out++] = '%';
   1704 	    val = ch >> 4;
   1705 	    if (val <= 9)
   1706 		ret[out++] = '0' + val;
   1707 	    else
   1708 		ret[out++] = 'A' + val - 0xA;
   1709 	    val = ch & 0xF;
   1710 	    if (val <= 9)
   1711 		ret[out++] = '0' + val;
   1712 	    else
   1713 		ret[out++] = 'A' + val - 0xA;
   1714 	    in++;
   1715 	} else {
   1716 	    ret[out++] = *in++;
   1717 	}
   1718 
   1719     }
   1720     ret[out] = 0;
   1721     return(ret);
   1722 }
   1723 
   1724 /**
   1725  * xmlURIEscape:
   1726  * @str:  the string of the URI to escape
   1727  *
   1728  * Escaping routine, does not do validity checks !
   1729  * It will try to escape the chars needing this, but this is heuristic
   1730  * based it's impossible to be sure.
   1731  *
   1732  * Returns an copy of the string, but escaped
   1733  *
   1734  * 25 May 2001
   1735  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
   1736  * according to RFC2396.
   1737  *   - Carl Douglas
   1738  */
   1739 xmlChar *
   1740 xmlURIEscape(const xmlChar * str)
   1741 {
   1742     xmlChar *ret, *segment = NULL;
   1743     xmlURIPtr uri;
   1744     int ret2;
   1745 
   1746 #define NULLCHK(p) if(!p) { \
   1747          xmlURIErrMemory("escaping URI value\n"); \
   1748          xmlFreeURI(uri); \
   1749          return NULL; } \
   1750 
   1751     if (str == NULL)
   1752         return (NULL);
   1753 
   1754     uri = xmlCreateURI();
   1755     if (uri != NULL) {
   1756 	/*
   1757 	 * Allow escaping errors in the unescaped form
   1758 	 */
   1759         uri->cleanup = 1;
   1760         ret2 = xmlParseURIReference(uri, (const char *)str);
   1761         if (ret2) {
   1762             xmlFreeURI(uri);
   1763             return (NULL);
   1764         }
   1765     }
   1766 
   1767     if (!uri)
   1768         return NULL;
   1769 
   1770     ret = NULL;
   1771 
   1772     if (uri->scheme) {
   1773         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
   1774         NULLCHK(segment)
   1775         ret = xmlStrcat(ret, segment);
   1776         ret = xmlStrcat(ret, BAD_CAST ":");
   1777         xmlFree(segment);
   1778     }
   1779 
   1780     if (uri->authority) {
   1781         segment =
   1782             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
   1783         NULLCHK(segment)
   1784         ret = xmlStrcat(ret, BAD_CAST "//");
   1785         ret = xmlStrcat(ret, segment);
   1786         xmlFree(segment);
   1787     }
   1788 
   1789     if (uri->user) {
   1790         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
   1791         NULLCHK(segment)
   1792 		ret = xmlStrcat(ret,BAD_CAST "//");
   1793         ret = xmlStrcat(ret, segment);
   1794         ret = xmlStrcat(ret, BAD_CAST "@");
   1795         xmlFree(segment);
   1796     }
   1797 
   1798     if (uri->server) {
   1799         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
   1800         NULLCHK(segment)
   1801 		if (uri->user == NULL)
   1802 		ret = xmlStrcat(ret, BAD_CAST "//");
   1803         ret = xmlStrcat(ret, segment);
   1804         xmlFree(segment);
   1805     }
   1806 
   1807     if (uri->port) {
   1808         xmlChar port[10];
   1809 
   1810         snprintf((char *) port, 10, "%d", uri->port);
   1811         ret = xmlStrcat(ret, BAD_CAST ":");
   1812         ret = xmlStrcat(ret, port);
   1813     }
   1814 
   1815     if (uri->path) {
   1816         segment =
   1817             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
   1818         NULLCHK(segment)
   1819         ret = xmlStrcat(ret, segment);
   1820         xmlFree(segment);
   1821     }
   1822 
   1823     if (uri->query_raw) {
   1824         ret = xmlStrcat(ret, BAD_CAST "?");
   1825         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
   1826     }
   1827     else if (uri->query) {
   1828         segment =
   1829             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
   1830         NULLCHK(segment)
   1831         ret = xmlStrcat(ret, BAD_CAST "?");
   1832         ret = xmlStrcat(ret, segment);
   1833         xmlFree(segment);
   1834     }
   1835 
   1836     if (uri->opaque) {
   1837         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
   1838         NULLCHK(segment)
   1839         ret = xmlStrcat(ret, segment);
   1840         xmlFree(segment);
   1841     }
   1842 
   1843     if (uri->fragment) {
   1844         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
   1845         NULLCHK(segment)
   1846         ret = xmlStrcat(ret, BAD_CAST "#");
   1847         ret = xmlStrcat(ret, segment);
   1848         xmlFree(segment);
   1849     }
   1850 
   1851     xmlFreeURI(uri);
   1852 #undef NULLCHK
   1853 
   1854     return (ret);
   1855 }
   1856 
   1857 /************************************************************************
   1858  *									*
   1859  *			Public functions				*
   1860  *									*
   1861  ************************************************************************/
   1862 
   1863 /**
   1864  * xmlBuildURI:
   1865  * @URI:  the URI instance found in the document
   1866  * @base:  the base value
   1867  *
   1868  * Computes he final URI of the reference done by checking that
   1869  * the given URI is valid, and building the final URI using the
   1870  * base URI. This is processed according to section 5.2 of the
   1871  * RFC 2396
   1872  *
   1873  * 5.2. Resolving Relative References to Absolute Form
   1874  *
   1875  * Returns a new URI string (to be freed by the caller) or NULL in case
   1876  *         of error.
   1877  */
   1878 xmlChar *
   1879 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
   1880     xmlChar *val = NULL;
   1881     int ret, len, indx, cur, out;
   1882     xmlURIPtr ref = NULL;
   1883     xmlURIPtr bas = NULL;
   1884     xmlURIPtr res = NULL;
   1885 
   1886     /*
   1887      * 1) The URI reference is parsed into the potential four components and
   1888      *    fragment identifier, as described in Section 4.3.
   1889      *
   1890      *    NOTE that a completely empty URI is treated by modern browsers
   1891      *    as a reference to "." rather than as a synonym for the current
   1892      *    URI.  Should we do that here?
   1893      */
   1894     if (URI == NULL)
   1895 	ret = -1;
   1896     else {
   1897 	if (*URI) {
   1898 	    ref = xmlCreateURI();
   1899 	    if (ref == NULL)
   1900 		goto done;
   1901 	    ret = xmlParseURIReference(ref, (const char *) URI);
   1902 	}
   1903 	else
   1904 	    ret = 0;
   1905     }
   1906     if (ret != 0)
   1907 	goto done;
   1908     if ((ref != NULL) && (ref->scheme != NULL)) {
   1909 	/*
   1910 	 * The URI is absolute don't modify.
   1911 	 */
   1912 	val = xmlStrdup(URI);
   1913 	goto done;
   1914     }
   1915     if (base == NULL)
   1916 	ret = -1;
   1917     else {
   1918 	bas = xmlCreateURI();
   1919 	if (bas == NULL)
   1920 	    goto done;
   1921 	ret = xmlParseURIReference(bas, (const char *) base);
   1922     }
   1923     if (ret != 0) {
   1924 	if (ref)
   1925 	    val = xmlSaveUri(ref);
   1926 	goto done;
   1927     }
   1928     if (ref == NULL) {
   1929 	/*
   1930 	 * the base fragment must be ignored
   1931 	 */
   1932 	if (bas->fragment != NULL) {
   1933 	    xmlFree(bas->fragment);
   1934 	    bas->fragment = NULL;
   1935 	}
   1936 	val = xmlSaveUri(bas);
   1937 	goto done;
   1938     }
   1939 
   1940     /*
   1941      * 2) If the path component is empty and the scheme, authority, and
   1942      *    query components are undefined, then it is a reference to the
   1943      *    current document and we are done.  Otherwise, the reference URI's
   1944      *    query and fragment components are defined as found (or not found)
   1945      *    within the URI reference and not inherited from the base URI.
   1946      *
   1947      *    NOTE that in modern browsers, the parsing differs from the above
   1948      *    in the following aspect:  the query component is allowed to be
   1949      *    defined while still treating this as a reference to the current
   1950      *    document.
   1951      */
   1952     res = xmlCreateURI();
   1953     if (res == NULL)
   1954 	goto done;
   1955     if ((ref->scheme == NULL) && (ref->path == NULL) &&
   1956 	((ref->authority == NULL) && (ref->server == NULL))) {
   1957 	if (bas->scheme != NULL)
   1958 	    res->scheme = xmlMemStrdup(bas->scheme);
   1959 	if (bas->authority != NULL)
   1960 	    res->authority = xmlMemStrdup(bas->authority);
   1961 	else if (bas->server != NULL) {
   1962 	    res->server = xmlMemStrdup(bas->server);
   1963 	    if (bas->user != NULL)
   1964 		res->user = xmlMemStrdup(bas->user);
   1965 	    res->port = bas->port;
   1966 	}
   1967 	if (bas->path != NULL)
   1968 	    res->path = xmlMemStrdup(bas->path);
   1969 	if (ref->query_raw != NULL)
   1970 	    res->query_raw = xmlMemStrdup (ref->query_raw);
   1971 	else if (ref->query != NULL)
   1972 	    res->query = xmlMemStrdup(ref->query);
   1973 	else if (bas->query_raw != NULL)
   1974 	    res->query_raw = xmlMemStrdup(bas->query_raw);
   1975 	else if (bas->query != NULL)
   1976 	    res->query = xmlMemStrdup(bas->query);
   1977 	if (ref->fragment != NULL)
   1978 	    res->fragment = xmlMemStrdup(ref->fragment);
   1979 	goto step_7;
   1980     }
   1981 
   1982     /*
   1983      * 3) If the scheme component is defined, indicating that the reference
   1984      *    starts with a scheme name, then the reference is interpreted as an
   1985      *    absolute URI and we are done.  Otherwise, the reference URI's
   1986      *    scheme is inherited from the base URI's scheme component.
   1987      */
   1988     if (ref->scheme != NULL) {
   1989 	val = xmlSaveUri(ref);
   1990 	goto done;
   1991     }
   1992     if (bas->scheme != NULL)
   1993 	res->scheme = xmlMemStrdup(bas->scheme);
   1994 
   1995     if (ref->query_raw != NULL)
   1996 	res->query_raw = xmlMemStrdup(ref->query_raw);
   1997     else if (ref->query != NULL)
   1998 	res->query = xmlMemStrdup(ref->query);
   1999     if (ref->fragment != NULL)
   2000 	res->fragment = xmlMemStrdup(ref->fragment);
   2001 
   2002     /*
   2003      * 4) If the authority component is defined, then the reference is a
   2004      *    network-path and we skip to step 7.  Otherwise, the reference
   2005      *    URI's authority is inherited from the base URI's authority
   2006      *    component, which will also be undefined if the URI scheme does not
   2007      *    use an authority component.
   2008      */
   2009     if ((ref->authority != NULL) || (ref->server != NULL)) {
   2010 	if (ref->authority != NULL)
   2011 	    res->authority = xmlMemStrdup(ref->authority);
   2012 	else {
   2013 	    res->server = xmlMemStrdup(ref->server);
   2014 	    if (ref->user != NULL)
   2015 		res->user = xmlMemStrdup(ref->user);
   2016             res->port = ref->port;
   2017 	}
   2018 	if (ref->path != NULL)
   2019 	    res->path = xmlMemStrdup(ref->path);
   2020 	goto step_7;
   2021     }
   2022     if (bas->authority != NULL)
   2023 	res->authority = xmlMemStrdup(bas->authority);
   2024     else if (bas->server != NULL) {
   2025 	res->server = xmlMemStrdup(bas->server);
   2026 	if (bas->user != NULL)
   2027 	    res->user = xmlMemStrdup(bas->user);
   2028 	res->port = bas->port;
   2029     }
   2030 
   2031     /*
   2032      * 5) If the path component begins with a slash character ("/"), then
   2033      *    the reference is an absolute-path and we skip to step 7.
   2034      */
   2035     if ((ref->path != NULL) && (ref->path[0] == '/')) {
   2036 	res->path = xmlMemStrdup(ref->path);
   2037 	goto step_7;
   2038     }
   2039 
   2040 
   2041     /*
   2042      * 6) If this step is reached, then we are resolving a relative-path
   2043      *    reference.  The relative path needs to be merged with the base
   2044      *    URI's path.  Although there are many ways to do this, we will
   2045      *    describe a simple method using a separate string buffer.
   2046      *
   2047      * Allocate a buffer large enough for the result string.
   2048      */
   2049     len = 2; /* extra / and 0 */
   2050     if (ref->path != NULL)
   2051 	len += strlen(ref->path);
   2052     if (bas->path != NULL)
   2053 	len += strlen(bas->path);
   2054     res->path = (char *) xmlMallocAtomic(len);
   2055     if (res->path == NULL) {
   2056         xmlURIErrMemory("resolving URI against base\n");
   2057 	goto done;
   2058     }
   2059     res->path[0] = 0;
   2060 
   2061     /*
   2062      * a) All but the last segment of the base URI's path component is
   2063      *    copied to the buffer.  In other words, any characters after the
   2064      *    last (right-most) slash character, if any, are excluded.
   2065      */
   2066     cur = 0;
   2067     out = 0;
   2068     if (bas->path != NULL) {
   2069 	while (bas->path[cur] != 0) {
   2070 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
   2071 		cur++;
   2072 	    if (bas->path[cur] == 0)
   2073 		break;
   2074 
   2075 	    cur++;
   2076 	    while (out < cur) {
   2077 		res->path[out] = bas->path[out];
   2078 		out++;
   2079 	    }
   2080 	}
   2081     }
   2082     res->path[out] = 0;
   2083 
   2084     /*
   2085      * b) The reference's path component is appended to the buffer
   2086      *    string.
   2087      */
   2088     if (ref->path != NULL && ref->path[0] != 0) {
   2089 	indx = 0;
   2090 	/*
   2091 	 * Ensure the path includes a '/'
   2092 	 */
   2093 	if ((out == 0) && (bas->server != NULL))
   2094 	    res->path[out++] = '/';
   2095 	while (ref->path[indx] != 0) {
   2096 	    res->path[out++] = ref->path[indx++];
   2097 	}
   2098     }
   2099     res->path[out] = 0;
   2100 
   2101     /*
   2102      * Steps c) to h) are really path normalization steps
   2103      */
   2104     xmlNormalizeURIPath(res->path);
   2105 
   2106 step_7:
   2107 
   2108     /*
   2109      * 7) The resulting URI components, including any inherited from the
   2110      *    base URI, are recombined to give the absolute form of the URI
   2111      *    reference.
   2112      */
   2113     val = xmlSaveUri(res);
   2114 
   2115 done:
   2116     if (ref != NULL)
   2117 	xmlFreeURI(ref);
   2118     if (bas != NULL)
   2119 	xmlFreeURI(bas);
   2120     if (res != NULL)
   2121 	xmlFreeURI(res);
   2122     return(val);
   2123 }
   2124 
   2125 /**
   2126  * xmlBuildRelativeURI:
   2127  * @URI:  the URI reference under consideration
   2128  * @base:  the base value
   2129  *
   2130  * Expresses the URI of the reference in terms relative to the
   2131  * base.  Some examples of this operation include:
   2132  *     base = "http://site1.com/docs/book1.html"
   2133  *        URI input                        URI returned
   2134  *     docs/pic1.gif                    pic1.gif
   2135  *     docs/img/pic1.gif                img/pic1.gif
   2136  *     img/pic1.gif                     ../img/pic1.gif
   2137  *     http://site1.com/docs/pic1.gif   pic1.gif
   2138  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
   2139  *
   2140  *     base = "docs/book1.html"
   2141  *        URI input                        URI returned
   2142  *     docs/pic1.gif                    pic1.gif
   2143  *     docs/img/pic1.gif                img/pic1.gif
   2144  *     img/pic1.gif                     ../img/pic1.gif
   2145  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
   2146  *
   2147  *
   2148  * Note: if the URI reference is really wierd or complicated, it may be
   2149  *       worthwhile to first convert it into a "nice" one by calling
   2150  *       xmlBuildURI (using 'base') before calling this routine,
   2151  *       since this routine (for reasonable efficiency) assumes URI has
   2152  *       already been through some validation.
   2153  *
   2154  * Returns a new URI string (to be freed by the caller) or NULL in case
   2155  * error.
   2156  */
   2157 xmlChar *
   2158 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
   2159 {
   2160     xmlChar *val = NULL;
   2161     int ret;
   2162     int ix;
   2163     int pos = 0;
   2164     int nbslash = 0;
   2165     int len;
   2166     xmlURIPtr ref = NULL;
   2167     xmlURIPtr bas = NULL;
   2168     xmlChar *bptr, *uptr, *vptr;
   2169     int remove_path = 0;
   2170 
   2171     if ((URI == NULL) || (*URI == 0))
   2172 	return NULL;
   2173 
   2174     /*
   2175      * First parse URI into a standard form
   2176      */
   2177     ref = xmlCreateURI ();
   2178     if (ref == NULL)
   2179 	return NULL;
   2180     /* If URI not already in "relative" form */
   2181     if (URI[0] != '.') {
   2182 	ret = xmlParseURIReference (ref, (const char *) URI);
   2183 	if (ret != 0)
   2184 	    goto done;		/* Error in URI, return NULL */
   2185     } else
   2186 	ref->path = (char *)xmlStrdup(URI);
   2187 
   2188     /*
   2189      * Next parse base into the same standard form
   2190      */
   2191     if ((base == NULL) || (*base == 0)) {
   2192 	val = xmlStrdup (URI);
   2193 	goto done;
   2194     }
   2195     bas = xmlCreateURI ();
   2196     if (bas == NULL)
   2197 	goto done;
   2198     if (base[0] != '.') {
   2199 	ret = xmlParseURIReference (bas, (const char *) base);
   2200 	if (ret != 0)
   2201 	    goto done;		/* Error in base, return NULL */
   2202     } else
   2203 	bas->path = (char *)xmlStrdup(base);
   2204 
   2205     /*
   2206      * If the scheme / server on the URI differs from the base,
   2207      * just return the URI
   2208      */
   2209     if ((ref->scheme != NULL) &&
   2210 	((bas->scheme == NULL) ||
   2211 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
   2212 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
   2213 	val = xmlStrdup (URI);
   2214 	goto done;
   2215     }
   2216     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
   2217 	val = xmlStrdup(BAD_CAST "");
   2218 	goto done;
   2219     }
   2220     if (bas->path == NULL) {
   2221 	val = xmlStrdup((xmlChar *)ref->path);
   2222 	goto done;
   2223     }
   2224     if (ref->path == NULL) {
   2225         ref->path = (char *) "/";
   2226 	remove_path = 1;
   2227     }
   2228 
   2229     /*
   2230      * At this point (at last!) we can compare the two paths
   2231      *
   2232      * First we take care of the special case where either of the
   2233      * two path components may be missing (bug 316224)
   2234      */
   2235     if (bas->path == NULL) {
   2236 	if (ref->path != NULL) {
   2237 	    uptr = (xmlChar *) ref->path;
   2238 	    if (*uptr == '/')
   2239 		uptr++;
   2240 	    /* exception characters from xmlSaveUri */
   2241 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
   2242 	}
   2243 	goto done;
   2244     }
   2245     bptr = (xmlChar *)bas->path;
   2246     if (ref->path == NULL) {
   2247 	for (ix = 0; bptr[ix] != 0; ix++) {
   2248 	    if (bptr[ix] == '/')
   2249 		nbslash++;
   2250 	}
   2251 	uptr = NULL;
   2252 	len = 1;	/* this is for a string terminator only */
   2253     } else {
   2254     /*
   2255      * Next we compare the two strings and find where they first differ
   2256      */
   2257 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
   2258             pos += 2;
   2259 	if ((*bptr == '.') && (bptr[1] == '/'))
   2260             bptr += 2;
   2261 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
   2262 	    bptr++;
   2263 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
   2264 	    pos++;
   2265 
   2266 	if (bptr[pos] == ref->path[pos]) {
   2267 	    val = xmlStrdup(BAD_CAST "");
   2268 	    goto done;		/* (I can't imagine why anyone would do this) */
   2269 	}
   2270 
   2271 	/*
   2272 	 * In URI, "back up" to the last '/' encountered.  This will be the
   2273 	 * beginning of the "unique" suffix of URI
   2274 	 */
   2275 	ix = pos;
   2276 	if ((ref->path[ix] == '/') && (ix > 0))
   2277 	    ix--;
   2278 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
   2279 	    ix -= 2;
   2280 	for (; ix > 0; ix--) {
   2281 	    if (ref->path[ix] == '/')
   2282 		break;
   2283 	}
   2284 	if (ix == 0) {
   2285 	    uptr = (xmlChar *)ref->path;
   2286 	} else {
   2287 	    ix++;
   2288 	    uptr = (xmlChar *)&ref->path[ix];
   2289 	}
   2290 
   2291 	/*
   2292 	 * In base, count the number of '/' from the differing point
   2293 	 */
   2294 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
   2295 	    for (; bptr[ix] != 0; ix++) {
   2296 		if (bptr[ix] == '/')
   2297 		    nbslash++;
   2298 	    }
   2299 	}
   2300 	len = xmlStrlen (uptr) + 1;
   2301     }
   2302 
   2303     if (nbslash == 0) {
   2304 	if (uptr != NULL)
   2305 	    /* exception characters from xmlSaveUri */
   2306 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
   2307 	goto done;
   2308     }
   2309 
   2310     /*
   2311      * Allocate just enough space for the returned string -
   2312      * length of the remainder of the URI, plus enough space
   2313      * for the "../" groups, plus one for the terminator
   2314      */
   2315     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
   2316     if (val == NULL) {
   2317         xmlURIErrMemory("building relative URI\n");
   2318 	goto done;
   2319     }
   2320     vptr = val;
   2321     /*
   2322      * Put in as many "../" as needed
   2323      */
   2324     for (; nbslash>0; nbslash--) {
   2325 	*vptr++ = '.';
   2326 	*vptr++ = '.';
   2327 	*vptr++ = '/';
   2328     }
   2329     /*
   2330      * Finish up with the end of the URI
   2331      */
   2332     if (uptr != NULL) {
   2333         if ((vptr > val) && (len > 0) &&
   2334 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
   2335 	    memcpy (vptr, uptr + 1, len - 1);
   2336 	    vptr[len - 2] = 0;
   2337 	} else {
   2338 	    memcpy (vptr, uptr, len);
   2339 	    vptr[len - 1] = 0;
   2340 	}
   2341     } else {
   2342 	vptr[len - 1] = 0;
   2343     }
   2344 
   2345     /* escape the freshly-built path */
   2346     vptr = val;
   2347 	/* exception characters from xmlSaveUri */
   2348     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
   2349     xmlFree(vptr);
   2350 
   2351 done:
   2352     /*
   2353      * Free the working variables
   2354      */
   2355     if (remove_path != 0)
   2356         ref->path = NULL;
   2357     if (ref != NULL)
   2358 	xmlFreeURI (ref);
   2359     if (bas != NULL)
   2360 	xmlFreeURI (bas);
   2361 
   2362     return val;
   2363 }
   2364 
   2365 /**
   2366  * xmlCanonicPath:
   2367  * @path:  the resource locator in a filesystem notation
   2368  *
   2369  * Constructs a canonic path from the specified path.
   2370  *
   2371  * Returns a new canonic path, or a duplicate of the path parameter if the
   2372  * construction fails. The caller is responsible for freeing the memory occupied
   2373  * by the returned string. If there is insufficient memory available, or the
   2374  * argument is NULL, the function returns NULL.
   2375  */
   2376 #define IS_WINDOWS_PATH(p)					\
   2377 	((p != NULL) &&						\
   2378 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
   2379 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
   2380 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
   2381 xmlChar *
   2382 xmlCanonicPath(const xmlChar *path)
   2383 {
   2384 /*
   2385  * For Windows implementations, additional work needs to be done to
   2386  * replace backslashes in pathnames with "forward slashes"
   2387  */
   2388 #if defined(_WIN32) && !defined(__CYGWIN__)
   2389     int len = 0;
   2390     int i = 0;
   2391     xmlChar *p = NULL;
   2392 #endif
   2393     xmlURIPtr uri;
   2394     xmlChar *ret;
   2395     const xmlChar *absuri;
   2396 
   2397     if (path == NULL)
   2398 	return(NULL);
   2399 
   2400 #if defined(_WIN32)
   2401     /*
   2402      * We must not change the backslashes to slashes if the the path
   2403      * starts with \\?\
   2404      * Those paths can be up to 32k characters long.
   2405      * Was added specifically for OpenOffice, those paths can't be converted
   2406      * to URIs anyway.
   2407      */
   2408     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
   2409         (path[3] == '\\') )
   2410 	return xmlStrdup((const xmlChar *) path);
   2411 #endif
   2412 
   2413 	/* sanitize filename starting with // so it can be used as URI */
   2414     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
   2415         path++;
   2416 
   2417     if ((uri = xmlParseURI((const char *) path)) != NULL) {
   2418 	xmlFreeURI(uri);
   2419 	return xmlStrdup(path);
   2420     }
   2421 
   2422     /* Check if this is an "absolute uri" */
   2423     absuri = xmlStrstr(path, BAD_CAST "://");
   2424     if (absuri != NULL) {
   2425         int l, j;
   2426 	unsigned char c;
   2427 	xmlChar *escURI;
   2428 
   2429         /*
   2430 	 * this looks like an URI where some parts have not been
   2431 	 * escaped leading to a parsing problem.  Check that the first
   2432 	 * part matches a protocol.
   2433 	 */
   2434 	l = absuri - path;
   2435 	/* Bypass if first part (part before the '://') is > 20 chars */
   2436 	if ((l <= 0) || (l > 20))
   2437 	    goto path_processing;
   2438 	/* Bypass if any non-alpha characters are present in first part */
   2439 	for (j = 0;j < l;j++) {
   2440 	    c = path[j];
   2441 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
   2442 	        goto path_processing;
   2443 	}
   2444 
   2445 	/* Escape all except the characters specified in the supplied path */
   2446         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
   2447 	if (escURI != NULL) {
   2448 	    /* Try parsing the escaped path */
   2449 	    uri = xmlParseURI((const char *) escURI);
   2450 	    /* If successful, return the escaped string */
   2451 	    if (uri != NULL) {
   2452 	        xmlFreeURI(uri);
   2453 		return escURI;
   2454 	    }
   2455 	}
   2456     }
   2457 
   2458 path_processing:
   2459 /* For Windows implementations, replace backslashes with 'forward slashes' */
   2460 #if defined(_WIN32) && !defined(__CYGWIN__)
   2461     /*
   2462      * Create a URI structure
   2463      */
   2464     uri = xmlCreateURI();
   2465     if (uri == NULL) {		/* Guard against 'out of memory' */
   2466         return(NULL);
   2467     }
   2468 
   2469     len = xmlStrlen(path);
   2470     if ((len > 2) && IS_WINDOWS_PATH(path)) {
   2471         /* make the scheme 'file' */
   2472 	uri->scheme = xmlStrdup(BAD_CAST "file");
   2473 	/* allocate space for leading '/' + path + string terminator */
   2474 	uri->path = xmlMallocAtomic(len + 2);
   2475 	if (uri->path == NULL) {
   2476 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
   2477 	    return(NULL);
   2478 	}
   2479 	/* Put in leading '/' plus path */
   2480 	uri->path[0] = '/';
   2481 	p = uri->path + 1;
   2482 	strncpy(p, path, len + 1);
   2483     } else {
   2484 	uri->path = xmlStrdup(path);
   2485 	if (uri->path == NULL) {
   2486 	    xmlFreeURI(uri);
   2487 	    return(NULL);
   2488 	}
   2489 	p = uri->path;
   2490     }
   2491     /* Now change all occurences of '\' to '/' */
   2492     while (*p != '\0') {
   2493 	if (*p == '\\')
   2494 	    *p = '/';
   2495 	p++;
   2496     }
   2497 
   2498     if (uri->scheme == NULL) {
   2499 	ret = xmlStrdup((const xmlChar *) uri->path);
   2500     } else {
   2501 	ret = xmlSaveUri(uri);
   2502     }
   2503 
   2504     xmlFreeURI(uri);
   2505 #else
   2506     ret = xmlStrdup((const xmlChar *) path);
   2507 #endif
   2508     return(ret);
   2509 }
   2510 
   2511 /**
   2512  * xmlPathToURI:
   2513  * @path:  the resource locator in a filesystem notation
   2514  *
   2515  * Constructs an URI expressing the existing path
   2516  *
   2517  * Returns a new URI, or a duplicate of the path parameter if the
   2518  * construction fails. The caller is responsible for freeing the memory
   2519  * occupied by the returned string. If there is insufficient memory available,
   2520  * or the argument is NULL, the function returns NULL.
   2521  */
   2522 xmlChar *
   2523 xmlPathToURI(const xmlChar *path)
   2524 {
   2525     xmlURIPtr uri;
   2526     xmlURI temp;
   2527     xmlChar *ret, *cal;
   2528 
   2529     if (path == NULL)
   2530         return(NULL);
   2531 
   2532     if ((uri = xmlParseURI((const char *) path)) != NULL) {
   2533 	xmlFreeURI(uri);
   2534 	return xmlStrdup(path);
   2535     }
   2536     cal = xmlCanonicPath(path);
   2537     if (cal == NULL)
   2538         return(NULL);
   2539 #if defined(_WIN32) && !defined(__CYGWIN__)
   2540     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
   2541        If 'cal' is a valid URI allready then we are done here, as continuing would make
   2542        it invalid. */
   2543     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
   2544 	xmlFreeURI(uri);
   2545 	return cal;
   2546     }
   2547     /* 'cal' can contain a relative path with backslashes. If that is processed
   2548        by xmlSaveURI, they will be escaped and the external entity loader machinery
   2549        will fail. So convert them to slashes. Misuse 'ret' for walking. */
   2550     ret = cal;
   2551     while (*ret != '\0') {
   2552 	if (*ret == '\\')
   2553 	    *ret = '/';
   2554 	ret++;
   2555     }
   2556 #endif
   2557     memset(&temp, 0, sizeof(temp));
   2558     temp.path = (char *) cal;
   2559     ret = xmlSaveUri(&temp);
   2560     xmlFree(cal);
   2561     return(ret);
   2562 }
   2563 #define bottom_uri
   2564 #include "elfgcchack.h"
   2565