Home | History | Annotate | Download | only in libxml2
      1 /**
      2  * uri.c: set of generic URI related routines
      3  *
      4  * Reference: RFCs 3986, 2732 and 2373
      5  *
      6  * See Copyright for the status of this software.
      7  *
      8  * daniel (at) veillard.com
      9  */
     10 
     11 #define IN_LIBXML
     12 #include "libxml.h"
     13 
     14 #include <string.h>
     15 #include <limits.h>
     16 
     17 #include <libxml/xmlmemory.h>
     18 #include <libxml/uri.h>
     19 #include <libxml/globals.h>
     20 #include <libxml/xmlerror.h>
     21 
     22 /**
     23  * MAX_URI_LENGTH:
     24  *
     25  * The definition of the URI regexp in the above RFC has no size limit
     26  * In practice they are usually relativey short except for the
     27  * data URI scheme as defined in RFC 2397. Even for data URI the usual
     28  * maximum size before hitting random practical limits is around 64 KB
     29  * and 4KB is usually a maximum admitted limit for proper operations.
     30  * The value below is more a security limit than anything else and
     31  * really should never be hit by 'normal' operations
     32  * Set to 1 MByte in 2012, this is only enforced on output
     33  */
     34 #define MAX_URI_LENGTH 1024 * 1024
     35 
     36 static void
     37 xmlURIErrMemory(const char *extra)
     38 {
     39     if (extra)
     40         __xmlRaiseError(NULL, NULL, NULL,
     41                         NULL, NULL, XML_FROM_URI,
     42                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
     43                         extra, NULL, NULL, 0, 0,
     44                         "Memory allocation failed : %s\n", extra);
     45     else
     46         __xmlRaiseError(NULL, NULL, NULL,
     47                         NULL, NULL, XML_FROM_URI,
     48                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
     49                         NULL, NULL, NULL, 0, 0,
     50                         "Memory allocation failed\n");
     51 }
     52 
     53 static void xmlCleanURI(xmlURIPtr uri);
     54 
     55 /*
     56  * Old rule from 2396 used in legacy handling code
     57  * alpha    = lowalpha | upalpha
     58  */
     59 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
     60 
     61 
     62 /*
     63  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
     64  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
     65  *            "u" | "v" | "w" | "x" | "y" | "z"
     66  */
     67 
     68 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
     69 
     70 /*
     71  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
     72  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
     73  *           "U" | "V" | "W" | "X" | "Y" | "Z"
     74  */
     75 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
     76 
     77 #ifdef IS_DIGIT
     78 #undef IS_DIGIT
     79 #endif
     80 /*
     81  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
     82  */
     83 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
     84 
     85 /*
     86  * alphanum = alpha | digit
     87  */
     88 
     89 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
     90 
     91 /*
     92  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
     93  */
     94 
     95 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
     96     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
     97     ((x) == '(') || ((x) == ')'))
     98 
     99 /*
    100  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
    101  */
    102 
    103 #define IS_UNWISE(p)                                                    \
    104       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
    105        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
    106        ((*(p) == ']')) || ((*(p) == '`')))
    107 /*
    108  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
    109  *            "[" | "]"
    110  */
    111 
    112 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
    113         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
    114         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
    115         ((x) == ']'))
    116 
    117 /*
    118  * unreserved = alphanum | mark
    119  */
    120 
    121 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
    122 
    123 /*
    124  * Skip to next pointer char, handle escaped sequences
    125  */
    126 
    127 #define NEXT(p) ((*p == '%')? p += 3 : p++)
    128 
    129 /*
    130  * Productions from the spec.
    131  *
    132  *    authority     = server | reg_name
    133  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
    134  *                        ";" | ":" | "@" | "&" | "=" | "+" )
    135  *
    136  * path          = [ abs_path | opaque_part ]
    137  */
    138 
    139 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
    140 
    141 /************************************************************************
    142  *									*
    143  *                         RFC 3986 parser				*
    144  *									*
    145  ************************************************************************/
    146 
    147 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
    148 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
    149                       ((*(p) >= 'A') && (*(p) <= 'Z')))
    150 #define ISA_HEXDIG(p)							\
    151        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
    152         ((*(p) >= 'A') && (*(p) <= 'F')))
    153 
    154 /*
    155  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
    156  *                     / "*" / "+" / "," / ";" / "="
    157  */
    158 #define ISA_SUB_DELIM(p)						\
    159       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
    160        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
    161        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
    162        ((*(p) == '=')) || ((*(p) == '\'')))
    163 
    164 /*
    165  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
    166  */
    167 #define ISA_GEN_DELIM(p)						\
    168       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
    169        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
    170        ((*(p) == '@')))
    171 
    172 /*
    173  *    reserved      = gen-delims / sub-delims
    174  */
    175 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
    176 
    177 /*
    178  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
    179  */
    180 #define ISA_UNRESERVED(p)						\
    181       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
    182        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
    183 
    184 /*
    185  *    pct-encoded   = "%" HEXDIG HEXDIG
    186  */
    187 #define ISA_PCT_ENCODED(p)						\
    188      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
    189 
    190 /*
    191  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
    192  */
    193 #define ISA_PCHAR(p)							\
    194      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
    195       ((*(p) == ':')) || ((*(p) == '@')))
    196 
    197 /**
    198  * xmlParse3986Scheme:
    199  * @uri:  pointer to an URI structure
    200  * @str:  pointer to the string to analyze
    201  *
    202  * Parse an URI scheme
    203  *
    204  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    205  *
    206  * Returns 0 or the error code
    207  */
    208 static int
    209 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
    210     const char *cur;
    211 
    212     if (str == NULL)
    213 	return(-1);
    214 
    215     cur = *str;
    216     if (!ISA_ALPHA(cur))
    217 	return(2);
    218     cur++;
    219     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
    220            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
    221     if (uri != NULL) {
    222 	if (uri->scheme != NULL) xmlFree(uri->scheme);
    223 	uri->scheme = STRNDUP(*str, cur - *str);
    224     }
    225     *str = cur;
    226     return(0);
    227 }
    228 
    229 /**
    230  * xmlParse3986Fragment:
    231  * @uri:  pointer to an URI structure
    232  * @str:  pointer to the string to analyze
    233  *
    234  * Parse the query part of an URI
    235  *
    236  * fragment      = *( pchar / "/" / "?" )
    237  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
    238  *       in the fragment identifier but this is used very broadly for
    239  *       xpointer scheme selection, so we are allowing it here to not break
    240  *       for example all the DocBook processing chains.
    241  *
    242  * Returns 0 or the error code
    243  */
    244 static int
    245 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
    246 {
    247     const char *cur;
    248 
    249     if (str == NULL)
    250         return (-1);
    251 
    252     cur = *str;
    253 
    254     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
    255            (*cur == '[') || (*cur == ']') ||
    256            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
    257         NEXT(cur);
    258     if (uri != NULL) {
    259         if (uri->fragment != NULL)
    260             xmlFree(uri->fragment);
    261 	if (uri->cleanup & 2)
    262 	    uri->fragment = STRNDUP(*str, cur - *str);
    263 	else
    264 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
    265     }
    266     *str = cur;
    267     return (0);
    268 }
    269 
    270 /**
    271  * xmlParse3986Query:
    272  * @uri:  pointer to an URI structure
    273  * @str:  pointer to the string to analyze
    274  *
    275  * Parse the query part of an URI
    276  *
    277  * query = *uric
    278  *
    279  * Returns 0 or the error code
    280  */
    281 static int
    282 xmlParse3986Query(xmlURIPtr uri, const char **str)
    283 {
    284     const char *cur;
    285 
    286     if (str == NULL)
    287         return (-1);
    288 
    289     cur = *str;
    290 
    291     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
    292            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
    293         NEXT(cur);
    294     if (uri != NULL) {
    295         if (uri->query != NULL)
    296             xmlFree(uri->query);
    297 	if (uri->cleanup & 2)
    298 	    uri->query = STRNDUP(*str, cur - *str);
    299 	else
    300 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
    301 
    302 	/* Save the raw bytes of the query as well.
    303 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
    304 	 */
    305 	if (uri->query_raw != NULL)
    306 	    xmlFree (uri->query_raw);
    307 	uri->query_raw = STRNDUP (*str, cur - *str);
    308     }
    309     *str = cur;
    310     return (0);
    311 }
    312 
    313 /**
    314  * xmlParse3986Port:
    315  * @uri:  pointer to an URI structure
    316  * @str:  the string to analyze
    317  *
    318  * Parse a port part and fills in the appropriate fields
    319  * of the @uri structure
    320  *
    321  * port          = *DIGIT
    322  *
    323  * Returns 0 or the error code
    324  */
    325 static int
    326 xmlParse3986Port(xmlURIPtr uri, const char **str)
    327 {
    328     const char *cur = *str;
    329     unsigned port = 0; /* unsigned for defined overflow behavior */
    330 
    331     if (ISA_DIGIT(cur)) {
    332 	while (ISA_DIGIT(cur)) {
    333 	    port = port * 10 + (*cur - '0');
    334 
    335 	    cur++;
    336 	}
    337 	if (uri != NULL)
    338 	    uri->port = port & USHRT_MAX; /* port value modulo INT_MAX+1 */
    339 	*str = cur;
    340 	return(0);
    341     }
    342     return(1);
    343 }
    344 
    345 /**
    346  * xmlParse3986Userinfo:
    347  * @uri:  pointer to an URI structure
    348  * @str:  the string to analyze
    349  *
    350  * Parse an user informations part and fills in the appropriate fields
    351  * of the @uri structure
    352  *
    353  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
    354  *
    355  * Returns 0 or the error code
    356  */
    357 static int
    358 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
    359 {
    360     const char *cur;
    361 
    362     cur = *str;
    363     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
    364            ISA_SUB_DELIM(cur) || (*cur == ':'))
    365 	NEXT(cur);
    366     if (*cur == '@') {
    367 	if (uri != NULL) {
    368 	    if (uri->user != NULL) xmlFree(uri->user);
    369 	    if (uri->cleanup & 2)
    370 		uri->user = STRNDUP(*str, cur - *str);
    371 	    else
    372 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
    373 	}
    374 	*str = cur;
    375 	return(0);
    376     }
    377     return(1);
    378 }
    379 
    380 /**
    381  * xmlParse3986DecOctet:
    382  * @str:  the string to analyze
    383  *
    384  *    dec-octet     = DIGIT                 ; 0-9
    385  *                  / %x31-39 DIGIT         ; 10-99
    386  *                  / "1" 2DIGIT            ; 100-199
    387  *                  / "2" %x30-34 DIGIT     ; 200-249
    388  *                  / "25" %x30-35          ; 250-255
    389  *
    390  * Skip a dec-octet.
    391  *
    392  * Returns 0 if found and skipped, 1 otherwise
    393  */
    394 static int
    395 xmlParse3986DecOctet(const char **str) {
    396     const char *cur = *str;
    397 
    398     if (!(ISA_DIGIT(cur)))
    399         return(1);
    400     if (!ISA_DIGIT(cur+1))
    401 	cur++;
    402     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
    403 	cur += 2;
    404     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
    405 	cur += 3;
    406     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
    407 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
    408 	cur += 3;
    409     else if ((*cur == '2') && (*(cur + 1) == '5') &&
    410 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
    411 	cur += 3;
    412     else
    413         return(1);
    414     *str = cur;
    415     return(0);
    416 }
    417 /**
    418  * xmlParse3986Host:
    419  * @uri:  pointer to an URI structure
    420  * @str:  the string to analyze
    421  *
    422  * Parse an host part and fills in the appropriate fields
    423  * of the @uri structure
    424  *
    425  * host          = IP-literal / IPv4address / reg-name
    426  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
    427  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
    428  * reg-name      = *( unreserved / pct-encoded / sub-delims )
    429  *
    430  * Returns 0 or the error code
    431  */
    432 static int
    433 xmlParse3986Host(xmlURIPtr uri, const char **str)
    434 {
    435     const char *cur = *str;
    436     const char *host;
    437 
    438     host = cur;
    439     /*
    440      * IPv6 and future adressing scheme are enclosed between brackets
    441      */
    442     if (*cur == '[') {
    443         cur++;
    444 	while ((*cur != ']') && (*cur != 0))
    445 	    cur++;
    446 	if (*cur != ']')
    447 	    return(1);
    448 	cur++;
    449 	goto found;
    450     }
    451     /*
    452      * try to parse an IPv4
    453      */
    454     if (ISA_DIGIT(cur)) {
    455         if (xmlParse3986DecOctet(&cur) != 0)
    456 	    goto not_ipv4;
    457 	if (*cur != '.')
    458 	    goto not_ipv4;
    459 	cur++;
    460         if (xmlParse3986DecOctet(&cur) != 0)
    461 	    goto not_ipv4;
    462 	if (*cur != '.')
    463 	    goto not_ipv4;
    464         if (xmlParse3986DecOctet(&cur) != 0)
    465 	    goto not_ipv4;
    466 	if (*cur != '.')
    467 	    goto not_ipv4;
    468         if (xmlParse3986DecOctet(&cur) != 0)
    469 	    goto not_ipv4;
    470 	goto found;
    471 not_ipv4:
    472         cur = *str;
    473     }
    474     /*
    475      * then this should be a hostname which can be empty
    476      */
    477     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
    478         NEXT(cur);
    479 found:
    480     if (uri != NULL) {
    481 	if (uri->authority != NULL) xmlFree(uri->authority);
    482 	uri->authority = NULL;
    483 	if (uri->server != NULL) xmlFree(uri->server);
    484 	if (cur != host) {
    485 	    if (uri->cleanup & 2)
    486 		uri->server = STRNDUP(host, cur - host);
    487 	    else
    488 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
    489 	} else
    490 	    uri->server = NULL;
    491     }
    492     *str = cur;
    493     return(0);
    494 }
    495 
    496 /**
    497  * xmlParse3986Authority:
    498  * @uri:  pointer to an URI structure
    499  * @str:  the string to analyze
    500  *
    501  * Parse an authority part and fills in the appropriate fields
    502  * of the @uri structure
    503  *
    504  * authority     = [ userinfo "@" ] host [ ":" port ]
    505  *
    506  * Returns 0 or the error code
    507  */
    508 static int
    509 xmlParse3986Authority(xmlURIPtr uri, const char **str)
    510 {
    511     const char *cur;
    512     int ret;
    513 
    514     cur = *str;
    515     /*
    516      * try to parse an userinfo and check for the trailing @
    517      */
    518     ret = xmlParse3986Userinfo(uri, &cur);
    519     if ((ret != 0) || (*cur != '@'))
    520         cur = *str;
    521     else
    522         cur++;
    523     ret = xmlParse3986Host(uri, &cur);
    524     if (ret != 0) return(ret);
    525     if (*cur == ':') {
    526         cur++;
    527         ret = xmlParse3986Port(uri, &cur);
    528 	if (ret != 0) return(ret);
    529     }
    530     *str = cur;
    531     return(0);
    532 }
    533 
    534 /**
    535  * xmlParse3986Segment:
    536  * @str:  the string to analyze
    537  * @forbid: an optional forbidden character
    538  * @empty: allow an empty segment
    539  *
    540  * Parse a segment and fills in the appropriate fields
    541  * of the @uri structure
    542  *
    543  * segment       = *pchar
    544  * segment-nz    = 1*pchar
    545  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
    546  *               ; non-zero-length segment without any colon ":"
    547  *
    548  * Returns 0 or the error code
    549  */
    550 static int
    551 xmlParse3986Segment(const char **str, char forbid, int empty)
    552 {
    553     const char *cur;
    554 
    555     cur = *str;
    556     if (!ISA_PCHAR(cur)) {
    557         if (empty)
    558 	    return(0);
    559 	return(1);
    560     }
    561     while (ISA_PCHAR(cur) && (*cur != forbid))
    562         NEXT(cur);
    563     *str = cur;
    564     return (0);
    565 }
    566 
    567 /**
    568  * xmlParse3986PathAbEmpty:
    569  * @uri:  pointer to an URI structure
    570  * @str:  the string to analyze
    571  *
    572  * Parse an path absolute or empty and fills in the appropriate fields
    573  * of the @uri structure
    574  *
    575  * path-abempty  = *( "/" segment )
    576  *
    577  * Returns 0 or the error code
    578  */
    579 static int
    580 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
    581 {
    582     const char *cur;
    583     int ret;
    584 
    585     cur = *str;
    586 
    587     while (*cur == '/') {
    588         cur++;
    589 	ret = xmlParse3986Segment(&cur, 0, 1);
    590 	if (ret != 0) return(ret);
    591     }
    592     if (uri != NULL) {
    593 	if (uri->path != NULL) xmlFree(uri->path);
    594         if (*str != cur) {
    595             if (uri->cleanup & 2)
    596                 uri->path = STRNDUP(*str, cur - *str);
    597             else
    598                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    599         } else {
    600             uri->path = NULL;
    601         }
    602     }
    603     *str = cur;
    604     return (0);
    605 }
    606 
    607 /**
    608  * xmlParse3986PathAbsolute:
    609  * @uri:  pointer to an URI structure
    610  * @str:  the string to analyze
    611  *
    612  * Parse an path absolute and fills in the appropriate fields
    613  * of the @uri structure
    614  *
    615  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
    616  *
    617  * Returns 0 or the error code
    618  */
    619 static int
    620 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
    621 {
    622     const char *cur;
    623     int ret;
    624 
    625     cur = *str;
    626 
    627     if (*cur != '/')
    628         return(1);
    629     cur++;
    630     ret = xmlParse3986Segment(&cur, 0, 0);
    631     if (ret == 0) {
    632 	while (*cur == '/') {
    633 	    cur++;
    634 	    ret = xmlParse3986Segment(&cur, 0, 1);
    635 	    if (ret != 0) return(ret);
    636 	}
    637     }
    638     if (uri != NULL) {
    639 	if (uri->path != NULL) xmlFree(uri->path);
    640         if (cur != *str) {
    641             if (uri->cleanup & 2)
    642                 uri->path = STRNDUP(*str, cur - *str);
    643             else
    644                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    645         } else {
    646             uri->path = NULL;
    647         }
    648     }
    649     *str = cur;
    650     return (0);
    651 }
    652 
    653 /**
    654  * xmlParse3986PathRootless:
    655  * @uri:  pointer to an URI structure
    656  * @str:  the string to analyze
    657  *
    658  * Parse an path without root and fills in the appropriate fields
    659  * of the @uri structure
    660  *
    661  * path-rootless = segment-nz *( "/" segment )
    662  *
    663  * Returns 0 or the error code
    664  */
    665 static int
    666 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
    667 {
    668     const char *cur;
    669     int ret;
    670 
    671     cur = *str;
    672 
    673     ret = xmlParse3986Segment(&cur, 0, 0);
    674     if (ret != 0) return(ret);
    675     while (*cur == '/') {
    676         cur++;
    677 	ret = xmlParse3986Segment(&cur, 0, 1);
    678 	if (ret != 0) return(ret);
    679     }
    680     if (uri != NULL) {
    681 	if (uri->path != NULL) xmlFree(uri->path);
    682         if (cur != *str) {
    683             if (uri->cleanup & 2)
    684                 uri->path = STRNDUP(*str, cur - *str);
    685             else
    686                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    687         } else {
    688             uri->path = NULL;
    689         }
    690     }
    691     *str = cur;
    692     return (0);
    693 }
    694 
    695 /**
    696  * xmlParse3986PathNoScheme:
    697  * @uri:  pointer to an URI structure
    698  * @str:  the string to analyze
    699  *
    700  * Parse an path which is not a scheme and fills in the appropriate fields
    701  * of the @uri structure
    702  *
    703  * path-noscheme = segment-nz-nc *( "/" segment )
    704  *
    705  * Returns 0 or the error code
    706  */
    707 static int
    708 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
    709 {
    710     const char *cur;
    711     int ret;
    712 
    713     cur = *str;
    714 
    715     ret = xmlParse3986Segment(&cur, ':', 0);
    716     if (ret != 0) return(ret);
    717     while (*cur == '/') {
    718         cur++;
    719 	ret = xmlParse3986Segment(&cur, 0, 1);
    720 	if (ret != 0) return(ret);
    721     }
    722     if (uri != NULL) {
    723 	if (uri->path != NULL) xmlFree(uri->path);
    724         if (cur != *str) {
    725             if (uri->cleanup & 2)
    726                 uri->path = STRNDUP(*str, cur - *str);
    727             else
    728                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
    729         } else {
    730             uri->path = NULL;
    731         }
    732     }
    733     *str = cur;
    734     return (0);
    735 }
    736 
    737 /**
    738  * xmlParse3986HierPart:
    739  * @uri:  pointer to an URI structure
    740  * @str:  the string to analyze
    741  *
    742  * Parse an hierarchical part and fills in the appropriate fields
    743  * of the @uri structure
    744  *
    745  * hier-part     = "//" authority path-abempty
    746  *                / path-absolute
    747  *                / path-rootless
    748  *                / path-empty
    749  *
    750  * Returns 0 or the error code
    751  */
    752 static int
    753 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
    754 {
    755     const char *cur;
    756     int ret;
    757 
    758     cur = *str;
    759 
    760     if ((*cur == '/') && (*(cur + 1) == '/')) {
    761         cur += 2;
    762 	ret = xmlParse3986Authority(uri, &cur);
    763 	if (ret != 0) return(ret);
    764 	if (uri->server == NULL)
    765 	    uri->port = -1;
    766 	ret = xmlParse3986PathAbEmpty(uri, &cur);
    767 	if (ret != 0) return(ret);
    768 	*str = cur;
    769 	return(0);
    770     } else if (*cur == '/') {
    771         ret = xmlParse3986PathAbsolute(uri, &cur);
    772 	if (ret != 0) return(ret);
    773     } else if (ISA_PCHAR(cur)) {
    774         ret = xmlParse3986PathRootless(uri, &cur);
    775 	if (ret != 0) return(ret);
    776     } else {
    777 	/* path-empty is effectively empty */
    778 	if (uri != NULL) {
    779 	    if (uri->path != NULL) xmlFree(uri->path);
    780 	    uri->path = NULL;
    781 	}
    782     }
    783     *str = cur;
    784     return (0);
    785 }
    786 
    787 /**
    788  * xmlParse3986RelativeRef:
    789  * @uri:  pointer to an URI structure
    790  * @str:  the string to analyze
    791  *
    792  * Parse an URI string and fills in the appropriate fields
    793  * of the @uri structure
    794  *
    795  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
    796  * relative-part = "//" authority path-abempty
    797  *               / path-absolute
    798  *               / path-noscheme
    799  *               / path-empty
    800  *
    801  * Returns 0 or the error code
    802  */
    803 static int
    804 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
    805     int ret;
    806 
    807     if ((*str == '/') && (*(str + 1) == '/')) {
    808         str += 2;
    809 	ret = xmlParse3986Authority(uri, &str);
    810 	if (ret != 0) return(ret);
    811 	ret = xmlParse3986PathAbEmpty(uri, &str);
    812 	if (ret != 0) return(ret);
    813     } else if (*str == '/') {
    814 	ret = xmlParse3986PathAbsolute(uri, &str);
    815 	if (ret != 0) return(ret);
    816     } else if (ISA_PCHAR(str)) {
    817         ret = xmlParse3986PathNoScheme(uri, &str);
    818 	if (ret != 0) return(ret);
    819     } else {
    820 	/* path-empty is effectively empty */
    821 	if (uri != NULL) {
    822 	    if (uri->path != NULL) xmlFree(uri->path);
    823 	    uri->path = NULL;
    824 	}
    825     }
    826 
    827     if (*str == '?') {
    828 	str++;
    829 	ret = xmlParse3986Query(uri, &str);
    830 	if (ret != 0) return(ret);
    831     }
    832     if (*str == '#') {
    833 	str++;
    834 	ret = xmlParse3986Fragment(uri, &str);
    835 	if (ret != 0) return(ret);
    836     }
    837     if (*str != 0) {
    838 	xmlCleanURI(uri);
    839 	return(1);
    840     }
    841     return(0);
    842 }
    843 
    844 
    845 /**
    846  * xmlParse3986URI:
    847  * @uri:  pointer to an URI structure
    848  * @str:  the string to analyze
    849  *
    850  * Parse an URI string and fills in the appropriate fields
    851  * of the @uri structure
    852  *
    853  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
    854  *
    855  * Returns 0 or the error code
    856  */
    857 static int
    858 xmlParse3986URI(xmlURIPtr uri, const char *str) {
    859     int ret;
    860 
    861     ret = xmlParse3986Scheme(uri, &str);
    862     if (ret != 0) return(ret);
    863     if (*str != ':') {
    864 	return(1);
    865     }
    866     str++;
    867     ret = xmlParse3986HierPart(uri, &str);
    868     if (ret != 0) return(ret);
    869     if (*str == '?') {
    870 	str++;
    871 	ret = xmlParse3986Query(uri, &str);
    872 	if (ret != 0) return(ret);
    873     }
    874     if (*str == '#') {
    875 	str++;
    876 	ret = xmlParse3986Fragment(uri, &str);
    877 	if (ret != 0) return(ret);
    878     }
    879     if (*str != 0) {
    880 	xmlCleanURI(uri);
    881 	return(1);
    882     }
    883     return(0);
    884 }
    885 
    886 /**
    887  * xmlParse3986URIReference:
    888  * @uri:  pointer to an URI structure
    889  * @str:  the string to analyze
    890  *
    891  * Parse an URI reference string and fills in the appropriate fields
    892  * of the @uri structure
    893  *
    894  * URI-reference = URI / relative-ref
    895  *
    896  * Returns 0 or the error code
    897  */
    898 static int
    899 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
    900     int ret;
    901 
    902     if (str == NULL)
    903 	return(-1);
    904     xmlCleanURI(uri);
    905 
    906     /*
    907      * Try first to parse absolute refs, then fallback to relative if
    908      * it fails.
    909      */
    910     ret = xmlParse3986URI(uri, str);
    911     if (ret != 0) {
    912 	xmlCleanURI(uri);
    913         ret = xmlParse3986RelativeRef(uri, str);
    914 	if (ret != 0) {
    915 	    xmlCleanURI(uri);
    916 	    return(ret);
    917 	}
    918     }
    919     return(0);
    920 }
    921 
    922 /**
    923  * xmlParseURI:
    924  * @str:  the URI string to analyze
    925  *
    926  * Parse an URI based on RFC 3986
    927  *
    928  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    929  *
    930  * Returns a newly built xmlURIPtr or NULL in case of error
    931  */
    932 xmlURIPtr
    933 xmlParseURI(const char *str) {
    934     xmlURIPtr uri;
    935     int ret;
    936 
    937     if (str == NULL)
    938 	return(NULL);
    939     uri = xmlCreateURI();
    940     if (uri != NULL) {
    941 	ret = xmlParse3986URIReference(uri, str);
    942         if (ret) {
    943 	    xmlFreeURI(uri);
    944 	    return(NULL);
    945 	}
    946     }
    947     return(uri);
    948 }
    949 
    950 /**
    951  * xmlParseURIReference:
    952  * @uri:  pointer to an URI structure
    953  * @str:  the string to analyze
    954  *
    955  * Parse an URI reference string based on RFC 3986 and fills in the
    956  * appropriate fields of the @uri structure
    957  *
    958  * URI-reference = URI / relative-ref
    959  *
    960  * Returns 0 or the error code
    961  */
    962 int
    963 xmlParseURIReference(xmlURIPtr uri, const char *str) {
    964     return(xmlParse3986URIReference(uri, str));
    965 }
    966 
    967 /**
    968  * xmlParseURIRaw:
    969  * @str:  the URI string to analyze
    970  * @raw:  if 1 unescaping of URI pieces are disabled
    971  *
    972  * Parse an URI but allows to keep intact the original fragments.
    973  *
    974  * URI-reference = URI / relative-ref
    975  *
    976  * Returns a newly built xmlURIPtr or NULL in case of error
    977  */
    978 xmlURIPtr
    979 xmlParseURIRaw(const char *str, int raw) {
    980     xmlURIPtr uri;
    981     int ret;
    982 
    983     if (str == NULL)
    984 	return(NULL);
    985     uri = xmlCreateURI();
    986     if (uri != NULL) {
    987         if (raw) {
    988 	    uri->cleanup |= 2;
    989 	}
    990 	ret = xmlParseURIReference(uri, str);
    991         if (ret) {
    992 	    xmlFreeURI(uri);
    993 	    return(NULL);
    994 	}
    995     }
    996     return(uri);
    997 }
    998 
    999 /************************************************************************
   1000  *									*
   1001  *			Generic URI structure functions			*
   1002  *									*
   1003  ************************************************************************/
   1004 
   1005 /**
   1006  * xmlCreateURI:
   1007  *
   1008  * Simply creates an empty xmlURI
   1009  *
   1010  * Returns the new structure or NULL in case of error
   1011  */
   1012 xmlURIPtr
   1013 xmlCreateURI(void) {
   1014     xmlURIPtr ret;
   1015 
   1016     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
   1017     if (ret == NULL) {
   1018         xmlURIErrMemory("creating URI structure\n");
   1019 	return(NULL);
   1020     }
   1021     memset(ret, 0, sizeof(xmlURI));
   1022     return(ret);
   1023 }
   1024 
   1025 /**
   1026  * xmlSaveUriRealloc:
   1027  *
   1028  * Function to handle properly a reallocation when saving an URI
   1029  * Also imposes some limit on the length of an URI string output
   1030  */
   1031 static xmlChar *
   1032 xmlSaveUriRealloc(xmlChar *ret, int *max) {
   1033     xmlChar *temp;
   1034     int tmp;
   1035 
   1036     if (*max > MAX_URI_LENGTH) {
   1037         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
   1038         return(NULL);
   1039     }
   1040     tmp = *max * 2;
   1041     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
   1042     if (temp == NULL) {
   1043         xmlURIErrMemory("saving URI\n");
   1044         return(NULL);
   1045     }
   1046     *max = tmp;
   1047     return(temp);
   1048 }
   1049 
   1050 /**
   1051  * xmlSaveUri:
   1052  * @uri:  pointer to an xmlURI
   1053  *
   1054  * Save the URI as an escaped string
   1055  *
   1056  * Returns a new string (to be deallocated by caller)
   1057  */
   1058 xmlChar *
   1059 xmlSaveUri(xmlURIPtr uri) {
   1060     xmlChar *ret = NULL;
   1061     xmlChar *temp;
   1062     const char *p;
   1063     int len;
   1064     int max;
   1065 
   1066     if (uri == NULL) return(NULL);
   1067 
   1068 
   1069     max = 80;
   1070     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
   1071     if (ret == NULL) {
   1072         xmlURIErrMemory("saving URI\n");
   1073 	return(NULL);
   1074     }
   1075     len = 0;
   1076 
   1077     if (uri->scheme != NULL) {
   1078 	p = uri->scheme;
   1079 	while (*p != 0) {
   1080 	    if (len >= max) {
   1081                 temp = xmlSaveUriRealloc(ret, &max);
   1082                 if (temp == NULL) goto mem_error;
   1083 		ret = temp;
   1084 	    }
   1085 	    ret[len++] = *p++;
   1086 	}
   1087 	if (len >= max) {
   1088             temp = xmlSaveUriRealloc(ret, &max);
   1089             if (temp == NULL) goto mem_error;
   1090             ret = temp;
   1091 	}
   1092 	ret[len++] = ':';
   1093     }
   1094     if (uri->opaque != NULL) {
   1095 	p = uri->opaque;
   1096 	while (*p != 0) {
   1097 	    if (len + 3 >= max) {
   1098                 temp = xmlSaveUriRealloc(ret, &max);
   1099                 if (temp == NULL) goto mem_error;
   1100                 ret = temp;
   1101 	    }
   1102 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
   1103 		ret[len++] = *p++;
   1104 	    else {
   1105 		int val = *(unsigned char *)p++;
   1106 		int hi = val / 0x10, lo = val % 0x10;
   1107 		ret[len++] = '%';
   1108 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1109 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1110 	    }
   1111 	}
   1112     } else {
   1113 	if ((uri->server != NULL) || (uri->port == -1)) {
   1114 	    if (len + 3 >= max) {
   1115                 temp = xmlSaveUriRealloc(ret, &max);
   1116                 if (temp == NULL) goto mem_error;
   1117                 ret = temp;
   1118 	    }
   1119 	    ret[len++] = '/';
   1120 	    ret[len++] = '/';
   1121 	    if (uri->user != NULL) {
   1122 		p = uri->user;
   1123 		while (*p != 0) {
   1124 		    if (len + 3 >= max) {
   1125                         temp = xmlSaveUriRealloc(ret, &max);
   1126                         if (temp == NULL) goto mem_error;
   1127                         ret = temp;
   1128 		    }
   1129 		    if ((IS_UNRESERVED(*(p))) ||
   1130 			((*(p) == ';')) || ((*(p) == ':')) ||
   1131 			((*(p) == '&')) || ((*(p) == '=')) ||
   1132 			((*(p) == '+')) || ((*(p) == '$')) ||
   1133 			((*(p) == ',')))
   1134 			ret[len++] = *p++;
   1135 		    else {
   1136 			int val = *(unsigned char *)p++;
   1137 			int hi = val / 0x10, lo = val % 0x10;
   1138 			ret[len++] = '%';
   1139 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1140 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1141 		    }
   1142 		}
   1143 		if (len + 3 >= max) {
   1144                     temp = xmlSaveUriRealloc(ret, &max);
   1145                     if (temp == NULL) goto mem_error;
   1146                     ret = temp;
   1147 		}
   1148 		ret[len++] = '@';
   1149 	    }
   1150 	    if (uri->server != NULL) {
   1151 		p = uri->server;
   1152 		while (*p != 0) {
   1153 		    if (len >= max) {
   1154 			temp = xmlSaveUriRealloc(ret, &max);
   1155 			if (temp == NULL) goto mem_error;
   1156 			ret = temp;
   1157 		    }
   1158 		    ret[len++] = *p++;
   1159 		}
   1160 		if (uri->port > 0) {
   1161 		    if (len + 10 >= max) {
   1162 			temp = xmlSaveUriRealloc(ret, &max);
   1163 			if (temp == NULL) goto mem_error;
   1164 			ret = temp;
   1165 		    }
   1166 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
   1167 		}
   1168 	    }
   1169 	} else if (uri->authority != NULL) {
   1170 	    if (len + 3 >= max) {
   1171                 temp = xmlSaveUriRealloc(ret, &max);
   1172                 if (temp == NULL) goto mem_error;
   1173                 ret = temp;
   1174 	    }
   1175 	    ret[len++] = '/';
   1176 	    ret[len++] = '/';
   1177 	    p = uri->authority;
   1178 	    while (*p != 0) {
   1179 		if (len + 3 >= max) {
   1180                     temp = xmlSaveUriRealloc(ret, &max);
   1181                     if (temp == NULL) goto mem_error;
   1182                     ret = temp;
   1183 		}
   1184 		if ((IS_UNRESERVED(*(p))) ||
   1185                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
   1186                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
   1187                     ((*(p) == '=')) || ((*(p) == '+')))
   1188 		    ret[len++] = *p++;
   1189 		else {
   1190 		    int val = *(unsigned char *)p++;
   1191 		    int hi = val / 0x10, lo = val % 0x10;
   1192 		    ret[len++] = '%';
   1193 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1194 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1195 		}
   1196 	    }
   1197 	} else if (uri->scheme != NULL) {
   1198 	    if (len + 3 >= max) {
   1199                 temp = xmlSaveUriRealloc(ret, &max);
   1200                 if (temp == NULL) goto mem_error;
   1201                 ret = temp;
   1202 	    }
   1203 	}
   1204 	if (uri->path != NULL) {
   1205 	    p = uri->path;
   1206 	    /*
   1207 	     * the colon in file:///d: should not be escaped or
   1208 	     * Windows accesses fail later.
   1209 	     */
   1210 	    if ((uri->scheme != NULL) &&
   1211 		(p[0] == '/') &&
   1212 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
   1213 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
   1214 		(p[2] == ':') &&
   1215 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
   1216 		if (len + 3 >= max) {
   1217                     temp = xmlSaveUriRealloc(ret, &max);
   1218                     if (temp == NULL) goto mem_error;
   1219                     ret = temp;
   1220 		}
   1221 		ret[len++] = *p++;
   1222 		ret[len++] = *p++;
   1223 		ret[len++] = *p++;
   1224 	    }
   1225 	    while (*p != 0) {
   1226 		if (len + 3 >= max) {
   1227                     temp = xmlSaveUriRealloc(ret, &max);
   1228                     if (temp == NULL) goto mem_error;
   1229                     ret = temp;
   1230 		}
   1231 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
   1232                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
   1233 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
   1234 	            ((*(p) == ',')))
   1235 		    ret[len++] = *p++;
   1236 		else {
   1237 		    int val = *(unsigned char *)p++;
   1238 		    int hi = val / 0x10, lo = val % 0x10;
   1239 		    ret[len++] = '%';
   1240 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1241 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1242 		}
   1243 	    }
   1244 	}
   1245 	if (uri->query_raw != NULL) {
   1246 	    if (len + 1 >= max) {
   1247                 temp = xmlSaveUriRealloc(ret, &max);
   1248                 if (temp == NULL) goto mem_error;
   1249                 ret = temp;
   1250 	    }
   1251 	    ret[len++] = '?';
   1252 	    p = uri->query_raw;
   1253 	    while (*p != 0) {
   1254 		if (len + 1 >= max) {
   1255                     temp = xmlSaveUriRealloc(ret, &max);
   1256                     if (temp == NULL) goto mem_error;
   1257                     ret = temp;
   1258 		}
   1259 		ret[len++] = *p++;
   1260 	    }
   1261 	} else if (uri->query != NULL) {
   1262 	    if (len + 3 >= max) {
   1263                 temp = xmlSaveUriRealloc(ret, &max);
   1264                 if (temp == NULL) goto mem_error;
   1265                 ret = temp;
   1266 	    }
   1267 	    ret[len++] = '?';
   1268 	    p = uri->query;
   1269 	    while (*p != 0) {
   1270 		if (len + 3 >= max) {
   1271                     temp = xmlSaveUriRealloc(ret, &max);
   1272                     if (temp == NULL) goto mem_error;
   1273                     ret = temp;
   1274 		}
   1275 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
   1276 		    ret[len++] = *p++;
   1277 		else {
   1278 		    int val = *(unsigned char *)p++;
   1279 		    int hi = val / 0x10, lo = val % 0x10;
   1280 		    ret[len++] = '%';
   1281 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1282 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1283 		}
   1284 	    }
   1285 	}
   1286     }
   1287     if (uri->fragment != NULL) {
   1288 	if (len + 3 >= max) {
   1289             temp = xmlSaveUriRealloc(ret, &max);
   1290             if (temp == NULL) goto mem_error;
   1291             ret = temp;
   1292 	}
   1293 	ret[len++] = '#';
   1294 	p = uri->fragment;
   1295 	while (*p != 0) {
   1296 	    if (len + 3 >= max) {
   1297                 temp = xmlSaveUriRealloc(ret, &max);
   1298                 if (temp == NULL) goto mem_error;
   1299                 ret = temp;
   1300 	    }
   1301 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
   1302 		ret[len++] = *p++;
   1303 	    else {
   1304 		int val = *(unsigned char *)p++;
   1305 		int hi = val / 0x10, lo = val % 0x10;
   1306 		ret[len++] = '%';
   1307 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
   1308 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
   1309 	    }
   1310 	}
   1311     }
   1312     if (len >= max) {
   1313         temp = xmlSaveUriRealloc(ret, &max);
   1314         if (temp == NULL) goto mem_error;
   1315         ret = temp;
   1316     }
   1317     ret[len] = 0;
   1318     return(ret);
   1319 
   1320 mem_error:
   1321     xmlFree(ret);
   1322     return(NULL);
   1323 }
   1324 
   1325 /**
   1326  * xmlPrintURI:
   1327  * @stream:  a FILE* for the output
   1328  * @uri:  pointer to an xmlURI
   1329  *
   1330  * Prints the URI in the stream @stream.
   1331  */
   1332 void
   1333 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
   1334     xmlChar *out;
   1335 
   1336     out = xmlSaveUri(uri);
   1337     if (out != NULL) {
   1338 	fprintf(stream, "%s", (char *) out);
   1339 	xmlFree(out);
   1340     }
   1341 }
   1342 
   1343 /**
   1344  * xmlCleanURI:
   1345  * @uri:  pointer to an xmlURI
   1346  *
   1347  * Make sure the xmlURI struct is free of content
   1348  */
   1349 static void
   1350 xmlCleanURI(xmlURIPtr uri) {
   1351     if (uri == NULL) return;
   1352 
   1353     if (uri->scheme != NULL) xmlFree(uri->scheme);
   1354     uri->scheme = NULL;
   1355     if (uri->server != NULL) xmlFree(uri->server);
   1356     uri->server = NULL;
   1357     if (uri->user != NULL) xmlFree(uri->user);
   1358     uri->user = NULL;
   1359     if (uri->path != NULL) xmlFree(uri->path);
   1360     uri->path = NULL;
   1361     if (uri->fragment != NULL) xmlFree(uri->fragment);
   1362     uri->fragment = NULL;
   1363     if (uri->opaque != NULL) xmlFree(uri->opaque);
   1364     uri->opaque = NULL;
   1365     if (uri->authority != NULL) xmlFree(uri->authority);
   1366     uri->authority = NULL;
   1367     if (uri->query != NULL) xmlFree(uri->query);
   1368     uri->query = NULL;
   1369     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
   1370     uri->query_raw = NULL;
   1371 }
   1372 
   1373 /**
   1374  * xmlFreeURI:
   1375  * @uri:  pointer to an xmlURI
   1376  *
   1377  * Free up the xmlURI struct
   1378  */
   1379 void
   1380 xmlFreeURI(xmlURIPtr uri) {
   1381     if (uri == NULL) return;
   1382 
   1383     if (uri->scheme != NULL) xmlFree(uri->scheme);
   1384     if (uri->server != NULL) xmlFree(uri->server);
   1385     if (uri->user != NULL) xmlFree(uri->user);
   1386     if (uri->path != NULL) xmlFree(uri->path);
   1387     if (uri->fragment != NULL) xmlFree(uri->fragment);
   1388     if (uri->opaque != NULL) xmlFree(uri->opaque);
   1389     if (uri->authority != NULL) xmlFree(uri->authority);
   1390     if (uri->query != NULL) xmlFree(uri->query);
   1391     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
   1392     xmlFree(uri);
   1393 }
   1394 
   1395 /************************************************************************
   1396  *									*
   1397  *			Helper functions				*
   1398  *									*
   1399  ************************************************************************/
   1400 
   1401 /**
   1402  * xmlNormalizeURIPath:
   1403  * @path:  pointer to the path string
   1404  *
   1405  * Applies the 5 normalization steps to a path string--that is, RFC 2396
   1406  * Section 5.2, steps 6.c through 6.g.
   1407  *
   1408  * Normalization occurs directly on the string, no new allocation is done
   1409  *
   1410  * Returns 0 or an error code
   1411  */
   1412 int
   1413 xmlNormalizeURIPath(char *path) {
   1414     char *cur, *out;
   1415 
   1416     if (path == NULL)
   1417 	return(-1);
   1418 
   1419     /* Skip all initial "/" chars.  We want to get to the beginning of the
   1420      * first non-empty segment.
   1421      */
   1422     cur = path;
   1423     while (cur[0] == '/')
   1424       ++cur;
   1425     if (cur[0] == '\0')
   1426       return(0);
   1427 
   1428     /* Keep everything we've seen so far.  */
   1429     out = cur;
   1430 
   1431     /*
   1432      * Analyze each segment in sequence for cases (c) and (d).
   1433      */
   1434     while (cur[0] != '\0') {
   1435 	/*
   1436 	 * c) All occurrences of "./", where "." is a complete path segment,
   1437 	 *    are removed from the buffer string.
   1438 	 */
   1439 	if ((cur[0] == '.') && (cur[1] == '/')) {
   1440 	    cur += 2;
   1441 	    /* '//' normalization should be done at this point too */
   1442 	    while (cur[0] == '/')
   1443 		cur++;
   1444 	    continue;
   1445 	}
   1446 
   1447 	/*
   1448 	 * d) If the buffer string ends with "." as a complete path segment,
   1449 	 *    that "." is removed.
   1450 	 */
   1451 	if ((cur[0] == '.') && (cur[1] == '\0'))
   1452 	    break;
   1453 
   1454 	/* Otherwise keep the segment.  */
   1455 	while (cur[0] != '/') {
   1456             if (cur[0] == '\0')
   1457               goto done_cd;
   1458 	    (out++)[0] = (cur++)[0];
   1459 	}
   1460 	/* nomalize // */
   1461 	while ((cur[0] == '/') && (cur[1] == '/'))
   1462 	    cur++;
   1463 
   1464         (out++)[0] = (cur++)[0];
   1465     }
   1466  done_cd:
   1467     out[0] = '\0';
   1468 
   1469     /* Reset to the beginning of the first segment for the next sequence.  */
   1470     cur = path;
   1471     while (cur[0] == '/')
   1472       ++cur;
   1473     if (cur[0] == '\0')
   1474 	return(0);
   1475 
   1476     /*
   1477      * Analyze each segment in sequence for cases (e) and (f).
   1478      *
   1479      * e) All occurrences of "<segment>/../", where <segment> is a
   1480      *    complete path segment not equal to "..", are removed from the
   1481      *    buffer string.  Removal of these path segments is performed
   1482      *    iteratively, removing the leftmost matching pattern on each
   1483      *    iteration, until no matching pattern remains.
   1484      *
   1485      * f) If the buffer string ends with "<segment>/..", where <segment>
   1486      *    is a complete path segment not equal to "..", that
   1487      *    "<segment>/.." is removed.
   1488      *
   1489      * To satisfy the "iterative" clause in (e), we need to collapse the
   1490      * string every time we find something that needs to be removed.  Thus,
   1491      * we don't need to keep two pointers into the string: we only need a
   1492      * "current position" pointer.
   1493      */
   1494     while (1) {
   1495         char *segp, *tmp;
   1496 
   1497         /* At the beginning of each iteration of this loop, "cur" points to
   1498          * the first character of the segment we want to examine.
   1499          */
   1500 
   1501         /* Find the end of the current segment.  */
   1502         segp = cur;
   1503         while ((segp[0] != '/') && (segp[0] != '\0'))
   1504           ++segp;
   1505 
   1506         /* If this is the last segment, we're done (we need at least two
   1507          * segments to meet the criteria for the (e) and (f) cases).
   1508          */
   1509         if (segp[0] == '\0')
   1510           break;
   1511 
   1512         /* If the first segment is "..", or if the next segment _isn't_ "..",
   1513          * keep this segment and try the next one.
   1514          */
   1515         ++segp;
   1516         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
   1517             || ((segp[0] != '.') || (segp[1] != '.')
   1518                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
   1519           cur = segp;
   1520           continue;
   1521         }
   1522 
   1523         /* If we get here, remove this segment and the next one and back up
   1524          * to the previous segment (if there is one), to implement the
   1525          * "iteratively" clause.  It's pretty much impossible to back up
   1526          * while maintaining two pointers into the buffer, so just compact
   1527          * the whole buffer now.
   1528          */
   1529 
   1530         /* If this is the end of the buffer, we're done.  */
   1531         if (segp[2] == '\0') {
   1532           cur[0] = '\0';
   1533           break;
   1534         }
   1535         /* Valgrind complained, strcpy(cur, segp + 3); */
   1536         /* string will overlap, do not use strcpy */
   1537         tmp = cur;
   1538         segp += 3;
   1539         while ((*tmp++ = *segp++) != 0)
   1540           ;
   1541 
   1542         /* If there are no previous segments, then keep going from here.  */
   1543         segp = cur;
   1544         while ((segp > path) && ((--segp)[0] == '/'))
   1545           ;
   1546         if (segp == path)
   1547           continue;
   1548 
   1549         /* "segp" is pointing to the end of a previous segment; find it's
   1550          * start.  We need to back up to the previous segment and start
   1551          * over with that to handle things like "foo/bar/../..".  If we
   1552          * don't do this, then on the first pass we'll remove the "bar/..",
   1553          * but be pointing at the second ".." so we won't realize we can also
   1554          * remove the "foo/..".
   1555          */
   1556         cur = segp;
   1557         while ((cur > path) && (cur[-1] != '/'))
   1558           --cur;
   1559     }
   1560     out[0] = '\0';
   1561 
   1562     /*
   1563      * g) If the resulting buffer string still begins with one or more
   1564      *    complete path segments of "..", then the reference is
   1565      *    considered to be in error. Implementations may handle this
   1566      *    error by retaining these components in the resolved path (i.e.,
   1567      *    treating them as part of the final URI), by removing them from
   1568      *    the resolved path (i.e., discarding relative levels above the
   1569      *    root), or by avoiding traversal of the reference.
   1570      *
   1571      * We discard them from the final path.
   1572      */
   1573     if (path[0] == '/') {
   1574       cur = path;
   1575       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
   1576              && ((cur[3] == '/') || (cur[3] == '\0')))
   1577 	cur += 3;
   1578 
   1579       if (cur != path) {
   1580 	out = path;
   1581 	while (cur[0] != '\0')
   1582           (out++)[0] = (cur++)[0];
   1583 	out[0] = 0;
   1584       }
   1585     }
   1586 
   1587     return(0);
   1588 }
   1589 
   1590 static int is_hex(char c) {
   1591     if (((c >= '0') && (c <= '9')) ||
   1592         ((c >= 'a') && (c <= 'f')) ||
   1593         ((c >= 'A') && (c <= 'F')))
   1594 	return(1);
   1595     return(0);
   1596 }
   1597 
   1598 /**
   1599  * xmlURIUnescapeString:
   1600  * @str:  the string to unescape
   1601  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
   1602  * @target:  optional destination buffer
   1603  *
   1604  * Unescaping routine, but does not check that the string is an URI. The
   1605  * output is a direct unsigned char translation of %XX values (no encoding)
   1606  * Note that the length of the result can only be smaller or same size as
   1607  * the input string.
   1608  *
   1609  * Returns a copy of the string, but unescaped, will return NULL only in case
   1610  * of error
   1611  */
   1612 char *
   1613 xmlURIUnescapeString(const char *str, int len, char *target) {
   1614     char *ret, *out;
   1615     const char *in;
   1616 
   1617     if (str == NULL)
   1618 	return(NULL);
   1619     if (len <= 0) len = strlen(str);
   1620     if (len < 0) return(NULL);
   1621 
   1622     if (target == NULL) {
   1623 	ret = (char *) xmlMallocAtomic(len + 1);
   1624 	if (ret == NULL) {
   1625             xmlURIErrMemory("unescaping URI value\n");
   1626 	    return(NULL);
   1627 	}
   1628     } else
   1629 	ret = target;
   1630     in = str;
   1631     out = ret;
   1632     while(len > 0) {
   1633 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
   1634 	    in++;
   1635 	    if ((*in >= '0') && (*in <= '9'))
   1636 	        *out = (*in - '0');
   1637 	    else if ((*in >= 'a') && (*in <= 'f'))
   1638 	        *out = (*in - 'a') + 10;
   1639 	    else if ((*in >= 'A') && (*in <= 'F'))
   1640 	        *out = (*in - 'A') + 10;
   1641 	    in++;
   1642 	    if ((*in >= '0') && (*in <= '9'))
   1643 	        *out = *out * 16 + (*in - '0');
   1644 	    else if ((*in >= 'a') && (*in <= 'f'))
   1645 	        *out = *out * 16 + (*in - 'a') + 10;
   1646 	    else if ((*in >= 'A') && (*in <= 'F'))
   1647 	        *out = *out * 16 + (*in - 'A') + 10;
   1648 	    in++;
   1649 	    len -= 3;
   1650 	    out++;
   1651 	} else {
   1652 	    *out++ = *in++;
   1653 	    len--;
   1654 	}
   1655     }
   1656     *out = 0;
   1657     return(ret);
   1658 }
   1659 
   1660 /**
   1661  * xmlURIEscapeStr:
   1662  * @str:  string to escape
   1663  * @list: exception list string of chars not to escape
   1664  *
   1665  * This routine escapes a string to hex, ignoring reserved characters (a-z)
   1666  * and the characters in the exception list.
   1667  *
   1668  * Returns a new escaped string or NULL in case of error.
   1669  */
   1670 xmlChar *
   1671 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
   1672     xmlChar *ret, ch;
   1673     xmlChar *temp;
   1674     const xmlChar *in;
   1675     int len, out;
   1676 
   1677     if (str == NULL)
   1678 	return(NULL);
   1679     if (str[0] == 0)
   1680 	return(xmlStrdup(str));
   1681     len = xmlStrlen(str);
   1682     if (!(len > 0)) return(NULL);
   1683 
   1684     len += 20;
   1685     ret = (xmlChar *) xmlMallocAtomic(len);
   1686     if (ret == NULL) {
   1687         xmlURIErrMemory("escaping URI value\n");
   1688 	return(NULL);
   1689     }
   1690     in = (const xmlChar *) str;
   1691     out = 0;
   1692     while(*in != 0) {
   1693 	if (len - out <= 3) {
   1694             temp = xmlSaveUriRealloc(ret, &len);
   1695 	    if (temp == NULL) {
   1696                 xmlURIErrMemory("escaping URI value\n");
   1697 		xmlFree(ret);
   1698 		return(NULL);
   1699 	    }
   1700 	    ret = temp;
   1701 	}
   1702 
   1703 	ch = *in;
   1704 
   1705 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
   1706 	    unsigned char val;
   1707 	    ret[out++] = '%';
   1708 	    val = ch >> 4;
   1709 	    if (val <= 9)
   1710 		ret[out++] = '0' + val;
   1711 	    else
   1712 		ret[out++] = 'A' + val - 0xA;
   1713 	    val = ch & 0xF;
   1714 	    if (val <= 9)
   1715 		ret[out++] = '0' + val;
   1716 	    else
   1717 		ret[out++] = 'A' + val - 0xA;
   1718 	    in++;
   1719 	} else {
   1720 	    ret[out++] = *in++;
   1721 	}
   1722 
   1723     }
   1724     ret[out] = 0;
   1725     return(ret);
   1726 }
   1727 
   1728 /**
   1729  * xmlURIEscape:
   1730  * @str:  the string of the URI to escape
   1731  *
   1732  * Escaping routine, does not do validity checks !
   1733  * It will try to escape the chars needing this, but this is heuristic
   1734  * based it's impossible to be sure.
   1735  *
   1736  * Returns an copy of the string, but escaped
   1737  *
   1738  * 25 May 2001
   1739  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
   1740  * according to RFC2396.
   1741  *   - Carl Douglas
   1742  */
   1743 xmlChar *
   1744 xmlURIEscape(const xmlChar * str)
   1745 {
   1746     xmlChar *ret, *segment = NULL;
   1747     xmlURIPtr uri;
   1748     int ret2;
   1749 
   1750 #define NULLCHK(p) if(!p) { \
   1751          xmlURIErrMemory("escaping URI value\n"); \
   1752          xmlFreeURI(uri); \
   1753          return NULL; } \
   1754 
   1755     if (str == NULL)
   1756         return (NULL);
   1757 
   1758     uri = xmlCreateURI();
   1759     if (uri != NULL) {
   1760 	/*
   1761 	 * Allow escaping errors in the unescaped form
   1762 	 */
   1763         uri->cleanup = 1;
   1764         ret2 = xmlParseURIReference(uri, (const char *)str);
   1765         if (ret2) {
   1766             xmlFreeURI(uri);
   1767             return (NULL);
   1768         }
   1769     }
   1770 
   1771     if (!uri)
   1772         return NULL;
   1773 
   1774     ret = NULL;
   1775 
   1776     if (uri->scheme) {
   1777         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
   1778         NULLCHK(segment)
   1779         ret = xmlStrcat(ret, segment);
   1780         ret = xmlStrcat(ret, BAD_CAST ":");
   1781         xmlFree(segment);
   1782     }
   1783 
   1784     if (uri->authority) {
   1785         segment =
   1786             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
   1787         NULLCHK(segment)
   1788         ret = xmlStrcat(ret, BAD_CAST "//");
   1789         ret = xmlStrcat(ret, segment);
   1790         xmlFree(segment);
   1791     }
   1792 
   1793     if (uri->user) {
   1794         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
   1795         NULLCHK(segment)
   1796 		ret = xmlStrcat(ret,BAD_CAST "//");
   1797         ret = xmlStrcat(ret, segment);
   1798         ret = xmlStrcat(ret, BAD_CAST "@");
   1799         xmlFree(segment);
   1800     }
   1801 
   1802     if (uri->server) {
   1803         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
   1804         NULLCHK(segment)
   1805 		if (uri->user == NULL)
   1806 		ret = xmlStrcat(ret, BAD_CAST "//");
   1807         ret = xmlStrcat(ret, segment);
   1808         xmlFree(segment);
   1809     }
   1810 
   1811     if (uri->port) {
   1812         xmlChar port[10];
   1813 
   1814         snprintf((char *) port, 10, "%d", uri->port);
   1815         ret = xmlStrcat(ret, BAD_CAST ":");
   1816         ret = xmlStrcat(ret, port);
   1817     }
   1818 
   1819     if (uri->path) {
   1820         segment =
   1821             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
   1822         NULLCHK(segment)
   1823         ret = xmlStrcat(ret, segment);
   1824         xmlFree(segment);
   1825     }
   1826 
   1827     if (uri->query_raw) {
   1828         ret = xmlStrcat(ret, BAD_CAST "?");
   1829         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
   1830     }
   1831     else if (uri->query) {
   1832         segment =
   1833             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
   1834         NULLCHK(segment)
   1835         ret = xmlStrcat(ret, BAD_CAST "?");
   1836         ret = xmlStrcat(ret, segment);
   1837         xmlFree(segment);
   1838     }
   1839 
   1840     if (uri->opaque) {
   1841         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
   1842         NULLCHK(segment)
   1843         ret = xmlStrcat(ret, segment);
   1844         xmlFree(segment);
   1845     }
   1846 
   1847     if (uri->fragment) {
   1848         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
   1849         NULLCHK(segment)
   1850         ret = xmlStrcat(ret, BAD_CAST "#");
   1851         ret = xmlStrcat(ret, segment);
   1852         xmlFree(segment);
   1853     }
   1854 
   1855     xmlFreeURI(uri);
   1856 #undef NULLCHK
   1857 
   1858     return (ret);
   1859 }
   1860 
   1861 /************************************************************************
   1862  *									*
   1863  *			Public functions				*
   1864  *									*
   1865  ************************************************************************/
   1866 
   1867 /**
   1868  * xmlBuildURI:
   1869  * @URI:  the URI instance found in the document
   1870  * @base:  the base value
   1871  *
   1872  * Computes he final URI of the reference done by checking that
   1873  * the given URI is valid, and building the final URI using the
   1874  * base URI. This is processed according to section 5.2 of the
   1875  * RFC 2396
   1876  *
   1877  * 5.2. Resolving Relative References to Absolute Form
   1878  *
   1879  * Returns a new URI string (to be freed by the caller) or NULL in case
   1880  *         of error.
   1881  */
   1882 xmlChar *
   1883 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
   1884     xmlChar *val = NULL;
   1885     int ret, len, indx, cur, out;
   1886     xmlURIPtr ref = NULL;
   1887     xmlURIPtr bas = NULL;
   1888     xmlURIPtr res = NULL;
   1889 
   1890     /*
   1891      * 1) The URI reference is parsed into the potential four components and
   1892      *    fragment identifier, as described in Section 4.3.
   1893      *
   1894      *    NOTE that a completely empty URI is treated by modern browsers
   1895      *    as a reference to "." rather than as a synonym for the current
   1896      *    URI.  Should we do that here?
   1897      */
   1898     if (URI == NULL)
   1899 	ret = -1;
   1900     else {
   1901 	if (*URI) {
   1902 	    ref = xmlCreateURI();
   1903 	    if (ref == NULL)
   1904 		goto done;
   1905 	    ret = xmlParseURIReference(ref, (const char *) URI);
   1906 	}
   1907 	else
   1908 	    ret = 0;
   1909     }
   1910     if (ret != 0)
   1911 	goto done;
   1912     if ((ref != NULL) && (ref->scheme != NULL)) {
   1913 	/*
   1914 	 * The URI is absolute don't modify.
   1915 	 */
   1916 	val = xmlStrdup(URI);
   1917 	goto done;
   1918     }
   1919     if (base == NULL)
   1920 	ret = -1;
   1921     else {
   1922 	bas = xmlCreateURI();
   1923 	if (bas == NULL)
   1924 	    goto done;
   1925 	ret = xmlParseURIReference(bas, (const char *) base);
   1926     }
   1927     if (ret != 0) {
   1928 	if (ref)
   1929 	    val = xmlSaveUri(ref);
   1930 	goto done;
   1931     }
   1932     if (ref == NULL) {
   1933 	/*
   1934 	 * the base fragment must be ignored
   1935 	 */
   1936 	if (bas->fragment != NULL) {
   1937 	    xmlFree(bas->fragment);
   1938 	    bas->fragment = NULL;
   1939 	}
   1940 	val = xmlSaveUri(bas);
   1941 	goto done;
   1942     }
   1943 
   1944     /*
   1945      * 2) If the path component is empty and the scheme, authority, and
   1946      *    query components are undefined, then it is a reference to the
   1947      *    current document and we are done.  Otherwise, the reference URI's
   1948      *    query and fragment components are defined as found (or not found)
   1949      *    within the URI reference and not inherited from the base URI.
   1950      *
   1951      *    NOTE that in modern browsers, the parsing differs from the above
   1952      *    in the following aspect:  the query component is allowed to be
   1953      *    defined while still treating this as a reference to the current
   1954      *    document.
   1955      */
   1956     res = xmlCreateURI();
   1957     if (res == NULL)
   1958 	goto done;
   1959     if ((ref->scheme == NULL) && (ref->path == NULL) &&
   1960 	((ref->authority == NULL) && (ref->server == NULL))) {
   1961 	if (bas->scheme != NULL)
   1962 	    res->scheme = xmlMemStrdup(bas->scheme);
   1963 	if (bas->authority != NULL)
   1964 	    res->authority = xmlMemStrdup(bas->authority);
   1965 	else if (bas->server != NULL) {
   1966 	    res->server = xmlMemStrdup(bas->server);
   1967 	    if (bas->user != NULL)
   1968 		res->user = xmlMemStrdup(bas->user);
   1969 	    res->port = bas->port;
   1970 	}
   1971 	if (bas->path != NULL)
   1972 	    res->path = xmlMemStrdup(bas->path);
   1973 	if (ref->query_raw != NULL)
   1974 	    res->query_raw = xmlMemStrdup (ref->query_raw);
   1975 	else if (ref->query != NULL)
   1976 	    res->query = xmlMemStrdup(ref->query);
   1977 	else if (bas->query_raw != NULL)
   1978 	    res->query_raw = xmlMemStrdup(bas->query_raw);
   1979 	else if (bas->query != NULL)
   1980 	    res->query = xmlMemStrdup(bas->query);
   1981 	if (ref->fragment != NULL)
   1982 	    res->fragment = xmlMemStrdup(ref->fragment);
   1983 	goto step_7;
   1984     }
   1985 
   1986     /*
   1987      * 3) If the scheme component is defined, indicating that the reference
   1988      *    starts with a scheme name, then the reference is interpreted as an
   1989      *    absolute URI and we are done.  Otherwise, the reference URI's
   1990      *    scheme is inherited from the base URI's scheme component.
   1991      */
   1992     if (ref->scheme != NULL) {
   1993 	val = xmlSaveUri(ref);
   1994 	goto done;
   1995     }
   1996     if (bas->scheme != NULL)
   1997 	res->scheme = xmlMemStrdup(bas->scheme);
   1998 
   1999     if (ref->query_raw != NULL)
   2000 	res->query_raw = xmlMemStrdup(ref->query_raw);
   2001     else if (ref->query != NULL)
   2002 	res->query = xmlMemStrdup(ref->query);
   2003     if (ref->fragment != NULL)
   2004 	res->fragment = xmlMemStrdup(ref->fragment);
   2005 
   2006     /*
   2007      * 4) If the authority component is defined, then the reference is a
   2008      *    network-path and we skip to step 7.  Otherwise, the reference
   2009      *    URI's authority is inherited from the base URI's authority
   2010      *    component, which will also be undefined if the URI scheme does not
   2011      *    use an authority component.
   2012      */
   2013     if ((ref->authority != NULL) || (ref->server != NULL)) {
   2014 	if (ref->authority != NULL)
   2015 	    res->authority = xmlMemStrdup(ref->authority);
   2016 	else {
   2017 	    res->server = xmlMemStrdup(ref->server);
   2018 	    if (ref->user != NULL)
   2019 		res->user = xmlMemStrdup(ref->user);
   2020             res->port = ref->port;
   2021 	}
   2022 	if (ref->path != NULL)
   2023 	    res->path = xmlMemStrdup(ref->path);
   2024 	goto step_7;
   2025     }
   2026     if (bas->authority != NULL)
   2027 	res->authority = xmlMemStrdup(bas->authority);
   2028     else if (bas->server != NULL) {
   2029 	res->server = xmlMemStrdup(bas->server);
   2030 	if (bas->user != NULL)
   2031 	    res->user = xmlMemStrdup(bas->user);
   2032 	res->port = bas->port;
   2033     }
   2034 
   2035     /*
   2036      * 5) If the path component begins with a slash character ("/"), then
   2037      *    the reference is an absolute-path and we skip to step 7.
   2038      */
   2039     if ((ref->path != NULL) && (ref->path[0] == '/')) {
   2040 	res->path = xmlMemStrdup(ref->path);
   2041 	goto step_7;
   2042     }
   2043 
   2044 
   2045     /*
   2046      * 6) If this step is reached, then we are resolving a relative-path
   2047      *    reference.  The relative path needs to be merged with the base
   2048      *    URI's path.  Although there are many ways to do this, we will
   2049      *    describe a simple method using a separate string buffer.
   2050      *
   2051      * Allocate a buffer large enough for the result string.
   2052      */
   2053     len = 2; /* extra / and 0 */
   2054     if (ref->path != NULL)
   2055 	len += strlen(ref->path);
   2056     if (bas->path != NULL)
   2057 	len += strlen(bas->path);
   2058     res->path = (char *) xmlMallocAtomic(len);
   2059     if (res->path == NULL) {
   2060         xmlURIErrMemory("resolving URI against base\n");
   2061 	goto done;
   2062     }
   2063     res->path[0] = 0;
   2064 
   2065     /*
   2066      * a) All but the last segment of the base URI's path component is
   2067      *    copied to the buffer.  In other words, any characters after the
   2068      *    last (right-most) slash character, if any, are excluded.
   2069      */
   2070     cur = 0;
   2071     out = 0;
   2072     if (bas->path != NULL) {
   2073 	while (bas->path[cur] != 0) {
   2074 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
   2075 		cur++;
   2076 	    if (bas->path[cur] == 0)
   2077 		break;
   2078 
   2079 	    cur++;
   2080 	    while (out < cur) {
   2081 		res->path[out] = bas->path[out];
   2082 		out++;
   2083 	    }
   2084 	}
   2085     }
   2086     res->path[out] = 0;
   2087 
   2088     /*
   2089      * b) The reference's path component is appended to the buffer
   2090      *    string.
   2091      */
   2092     if (ref->path != NULL && ref->path[0] != 0) {
   2093 	indx = 0;
   2094 	/*
   2095 	 * Ensure the path includes a '/'
   2096 	 */
   2097 	if ((out == 0) && (bas->server != NULL))
   2098 	    res->path[out++] = '/';
   2099 	while (ref->path[indx] != 0) {
   2100 	    res->path[out++] = ref->path[indx++];
   2101 	}
   2102     }
   2103     res->path[out] = 0;
   2104 
   2105     /*
   2106      * Steps c) to h) are really path normalization steps
   2107      */
   2108     xmlNormalizeURIPath(res->path);
   2109 
   2110 step_7:
   2111 
   2112     /*
   2113      * 7) The resulting URI components, including any inherited from the
   2114      *    base URI, are recombined to give the absolute form of the URI
   2115      *    reference.
   2116      */
   2117     val = xmlSaveUri(res);
   2118 
   2119 done:
   2120     if (ref != NULL)
   2121 	xmlFreeURI(ref);
   2122     if (bas != NULL)
   2123 	xmlFreeURI(bas);
   2124     if (res != NULL)
   2125 	xmlFreeURI(res);
   2126     return(val);
   2127 }
   2128 
   2129 /**
   2130  * xmlBuildRelativeURI:
   2131  * @URI:  the URI reference under consideration
   2132  * @base:  the base value
   2133  *
   2134  * Expresses the URI of the reference in terms relative to the
   2135  * base.  Some examples of this operation include:
   2136  *     base = "http://site1.com/docs/book1.html"
   2137  *        URI input                        URI returned
   2138  *     docs/pic1.gif                    pic1.gif
   2139  *     docs/img/pic1.gif                img/pic1.gif
   2140  *     img/pic1.gif                     ../img/pic1.gif
   2141  *     http://site1.com/docs/pic1.gif   pic1.gif
   2142  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
   2143  *
   2144  *     base = "docs/book1.html"
   2145  *        URI input                        URI returned
   2146  *     docs/pic1.gif                    pic1.gif
   2147  *     docs/img/pic1.gif                img/pic1.gif
   2148  *     img/pic1.gif                     ../img/pic1.gif
   2149  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
   2150  *
   2151  *
   2152  * Note: if the URI reference is really wierd or complicated, it may be
   2153  *       worthwhile to first convert it into a "nice" one by calling
   2154  *       xmlBuildURI (using 'base') before calling this routine,
   2155  *       since this routine (for reasonable efficiency) assumes URI has
   2156  *       already been through some validation.
   2157  *
   2158  * Returns a new URI string (to be freed by the caller) or NULL in case
   2159  * error.
   2160  */
   2161 xmlChar *
   2162 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
   2163 {
   2164     xmlChar *val = NULL;
   2165     int ret;
   2166     int ix;
   2167     int pos = 0;
   2168     int nbslash = 0;
   2169     int len;
   2170     xmlURIPtr ref = NULL;
   2171     xmlURIPtr bas = NULL;
   2172     xmlChar *bptr, *uptr, *vptr;
   2173     int remove_path = 0;
   2174 
   2175     if ((URI == NULL) || (*URI == 0))
   2176 	return NULL;
   2177 
   2178     /*
   2179      * First parse URI into a standard form
   2180      */
   2181     ref = xmlCreateURI ();
   2182     if (ref == NULL)
   2183 	return NULL;
   2184     /* If URI not already in "relative" form */
   2185     if (URI[0] != '.') {
   2186 	ret = xmlParseURIReference (ref, (const char *) URI);
   2187 	if (ret != 0)
   2188 	    goto done;		/* Error in URI, return NULL */
   2189     } else
   2190 	ref->path = (char *)xmlStrdup(URI);
   2191 
   2192     /*
   2193      * Next parse base into the same standard form
   2194      */
   2195     if ((base == NULL) || (*base == 0)) {
   2196 	val = xmlStrdup (URI);
   2197 	goto done;
   2198     }
   2199     bas = xmlCreateURI ();
   2200     if (bas == NULL)
   2201 	goto done;
   2202     if (base[0] != '.') {
   2203 	ret = xmlParseURIReference (bas, (const char *) base);
   2204 	if (ret != 0)
   2205 	    goto done;		/* Error in base, return NULL */
   2206     } else
   2207 	bas->path = (char *)xmlStrdup(base);
   2208 
   2209     /*
   2210      * If the scheme / server on the URI differs from the base,
   2211      * just return the URI
   2212      */
   2213     if ((ref->scheme != NULL) &&
   2214 	((bas->scheme == NULL) ||
   2215 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
   2216 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
   2217 	val = xmlStrdup (URI);
   2218 	goto done;
   2219     }
   2220     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
   2221 	val = xmlStrdup(BAD_CAST "");
   2222 	goto done;
   2223     }
   2224     if (bas->path == NULL) {
   2225 	val = xmlStrdup((xmlChar *)ref->path);
   2226 	goto done;
   2227     }
   2228     if (ref->path == NULL) {
   2229         ref->path = (char *) "/";
   2230 	remove_path = 1;
   2231     }
   2232 
   2233     /*
   2234      * At this point (at last!) we can compare the two paths
   2235      *
   2236      * First we take care of the special case where either of the
   2237      * two path components may be missing (bug 316224)
   2238      */
   2239     if (bas->path == NULL) {
   2240 	if (ref->path != NULL) {
   2241 	    uptr = (xmlChar *) ref->path;
   2242 	    if (*uptr == '/')
   2243 		uptr++;
   2244 	    /* exception characters from xmlSaveUri */
   2245 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
   2246 	}
   2247 	goto done;
   2248     }
   2249     bptr = (xmlChar *)bas->path;
   2250     if (ref->path == NULL) {
   2251 	for (ix = 0; bptr[ix] != 0; ix++) {
   2252 	    if (bptr[ix] == '/')
   2253 		nbslash++;
   2254 	}
   2255 	uptr = NULL;
   2256 	len = 1;	/* this is for a string terminator only */
   2257     } else {
   2258     /*
   2259      * Next we compare the two strings and find where they first differ
   2260      */
   2261 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
   2262             pos += 2;
   2263 	if ((*bptr == '.') && (bptr[1] == '/'))
   2264             bptr += 2;
   2265 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
   2266 	    bptr++;
   2267 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
   2268 	    pos++;
   2269 
   2270 	if (bptr[pos] == ref->path[pos]) {
   2271 	    val = xmlStrdup(BAD_CAST "");
   2272 	    goto done;		/* (I can't imagine why anyone would do this) */
   2273 	}
   2274 
   2275 	/*
   2276 	 * In URI, "back up" to the last '/' encountered.  This will be the
   2277 	 * beginning of the "unique" suffix of URI
   2278 	 */
   2279 	ix = pos;
   2280 	if ((ref->path[ix] == '/') && (ix > 0))
   2281 	    ix--;
   2282 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
   2283 	    ix -= 2;
   2284 	for (; ix > 0; ix--) {
   2285 	    if (ref->path[ix] == '/')
   2286 		break;
   2287 	}
   2288 	if (ix == 0) {
   2289 	    uptr = (xmlChar *)ref->path;
   2290 	} else {
   2291 	    ix++;
   2292 	    uptr = (xmlChar *)&ref->path[ix];
   2293 	}
   2294 
   2295 	/*
   2296 	 * In base, count the number of '/' from the differing point
   2297 	 */
   2298 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
   2299 	    for (; bptr[ix] != 0; ix++) {
   2300 		if (bptr[ix] == '/')
   2301 		    nbslash++;
   2302 	    }
   2303 	}
   2304 	len = xmlStrlen (uptr) + 1;
   2305     }
   2306 
   2307     if (nbslash == 0) {
   2308 	if (uptr != NULL)
   2309 	    /* exception characters from xmlSaveUri */
   2310 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
   2311 	goto done;
   2312     }
   2313 
   2314     /*
   2315      * Allocate just enough space for the returned string -
   2316      * length of the remainder of the URI, plus enough space
   2317      * for the "../" groups, plus one for the terminator
   2318      */
   2319     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
   2320     if (val == NULL) {
   2321         xmlURIErrMemory("building relative URI\n");
   2322 	goto done;
   2323     }
   2324     vptr = val;
   2325     /*
   2326      * Put in as many "../" as needed
   2327      */
   2328     for (; nbslash>0; nbslash--) {
   2329 	*vptr++ = '.';
   2330 	*vptr++ = '.';
   2331 	*vptr++ = '/';
   2332     }
   2333     /*
   2334      * Finish up with the end of the URI
   2335      */
   2336     if (uptr != NULL) {
   2337         if ((vptr > val) && (len > 0) &&
   2338 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
   2339 	    memcpy (vptr, uptr + 1, len - 1);
   2340 	    vptr[len - 2] = 0;
   2341 	} else {
   2342 	    memcpy (vptr, uptr, len);
   2343 	    vptr[len - 1] = 0;
   2344 	}
   2345     } else {
   2346 	vptr[len - 1] = 0;
   2347     }
   2348 
   2349     /* escape the freshly-built path */
   2350     vptr = val;
   2351 	/* exception characters from xmlSaveUri */
   2352     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
   2353     xmlFree(vptr);
   2354 
   2355 done:
   2356     /*
   2357      * Free the working variables
   2358      */
   2359     if (remove_path != 0)
   2360         ref->path = NULL;
   2361     if (ref != NULL)
   2362 	xmlFreeURI (ref);
   2363     if (bas != NULL)
   2364 	xmlFreeURI (bas);
   2365 
   2366     return val;
   2367 }
   2368 
   2369 /**
   2370  * xmlCanonicPath:
   2371  * @path:  the resource locator in a filesystem notation
   2372  *
   2373  * Constructs a canonic path from the specified path.
   2374  *
   2375  * Returns a new canonic path, or a duplicate of the path parameter if the
   2376  * construction fails. The caller is responsible for freeing the memory occupied
   2377  * by the returned string. If there is insufficient memory available, or the
   2378  * argument is NULL, the function returns NULL.
   2379  */
   2380 #define IS_WINDOWS_PATH(p)					\
   2381 	((p != NULL) &&						\
   2382 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
   2383 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
   2384 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
   2385 xmlChar *
   2386 xmlCanonicPath(const xmlChar *path)
   2387 {
   2388 /*
   2389  * For Windows implementations, additional work needs to be done to
   2390  * replace backslashes in pathnames with "forward slashes"
   2391  */
   2392 #if defined(_WIN32) && !defined(__CYGWIN__)
   2393     int len = 0;
   2394     int i = 0;
   2395     xmlChar *p = NULL;
   2396 #endif
   2397     xmlURIPtr uri;
   2398     xmlChar *ret;
   2399     const xmlChar *absuri;
   2400 
   2401     if (path == NULL)
   2402 	return(NULL);
   2403 
   2404 #if defined(_WIN32)
   2405     /*
   2406      * We must not change the backslashes to slashes if the the path
   2407      * starts with \\?\
   2408      * Those paths can be up to 32k characters long.
   2409      * Was added specifically for OpenOffice, those paths can't be converted
   2410      * to URIs anyway.
   2411      */
   2412     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
   2413         (path[3] == '\\') )
   2414 	return xmlStrdup((const xmlChar *) path);
   2415 #endif
   2416 
   2417 	/* sanitize filename starting with // so it can be used as URI */
   2418     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
   2419         path++;
   2420 
   2421     if ((uri = xmlParseURI((const char *) path)) != NULL) {
   2422 	xmlFreeURI(uri);
   2423 	return xmlStrdup(path);
   2424     }
   2425 
   2426     /* Check if this is an "absolute uri" */
   2427     absuri = xmlStrstr(path, BAD_CAST "://");
   2428     if (absuri != NULL) {
   2429         int l, j;
   2430 	unsigned char c;
   2431 	xmlChar *escURI;
   2432 
   2433         /*
   2434 	 * this looks like an URI where some parts have not been
   2435 	 * escaped leading to a parsing problem.  Check that the first
   2436 	 * part matches a protocol.
   2437 	 */
   2438 	l = absuri - path;
   2439 	/* Bypass if first part (part before the '://') is > 20 chars */
   2440 	if ((l <= 0) || (l > 20))
   2441 	    goto path_processing;
   2442 	/* Bypass if any non-alpha characters are present in first part */
   2443 	for (j = 0;j < l;j++) {
   2444 	    c = path[j];
   2445 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
   2446 	        goto path_processing;
   2447 	}
   2448 
   2449 	/* Escape all except the characters specified in the supplied path */
   2450         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
   2451 	if (escURI != NULL) {
   2452 	    /* Try parsing the escaped path */
   2453 	    uri = xmlParseURI((const char *) escURI);
   2454 	    /* If successful, return the escaped string */
   2455 	    if (uri != NULL) {
   2456 	        xmlFreeURI(uri);
   2457 		return escURI;
   2458 	    }
   2459 	}
   2460     }
   2461 
   2462 path_processing:
   2463 /* For Windows implementations, replace backslashes with 'forward slashes' */
   2464 #if defined(_WIN32) && !defined(__CYGWIN__)
   2465     /*
   2466      * Create a URI structure
   2467      */
   2468     uri = xmlCreateURI();
   2469     if (uri == NULL) {		/* Guard against 'out of memory' */
   2470         return(NULL);
   2471     }
   2472 
   2473     len = xmlStrlen(path);
   2474     if ((len > 2) && IS_WINDOWS_PATH(path)) {
   2475         /* make the scheme 'file' */
   2476 	uri->scheme = xmlStrdup(BAD_CAST "file");
   2477 	/* allocate space for leading '/' + path + string terminator */
   2478 	uri->path = xmlMallocAtomic(len + 2);
   2479 	if (uri->path == NULL) {
   2480 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
   2481 	    return(NULL);
   2482 	}
   2483 	/* Put in leading '/' plus path */
   2484 	uri->path[0] = '/';
   2485 	p = uri->path + 1;
   2486 	strncpy(p, path, len + 1);
   2487     } else {
   2488 	uri->path = xmlStrdup(path);
   2489 	if (uri->path == NULL) {
   2490 	    xmlFreeURI(uri);
   2491 	    return(NULL);
   2492 	}
   2493 	p = uri->path;
   2494     }
   2495     /* Now change all occurences of '\' to '/' */
   2496     while (*p != '\0') {
   2497 	if (*p == '\\')
   2498 	    *p = '/';
   2499 	p++;
   2500     }
   2501 
   2502     if (uri->scheme == NULL) {
   2503 	ret = xmlStrdup((const xmlChar *) uri->path);
   2504     } else {
   2505 	ret = xmlSaveUri(uri);
   2506     }
   2507 
   2508     xmlFreeURI(uri);
   2509 #else
   2510     ret = xmlStrdup((const xmlChar *) path);
   2511 #endif
   2512     return(ret);
   2513 }
   2514 
   2515 /**
   2516  * xmlPathToURI:
   2517  * @path:  the resource locator in a filesystem notation
   2518  *
   2519  * Constructs an URI expressing the existing path
   2520  *
   2521  * Returns a new URI, or a duplicate of the path parameter if the
   2522  * construction fails. The caller is responsible for freeing the memory
   2523  * occupied by the returned string. If there is insufficient memory available,
   2524  * or the argument is NULL, the function returns NULL.
   2525  */
   2526 xmlChar *
   2527 xmlPathToURI(const xmlChar *path)
   2528 {
   2529     xmlURIPtr uri;
   2530     xmlURI temp;
   2531     xmlChar *ret, *cal;
   2532 
   2533     if (path == NULL)
   2534         return(NULL);
   2535 
   2536     if ((uri = xmlParseURI((const char *) path)) != NULL) {
   2537 	xmlFreeURI(uri);
   2538 	return xmlStrdup(path);
   2539     }
   2540     cal = xmlCanonicPath(path);
   2541     if (cal == NULL)
   2542         return(NULL);
   2543 #if defined(_WIN32) && !defined(__CYGWIN__)
   2544     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
   2545        If 'cal' is a valid URI allready then we are done here, as continuing would make
   2546        it invalid. */
   2547     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
   2548 	xmlFreeURI(uri);
   2549 	return cal;
   2550     }
   2551     /* 'cal' can contain a relative path with backslashes. If that is processed
   2552        by xmlSaveURI, they will be escaped and the external entity loader machinery
   2553        will fail. So convert them to slashes. Misuse 'ret' for walking. */
   2554     ret = cal;
   2555     while (*ret != '\0') {
   2556 	if (*ret == '\\')
   2557 	    *ret = '/';
   2558 	ret++;
   2559     }
   2560 #endif
   2561     memset(&temp, 0, sizeof(temp));
   2562     temp.path = (char *) cal;
   2563     ret = xmlSaveUri(&temp);
   2564     xmlFree(cal);
   2565     return(ret);
   2566 }
   2567 #define bottom_uri
   2568 #include "elfgcchack.h"
   2569