Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright (C) 2007 Michael Brown <mbrown (at) fensystems.co.uk>.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of the GNU General Public License as
      6  * published by the Free Software Foundation; either version 2 of the
      7  * License, or any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12  * General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program; if not, write to the Free Software
     16  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     17  */
     18 
     19 FILE_LICENCE ( GPL2_OR_LATER );
     20 
     21 /** @file
     22  *
     23  * Uniform Resource Identifiers
     24  *
     25  */
     26 
     27 #include <stdint.h>
     28 #include <stdlib.h>
     29 #include <string.h>
     30 #include <libgen.h>
     31 #include <ctype.h>
     32 #include <gpxe/vsprintf.h>
     33 #include <gpxe/uri.h>
     34 
     35 /**
     36  * Dump URI for debugging
     37  *
     38  * @v uri		URI
     39  */
     40 static void dump_uri ( struct uri *uri ) {
     41 	if ( ! uri )
     42 		return;
     43 	if ( uri->scheme )
     44 		DBG ( " scheme \"%s\"", uri->scheme );
     45 	if ( uri->opaque )
     46 		DBG ( " opaque \"%s\"", uri->opaque );
     47 	if ( uri->user )
     48 		DBG ( " user \"%s\"", uri->user );
     49 	if ( uri->password )
     50 		DBG ( " password \"%s\"", uri->password );
     51 	if ( uri->host )
     52 		DBG ( " host \"%s\"", uri->host );
     53 	if ( uri->port )
     54 		DBG ( " port \"%s\"", uri->port );
     55 	if ( uri->path )
     56 		DBG ( " path \"%s\"", uri->path );
     57 	if ( uri->query )
     58 		DBG ( " query \"%s\"", uri->query );
     59 	if ( uri->fragment )
     60 		DBG ( " fragment \"%s\"", uri->fragment );
     61 }
     62 
     63 /**
     64  * Parse URI
     65  *
     66  * @v uri_string	URI as a string
     67  * @ret uri		URI
     68  *
     69  * Splits a URI into its component parts.  The return URI structure is
     70  * dynamically allocated and must eventually be freed by calling
     71  * uri_put().
     72  */
     73 struct uri * parse_uri ( const char *uri_string ) {
     74 	struct uri *uri;
     75 	char *raw;
     76 	char *tmp;
     77 	char *path = NULL;
     78 	char *authority = NULL;
     79 	int i;
     80 	size_t raw_len;
     81 
     82 	/* Allocate space for URI struct and a copy of the string */
     83 	raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
     84 	uri = zalloc ( sizeof ( *uri ) + raw_len );
     85 	if ( ! uri )
     86 		return NULL;
     87 	raw = ( ( ( char * ) uri ) + sizeof ( *uri ) );
     88 
     89 	/* Copy in the raw string */
     90 	memcpy ( raw, uri_string, raw_len );
     91 
     92 	/* Start by chopping off the fragment, if it exists */
     93 	if ( ( tmp = strchr ( raw, '#' ) ) ) {
     94 		*(tmp++) = '\0';
     95 		uri->fragment = tmp;
     96 	}
     97 
     98 	/* Identify absolute/relative URI.  We ignore schemes that are
     99 	 * apparently only a single character long, since otherwise we
    100 	 * misinterpret a DOS-style path name ("C:\path\to\file") as a
    101 	 * URI with scheme="C",opaque="\path\to\file".
    102 	 */
    103 	if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) {
    104 		/* Absolute URI: identify hierarchical/opaque */
    105 		uri->scheme = raw;
    106 		*(tmp++) = '\0';
    107 		if ( *tmp == '/' ) {
    108 			/* Absolute URI with hierarchical part */
    109 			path = tmp;
    110 		} else {
    111 			/* Absolute URI with opaque part */
    112 			uri->opaque = tmp;
    113 		}
    114 	} else {
    115 		/* Relative URI */
    116 		path = raw;
    117 	}
    118 
    119 	/* If we don't have a path (i.e. we have an absolute URI with
    120 	 * an opaque portion, we're already finished processing
    121 	 */
    122 	if ( ! path )
    123 		goto done;
    124 
    125 	/* Chop off the query, if it exists */
    126 	if ( ( tmp = strchr ( path, '?' ) ) ) {
    127 		*(tmp++) = '\0';
    128 		uri->query = tmp;
    129 	}
    130 
    131 	/* Identify net/absolute/relative path */
    132 	if ( strncmp ( path, "//", 2 ) == 0 ) {
    133 		/* Net path.  If this is terminated by the first '/'
    134 		 * of an absolute path, then we have no space for a
    135 		 * terminator after the authority field, so shuffle
    136 		 * the authority down by one byte, overwriting one of
    137 		 * the two slashes.
    138 		 */
    139 		authority = ( path + 2 );
    140 		if ( ( tmp = strchr ( authority, '/' ) ) ) {
    141 			/* Shuffle down */
    142 			uri->path = tmp;
    143 			memmove ( ( authority - 1 ), authority,
    144 				  ( tmp - authority ) );
    145 			authority--;
    146 			*(--tmp) = '\0';
    147 		}
    148 	} else {
    149 		/* Absolute/relative path */
    150 		uri->path = path;
    151 	}
    152 
    153 	/* Split authority into user[:password] and host[:port] portions */
    154 	if ( ( tmp = strchr ( authority, '@' ) ) ) {
    155 		/* Has user[:password] */
    156 		*(tmp++) = '\0';
    157 		uri->host = tmp;
    158 		uri->user = authority;
    159 		if ( ( tmp = strchr ( authority, ':' ) ) ) {
    160 			/* Has password */
    161 			*(tmp++) = '\0';
    162 			uri->password = tmp;
    163 		}
    164 	} else {
    165 		/* No user:password */
    166 		uri->host = authority;
    167 	}
    168 
    169 	/* Split host into host[:port] */
    170 	if ( ( tmp = strchr ( uri->host, ':' ) ) ) {
    171 		*(tmp++) = '\0';
    172 		uri->port = tmp;
    173 	}
    174 
    175 	/* Decode fields that should be decoded */
    176 	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
    177 		const char *field = uri_get_field ( uri, i );
    178 		if ( field && ( URI_ENCODED & ( 1 << i ) ) )
    179 			uri_decode ( field, ( char * ) field,
    180 				     strlen ( field ) + 1 /* NUL */ );
    181 	}
    182 
    183  done:
    184 	DBG ( "URI \"%s\" split into", uri_string );
    185 	dump_uri ( uri );
    186 	DBG ( "\n" );
    187 
    188 	return uri;
    189 }
    190 
    191 /**
    192  * Get port from URI
    193  *
    194  * @v uri		URI, or NULL
    195  * @v default_port	Default port to use if none specified in URI
    196  * @ret port		Port
    197  */
    198 unsigned int uri_port ( struct uri *uri, unsigned int default_port ) {
    199 	if ( ( ! uri ) || ( ! uri->port ) )
    200 		return default_port;
    201 	return ( strtoul ( uri->port, NULL, 0 ) );
    202 }
    203 
    204 /**
    205  * Unparse URI
    206  *
    207  * @v buf		Buffer to fill with URI string
    208  * @v size		Size of buffer
    209  * @v uri		URI to write into buffer, or NULL
    210  * @v fields		Bitmask of fields to include in URI string, or URI_ALL
    211  * @ret len		Length of URI string
    212  */
    213 int unparse_uri ( char *buf, size_t size, struct uri *uri,
    214 		  unsigned int fields ) {
    215 	/* List of characters that typically go before certain fields */
    216 	static char separators[] = { /* scheme */ 0, /* opaque */ ':',
    217 				     /* user */ 0, /* password */ ':',
    218 				     /* host */ '@', /* port */ ':',
    219 				     /* path */ 0, /* query */ '?',
    220 				     /* fragment */ '#' };
    221 	int used = 0;
    222 	int i;
    223 
    224 	DBG ( "URI unparsing" );
    225 	dump_uri ( uri );
    226 	DBG ( "\n" );
    227 
    228 	/* Ensure buffer is NUL-terminated */
    229 	if ( size )
    230 		buf[0] = '\0';
    231 
    232 	/* Special-case NULL URI */
    233 	if ( ! uri )
    234 		return 0;
    235 
    236 	/* Iterate through requested fields */
    237 	for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) {
    238 		const char *field = uri_get_field ( uri, i );
    239 		char sep = separators[i];
    240 
    241 		/* Ensure `fields' only contains bits for fields that exist */
    242 		if ( ! field )
    243 			fields &= ~( 1 << i );
    244 
    245 		/* Store this field if we were asked to */
    246 		if ( fields & ( 1 << i ) ) {
    247 			/* Print :// if we're non-opaque and had a scheme */
    248 			if ( ( fields & URI_SCHEME_BIT ) &&
    249 			     ( i > URI_OPAQUE ) ) {
    250 				used += ssnprintf ( buf + used, size - used,
    251 						    "://" );
    252 				/* Only print :// once */
    253 				fields &= ~URI_SCHEME_BIT;
    254 			}
    255 
    256 			/* Only print separator if an earlier field exists */
    257 			if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) )
    258 				used += ssnprintf ( buf + used, size - used,
    259 						    "%c", sep );
    260 
    261 			/* Print contents of field, possibly encoded */
    262 			if ( URI_ENCODED & ( 1 << i ) )
    263 				used += uri_encode ( field, buf + used,
    264 						     size - used, i );
    265 			else
    266 				used += ssnprintf ( buf + used, size - used,
    267 						    "%s", field );
    268 		}
    269 	}
    270 
    271 	return used;
    272 }
    273 
    274 /**
    275  * Duplicate URI
    276  *
    277  * @v uri		URI
    278  * @ret uri		Duplicate URI
    279  *
    280  * Creates a modifiable copy of a URI.
    281  */
    282 struct uri * uri_dup ( struct uri *uri ) {
    283 	size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 );
    284 	char buf[len];
    285 
    286 	unparse_uri ( buf, len, uri, URI_ALL );
    287 	return parse_uri ( buf );
    288 }
    289 
    290 /**
    291  * Resolve base+relative path
    292  *
    293  * @v base_uri		Base path
    294  * @v relative_uri	Relative path
    295  * @ret resolved_uri	Resolved path
    296  *
    297  * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
    298  * path (e.g. "initrd.gz") and produces a new path
    299  * (e.g. "/var/lib/tftpboot/initrd.gz").  Note that any non-directory
    300  * portion of the base path will automatically be stripped; this
    301  * matches the semantics used when resolving the path component of
    302  * URIs.
    303  */
    304 char * resolve_path ( const char *base_path,
    305 		      const char *relative_path ) {
    306 	size_t base_len = ( strlen ( base_path ) + 1 );
    307 	char base_path_copy[base_len];
    308 	char *base_tmp = base_path_copy;
    309 	char *resolved;
    310 
    311 	/* If relative path is absolute, just re-use it */
    312 	if ( relative_path[0] == '/' )
    313 		return strdup ( relative_path );
    314 
    315 	/* Create modifiable copy of path for dirname() */
    316 	memcpy ( base_tmp, base_path, base_len );
    317 	base_tmp = dirname ( base_tmp );
    318 
    319 	/* Process "./" and "../" elements */
    320 	while ( *relative_path == '.' ) {
    321 		relative_path++;
    322 		if ( *relative_path == 0 ) {
    323 			/* Do nothing */
    324 		} else if ( *relative_path == '/' ) {
    325 			relative_path++;
    326 		} else if ( *relative_path == '.' ) {
    327 			relative_path++;
    328 			if ( *relative_path == 0 ) {
    329 				base_tmp = dirname ( base_tmp );
    330 			} else if ( *relative_path == '/' ) {
    331 				base_tmp = dirname ( base_tmp );
    332 				relative_path++;
    333 			} else {
    334 				relative_path -= 2;
    335 				break;
    336 			}
    337 		} else {
    338 			relative_path--;
    339 			break;
    340 		}
    341 	}
    342 
    343 	/* Create and return new path */
    344 	if ( asprintf ( &resolved, "%s%s%s", base_tmp,
    345 			( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
    346 			  "" : "/" ), relative_path ) < 0 )
    347 		return NULL;
    348 
    349 	return resolved;
    350 }
    351 
    352 /**
    353  * Resolve base+relative URI
    354  *
    355  * @v base_uri		Base URI, or NULL
    356  * @v relative_uri	Relative URI
    357  * @ret resolved_uri	Resolved URI
    358  *
    359  * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a
    360  * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
    361  * (e.g. "http://etherboot.org/initrds/initrd.gz").
    362  */
    363 struct uri * resolve_uri ( struct uri *base_uri,
    364 			   struct uri *relative_uri ) {
    365 	struct uri tmp_uri;
    366 	char *tmp_path = NULL;
    367 	struct uri *new_uri;
    368 
    369 	/* If relative URI is absolute, just re-use it */
    370 	if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
    371 		return uri_get ( relative_uri );
    372 
    373 	/* Mangle URI */
    374 	memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
    375 	if ( relative_uri->path ) {
    376 		tmp_path = resolve_path ( ( base_uri->path ?
    377 					    base_uri->path : "/" ),
    378 					  relative_uri->path );
    379 		tmp_uri.path = tmp_path;
    380 		tmp_uri.query = relative_uri->query;
    381 		tmp_uri.fragment = relative_uri->fragment;
    382 	} else if ( relative_uri->query ) {
    383 		tmp_uri.query = relative_uri->query;
    384 		tmp_uri.fragment = relative_uri->fragment;
    385 	} else if ( relative_uri->fragment ) {
    386 		tmp_uri.fragment = relative_uri->fragment;
    387 	}
    388 
    389 	/* Create demangled URI */
    390 	new_uri = uri_dup ( &tmp_uri );
    391 	free ( tmp_path );
    392 	return new_uri;
    393 }
    394 
    395 /**
    396  * Test for unreserved URI characters
    397  *
    398  * @v c			Character to test
    399  * @v field		Field of URI in which character lies
    400  * @ret is_unreserved	Character is an unreserved character
    401  */
    402 static int is_unreserved_uri_char ( int c, int field ) {
    403 	/* According to RFC3986, the unreserved character set is
    404 	 *
    405 	 * A-Z a-z 0-9 - _ . ~
    406 	 *
    407 	 * but we also pass & ; = in queries, / in paths,
    408 	 * and everything in opaques
    409 	 */
    410 	int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) ||
    411 		    ( c == '-' ) || ( c == '_' ) ||
    412 		    ( c == '.' ) || ( c == '~' ) );
    413 
    414 	if ( field == URI_QUERY )
    415 		ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' );
    416 
    417 	if ( field == URI_PATH )
    418 		ok = ok || ( c == '/' );
    419 
    420 	if ( field == URI_OPAQUE )
    421 		ok = 1;
    422 
    423 	return ok;
    424 }
    425 
    426 /**
    427  * URI-encode string
    428  *
    429  * @v raw_string	String to be URI-encoded
    430  * @v buf		Buffer to contain encoded string
    431  * @v len		Length of buffer
    432  * @v field		Field of URI in which string lies
    433  * @ret len		Length of encoded string (excluding NUL)
    434  */
    435 size_t uri_encode ( const char *raw_string, char *buf, ssize_t len,
    436 		    int field ) {
    437 	ssize_t remaining = len;
    438 	size_t used;
    439 	unsigned char c;
    440 
    441 	if ( len > 0 )
    442 		buf[0] = '\0';
    443 
    444 	while ( ( c = *(raw_string++) ) ) {
    445 		if ( is_unreserved_uri_char ( c, field ) ) {
    446 			used = ssnprintf ( buf, remaining, "%c", c );
    447 		} else {
    448 			used = ssnprintf ( buf, remaining, "%%%02X", c );
    449 		}
    450 		buf += used;
    451 		remaining -= used;
    452 	}
    453 
    454 	return ( len - remaining );
    455 }
    456 
    457 /**
    458  * Decode URI-encoded string
    459  *
    460  * @v encoded_string	URI-encoded string
    461  * @v buf		Buffer to contain decoded string
    462  * @v len		Length of buffer
    463  * @ret len		Length of decoded string (excluding NUL)
    464  *
    465  * This function may be used in-place, with @a buf the same as
    466  * @a encoded_string.
    467  */
    468 size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) {
    469 	ssize_t remaining;
    470 	char hexbuf[3];
    471 	char *hexbuf_end;
    472 	unsigned char c;
    473 
    474 	for ( remaining = len; *encoded_string; remaining-- ) {
    475 		if ( *encoded_string == '%' ) {
    476 			encoded_string++;
    477 			snprintf ( hexbuf, sizeof ( hexbuf ), "%s",
    478 				   encoded_string );
    479 			c = strtoul ( hexbuf, &hexbuf_end, 16 );
    480 			encoded_string += ( hexbuf_end - hexbuf );
    481 		} else {
    482 			c = *(encoded_string++);
    483 		}
    484 		if ( remaining > 1 )
    485 			*buf++ = c;
    486 	}
    487 
    488 	if ( len )
    489 		*buf = 0;
    490 
    491 	return ( len - remaining );
    492 }
    493