1 /* 2 * Copyright (C) 2007 Michael Brown <mbrown (at) fensystems.co.uk>. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License as 6 * published by the Free Software Foundation; either version 2 of the 7 * License, or any later version. 8 * 9 * This program is distributed in the hope that it will be useful, but 10 * WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 */ 18 19 FILE_LICENCE ( GPL2_OR_LATER ); 20 21 /** @file 22 * 23 * Uniform Resource Identifiers 24 * 25 */ 26 27 #include <stdint.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <libgen.h> 31 #include <ctype.h> 32 #include <gpxe/vsprintf.h> 33 #include <gpxe/uri.h> 34 35 /** 36 * Dump URI for debugging 37 * 38 * @v uri URI 39 */ 40 static void dump_uri ( struct uri *uri ) { 41 if ( ! uri ) 42 return; 43 if ( uri->scheme ) 44 DBG ( " scheme \"%s\"", uri->scheme ); 45 if ( uri->opaque ) 46 DBG ( " opaque \"%s\"", uri->opaque ); 47 if ( uri->user ) 48 DBG ( " user \"%s\"", uri->user ); 49 if ( uri->password ) 50 DBG ( " password \"%s\"", uri->password ); 51 if ( uri->host ) 52 DBG ( " host \"%s\"", uri->host ); 53 if ( uri->port ) 54 DBG ( " port \"%s\"", uri->port ); 55 if ( uri->path ) 56 DBG ( " path \"%s\"", uri->path ); 57 if ( uri->query ) 58 DBG ( " query \"%s\"", uri->query ); 59 if ( uri->fragment ) 60 DBG ( " fragment \"%s\"", uri->fragment ); 61 } 62 63 /** 64 * Parse URI 65 * 66 * @v uri_string URI as a string 67 * @ret uri URI 68 * 69 * Splits a URI into its component parts. The return URI structure is 70 * dynamically allocated and must eventually be freed by calling 71 * uri_put(). 72 */ 73 struct uri * parse_uri ( const char *uri_string ) { 74 struct uri *uri; 75 char *raw; 76 char *tmp; 77 char *path = NULL; 78 char *authority = NULL; 79 int i; 80 size_t raw_len; 81 82 /* Allocate space for URI struct and a copy of the string */ 83 raw_len = ( strlen ( uri_string ) + 1 /* NUL */ ); 84 uri = zalloc ( sizeof ( *uri ) + raw_len ); 85 if ( ! uri ) 86 return NULL; 87 raw = ( ( ( char * ) uri ) + sizeof ( *uri ) ); 88 89 /* Copy in the raw string */ 90 memcpy ( raw, uri_string, raw_len ); 91 92 /* Start by chopping off the fragment, if it exists */ 93 if ( ( tmp = strchr ( raw, '#' ) ) ) { 94 *(tmp++) = '\0'; 95 uri->fragment = tmp; 96 } 97 98 /* Identify absolute/relative URI. We ignore schemes that are 99 * apparently only a single character long, since otherwise we 100 * misinterpret a DOS-style path name ("C:\path\to\file") as a 101 * URI with scheme="C",opaque="\path\to\file". 102 */ 103 if ( ( tmp = strchr ( raw, ':' ) ) && ( tmp > ( raw + 1 ) ) ) { 104 /* Absolute URI: identify hierarchical/opaque */ 105 uri->scheme = raw; 106 *(tmp++) = '\0'; 107 if ( *tmp == '/' ) { 108 /* Absolute URI with hierarchical part */ 109 path = tmp; 110 } else { 111 /* Absolute URI with opaque part */ 112 uri->opaque = tmp; 113 } 114 } else { 115 /* Relative URI */ 116 path = raw; 117 } 118 119 /* If we don't have a path (i.e. we have an absolute URI with 120 * an opaque portion, we're already finished processing 121 */ 122 if ( ! path ) 123 goto done; 124 125 /* Chop off the query, if it exists */ 126 if ( ( tmp = strchr ( path, '?' ) ) ) { 127 *(tmp++) = '\0'; 128 uri->query = tmp; 129 } 130 131 /* Identify net/absolute/relative path */ 132 if ( strncmp ( path, "//", 2 ) == 0 ) { 133 /* Net path. If this is terminated by the first '/' 134 * of an absolute path, then we have no space for a 135 * terminator after the authority field, so shuffle 136 * the authority down by one byte, overwriting one of 137 * the two slashes. 138 */ 139 authority = ( path + 2 ); 140 if ( ( tmp = strchr ( authority, '/' ) ) ) { 141 /* Shuffle down */ 142 uri->path = tmp; 143 memmove ( ( authority - 1 ), authority, 144 ( tmp - authority ) ); 145 authority--; 146 *(--tmp) = '\0'; 147 } 148 } else { 149 /* Absolute/relative path */ 150 uri->path = path; 151 } 152 153 /* Split authority into user[:password] and host[:port] portions */ 154 if ( ( tmp = strchr ( authority, '@' ) ) ) { 155 /* Has user[:password] */ 156 *(tmp++) = '\0'; 157 uri->host = tmp; 158 uri->user = authority; 159 if ( ( tmp = strchr ( authority, ':' ) ) ) { 160 /* Has password */ 161 *(tmp++) = '\0'; 162 uri->password = tmp; 163 } 164 } else { 165 /* No user:password */ 166 uri->host = authority; 167 } 168 169 /* Split host into host[:port] */ 170 if ( ( tmp = strchr ( uri->host, ':' ) ) ) { 171 *(tmp++) = '\0'; 172 uri->port = tmp; 173 } 174 175 /* Decode fields that should be decoded */ 176 for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) { 177 const char *field = uri_get_field ( uri, i ); 178 if ( field && ( URI_ENCODED & ( 1 << i ) ) ) 179 uri_decode ( field, ( char * ) field, 180 strlen ( field ) + 1 /* NUL */ ); 181 } 182 183 done: 184 DBG ( "URI \"%s\" split into", uri_string ); 185 dump_uri ( uri ); 186 DBG ( "\n" ); 187 188 return uri; 189 } 190 191 /** 192 * Get port from URI 193 * 194 * @v uri URI, or NULL 195 * @v default_port Default port to use if none specified in URI 196 * @ret port Port 197 */ 198 unsigned int uri_port ( struct uri *uri, unsigned int default_port ) { 199 if ( ( ! uri ) || ( ! uri->port ) ) 200 return default_port; 201 return ( strtoul ( uri->port, NULL, 0 ) ); 202 } 203 204 /** 205 * Unparse URI 206 * 207 * @v buf Buffer to fill with URI string 208 * @v size Size of buffer 209 * @v uri URI to write into buffer, or NULL 210 * @v fields Bitmask of fields to include in URI string, or URI_ALL 211 * @ret len Length of URI string 212 */ 213 int unparse_uri ( char *buf, size_t size, struct uri *uri, 214 unsigned int fields ) { 215 /* List of characters that typically go before certain fields */ 216 static char separators[] = { /* scheme */ 0, /* opaque */ ':', 217 /* user */ 0, /* password */ ':', 218 /* host */ '@', /* port */ ':', 219 /* path */ 0, /* query */ '?', 220 /* fragment */ '#' }; 221 int used = 0; 222 int i; 223 224 DBG ( "URI unparsing" ); 225 dump_uri ( uri ); 226 DBG ( "\n" ); 227 228 /* Ensure buffer is NUL-terminated */ 229 if ( size ) 230 buf[0] = '\0'; 231 232 /* Special-case NULL URI */ 233 if ( ! uri ) 234 return 0; 235 236 /* Iterate through requested fields */ 237 for ( i = URI_FIRST_FIELD; i <= URI_LAST_FIELD; i++ ) { 238 const char *field = uri_get_field ( uri, i ); 239 char sep = separators[i]; 240 241 /* Ensure `fields' only contains bits for fields that exist */ 242 if ( ! field ) 243 fields &= ~( 1 << i ); 244 245 /* Store this field if we were asked to */ 246 if ( fields & ( 1 << i ) ) { 247 /* Print :// if we're non-opaque and had a scheme */ 248 if ( ( fields & URI_SCHEME_BIT ) && 249 ( i > URI_OPAQUE ) ) { 250 used += ssnprintf ( buf + used, size - used, 251 "://" ); 252 /* Only print :// once */ 253 fields &= ~URI_SCHEME_BIT; 254 } 255 256 /* Only print separator if an earlier field exists */ 257 if ( sep && ( fields & ( ( 1 << i ) - 1 ) ) ) 258 used += ssnprintf ( buf + used, size - used, 259 "%c", sep ); 260 261 /* Print contents of field, possibly encoded */ 262 if ( URI_ENCODED & ( 1 << i ) ) 263 used += uri_encode ( field, buf + used, 264 size - used, i ); 265 else 266 used += ssnprintf ( buf + used, size - used, 267 "%s", field ); 268 } 269 } 270 271 return used; 272 } 273 274 /** 275 * Duplicate URI 276 * 277 * @v uri URI 278 * @ret uri Duplicate URI 279 * 280 * Creates a modifiable copy of a URI. 281 */ 282 struct uri * uri_dup ( struct uri *uri ) { 283 size_t len = ( unparse_uri ( NULL, 0, uri, URI_ALL ) + 1 ); 284 char buf[len]; 285 286 unparse_uri ( buf, len, uri, URI_ALL ); 287 return parse_uri ( buf ); 288 } 289 290 /** 291 * Resolve base+relative path 292 * 293 * @v base_uri Base path 294 * @v relative_uri Relative path 295 * @ret resolved_uri Resolved path 296 * 297 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative 298 * path (e.g. "initrd.gz") and produces a new path 299 * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory 300 * portion of the base path will automatically be stripped; this 301 * matches the semantics used when resolving the path component of 302 * URIs. 303 */ 304 char * resolve_path ( const char *base_path, 305 const char *relative_path ) { 306 size_t base_len = ( strlen ( base_path ) + 1 ); 307 char base_path_copy[base_len]; 308 char *base_tmp = base_path_copy; 309 char *resolved; 310 311 /* If relative path is absolute, just re-use it */ 312 if ( relative_path[0] == '/' ) 313 return strdup ( relative_path ); 314 315 /* Create modifiable copy of path for dirname() */ 316 memcpy ( base_tmp, base_path, base_len ); 317 base_tmp = dirname ( base_tmp ); 318 319 /* Process "./" and "../" elements */ 320 while ( *relative_path == '.' ) { 321 relative_path++; 322 if ( *relative_path == 0 ) { 323 /* Do nothing */ 324 } else if ( *relative_path == '/' ) { 325 relative_path++; 326 } else if ( *relative_path == '.' ) { 327 relative_path++; 328 if ( *relative_path == 0 ) { 329 base_tmp = dirname ( base_tmp ); 330 } else if ( *relative_path == '/' ) { 331 base_tmp = dirname ( base_tmp ); 332 relative_path++; 333 } else { 334 relative_path -= 2; 335 break; 336 } 337 } else { 338 relative_path--; 339 break; 340 } 341 } 342 343 /* Create and return new path */ 344 if ( asprintf ( &resolved, "%s%s%s", base_tmp, 345 ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ? 346 "" : "/" ), relative_path ) < 0 ) 347 return NULL; 348 349 return resolved; 350 } 351 352 /** 353 * Resolve base+relative URI 354 * 355 * @v base_uri Base URI, or NULL 356 * @v relative_uri Relative URI 357 * @ret resolved_uri Resolved URI 358 * 359 * Takes a base URI (e.g. "http://etherboot.org/kernels/vmlinuz" and a 360 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI 361 * (e.g. "http://etherboot.org/initrds/initrd.gz"). 362 */ 363 struct uri * resolve_uri ( struct uri *base_uri, 364 struct uri *relative_uri ) { 365 struct uri tmp_uri; 366 char *tmp_path = NULL; 367 struct uri *new_uri; 368 369 /* If relative URI is absolute, just re-use it */ 370 if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) ) 371 return uri_get ( relative_uri ); 372 373 /* Mangle URI */ 374 memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) ); 375 if ( relative_uri->path ) { 376 tmp_path = resolve_path ( ( base_uri->path ? 377 base_uri->path : "/" ), 378 relative_uri->path ); 379 tmp_uri.path = tmp_path; 380 tmp_uri.query = relative_uri->query; 381 tmp_uri.fragment = relative_uri->fragment; 382 } else if ( relative_uri->query ) { 383 tmp_uri.query = relative_uri->query; 384 tmp_uri.fragment = relative_uri->fragment; 385 } else if ( relative_uri->fragment ) { 386 tmp_uri.fragment = relative_uri->fragment; 387 } 388 389 /* Create demangled URI */ 390 new_uri = uri_dup ( &tmp_uri ); 391 free ( tmp_path ); 392 return new_uri; 393 } 394 395 /** 396 * Test for unreserved URI characters 397 * 398 * @v c Character to test 399 * @v field Field of URI in which character lies 400 * @ret is_unreserved Character is an unreserved character 401 */ 402 static int is_unreserved_uri_char ( int c, int field ) { 403 /* According to RFC3986, the unreserved character set is 404 * 405 * A-Z a-z 0-9 - _ . ~ 406 * 407 * but we also pass & ; = in queries, / in paths, 408 * and everything in opaques 409 */ 410 int ok = ( isupper ( c ) || islower ( c ) || isdigit ( c ) || 411 ( c == '-' ) || ( c == '_' ) || 412 ( c == '.' ) || ( c == '~' ) ); 413 414 if ( field == URI_QUERY ) 415 ok = ok || ( c == ';' ) || ( c == '&' ) || ( c == '=' ); 416 417 if ( field == URI_PATH ) 418 ok = ok || ( c == '/' ); 419 420 if ( field == URI_OPAQUE ) 421 ok = 1; 422 423 return ok; 424 } 425 426 /** 427 * URI-encode string 428 * 429 * @v raw_string String to be URI-encoded 430 * @v buf Buffer to contain encoded string 431 * @v len Length of buffer 432 * @v field Field of URI in which string lies 433 * @ret len Length of encoded string (excluding NUL) 434 */ 435 size_t uri_encode ( const char *raw_string, char *buf, ssize_t len, 436 int field ) { 437 ssize_t remaining = len; 438 size_t used; 439 unsigned char c; 440 441 if ( len > 0 ) 442 buf[0] = '\0'; 443 444 while ( ( c = *(raw_string++) ) ) { 445 if ( is_unreserved_uri_char ( c, field ) ) { 446 used = ssnprintf ( buf, remaining, "%c", c ); 447 } else { 448 used = ssnprintf ( buf, remaining, "%%%02X", c ); 449 } 450 buf += used; 451 remaining -= used; 452 } 453 454 return ( len - remaining ); 455 } 456 457 /** 458 * Decode URI-encoded string 459 * 460 * @v encoded_string URI-encoded string 461 * @v buf Buffer to contain decoded string 462 * @v len Length of buffer 463 * @ret len Length of decoded string (excluding NUL) 464 * 465 * This function may be used in-place, with @a buf the same as 466 * @a encoded_string. 467 */ 468 size_t uri_decode ( const char *encoded_string, char *buf, ssize_t len ) { 469 ssize_t remaining; 470 char hexbuf[3]; 471 char *hexbuf_end; 472 unsigned char c; 473 474 for ( remaining = len; *encoded_string; remaining-- ) { 475 if ( *encoded_string == '%' ) { 476 encoded_string++; 477 snprintf ( hexbuf, sizeof ( hexbuf ), "%s", 478 encoded_string ); 479 c = strtoul ( hexbuf, &hexbuf_end, 16 ); 480 encoded_string += ( hexbuf_end - hexbuf ); 481 } else { 482 c = *(encoded_string++); 483 } 484 if ( remaining > 1 ) 485 *buf++ = c; 486 } 487 488 if ( len ) 489 *buf = 0; 490 491 return ( len - remaining ); 492 } 493