Home | History | Annotate | Download | only in dae
      1 /*
      2 * Copyright 2006 Sony Computer Entertainment Inc.
      3 *
      4 * Licensed under the MIT Open Source License, for details please see license.txt or the website
      5 * http://www.opensource.org/licenses/mit-license.php
      6 *
      7 */
      8 
      9 #include <algorithm>
     10 #include <dae.h>
     11 #include <dae/daeURI.h>
     12 #include <ctype.h>
     13 #include <dae/daeDocument.h>
     14 #include <dae/daeErrorHandler.h>
     15 #include <dae/daeUtils.h>
     16 #include <pcrecpp.h>
     17 
     18 using namespace std;
     19 using namespace cdom;
     20 
     21 void daeURI::initialize() {
     22 	reset();
     23 	container = NULL;
     24 }
     25 
     26 daeURI::~daeURI() { }
     27 
     28 daeURI::daeURI(DAE& dae) : dae(&dae) {
     29 	initialize();
     30 }
     31 
     32 daeURI::daeURI(DAE& dae, const string& uriStr, daeBool nofrag) : dae(&dae) {
     33 	initialize();
     34 
     35 	if (nofrag) {
     36 		size_t pos = uriStr.find_last_of('#');
     37 		if (pos != string::npos) {
     38 			set(uriStr.substr(0, pos));
     39 			return;
     40 		}
     41 	}
     42 
     43 	set(uriStr);
     44 }
     45 
     46 daeURI::daeURI(const daeURI& baseURI, const string& uriStr) : dae(baseURI.getDAE())
     47 {
     48 	initialize();
     49 	set(uriStr, &baseURI);
     50 }
     51 
     52 daeURI::daeURI(const daeURI& copyFrom_) : dae(copyFrom_.getDAE()), container(NULL)
     53 {
     54 	initialize();
     55 	copyFrom(copyFrom_);
     56 }
     57 
     58 daeURI::daeURI(daeElement& container_, const std::string& uriStr)
     59 	: dae(container_.getDAE())
     60 {
     61 	initialize();
     62 	container = &container_;
     63 	set(uriStr);
     64 }
     65 
     66 daeURI::daeURI(DAE& dae, daeElement& container_, const string& uriStr)
     67 	: dae(&dae)
     68 {
     69 	initialize();
     70 	container = &container_;
     71 	set(uriStr);
     72 }
     73 
     74 void
     75 daeURI::copyFrom(const daeURI& copyFrom)
     76 {
     77 	if (!container)
     78 		container = copyFrom.container;
     79 	set(copyFrom.originalStr());
     80 }
     81 
     82 daeURI& daeURI::operator=(const daeURI& other) {
     83 	copyFrom(other);
     84 	return *this;
     85 }
     86 
     87 daeURI& daeURI::operator=(const string& uriStr) {
     88 	set(uriStr);
     89 	return *this;
     90 }
     91 
     92 void daeURI::reset() {
     93 	// Clear everything except the container, which doesn't change for the lifetime of the daeURI
     94 	uriString	         = "";
     95 	originalURIString	 = "";
     96 	_scheme            = "";
     97 	_authority	       = "";
     98 	_path              = "";
     99 	_query             = "";
    100 	_fragment          = "";
    101 }
    102 
    103 DAE* daeURI::getDAE() const {
    104 	return dae;
    105 }
    106 
    107 
    108 const string& daeURI::str() const {
    109 	return uriString;
    110 }
    111 
    112 const string& daeURI::originalStr() const {
    113 	return originalURIString;
    114 }
    115 
    116 daeString daeURI::getURI() const {
    117 	return str().c_str();
    118 }
    119 
    120 daeString daeURI::getOriginalURI() const {
    121 	return originalStr().c_str();
    122 }
    123 
    124 
    125 namespace {
    126 	void parsePath(const string& path,
    127 	               /* out */ string& dir,
    128 	               /* out */ string& baseName,
    129 	               /* out */ string& extension) {
    130 		// !!!steveT Currently, if we have a file name that begins with a '.', as in
    131 		// ".emacs", that will be treated as having no base name with an extension
    132 		// of ".emacs". We might want to change this behavior, so that the base name
    133 		// is considered ".emacs" and the extension is empty. I think this is more
    134 		// in line with what path parsers in other libraries/languages do, and it
    135 		// more accurately reflects the intended structure of the file name.
    136 
    137         // The following implementation cannot handle paths like this:
    138         // /tmp/se.3/file
    139         //static pcrecpp::RE re("(.*/)?([^.]*)?(\\..*)?");
    140 		//dir = baseName = extension = "";
    141 		//re.FullMatch(path, &dir, &baseName, &extension);
    142 
    143         static pcrecpp::RE findDir("(.*/)?(.*)?");
    144         static pcrecpp::RE findExt("([^.]*)?(\\..*)?");
    145         string tmpFile;
    146         dir = baseName = extension = tmpFile = "";
    147         findDir.PartialMatch(path, &dir, &tmpFile);
    148         findExt.PartialMatch(tmpFile, &baseName, &extension);
    149 	}
    150 }
    151 
    152 void daeURI::set(const string& uriStr_, const daeURI* baseURI) {
    153 	// We make a copy of the uriStr so that set(originalURIString, ...) works properly.
    154 	string uriStr = uriStr_;
    155 	reset();
    156 	originalURIString = uriStr;
    157 
    158 	if (!parseUriRef(uriStr, _scheme, _authority, _path, _query, _fragment)) {
    159 		reset();
    160 		return;
    161 	}
    162 
    163 	validate(baseURI);
    164 }
    165 
    166 void daeURI::set(const string& scheme_,
    167                  const string& authority_,
    168                  const string& path_,
    169                  const string& query_,
    170                  const string& fragment_,
    171                  const daeURI* baseURI)
    172 {
    173 	set(assembleUri(scheme_, authority_, path_, query_, fragment_), baseURI);
    174 }
    175 
    176 void daeURI::setURI(daeString _URIString, const daeURI* baseURI) {
    177 	string uriStr = _URIString ? _URIString : "";
    178 	set(uriStr, baseURI);
    179 }
    180 
    181 
    182 const string& daeURI::scheme() const { return _scheme; }
    183 const string& daeURI::authority() const { return _authority; }
    184 const string& daeURI::path() const { return _path; }
    185 const string& daeURI::query() const { return _query; }
    186 const string& daeURI::fragment() const { return _fragment; }
    187 const string& daeURI::id() const { return fragment(); }
    188 
    189 
    190 namespace {
    191 	string addSlashToEnd(const string& s) {
    192 		return (!s.empty() && s[s.length()-1] != '/')  ?  s + '/' : s;
    193 	}
    194 }
    195 
    196 void daeURI::pathComponents(string& dir, string& baseName, string& ext) const {
    197 	parsePath(_path, dir, baseName, ext);
    198 }
    199 
    200 string daeURI::pathDir() const {
    201 	string dir, base, ext;
    202 	parsePath(_path, dir, base, ext);
    203 	return dir;
    204 }
    205 
    206 string daeURI::pathFileBase() const {
    207 	string dir, base, ext;
    208 	parsePath(_path, dir, base, ext);
    209 	return base;
    210 }
    211 
    212 string daeURI::pathExt() const {
    213 	string dir, base, ext;
    214 	parsePath(_path, dir, base, ext);
    215 	return ext;
    216 }
    217 
    218 string daeURI::pathFile() const {
    219 	string dir, base, ext;
    220 	parsePath(_path, dir, base, ext);
    221 	return base + ext;
    222 }
    223 
    224 void daeURI::path(const string& dir, const string& baseName, const string& ext) {
    225 	path(addSlashToEnd(dir) + baseName + ext);
    226 }
    227 
    228 void daeURI::pathDir(const string& dir) {
    229 	string tmp, base, ext;
    230 	parsePath(_path, tmp, base, ext);
    231 	path(addSlashToEnd(dir), base, ext);
    232 }
    233 
    234 void daeURI::pathFileBase(const string& baseName) {
    235 	string dir, tmp, ext;
    236 	parsePath(_path, dir, tmp, ext);
    237 	path(dir, baseName, ext);
    238 }
    239 
    240 void daeURI::pathExt(const string& ext) {
    241 	string dir, base, tmp;
    242 	parsePath(_path, dir, base, tmp);
    243 	path(dir, base, ext);
    244 }
    245 
    246 void daeURI::pathFile(const string& file) {
    247 	string dir, base, ext;
    248 	parsePath(_path, dir, base, ext);
    249 	path(dir, file, "");
    250 }
    251 
    252 
    253 daeString daeURI::getScheme() const { return _scheme.c_str(); }
    254 daeString daeURI::getProtocol() const {	return getScheme(); }
    255 daeString daeURI::getAuthority() const { return _authority.c_str(); }
    256 daeString daeURI::getPath() const { return _path.c_str(); }
    257 daeString daeURI::getQuery() const { return _query.c_str(); }
    258 daeString daeURI::getFragment() const { return _fragment.c_str(); }
    259 daeString daeURI::getID() const { return getFragment(); }
    260 daeBool daeURI::getPath(daeChar *dest, daeInt size) const {
    261 	if (int(_path.length()) < size) {
    262 		strcpy(dest, _path.c_str());
    263 		return true;
    264 	}
    265 	return false;
    266 }
    267 
    268 
    269 void daeURI::scheme(const string& scheme_) { set(scheme_, _authority, _path, _query, _fragment); };
    270 void daeURI::authority(const string& authority_) { set(_scheme, authority_, _path, _query, _fragment); }
    271 void daeURI::path(const string& path_) { set(_scheme, _authority, path_, _query, _fragment); }
    272 void daeURI::query(const string& query_) { set(_scheme, _authority, _path, query_, _fragment); }
    273 void daeURI::fragment(const string& fragment_) { set(_scheme, _authority, _path, _query, fragment_); }
    274 void daeURI::id(const string& id) { fragment(id); }
    275 
    276 void
    277 daeURI::print()
    278 {
    279 	fprintf(stderr,"URI(%s)\n",uriString.c_str());
    280 	fprintf(stderr,"scheme = %s\n",_scheme.c_str());
    281 	fprintf(stderr,"authority = %s\n",_authority.c_str());
    282 	fprintf(stderr,"path = %s\n",_path.c_str());
    283 	fprintf(stderr,"query = %s\n",_query.c_str());
    284 	fprintf(stderr,"fragment = %s\n",_fragment.c_str());
    285 	fprintf(stderr,"URI without base = %s\n",originalURIString.c_str());
    286 	fflush(stderr);
    287 }
    288 
    289 namespace {
    290 	void normalize(string& path) {
    291 		daeURI::normalizeURIPath(const_cast<char*>(path.c_str()));
    292 		path = path.substr(0, strlen(path.c_str()));
    293 	}
    294 }
    295 
    296 void
    297 daeURI::validate(const daeURI* baseURI)
    298 {
    299 	// If no base URI was supplied, use the container's document URI. If there's
    300 	// no container or the container doesn't have a doc URI, use the application
    301 	// base URI.
    302 	if (!baseURI) {
    303 		if (container)
    304         {
    305             if (container->getDocument())
    306             {
    307                 if (container->getDocument()->isZAERootDocument())
    308                     baseURI = &container->getDocument()->getExtractedFileURI();
    309                 else
    310                     baseURI = container->getDocumentURI();
    311             }
    312         }
    313         if (!baseURI)
    314             baseURI = &dae->getBaseURI();
    315 		if (this == baseURI)
    316 			return;
    317 	}
    318 
    319 	// This is rewritten according to the updated rfc 3986
    320 	if (!_scheme.empty()) // if defined(R.scheme) then
    321 	{
    322 		// Everything stays the same except path which we normalize
    323 		// T.scheme    = R.scheme;
    324 		// T.authority = R.authority;
    325 		// T.path      = remove_dot_segments(R.path);
    326 		// T.query     = R.query;
    327 		normalize(_path);
    328 	}
    329 	else
    330 	{
    331 		if (!_authority.empty()) // if defined(R.authority) then
    332 		{
    333 			// Authority and query stay the same, path is normalized
    334 			// T.authority = R.authority;
    335 			// T.path      = remove_dot_segments(R.path);
    336 			// T.query     = R.query;
    337 			normalize(_path);
    338 		}
    339 		else
    340 		{
    341 			if (_path.empty())  // if (R.path == "") then
    342 			{
    343 				// T.path = Base.path;
    344 				_path = baseURI->_path;
    345 
    346 				//if defined(R.query) then
    347 				//   T.query = R.query;
    348 				//else
    349 				//   T.query = Base.query;
    350 				//endif;
    351 				if (_query.empty())
    352 					_query = baseURI->_query;
    353 			}
    354 			else
    355 			{
    356 				if (_path[0] == '/')  // if (R.path starts-with "/") then
    357 				{
    358 					// T.path = remove_dot_segments(R.path);
    359 					normalize(_path);
    360 				}
    361 				else
    362 				{
    363 					// T.path = merge(Base.path, R.path);
    364 					if (!baseURI->_authority.empty() && baseURI->_path.empty()) // authority defined, path empty
    365 						_path.insert(0, "/");
    366 					else {
    367 						string dir, baseName, ext;
    368 						parsePath(baseURI->_path, dir, baseName, ext);
    369 						_path = dir + _path;
    370 					}
    371 					// T.path = remove_dot_segments(T.path);
    372 					normalize(_path);
    373 				}
    374 				// T.query = R.query;
    375 			}
    376 			// T.authority = Base.authority;
    377 			_authority = baseURI->_authority;
    378 		}
    379 		// T.scheme = Base.scheme;
    380 		_scheme = baseURI->_scheme;
    381 	}
    382 	// T.fragment = R.fragment;
    383 
    384 	// Reassemble all this into a string version of the URI
    385 	uriString = assembleUri(_scheme, _authority, _path, _query, _fragment);
    386 }
    387 
    388 daeElementRef daeURI::getElement() const {
    389 	return internalResolveElement();
    390 }
    391 
    392 daeElement* daeURI::internalResolveElement() const {
    393 	if (uriString.empty())
    394 		return NULL;
    395 
    396 	return dae->getURIResolvers().resolveElement(*this);
    397 }
    398 
    399 void daeURI::resolveElement() { }
    400 
    401 void daeURI::setContainer(daeElement* cont) {
    402 	container = cont;
    403 	// Since we have a new container element, the base URI may have changed. Re-resolve.
    404 	set(originalURIString);
    405 }
    406 
    407 daeBool daeURI::isExternalReference() const {
    408 	if (uriString.empty())
    409 		return false;
    410 
    411 	if (container && container->getDocumentURI()) {
    412 		daeURI* docURI = container->getDocumentURI();
    413 		if (_path != docURI->_path ||
    414 		    _scheme != docURI->_scheme ||
    415 		    _authority != docURI->_authority) {
    416 			return true;
    417 		}
    418 	}
    419 
    420 	return false;
    421 }
    422 
    423 
    424 daeDocument* daeURI::getReferencedDocument() const {
    425 	string doc = assembleUri(_scheme, _authority, _path, "", "");
    426 	return dae->getDatabase()->getDocument(doc.c_str(), true);
    427 }
    428 
    429 daeURI::ResolveState daeURI::getState() const {
    430 	return uriString.empty() ? uri_empty : uri_loaded;
    431 }
    432 
    433 void daeURI::setState(ResolveState newState) { }
    434 
    435 
    436 // This code is loosely based on the RFC 2396 normalization code from
    437 // libXML. Specifically it does the RFC steps 6.c->6.g from section 5.2
    438 // The path is modified in place, there is no error return.
    439 void daeURI::normalizeURIPath(char* path)
    440 {
    441 	char *cur, // location we are currently processing
    442 	     *out; // Everything from this back we are done with
    443 
    444 	// Return if the path pointer is null
    445 
    446 	if (path == NULL) return;
    447 
    448 	// Skip any initial / characters to get us to the start of the first segment
    449 
    450 	for(cur=path; *cur == '/'; cur++);
    451 
    452 	// Return if we hit the end of the string
    453 
    454 	if (*cur == 0) return;
    455 
    456 	// Keep everything we've seen so far.
    457 
    458 	out = cur;
    459 
    460 	// Analyze each segment in sequence for cases (c) and (d).
    461 
    462 	while (*cur != 0)
    463 	{
    464 		// (c) All occurrences of "./", where "." is a complete path segment, are removed from the buffer string.
    465 
    466 		if ((*cur == '.') && (*(cur+1) == '/'))
    467 		{
    468 			cur += 2;
    469 			// If there were multiple slashes, skip them too
    470 			while (*cur == '/') cur++;
    471 			continue;
    472 		}
    473 
    474 		// (d) If the buffer string ends with "." as a complete path segment, that "." is removed.
    475 
    476 		if ((*cur == '.') && (*(cur+1) == 0))
    477 			break;
    478 
    479 		// If we passed the above tests copy the segment to the output side
    480 
    481 		while (*cur != '/' && *cur != 0)
    482 		{
    483 			*(out++) = *(cur++);
    484 		}
    485 
    486 		if(*cur != 0)
    487 		{
    488 			// Skip any occurrances of // at the end of the segment
    489 
    490 			while ((*cur == '/') && (*(cur+1) == '/')) cur++;
    491 
    492 			// Bring the last character in the segment (/ or a null terminator) into the output
    493 
    494 			*(out++) = *(cur++);
    495 		}
    496 	}
    497 
    498 	*out = 0;
    499 
    500     // Restart at the beginning of the first segment for the next part
    501 
    502 	for(cur=path; *cur == '/'; cur++);
    503 	if (*cur == 0) return;
    504 
    505 	// Analyze each segment in sequence for cases (e) and (f).
    506 	//
    507 	// e) All occurrences of "<segment>/../", where <segment> is a
    508 	//    complete path segment not equal to "..", are removed from the
    509 	//    buffer string.  Removal of these path segments is performed
    510 	//    iteratively, removing the leftmost matching pattern on each
    511 	//    iteration, until no matching pattern remains.
    512 	//
    513 	// f) If the buffer string ends with "<segment>/..", where <segment>
    514 	//    is a complete path segment not equal to "..", that
    515 	//    "<segment>/.." is removed.
    516 	//
    517 	// To satisfy the "iterative" clause in (e), we need to collapse the
    518 	// string every time we find something that needs to be removed.  Thus,
    519 	// we don't need to keep two pointers into the string: we only need a
    520 	// "current position" pointer.
    521 	//
    522 	while (true)
    523 	{
    524 		char *segp, *tmp;
    525 
    526 		// At the beginning of each iteration of this loop, "cur" points to
    527 		// the first character of the segment we want to examine.
    528 
    529 		// Find the end of the current segment.
    530 
    531 		for(segp = cur;(*segp != '/') && (*segp != 0); ++segp);
    532 
    533 		// If this is the last segment, we're done (we need at least two
    534 		// segments to meet the criteria for the (e) and (f) cases).
    535 
    536 		if (*segp == 0)
    537 			break;
    538 
    539 		// If the first segment is "..", or if the next segment _isn't_ "..",
    540 		// keep this segment and try the next one.
    541 
    542 		++segp;
    543 		if (((*cur == '.') && (cur[1] == '.') && (segp == cur+3))
    544             || ((*segp != '.') || (segp[1] != '.')
    545             || ((segp[2] != '/') && (segp[2] != 0))))
    546 		{
    547 			cur = segp;
    548 			continue;
    549 		}
    550 
    551 		// If we get here, remove this segment and the next one and back up
    552 		// to the previous segment (if there is one), to implement the
    553 		// "iteratively" clause.  It's pretty much impossible to back up
    554 		// while maintaining two pointers into the buffer, so just compact
    555 		// the whole buffer now.
    556 
    557 		// If this is the end of the buffer, we're done.
    558 
    559 		if (segp[2] == 0)
    560 		{
    561 			*cur = 0;
    562 			break;
    563 		}
    564 
    565 		// Strings overlap during this copy, but not in a bad way, just avoid using strcpy
    566 
    567 		tmp = cur;
    568 		segp += 3;
    569 		while ((*(tmp++) = *(segp++)) != 0);
    570 
    571 		// If there are no previous segments, then keep going from here.
    572 
    573 		segp = cur;
    574 		while ((segp > path) && (*(--segp) == '/'));
    575 
    576 		if (segp == path)
    577 			continue;
    578 
    579 		// "segp" is pointing to the end of a previous segment; find it's
    580 		// start.  We need to back up to the previous segment and start
    581 		// over with that to handle things like "foo/bar/../..".  If we
    582 		// don't do this, then on the first pass we'll remove the "bar/..",
    583 		// but be pointing at the second ".." so we won't realize we can also
    584 		// remove the "foo/..".
    585 
    586 		for(cur = segp;(cur > path) && (*(cur-1) != '/'); cur--);
    587 	}
    588 
    589 	*out = 0;
    590 
    591 	// g) If the resulting buffer string still begins with one or more
    592 	//    complete path segments of "..", then the reference is
    593 	//    considered to be in error. Implementations may handle this
    594 	//    error by retaining these components in the resolved path (i.e.,
    595 	//    treating them as part of the final URI), by removing them from
    596 	//    the resolved path (i.e., discarding relative levels above the
    597 	//    root), or by avoiding traversal of the reference.
    598 	//
    599 	// We discard them from the final path.
    600 
    601 	if (*path == '/')
    602 	{
    603 		for(cur=path; (*cur == '/') && (cur[1] == '.') && (cur[2] == '.') && ((cur[3] == '/') || (cur[3] == 0)); cur += 3);
    604 
    605 		if (cur != path)
    606 		{
    607 			for(out=path; *cur != 0; *(out++) = *(cur++));
    608 
    609 			*out = 0;
    610 		}
    611 	}
    612 	return;
    613 }
    614 
    615 // This function will take a resolved URI and create a version of it that is relative to
    616 // another existing URI.  The new URI is stored in the "originalURI"
    617 int daeURI::makeRelativeTo(const daeURI* relativeToURI)
    618 {
    619 	// Can only do this function if both URIs have the same scheme and authority
    620 	if (_scheme != relativeToURI->_scheme  ||  _authority != relativeToURI->_authority)
    621 		return DAE_ERR_INVALID_CALL;
    622 
    623 	// advance till we find a segment that doesn't match
    624 	const char *this_path        = getPath();
    625 	const char *relativeTo_path  = relativeToURI->getPath();
    626 	const char *this_slash       = this_path;
    627 	const char *relativeTo_slash = relativeTo_path;
    628 
    629 	while((*this_path == *relativeTo_path) && *this_path)
    630 	{
    631 		if(*this_path == '/')
    632 		{
    633 			this_slash = this_path;
    634 			relativeTo_slash = relativeTo_path;
    635 		}
    636 		this_path++;
    637 		relativeTo_path++;
    638 	}
    639 
    640 	// Decide how many ../ segments are needed (Filepath should always end in a /)
    641 	int segment_count = 0;
    642 	relativeTo_slash++;
    643 	while(*relativeTo_slash != 0)
    644 	{
    645 		if(*relativeTo_slash == '/')
    646 			segment_count ++;
    647 		relativeTo_slash++;
    648 	}
    649 	this_slash++;
    650 
    651 	string newPath;
    652 	for (int i = 0; i < segment_count; i++)
    653 		newPath += "../";
    654 	newPath += this_slash;
    655 
    656 	set("", "", newPath, _query, _fragment, relativeToURI);
    657 	return(DAE_OK);
    658 }
    659 
    660 
    661 daeBool daeURIResolver::_loadExternalDocuments = true;
    662 
    663 daeURIResolver::daeURIResolver(DAE& dae) : dae(&dae) { }
    664 
    665 daeURIResolver::~daeURIResolver() { }
    666 
    667 void daeURIResolver::setAutoLoadExternalDocuments( daeBool load )
    668 {
    669 	_loadExternalDocuments = load;
    670 }
    671 
    672 daeBool daeURIResolver::getAutoLoadExternalDocuments()
    673 {
    674 	return _loadExternalDocuments;
    675 }
    676 
    677 
    678 daeURIResolverList::daeURIResolverList() { }
    679 
    680 daeURIResolverList::~daeURIResolverList() {
    681 	for (size_t i = 0; i < resolvers.getCount(); i++)
    682 		delete resolvers[i];
    683 }
    684 
    685 daeTArray<daeURIResolver*>& daeURIResolverList::list() {
    686 	return resolvers;
    687 }
    688 
    689 daeElement* daeURIResolverList::resolveElement(const daeURI& uri) {
    690 	for (size_t i = 0; i < resolvers.getCount(); i++)
    691 		if (daeElement* elt = resolvers[i]->resolveElement(uri))
    692 			return elt;
    693 	return NULL;
    694 }
    695 
    696 
    697 // Returns true if parsing succeeded, false otherwise. Parsing can fail if the uri
    698 // reference isn't properly formed.
    699 bool cdom::parseUriRef(const string& uriRef,
    700                        string& scheme,
    701                        string& authority,
    702                        string& path,
    703                        string& query,
    704                        string& fragment) {
    705 	// This regular expression for parsing URI references comes from the URI spec:
    706 	//   http://tools.ietf.org/html/rfc3986#appendix-B
    707 	static pcrecpp::RE re("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?");
    708 	string s1, s3, s6, s8;
    709 	if (re.FullMatch(uriRef, &s1, &scheme, &s3, &authority, &path, &s6, &query, &s8, &fragment))
    710 		return true;
    711 
    712 	return false;
    713 }
    714 
    715 namespace {
    716 	string safeSubstr(const string& s, size_t offset, size_t length) {
    717 		string result = s.substr(offset, min(length, s.length() - offset));
    718 		result.resize(length, '\0');
    719 		return result;
    720 	}
    721 }
    722 
    723 string cdom::assembleUri(const string& scheme,
    724                          const string& authority,
    725                          const string& path,
    726                          const string& query,
    727                          const string& fragment,
    728                          bool forceLibxmlCompatible) {
    729 	string p = safeSubstr(path, 0, 3);
    730 	bool libxmlHack = forceLibxmlCompatible && scheme == "file";
    731 	bool uncPath = false;
    732 	string uri;
    733 
    734 	if (!scheme.empty())
    735 		uri += scheme + ":";
    736 
    737 	if (!authority.empty() || libxmlHack || (p[0] == '/' && p[1] == '/'))
    738 		uri += "//";
    739 	if (!authority.empty()) {
    740 		if (libxmlHack) {
    741 			// We have a UNC path URI of the form file://otherMachine/file.dae.
    742 			// Convert it to file://///otherMachine/file.dae, which is how libxml
    743 			// does UNC paths.
    744 			uri += "///" + authority;
    745 			uncPath = true;
    746 		}
    747 		else {
    748 			uri += authority;
    749 		}
    750 	}
    751 
    752 	if (!uncPath && libxmlHack && getSystemType() == Windows) {
    753 		// We have to be delicate in how we pass absolute path URIs to libxml on Windows.
    754 		// If the path is an absolute path with no drive letter, add an extra slash to
    755 		// appease libxml.
    756 		if (p[0] == '/' && p[1] != '/' && p[2] != ':') {
    757 			uri += "/";
    758 		}
    759 	}
    760 	uri += path;
    761 
    762 	if (!query.empty())
    763 		uri += "?" + query;
    764 	if (!fragment.empty())
    765 		uri += "#" + fragment;
    766 
    767 	return uri;
    768 }
    769 
    770 string cdom::fixUriForLibxml(const string& uriRef) {
    771 	string scheme, authority, path, query, fragment;
    772 	cdom::parseUriRef(uriRef, scheme, authority, path, query, fragment);
    773 	return assembleUri(scheme, authority, path, query, fragment, true);
    774 }
    775 
    776 
    777 string cdom::nativePathToUri(const string& nativePath, systemType type) {
    778 	string uri = nativePath;
    779 
    780 	if (type == Windows) {
    781 		// Convert "c:\" to "/c:/"
    782 		if (uri.length() >= 2  &&  isalpha(uri[0])  &&  uri[1] == ':')
    783 			uri.insert(0, "/");
    784 		// Convert backslashes to forward slashes
    785 		uri = replace(uri, "\\", "/");
    786 	}
    787 
    788 	// Convert spaces to %20
    789 	uri = replace(uri, " ", "%20");
    790 
    791 	return uri;
    792 }
    793 
    794 string cdom::filePathToUri(const string& filePath) {
    795 	return nativePathToUri(filePath);
    796 }
    797 
    798 string cdom::uriToNativePath(const string& uriRef, systemType type) {
    799 	string scheme, authority, path, query, fragment;
    800 	parseUriRef(uriRef, scheme, authority, path, query, fragment);
    801 
    802 	// Make sure we have a file scheme URI, or that it doesn't have a scheme
    803 	if (!scheme.empty()  &&  scheme != "file")
    804 		return "";
    805 
    806 	string filePath;
    807 
    808 	if (type == Windows) {
    809 		if (!authority.empty())
    810 			filePath += string("\\\\") + authority; // UNC path
    811 
    812 		// Replace two leading slashes with one leading slash, so that
    813 		// ///otherComputer/file.dae becomes //otherComputer/file.dae and
    814 		// //folder/file.dae becomes /folder/file.dae
    815 		if (path.length() >= 2  &&  path[0] == '/'  &&  path[1] == '/')
    816 			path.erase(0, 1);
    817 
    818 		// Convert "/C:/" to "C:/"
    819 		if (path.length() >= 3  &&  path[0] == '/'  &&  path[2] == ':')
    820 			path.erase(0, 1);
    821 
    822 		// Convert forward slashes to back slashes
    823 		path = replace(path, "/", "\\");
    824 	}
    825 
    826 	filePath += path;
    827 
    828 	// Replace %20 with space
    829 	filePath = replace(filePath, "%20", " ");
    830 
    831 	return filePath;
    832 }
    833 
    834 string cdom::uriToFilePath(const string& uriRef) {
    835 	return uriToNativePath(uriRef);
    836 }
    837