Home | History | Annotate | Download | only in tinyxml
      1 /*
      2 www.sourceforge.net/projects/tinyxml
      3 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
      4 
      5 This software is provided 'as-is', without any express or implied
      6 warranty. In no event will the authors be held liable for any
      7 damages arising from the use of this software.
      8 
      9 Permission is granted to anyone to use this software for any
     10 purpose, including commercial applications, and to alter it and
     11 redistribute it freely, subject to the following restrictions:
     12 
     13 1. The origin of this software must not be misrepresented; you must
     14 not claim that you wrote the original software. If you use this
     15 software in a product, an acknowledgment in the product documentation
     16 would be appreciated but is not required.
     17 
     18 2. Altered source versions must be plainly marked as such, and
     19 must not be misrepresented as being the original software.
     20 
     21 3. This notice may not be removed or altered from any source
     22 distribution.
     23 */
     24 
     25 #include "tinyxml.h"
     26 #include <ctype.h>
     27 #include <stddef.h>
     28 
     29 //#define DEBUG_PARSER
     30 
     31 // Note tha "PutString" hardcodes the same list. This
     32 // is less flexible than it appears. Changing the entries
     33 // or order will break putstring.
     34 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
     35 {
     36 	{ "&amp;",  5, '&' },
     37 	{ "&lt;",   4, '<' },
     38 	{ "&gt;",   4, '>' },
     39 	{ "&quot;", 6, '\"' },
     40 	{ "&apos;", 6, '\'' }
     41 };
     42 
     43 // Bunch of unicode info at:
     44 //		http://www.unicode.org/faq/utf_bom.html
     45 // Including the basic of this table, which determines the #bytes in the
     46 // sequence from the lead byte. 1 placed for invalid sequences --
     47 // although the result will be junk, pass it through as much as possible.
     48 // Beware of the non-characters in UTF-8:
     49 //				ef bb bf (Microsoft "lead bytes")
     50 //				ef bf be
     51 //				ef bf bf
     52 
     53 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
     54 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
     55 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
     56 
     57 const int TiXmlBase::utf8ByteTable[256] =
     58 {
     59 	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
     60 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x00
     61 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x10
     62 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x20
     63 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x30
     64 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x40
     65 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x50
     66 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x60
     67 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x70	End of ASCII range
     68 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x80 0x80 to 0xc1 invalid
     69 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x90
     70 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xa0
     71 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xb0
     72 		1,	1,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xc0 0xc2 to 0xdf 2 byte
     73 		2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xd0
     74 		3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	// 0xe0 0xe0 to 0xef 3 byte
     75 		4,	4,	4,	4,	4,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1	// 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
     76 };
     77 
     78 
     79 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
     80 {
     81 	const unsigned long BYTE_MASK = 0xBF;
     82 	const unsigned long BYTE_MARK = 0x80;
     83 	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
     84 
     85 	if (input < 0x80)
     86 		*length = 1;
     87 	else if ( input < 0x800 )
     88 		*length = 2;
     89 	else if ( input < 0x10000 )
     90 		*length = 3;
     91 	else if ( input < 0x200000 )
     92 		*length = 4;
     93 	else
     94 		{ *length = 0; return; }	// This code won't covert this correctly anyway.
     95 
     96 	output += *length;
     97 
     98 	// Scary scary fall throughs.
     99 	switch (*length)
    100 	{
    101 		case 4:
    102 			--output;
    103 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
    104 			input >>= 6;
    105 		case 3:
    106 			--output;
    107 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
    108 			input >>= 6;
    109 		case 2:
    110 			--output;
    111 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
    112 			input >>= 6;
    113 		case 1:
    114 			--output;
    115 			*output = (char)(input | FIRST_BYTE_MARK[*length]);
    116 	}
    117 }
    118 
    119 
    120 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
    121 {
    122 	// This will only work for low-ascii, everything else is assumed to be a valid
    123 	// letter. I'm not sure this is the best approach, but it is quite tricky trying
    124 	// to figure out alhabetical vs. not across encoding. So take a very
    125 	// conservative approach.
    126 
    127 //	if ( encoding == TIXML_ENCODING_UTF8 )
    128 //	{
    129 		if ( anyByte < 127 )
    130 			return isalpha( anyByte );
    131 		else
    132 			return 1;	// What else to do? The unicode set is huge...get the english ones right.
    133 //	}
    134 //	else
    135 //	{
    136 //		return isalpha( anyByte );
    137 //	}
    138 }
    139 
    140 
    141 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
    142 {
    143 	// This will only work for low-ascii, everything else is assumed to be a valid
    144 	// letter. I'm not sure this is the best approach, but it is quite tricky trying
    145 	// to figure out alhabetical vs. not across encoding. So take a very
    146 	// conservative approach.
    147 
    148 //	if ( encoding == TIXML_ENCODING_UTF8 )
    149 //	{
    150 		if ( anyByte < 127 )
    151 			return isalnum( anyByte );
    152 		else
    153 			return 1;	// What else to do? The unicode set is huge...get the english ones right.
    154 //	}
    155 //	else
    156 //	{
    157 //		return isalnum( anyByte );
    158 //	}
    159 }
    160 
    161 
    162 class TiXmlParsingData
    163 {
    164 	friend class TiXmlDocument;
    165   public:
    166 	void Stamp( const char* now, TiXmlEncoding encoding );
    167 
    168 	const TiXmlCursor& Cursor()	{ return cursor; }
    169 
    170   private:
    171 	// Only used by the document!
    172 	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
    173 	{
    174 		assert( start );
    175 		stamp = start;
    176 		tabsize = _tabsize;
    177 		cursor.row = row;
    178 		cursor.col = col;
    179 	}
    180 
    181 	TiXmlCursor		cursor;
    182 	const char*		stamp;
    183 	int				tabsize;
    184 };
    185 
    186 
    187 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
    188 {
    189 	assert( now );
    190 
    191 	// Do nothing if the tabsize is 0.
    192 	if ( tabsize < 1 )
    193 	{
    194 		return;
    195 	}
    196 
    197 	// Get the current row, column.
    198 	int row = cursor.row;
    199 	int col = cursor.col;
    200 	const char* p = stamp;
    201 	assert( p );
    202 
    203 	while ( p < now )
    204 	{
    205 		// Treat p as unsigned, so we have a happy compiler.
    206 		const unsigned char* pU = (const unsigned char*)p;
    207 
    208 		// Code contributed by Fletcher Dunn: (modified by lee)
    209 		switch (*pU) {
    210 			case 0:
    211 				// We *should* never get here, but in case we do, don't
    212 				// advance past the terminating null character, ever
    213 				return;
    214 
    215 			case '\r':
    216 				// bump down to the next line
    217 				++row;
    218 				col = 0;
    219 				// Eat the character
    220 				++p;
    221 
    222 				// Check for \r\n sequence, and treat this as a single character
    223 				if (*p == '\n') {
    224 					++p;
    225 				}
    226 				break;
    227 
    228 			case '\n':
    229 				// bump down to the next line
    230 				++row;
    231 				col = 0;
    232 
    233 				// Eat the character
    234 				++p;
    235 
    236 				// Check for \n\r sequence, and treat this as a single
    237 				// character.  (Yes, this bizarre thing does occur still
    238 				// on some arcane platforms...)
    239 				if (*p == '\r') {
    240 					++p;
    241 				}
    242 				break;
    243 
    244 			case '\t':
    245 				// Eat the character
    246 				++p;
    247 
    248 				// Skip to next tab stop
    249 				col = (col / tabsize + 1) * tabsize;
    250 				break;
    251 
    252 			case TIXML_UTF_LEAD_0:
    253 				if ( encoding == TIXML_ENCODING_UTF8 )
    254 				{
    255 					if ( *(p+1) && *(p+2) )
    256 					{
    257 						// In these cases, don't advance the column. These are
    258 						// 0-width spaces.
    259 						if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
    260 							p += 3;
    261 						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
    262 							p += 3;
    263 						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
    264 							p += 3;
    265 						else
    266 							{ p +=3; ++col; }	// A normal character.
    267 					}
    268 				}
    269 				else
    270 				{
    271 					++p;
    272 					++col;
    273 				}
    274 				break;
    275 
    276 			default:
    277 				if ( encoding == TIXML_ENCODING_UTF8 )
    278 				{
    279 					// Eat the 1 to 4 byte utf8 character.
    280 					int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
    281 					if ( step == 0 )
    282 						step = 1;		// Error case from bad encoding, but handle gracefully.
    283 					p += step;
    284 
    285 					// Just advance one column, of course.
    286 					++col;
    287 				}
    288 				else
    289 				{
    290 					++p;
    291 					++col;
    292 				}
    293 				break;
    294 		}
    295 	}
    296 	cursor.row = row;
    297 	cursor.col = col;
    298 	assert( cursor.row >= -1 );
    299 	assert( cursor.col >= -1 );
    300 	stamp = p;
    301 	assert( stamp );
    302 }
    303 
    304 
    305 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
    306 {
    307 	if ( !p || !*p )
    308 	{
    309 		return 0;
    310 	}
    311 	if ( encoding == TIXML_ENCODING_UTF8 )
    312 	{
    313 		while ( *p )
    314 		{
    315 			const unsigned char* pU = (const unsigned char*)p;
    316 
    317 			// Skip the stupid Microsoft UTF-8 Byte order marks
    318 			if (	*(pU+0)==TIXML_UTF_LEAD_0
    319 				 && *(pU+1)==TIXML_UTF_LEAD_1
    320 				 && *(pU+2)==TIXML_UTF_LEAD_2 )
    321 			{
    322 				p += 3;
    323 				continue;
    324 			}
    325 			else if(*(pU+0)==TIXML_UTF_LEAD_0
    326 				 && *(pU+1)==0xbfU
    327 				 && *(pU+2)==0xbeU )
    328 			{
    329 				p += 3;
    330 				continue;
    331 			}
    332 			else if(*(pU+0)==TIXML_UTF_LEAD_0
    333 				 && *(pU+1)==0xbfU
    334 				 && *(pU+2)==0xbfU )
    335 			{
    336 				p += 3;
    337 				continue;
    338 			}
    339 
    340 			if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )		// Still using old rules for white space.
    341 				++p;
    342 			else
    343 				break;
    344 		}
    345 	}
    346 	else
    347 	{
    348 		while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
    349 			++p;
    350 	}
    351 
    352 	return p;
    353 }
    354 
    355 #ifdef TIXML_USE_STL
    356 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
    357 {
    358 	for( ;; )
    359 	{
    360 		if ( !in->good() ) return false;
    361 
    362 		int c = in->peek();
    363 		// At this scope, we can't get to a document. So fail silently.
    364 		if ( !IsWhiteSpace( c ) || c <= 0 )
    365 			return true;
    366 
    367 		*tag += (char) in->get();
    368 	}
    369 }
    370 
    371 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
    372 {
    373 	//assert( character > 0 && character < 128 );	// else it won't work in utf-8
    374 	while ( in->good() )
    375 	{
    376 		int c = in->peek();
    377 		if ( c == character )
    378 			return true;
    379 		if ( c <= 0 )		// Silent failure: can't get document at this scope
    380 			return false;
    381 
    382 		in->get();
    383 		*tag += (char) c;
    384 	}
    385 	return false;
    386 }
    387 #endif
    388 
    389 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
    390 {
    391 	*name = "";
    392 	assert( p );
    393 
    394 	// Names start with letters or underscores.
    395 	// Of course, in unicode, tinyxml has no idea what a letter *is*. The
    396 	// algorithm is generous.
    397 	//
    398 	// After that, they can be letters, underscores, numbers,
    399 	// hyphens, or colons. (Colons are valid ony for namespaces,
    400 	// but tinyxml can't tell namespaces from names.)
    401 	if (    p && *p
    402 		 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
    403 	{
    404 		while(		p && *p
    405 				&&	(		IsAlphaNum( (unsigned char ) *p, encoding )
    406 						 || *p == '_'
    407 						 || *p == '-'
    408 						 || *p == '.'
    409 						 || *p == ':' ) )
    410 		{
    411 			(*name) += *p;
    412 			++p;
    413 		}
    414 		return p;
    415 	}
    416 	return 0;
    417 }
    418 
    419 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
    420 {
    421 	// Presume an entity, and pull it out.
    422     TIXML_STRING ent;
    423 	int i;
    424 	*length = 0;
    425 
    426 	if ( *(p+1) && *(p+1) == '#' && *(p+2) )
    427 	{
    428 		unsigned long ucs = 0;
    429 		ptrdiff_t delta = 0;
    430 		unsigned mult = 1;
    431 
    432 		if ( *(p+2) == 'x' )
    433 		{
    434 			// Hexadecimal.
    435 			if ( !*(p+3) ) return 0;
    436 
    437 			const char* q = p+3;
    438 			q = strchr( q, ';' );
    439 
    440 			if ( !q || !*q ) return 0;
    441 
    442 			delta = q-p;
    443 			--q;
    444 
    445 			while ( *q != 'x' )
    446 			{
    447 				if ( *q >= '0' && *q <= '9' )
    448 					ucs += mult * (*q - '0');
    449 				else if ( *q >= 'a' && *q <= 'f' )
    450 					ucs += mult * (*q - 'a' + 10);
    451 				else if ( *q >= 'A' && *q <= 'F' )
    452 					ucs += mult * (*q - 'A' + 10 );
    453 				else
    454 					return 0;
    455 				mult *= 16;
    456 				--q;
    457 			}
    458 		}
    459 		else
    460 		{
    461 			// Decimal.
    462 			if ( !*(p+2) ) return 0;
    463 
    464 			const char* q = p+2;
    465 			q = strchr( q, ';' );
    466 
    467 			if ( !q || !*q ) return 0;
    468 
    469 			delta = q-p;
    470 			--q;
    471 
    472 			while ( *q != '#' )
    473 			{
    474 				if ( *q >= '0' && *q <= '9' )
    475 					ucs += mult * (*q - '0');
    476 				else
    477 					return 0;
    478 				mult *= 10;
    479 				--q;
    480 			}
    481 		}
    482 		if ( encoding == TIXML_ENCODING_UTF8 )
    483 		{
    484 			// convert the UCS to UTF-8
    485 			ConvertUTF32ToUTF8( ucs, value, length );
    486 		}
    487 		else
    488 		{
    489 			*value = (char)ucs;
    490 			*length = 1;
    491 		}
    492 		return p + delta + 1;
    493 	}
    494 
    495 	// Now try to match it.
    496 	for( i=0; i<NUM_ENTITY; ++i )
    497 	{
    498 		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
    499 		{
    500 			assert( strlen( entity[i].str ) == entity[i].strLength );
    501 			*value = entity[i].chr;
    502 			*length = 1;
    503 			return ( p + entity[i].strLength );
    504 		}
    505 	}
    506 
    507 	// So it wasn't an entity, its unrecognized, or something like that.
    508 	*value = *p;	// Don't put back the last one, since we return it!
    509 	return p+1;
    510 }
    511 
    512 
    513 bool TiXmlBase::StringEqual( const char* p,
    514 							 const char* tag,
    515 							 bool ignoreCase,
    516 							 TiXmlEncoding encoding )
    517 {
    518 	assert( p );
    519 	assert( tag );
    520 	if ( !p || !*p )
    521 	{
    522 		assert( 0 );
    523 		return false;
    524 	}
    525 
    526 	const char* q = p;
    527 
    528 	if ( ignoreCase )
    529 	{
    530 		while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
    531 		{
    532 			++q;
    533 			++tag;
    534 		}
    535 
    536 		if ( *tag == 0 )
    537 			return true;
    538 	}
    539 	else
    540 	{
    541 		while ( *q && *tag && *q == *tag )
    542 		{
    543 			++q;
    544 			++tag;
    545 		}
    546 
    547 		if ( *tag == 0 )		// Have we found the end of the tag, and everything equal?
    548 			return true;
    549 	}
    550 	return false;
    551 }
    552 
    553 const char* TiXmlBase::ReadText(	const char* p,
    554 									TIXML_STRING * text,
    555 									bool trimWhiteSpace,
    556 									const char* endTag,
    557 									bool caseInsensitive,
    558 									TiXmlEncoding encoding )
    559 {
    560     *text = "";
    561 	if (    !trimWhiteSpace			// certain tags always keep whitespace
    562 		 || !condenseWhiteSpace )	// if true, whitespace is always kept
    563 	{
    564 		// Keep all the white space.
    565 		while (	   p && *p
    566 				&& !StringEqual( p, endTag, caseInsensitive, encoding )
    567 			  )
    568 		{
    569 			int len;
    570 			char cArr[4] = { 0, 0, 0, 0 };
    571 			p = GetChar( p, cArr, &len, encoding );
    572 			text->append( cArr, len );
    573 		}
    574 	}
    575 	else
    576 	{
    577 		bool whitespace = false;
    578 
    579 		// Remove leading white space:
    580 		p = SkipWhiteSpace( p, encoding );
    581 		while (	   p && *p
    582 				&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
    583 		{
    584 			if ( *p == '\r' || *p == '\n' )
    585 			{
    586 				whitespace = true;
    587 				++p;
    588 			}
    589 			else if ( IsWhiteSpace( *p ) )
    590 			{
    591 				whitespace = true;
    592 				++p;
    593 			}
    594 			else
    595 			{
    596 				// If we've found whitespace, add it before the
    597 				// new character. Any whitespace just becomes a space.
    598 				if ( whitespace )
    599 				{
    600 					(*text) += ' ';
    601 					whitespace = false;
    602 				}
    603 				int len;
    604 				char cArr[4] = { 0, 0, 0, 0 };
    605 				p = GetChar( p, cArr, &len, encoding );
    606 				if ( len == 1 )
    607 					(*text) += cArr[0];	// more efficient
    608 				else
    609 					text->append( cArr, len );
    610 			}
    611 		}
    612 	}
    613 	return p + strlen( endTag );
    614 }
    615 
    616 #ifdef TIXML_USE_STL
    617 
    618 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
    619 {
    620 	// The basic issue with a document is that we don't know what we're
    621 	// streaming. Read something presumed to be a tag (and hope), then
    622 	// identify it, and call the appropriate stream method on the tag.
    623 	//
    624 	// This "pre-streaming" will never read the closing ">" so the
    625 	// sub-tag can orient itself.
    626 
    627 	if ( !StreamTo( in, '<', tag ) )
    628 	{
    629 		SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
    630 		return;
    631 	}
    632 
    633 	while ( in->good() )
    634 	{
    635 		int tagIndex = (int) tag->length();
    636 		while ( in->good() && in->peek() != '>' )
    637 		{
    638 			int c = in->get();
    639 			if ( c <= 0 )
    640 			{
    641 				SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
    642 				break;
    643 			}
    644 			(*tag) += (char) c;
    645 		}
    646 
    647 		if ( in->good() )
    648 		{
    649 			// We now have something we presume to be a node of
    650 			// some sort. Identify it, and call the node to
    651 			// continue streaming.
    652 			TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
    653 
    654 			if ( node )
    655 			{
    656 				node->StreamIn( in, tag );
    657 				bool isElement = node->ToElement() != 0;
    658 				delete node;
    659 				node = 0;
    660 
    661 				// If this is the root element, we're done. Parsing will be
    662 				// done by the >> operator.
    663 				if ( isElement )
    664 				{
    665 					return;
    666 				}
    667 			}
    668 			else
    669 			{
    670 				SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
    671 				return;
    672 			}
    673 		}
    674 	}
    675 	// We should have returned sooner.
    676 	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
    677 }
    678 
    679 #endif
    680 
    681 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
    682 {
    683 	ClearError();
    684 
    685 	// Parse away, at the document level. Since a document
    686 	// contains nothing but other tags, most of what happens
    687 	// here is skipping white space.
    688 	if ( !p || !*p )
    689 	{
    690 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
    691 		return 0;
    692 	}
    693 
    694 	// Note that, for a document, this needs to come
    695 	// before the while space skip, so that parsing
    696 	// starts from the pointer we are given.
    697 	location.Clear();
    698 	if ( prevData )
    699 	{
    700 		location.row = prevData->cursor.row;
    701 		location.col = prevData->cursor.col;
    702 	}
    703 	else
    704 	{
    705 		location.row = 0;
    706 		location.col = 0;
    707 	}
    708 	TiXmlParsingData data( p, TabSize(), location.row, location.col );
    709 	location = data.Cursor();
    710 
    711 	if ( encoding == TIXML_ENCODING_UNKNOWN )
    712 	{
    713 		// Check for the Microsoft UTF-8 lead bytes.
    714 		const unsigned char* pU = (const unsigned char*)p;
    715 		if (	*(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
    716 			 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
    717 			 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
    718 		{
    719 			encoding = TIXML_ENCODING_UTF8;
    720 			useMicrosoftBOM = true;
    721 		}
    722 	}
    723 
    724     p = SkipWhiteSpace( p, encoding );
    725 	if ( !p )
    726 	{
    727 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
    728 		return 0;
    729 	}
    730 
    731 	while ( p && *p )
    732 	{
    733 		TiXmlNode* node = Identify( p, encoding );
    734 		if ( node )
    735 		{
    736 			p = node->Parse( p, &data, encoding );
    737 			LinkEndChild( node );
    738 		}
    739 		else
    740 		{
    741 			break;
    742 		}
    743 
    744 		// Did we get encoding info?
    745 		if (    encoding == TIXML_ENCODING_UNKNOWN
    746 			 && node->ToDeclaration() )
    747 		{
    748 			TiXmlDeclaration* dec = node->ToDeclaration();
    749 			const char* enc = dec->Encoding();
    750 			assert( enc );
    751 
    752 			if ( *enc == 0 )
    753 				encoding = TIXML_ENCODING_UTF8;
    754 			else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
    755 				encoding = TIXML_ENCODING_UTF8;
    756 			else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
    757 				encoding = TIXML_ENCODING_UTF8;	// incorrect, but be nice
    758 			else
    759 				encoding = TIXML_ENCODING_LEGACY;
    760 		}
    761 
    762 		p = SkipWhiteSpace( p, encoding );
    763 	}
    764 
    765 	// Was this empty?
    766 	if ( !firstChild ) {
    767 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
    768 		return 0;
    769 	}
    770 
    771 	// All is well.
    772 	return p;
    773 }
    774 
    775 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
    776 {
    777 	// The first error in a chain is more accurate - don't set again!
    778 	if ( error )
    779 		return;
    780 
    781 	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
    782 	error   = true;
    783 	errorId = err;
    784 	errorDesc = errorString[ errorId ];
    785 
    786 	errorLocation.Clear();
    787 	if ( pError && data )
    788 	{
    789 		data->Stamp( pError, encoding );
    790 		errorLocation = data->Cursor();
    791 	}
    792 }
    793 
    794 
    795 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
    796 {
    797 	TiXmlNode* returnNode = 0;
    798 
    799 	p = SkipWhiteSpace( p, encoding );
    800 	if( !p || !*p || *p != '<' )
    801 	{
    802 		return 0;
    803 	}
    804 
    805 	TiXmlDocument* doc = GetDocument();
    806 	p = SkipWhiteSpace( p, encoding );
    807 
    808 	if ( !p || !*p )
    809 	{
    810 		return 0;
    811 	}
    812 
    813 	// What is this thing?
    814 	// - Elements start with a letter or underscore, but xml is reserved.
    815 	// - Comments: <!--
    816 	// - Decleration: <?xml
    817 	// - Everthing else is unknown to tinyxml.
    818 	//
    819 
    820 	const char* xmlHeader = { "<?xml" };
    821 	const char* commentHeader = { "<!--" };
    822 	const char* dtdHeader = { "<!" };
    823 	const char* cdataHeader = { "<![CDATA[" };
    824 
    825 	if ( StringEqual( p, xmlHeader, true, encoding ) )
    826 	{
    827 		#ifdef DEBUG_PARSER
    828 			TIXML_LOG( "XML parsing Declaration\n" );
    829 		#endif
    830 		returnNode = new TiXmlDeclaration();
    831 	}
    832 	else if ( StringEqual( p, commentHeader, false, encoding ) )
    833 	{
    834 		#ifdef DEBUG_PARSER
    835 			TIXML_LOG( "XML parsing Comment\n" );
    836 		#endif
    837 		returnNode = new TiXmlComment();
    838 	}
    839 	else if ( StringEqual( p, cdataHeader, false, encoding ) )
    840 	{
    841 		#ifdef DEBUG_PARSER
    842 			TIXML_LOG( "XML parsing CDATA\n" );
    843 		#endif
    844 		TiXmlText* text = new TiXmlText( "" );
    845 		text->SetCDATA( true );
    846 		returnNode = text;
    847 	}
    848 	else if ( StringEqual( p, dtdHeader, false, encoding ) )
    849 	{
    850 		#ifdef DEBUG_PARSER
    851 			TIXML_LOG( "XML parsing Unknown(1)\n" );
    852 		#endif
    853 		returnNode = new TiXmlUnknown();
    854 	}
    855 	else if (    IsAlpha( *(p+1), encoding )
    856 			  || *(p+1) == '_' )
    857 	{
    858 		#ifdef DEBUG_PARSER
    859 			TIXML_LOG( "XML parsing Element\n" );
    860 		#endif
    861 		returnNode = new TiXmlElement( "" );
    862 	}
    863 	else
    864 	{
    865 		#ifdef DEBUG_PARSER
    866 			TIXML_LOG( "XML parsing Unknown(2)\n" );
    867 		#endif
    868 		returnNode = new TiXmlUnknown();
    869 	}
    870 
    871 	if ( returnNode )
    872 	{
    873 		// Set the parent, so it can report errors
    874 		returnNode->parent = this;
    875 	}
    876 	else
    877 	{
    878 		if ( doc )
    879 			doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
    880 	}
    881 	return returnNode;
    882 }
    883 
    884 #ifdef TIXML_USE_STL
    885 
    886 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
    887 {
    888 	// We're called with some amount of pre-parsing. That is, some of "this"
    889 	// element is in "tag". Go ahead and stream to the closing ">"
    890 	while( in->good() )
    891 	{
    892 		int c = in->get();
    893 		if ( c <= 0 )
    894 		{
    895 			TiXmlDocument* document = GetDocument();
    896 			if ( document )
    897 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
    898 			return;
    899 		}
    900 		(*tag) += (char) c ;
    901 
    902 		if ( c == '>' )
    903 			break;
    904 	}
    905 
    906 	if ( tag->length() < 3 ) return;
    907 
    908 	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
    909 	// If not, identify and stream.
    910 
    911 	if (    tag->at( tag->length() - 1 ) == '>'
    912 		 && tag->at( tag->length() - 2 ) == '/' )
    913 	{
    914 		// All good!
    915 		return;
    916 	}
    917 	else if ( tag->at( tag->length() - 1 ) == '>' )
    918 	{
    919 		// There is more. Could be:
    920 		//		text
    921 		//		closing tag
    922 		//		another node.
    923 		for ( ;; )
    924 		{
    925 			StreamWhiteSpace( in, tag );
    926 
    927 			// Do we have text?
    928 			if ( in->good() && in->peek() != '<' )
    929 			{
    930 				// Yep, text.
    931 				TiXmlText text( "" );
    932 				text.StreamIn( in, tag );
    933 
    934 				// What follows text is a closing tag or another node.
    935 				// Go around again and figure it out.
    936 				continue;
    937 			}
    938 
    939 			// We now have either a closing tag...or another node.
    940 			// We should be at a "<", regardless.
    941 			if ( !in->good() ) return;
    942 			assert( in->peek() == '<' );
    943 			int tagIndex = (int) tag->length();
    944 
    945 			bool closingTag = false;
    946 			bool firstCharFound = false;
    947 
    948 			for( ;; )
    949 			{
    950 				if ( !in->good() )
    951 					return;
    952 
    953 				int c = in->peek();
    954 				if ( c <= 0 )
    955 				{
    956 					TiXmlDocument* document = GetDocument();
    957 					if ( document )
    958 						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
    959 					return;
    960 				}
    961 
    962 				if ( c == '>' )
    963 					break;
    964 
    965 				*tag += (char) c;
    966 				in->get();
    967 
    968 				if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
    969 				{
    970 					firstCharFound = true;
    971 					if ( c == '/' )
    972 						closingTag = true;
    973 				}
    974 			}
    975 			// If it was a closing tag, then read in the closing '>' to clean up the input stream.
    976 			// If it was not, the streaming will be done by the tag.
    977 			if ( closingTag )
    978 			{
    979 				if ( !in->good() )
    980 					return;
    981 
    982 				int c = in->get();
    983 				if ( c <= 0 )
    984 				{
    985 					TiXmlDocument* document = GetDocument();
    986 					if ( document )
    987 						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
    988 					return;
    989 				}
    990 				assert( c == '>' );
    991 				*tag += (char) c;
    992 
    993 				// We are done, once we've found our closing tag.
    994 				return;
    995 			}
    996 			else
    997 			{
    998 				// If not a closing tag, id it, and stream.
    999 				const char* tagloc = tag->c_str() + tagIndex;
   1000 				TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
   1001 				if ( !node )
   1002 					return;
   1003 				node->StreamIn( in, tag );
   1004 				delete node;
   1005 				node = 0;
   1006 
   1007 				// No return: go around from the beginning: text, closing tag, or node.
   1008 			}
   1009 		}
   1010 	}
   1011 }
   1012 #endif
   1013 
   1014 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
   1015 {
   1016 	p = SkipWhiteSpace( p, encoding );
   1017 	TiXmlDocument* document = GetDocument();
   1018 
   1019 	if ( !p || !*p )
   1020 	{
   1021 		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
   1022 		return 0;
   1023 	}
   1024 
   1025 	if ( data )
   1026 	{
   1027 		data->Stamp( p, encoding );
   1028 		location = data->Cursor();
   1029 	}
   1030 
   1031 	if ( *p != '<' )
   1032 	{
   1033 		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
   1034 		return 0;
   1035 	}
   1036 
   1037 	p = SkipWhiteSpace( p+1, encoding );
   1038 
   1039 	// Read the name.
   1040 	const char* pErr = p;
   1041 
   1042     p = ReadName( p, &value, encoding );
   1043 	if ( !p || !*p )
   1044 	{
   1045 		if ( document )	document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
   1046 		return 0;
   1047 	}
   1048 
   1049     TIXML_STRING endTag ("</");
   1050 	endTag += value;
   1051 	endTag += ">";
   1052 
   1053 	// Check for and read attributes. Also look for an empty
   1054 	// tag or an end tag.
   1055 	while ( p && *p )
   1056 	{
   1057 		pErr = p;
   1058 		p = SkipWhiteSpace( p, encoding );
   1059 		if ( !p || !*p )
   1060 		{
   1061 			if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
   1062 			return 0;
   1063 		}
   1064 		if ( *p == '/' )
   1065 		{
   1066 			++p;
   1067 			// Empty tag.
   1068 			if ( *p  != '>' )
   1069 			{
   1070 				if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
   1071 				return 0;
   1072 			}
   1073 			return (p+1);
   1074 		}
   1075 		else if ( *p == '>' )
   1076 		{
   1077 			// Done with attributes (if there were any.)
   1078 			// Read the value -- which can include other
   1079 			// elements -- read the end tag, and return.
   1080 			++p;
   1081 			p = ReadValue( p, data, encoding );		// Note this is an Element method, and will set the error if one happens.
   1082 			if ( !p || !*p )
   1083 				return 0;
   1084 
   1085 			// We should find the end tag now
   1086 			if ( StringEqual( p, endTag.c_str(), false, encoding ) )
   1087 			{
   1088 				p += endTag.length();
   1089 				return p;
   1090 			}
   1091 			else
   1092 			{
   1093 				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
   1094 				return 0;
   1095 			}
   1096 		}
   1097 		else
   1098 		{
   1099 			// Try to read an attribute:
   1100 			TiXmlAttribute* attrib = new TiXmlAttribute();
   1101 			if ( !attrib )
   1102 			{
   1103 				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
   1104 				return 0;
   1105 			}
   1106 
   1107 			attrib->SetDocument( document );
   1108 			const char* pErr = p;
   1109 			p = attrib->Parse( p, data, encoding );
   1110 
   1111 			if ( !p || !*p )
   1112 			{
   1113 				if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
   1114 				delete attrib;
   1115 				return 0;
   1116 			}
   1117 
   1118 			// Handle the strange case of double attributes:
   1119 			TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
   1120 			if ( node )
   1121 			{
   1122 				node->SetValue( attrib->Value() );
   1123 				delete attrib;
   1124 				return 0;
   1125 			}
   1126 
   1127 			attributeSet.Add( attrib );
   1128 		}
   1129 	}
   1130 	return p;
   1131 }
   1132 
   1133 
   1134 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
   1135 {
   1136 	TiXmlDocument* document = GetDocument();
   1137 
   1138 	// Read in text and elements in any order.
   1139 	const char* pWithWhiteSpace = p;
   1140 	p = SkipWhiteSpace( p, encoding );
   1141 
   1142 	while ( p && *p )
   1143 	{
   1144 		if ( *p != '<' )
   1145 		{
   1146 			// Take what we have, make a text element.
   1147 			TiXmlText* textNode = new TiXmlText( "" );
   1148 
   1149 			if ( !textNode )
   1150 			{
   1151 				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
   1152 				    return 0;
   1153 			}
   1154 
   1155 			if ( TiXmlBase::IsWhiteSpaceCondensed() )
   1156 			{
   1157 				p = textNode->Parse( p, data, encoding );
   1158 			}
   1159 			else
   1160 			{
   1161 				// Special case: we want to keep the white space
   1162 				// so that leading spaces aren't removed.
   1163 				p = textNode->Parse( pWithWhiteSpace, data, encoding );
   1164 			}
   1165 
   1166 			if ( !textNode->Blank() )
   1167 				LinkEndChild( textNode );
   1168 			else
   1169 				delete textNode;
   1170 		}
   1171 		else
   1172 		{
   1173 			// We hit a '<'
   1174 			// Have we hit a new element or an end tag? This could also be
   1175 			// a TiXmlText in the "CDATA" style.
   1176 			if ( StringEqual( p, "</", false, encoding ) )
   1177 			{
   1178 				return p;
   1179 			}
   1180 			else
   1181 			{
   1182 				TiXmlNode* node = Identify( p, encoding );
   1183 				if ( node )
   1184 				{
   1185 					p = node->Parse( p, data, encoding );
   1186 					LinkEndChild( node );
   1187 				}
   1188 				else
   1189 				{
   1190 					return 0;
   1191 				}
   1192 			}
   1193 		}
   1194 		pWithWhiteSpace = p;
   1195 		p = SkipWhiteSpace( p, encoding );
   1196 	}
   1197 
   1198 	if ( !p )
   1199 	{
   1200 		if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
   1201 	}
   1202 	return p;
   1203 }
   1204 
   1205 
   1206 #ifdef TIXML_USE_STL
   1207 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
   1208 {
   1209 	while ( in->good() )
   1210 	{
   1211 		int c = in->get();
   1212 		if ( c <= 0 )
   1213 		{
   1214 			TiXmlDocument* document = GetDocument();
   1215 			if ( document )
   1216 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
   1217 			return;
   1218 		}
   1219 		(*tag) += (char) c;
   1220 
   1221 		if ( c == '>' )
   1222 		{
   1223 			// All is well.
   1224 			return;
   1225 		}
   1226 	}
   1227 }
   1228 #endif
   1229 
   1230 
   1231 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
   1232 {
   1233 	TiXmlDocument* document = GetDocument();
   1234 	p = SkipWhiteSpace( p, encoding );
   1235 
   1236 	if ( data )
   1237 	{
   1238 		data->Stamp( p, encoding );
   1239 		location = data->Cursor();
   1240 	}
   1241 	if ( !p || !*p || *p != '<' )
   1242 	{
   1243 		if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
   1244 		return 0;
   1245 	}
   1246 	++p;
   1247     value = "";
   1248 
   1249 	while ( p && *p && *p != '>' )
   1250 	{
   1251 		value += *p;
   1252 		++p;
   1253 	}
   1254 
   1255 	if ( !p )
   1256 	{
   1257 		if ( document )	document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
   1258 	}
   1259 	if ( *p == '>' )
   1260 		return p+1;
   1261 	return p;
   1262 }
   1263 
   1264 #ifdef TIXML_USE_STL
   1265 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
   1266 {
   1267 	while ( in->good() )
   1268 	{
   1269 		int c = in->get();
   1270 		if ( c <= 0 )
   1271 		{
   1272 			TiXmlDocument* document = GetDocument();
   1273 			if ( document )
   1274 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
   1275 			return;
   1276 		}
   1277 
   1278 		(*tag) += (char) c;
   1279 
   1280 		if ( c == '>'
   1281 			 && tag->at( tag->length() - 2 ) == '-'
   1282 			 && tag->at( tag->length() - 3 ) == '-' )
   1283 		{
   1284 			// All is well.
   1285 			return;
   1286 		}
   1287 	}
   1288 }
   1289 #endif
   1290 
   1291 
   1292 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
   1293 {
   1294 	TiXmlDocument* document = GetDocument();
   1295 	value = "";
   1296 
   1297 	p = SkipWhiteSpace( p, encoding );
   1298 
   1299 	if ( data )
   1300 	{
   1301 		data->Stamp( p, encoding );
   1302 		location = data->Cursor();
   1303 	}
   1304 	const char* startTag = "<!--";
   1305 	const char* endTag   = "-->";
   1306 
   1307 	if ( !StringEqual( p, startTag, false, encoding ) )
   1308 	{
   1309 		document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
   1310 		return 0;
   1311 	}
   1312 	p += strlen( startTag );
   1313 	p = ReadText( p, &value, false, endTag, false, encoding );
   1314 	return p;
   1315 }
   1316 
   1317 
   1318 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
   1319 {
   1320 	p = SkipWhiteSpace( p, encoding );
   1321 	if ( !p || !*p ) return 0;
   1322 
   1323 	int tabsize = 4;
   1324 	if ( document )
   1325 		tabsize = document->TabSize();
   1326 
   1327 	if ( data )
   1328 	{
   1329 		data->Stamp( p, encoding );
   1330 		location = data->Cursor();
   1331 	}
   1332 	// Read the name, the '=' and the value.
   1333 	const char* pErr = p;
   1334 	p = ReadName( p, &name, encoding );
   1335 	if ( !p || !*p )
   1336 	{
   1337 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
   1338 		return 0;
   1339 	}
   1340 	p = SkipWhiteSpace( p, encoding );
   1341 	if ( !p || !*p || *p != '=' )
   1342 	{
   1343 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
   1344 		return 0;
   1345 	}
   1346 
   1347 	++p;	// skip '='
   1348 	p = SkipWhiteSpace( p, encoding );
   1349 	if ( !p || !*p )
   1350 	{
   1351 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
   1352 		return 0;
   1353 	}
   1354 
   1355 	const char* end;
   1356 
   1357 	if ( *p == '\'' )
   1358 	{
   1359 		++p;
   1360 		end = "\'";
   1361 		p = ReadText( p, &value, false, end, false, encoding );
   1362 	}
   1363 	else if ( *p == '"' )
   1364 	{
   1365 		++p;
   1366 		end = "\"";
   1367 		p = ReadText( p, &value, false, end, false, encoding );
   1368 	}
   1369 	else
   1370 	{
   1371 		// All attribute values should be in single or double quotes.
   1372 		// But this is such a common error that the parser will try
   1373 		// its best, even without them.
   1374 		value = "";
   1375 		while (    p && *p										// existence
   1376 				&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'	// whitespace
   1377 				&& *p != '/' && *p != '>' )						// tag end
   1378 		{
   1379 			value += *p;
   1380 			++p;
   1381 		}
   1382 	}
   1383 	return p;
   1384 }
   1385 
   1386 #ifdef TIXML_USE_STL
   1387 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
   1388 {
   1389 	if ( cdata )
   1390 	{
   1391 		int c = in->get();
   1392 		if ( c <= 0 )
   1393 		{
   1394 			TiXmlDocument* document = GetDocument();
   1395 			if ( document )
   1396 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
   1397 			return;
   1398 		}
   1399 
   1400 		(*tag) += (char) c;
   1401 
   1402 		if ( c == '>'
   1403 			 && tag->at( tag->length() - 2 ) == ']'
   1404 			 && tag->at( tag->length() - 3 ) == ']' )
   1405 		{
   1406 			// All is well.
   1407 			return;
   1408 		}
   1409 	}
   1410 	else
   1411 	{
   1412 		while ( in->good() )
   1413 		{
   1414 			int c = in->peek();
   1415 			if ( c == '<' )
   1416 				return;
   1417 			if ( c <= 0 )
   1418 			{
   1419 				TiXmlDocument* document = GetDocument();
   1420 				if ( document )
   1421 					document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
   1422 				return;
   1423 			}
   1424 
   1425 			(*tag) += (char) c;
   1426 			in->get();
   1427 		}
   1428 	}
   1429 }
   1430 #endif
   1431 
   1432 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
   1433 {
   1434 	value = "";
   1435 	TiXmlDocument* document = GetDocument();
   1436 
   1437 	if ( data )
   1438 	{
   1439 		data->Stamp( p, encoding );
   1440 		location = data->Cursor();
   1441 	}
   1442 
   1443 	const char* const startTag = "<![CDATA[";
   1444 	const char* const endTag   = "]]>";
   1445 
   1446 	if ( cdata || StringEqual( p, startTag, false, encoding ) )
   1447 	{
   1448 		cdata = true;
   1449 
   1450 		if ( !StringEqual( p, startTag, false, encoding ) )
   1451 		{
   1452 			document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
   1453 			return 0;
   1454 		}
   1455 		p += strlen( startTag );
   1456 
   1457 		// Keep all the white space, ignore the encoding, etc.
   1458 		while (	   p && *p
   1459 				&& !StringEqual( p, endTag, false, encoding )
   1460 			  )
   1461 		{
   1462 			value += *p;
   1463 			++p;
   1464 		}
   1465 
   1466 		TIXML_STRING dummy;
   1467 		p = ReadText( p, &dummy, false, endTag, false, encoding );
   1468 		return p;
   1469 	}
   1470 	else
   1471 	{
   1472 		bool ignoreWhite = true;
   1473 
   1474 		const char* end = "<";
   1475 		p = ReadText( p, &value, ignoreWhite, end, false, encoding );
   1476 		if ( p )
   1477 			return p-1;	// don't truncate the '<'
   1478 		return 0;
   1479 	}
   1480 }
   1481 
   1482 #ifdef TIXML_USE_STL
   1483 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
   1484 {
   1485 	while ( in->good() )
   1486 	{
   1487 		int c = in->get();
   1488 		if ( c <= 0 )
   1489 		{
   1490 			TiXmlDocument* document = GetDocument();
   1491 			if ( document )
   1492 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
   1493 			return;
   1494 		}
   1495 		(*tag) += (char) c;
   1496 
   1497 		if ( c == '>' )
   1498 		{
   1499 			// All is well.
   1500 			return;
   1501 		}
   1502 	}
   1503 }
   1504 #endif
   1505 
   1506 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
   1507 {
   1508 	p = SkipWhiteSpace( p, _encoding );
   1509 	// Find the beginning, find the end, and look for
   1510 	// the stuff in-between.
   1511 	TiXmlDocument* document = GetDocument();
   1512 	if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
   1513 	{
   1514 		if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
   1515 		return 0;
   1516 	}
   1517 	if ( data )
   1518 	{
   1519 		data->Stamp( p, _encoding );
   1520 		location = data->Cursor();
   1521 	}
   1522 	p += 5;
   1523 
   1524 	version = "";
   1525 	encoding = "";
   1526 	standalone = "";
   1527 
   1528 	while ( p && *p )
   1529 	{
   1530 		if ( *p == '>' )
   1531 		{
   1532 			++p;
   1533 			return p;
   1534 		}
   1535 
   1536 		p = SkipWhiteSpace( p, _encoding );
   1537 		if ( StringEqual( p, "version", true, _encoding ) )
   1538 		{
   1539 			TiXmlAttribute attrib;
   1540 			p = attrib.Parse( p, data, _encoding );
   1541 			version = attrib.Value();
   1542 		}
   1543 		else if ( StringEqual( p, "encoding", true, _encoding ) )
   1544 		{
   1545 			TiXmlAttribute attrib;
   1546 			p = attrib.Parse( p, data, _encoding );
   1547 			encoding = attrib.Value();
   1548 		}
   1549 		else if ( StringEqual( p, "standalone", true, _encoding ) )
   1550 		{
   1551 			TiXmlAttribute attrib;
   1552 			p = attrib.Parse( p, data, _encoding );
   1553 			standalone = attrib.Value();
   1554 		}
   1555 		else
   1556 		{
   1557 			// Read over whatever it is.
   1558 			while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
   1559 				++p;
   1560 		}
   1561 	}
   1562 	return 0;
   1563 }
   1564 
   1565 bool TiXmlText::Blank() const
   1566 {
   1567 	for ( unsigned i=0; i<value.length(); i++ )
   1568 		if ( !IsWhiteSpace( value[i] ) )
   1569 			return false;
   1570 	return true;
   1571 }
   1572 
   1573