1 /* 2 www.sourceforge.net/projects/tinyxml 3 Original code by Lee Thomason (www.grinninglizard.com) 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any 7 damages arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any 10 purpose, including commercial applications, and to alter it and 11 redistribute it freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must 14 not claim that you wrote the original software. If you use this 15 software in a product, an acknowledgment in the product documentation 16 would be appreciated but is not required. 17 18 2. Altered source versions must be plainly marked as such, and 19 must not be misrepresented as being the original software. 20 21 3. This notice may not be removed or altered from any source 22 distribution. 23 */ 24 25 #include <ctype.h> 26 #include <stddef.h> 27 28 #include "tinyxml.h" 29 30 //#define DEBUG_PARSER 31 #if defined( DEBUG_PARSER ) 32 # if defined( DEBUG ) && defined( _MSC_VER ) 33 # include <windows.h> 34 # define TIXML_LOG OutputDebugString 35 # else 36 # define TIXML_LOG printf 37 # endif 38 #endif 39 40 // Note tha "PutString" hardcodes the same list. This 41 // is less flexible than it appears. Changing the entries 42 // or order will break putstring. 43 TiXmlBase::Entity TiXmlBase::entity[ TiXmlBase::NUM_ENTITY ] = 44 { 45 { "&", 5, '&' }, 46 { "<", 4, '<' }, 47 { ">", 4, '>' }, 48 { """, 6, '\"' }, 49 { "'", 6, '\'' } 50 }; 51 52 // Bunch of unicode info at: 53 // http://www.unicode.org/faq/utf_bom.html 54 // Including the basic of this table, which determines the #bytes in the 55 // sequence from the lead byte. 1 placed for invalid sequences -- 56 // although the result will be junk, pass it through as much as possible. 57 // Beware of the non-characters in UTF-8: 58 // ef bb bf (Microsoft "lead bytes") 59 // ef bf be 60 // ef bf bf 61 62 const unsigned char TIXML_UTF_LEAD_0 = 0xefU; 63 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; 64 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; 65 66 const int TiXmlBase::utf8ByteTable[256] = 67 { 68 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 69 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 70 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 71 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 72 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 73 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range 77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid 78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 81 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte 82 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 83 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte 84 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid 85 }; 86 87 88 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) 89 { 90 const unsigned long BYTE_MASK = 0xBF; 91 const unsigned long BYTE_MARK = 0x80; 92 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 93 94 if (input < 0x80) 95 *length = 1; 96 else if ( input < 0x800 ) 97 *length = 2; 98 else if ( input < 0x10000 ) 99 *length = 3; 100 else if ( input < 0x200000 ) 101 *length = 4; 102 else 103 { *length = 0; return; } // This code won't covert this correctly anyway. 104 105 output += *length; 106 107 // Scary scary fall throughs. 108 switch (*length) 109 { 110 case 4: 111 --output; 112 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 113 input >>= 6; 114 case 3: 115 --output; 116 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 117 input >>= 6; 118 case 2: 119 --output; 120 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 121 input >>= 6; 122 case 1: 123 --output; 124 *output = (char)(input | FIRST_BYTE_MARK[*length]); 125 } 126 } 127 128 129 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) 130 { 131 // This will only work for low-ascii, everything else is assumed to be a valid 132 // letter. I'm not sure this is the best approach, but it is quite tricky trying 133 // to figure out alhabetical vs. not across encoding. So take a very 134 // conservative approach. 135 136 // if ( encoding == TIXML_ENCODING_UTF8 ) 137 // { 138 if ( anyByte < 127 ) 139 return isalpha( anyByte ); 140 else 141 return 1; // What else to do? The unicode set is huge...get the english ones right. 142 // } 143 // else 144 // { 145 // return isalpha( anyByte ); 146 // } 147 } 148 149 150 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) 151 { 152 // This will only work for low-ascii, everything else is assumed to be a valid 153 // letter. I'm not sure this is the best approach, but it is quite tricky trying 154 // to figure out alhabetical vs. not across encoding. So take a very 155 // conservative approach. 156 157 // if ( encoding == TIXML_ENCODING_UTF8 ) 158 // { 159 if ( anyByte < 127 ) 160 return isalnum( anyByte ); 161 else 162 return 1; // What else to do? The unicode set is huge...get the english ones right. 163 // } 164 // else 165 // { 166 // return isalnum( anyByte ); 167 // } 168 } 169 170 171 class TiXmlParsingData 172 { 173 friend class TiXmlDocument; 174 public: 175 void Stamp( const char* now, TiXmlEncoding encoding ); 176 177 const TiXmlCursor& Cursor() const { return cursor; } 178 179 private: 180 // Only used by the document! 181 TiXmlParsingData( const char* start, int _tabsize, int row, int col ) 182 { 183 assert( start ); 184 stamp = start; 185 tabsize = _tabsize; 186 cursor.row = row; 187 cursor.col = col; 188 } 189 190 TiXmlCursor cursor; 191 const char* stamp; 192 int tabsize; 193 }; 194 195 196 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) 197 { 198 assert( now ); 199 200 // Do nothing if the tabsize is 0. 201 if ( tabsize < 1 ) 202 { 203 return; 204 } 205 206 // Get the current row, column. 207 int row = cursor.row; 208 int col = cursor.col; 209 const char* p = stamp; 210 assert( p ); 211 212 while ( p < now ) 213 { 214 // Treat p as unsigned, so we have a happy compiler. 215 const unsigned char* pU = (const unsigned char*)p; 216 217 // Code contributed by Fletcher Dunn: (modified by lee) 218 switch (*pU) { 219 case 0: 220 // We *should* never get here, but in case we do, don't 221 // advance past the terminating null character, ever 222 return; 223 224 case '\r': 225 // bump down to the next line 226 ++row; 227 col = 0; 228 // Eat the character 229 ++p; 230 231 // Check for \r\n sequence, and treat this as a single character 232 if (*p == '\n') { 233 ++p; 234 } 235 break; 236 237 case '\n': 238 // bump down to the next line 239 ++row; 240 col = 0; 241 242 // Eat the character 243 ++p; 244 245 // Check for \n\r sequence, and treat this as a single 246 // character. (Yes, this bizarre thing does occur still 247 // on some arcane platforms...) 248 if (*p == '\r') { 249 ++p; 250 } 251 break; 252 253 case '\t': 254 // Eat the character 255 ++p; 256 257 // Skip to next tab stop 258 col = (col / tabsize + 1) * tabsize; 259 break; 260 261 case TIXML_UTF_LEAD_0: 262 if ( encoding == TIXML_ENCODING_UTF8 ) 263 { 264 if ( *(p+1) && *(p+2) ) 265 { 266 // In these cases, don't advance the column. These are 267 // 0-width spaces. 268 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 ) 269 p += 3; 270 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU ) 271 p += 3; 272 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU ) 273 p += 3; 274 else 275 { p +=3; ++col; } // A normal character. 276 } 277 } 278 else 279 { 280 ++p; 281 ++col; 282 } 283 break; 284 285 default: 286 if ( encoding == TIXML_ENCODING_UTF8 ) 287 { 288 // Eat the 1 to 4 byte utf8 character. 289 int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)]; 290 if ( step == 0 ) 291 step = 1; // Error case from bad encoding, but handle gracefully. 292 p += step; 293 294 // Just advance one column, of course. 295 ++col; 296 } 297 else 298 { 299 ++p; 300 ++col; 301 } 302 break; 303 } 304 } 305 cursor.row = row; 306 cursor.col = col; 307 assert( cursor.row >= -1 ); 308 assert( cursor.col >= -1 ); 309 stamp = p; 310 assert( stamp ); 311 } 312 313 314 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) 315 { 316 if ( !p || !*p ) 317 { 318 return 0; 319 } 320 if ( encoding == TIXML_ENCODING_UTF8 ) 321 { 322 while ( *p ) 323 { 324 const unsigned char* pU = (const unsigned char*)p; 325 326 // Skip the stupid Microsoft UTF-8 Byte order marks 327 if ( *(pU+0)==TIXML_UTF_LEAD_0 328 && *(pU+1)==TIXML_UTF_LEAD_1 329 && *(pU+2)==TIXML_UTF_LEAD_2 ) 330 { 331 p += 3; 332 continue; 333 } 334 else if(*(pU+0)==TIXML_UTF_LEAD_0 335 && *(pU+1)==0xbfU 336 && *(pU+2)==0xbeU ) 337 { 338 p += 3; 339 continue; 340 } 341 else if(*(pU+0)==TIXML_UTF_LEAD_0 342 && *(pU+1)==0xbfU 343 && *(pU+2)==0xbfU ) 344 { 345 p += 3; 346 continue; 347 } 348 349 if ( IsWhiteSpace( *p ) ) // Still using old rules for white space. 350 ++p; 351 else 352 break; 353 } 354 } 355 else 356 { 357 while ( *p && IsWhiteSpace( *p ) ) 358 ++p; 359 } 360 361 return p; 362 } 363 364 #ifdef TIXML_USE_STL 365 /*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag ) 366 { 367 for( ;; ) 368 { 369 if ( !in->good() ) return false; 370 371 int c = in->peek(); 372 // At this scope, we can't get to a document. So fail silently. 373 if ( !IsWhiteSpace( c ) || c <= 0 ) 374 return true; 375 376 *tag += (char) in->get(); 377 } 378 } 379 380 /*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag ) 381 { 382 //assert( character > 0 && character < 128 ); // else it won't work in utf-8 383 while ( in->good() ) 384 { 385 int c = in->peek(); 386 if ( c == character ) 387 return true; 388 if ( c <= 0 ) // Silent failure: can't get document at this scope 389 return false; 390 391 in->get(); 392 *tag += (char) c; 393 } 394 return false; 395 } 396 #endif 397 398 // One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The 399 // "assign" optimization removes over 10% of the execution time. 400 // 401 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding ) 402 { 403 // Oddly, not supported on some comilers, 404 //name->clear(); 405 // So use this: 406 *name = ""; 407 assert( p ); 408 409 // Names start with letters or underscores. 410 // Of course, in unicode, tinyxml has no idea what a letter *is*. The 411 // algorithm is generous. 412 // 413 // After that, they can be letters, underscores, numbers, 414 // hyphens, or colons. (Colons are valid ony for namespaces, 415 // but tinyxml can't tell namespaces from names.) 416 if ( p && *p 417 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) 418 { 419 const char* start = p; 420 while( p && *p 421 && ( IsAlphaNum( (unsigned char ) *p, encoding ) 422 || *p == '_' 423 || *p == '-' 424 || *p == '.' 425 || *p == ':' ) ) 426 { 427 //(*name) += *p; // expensive 428 ++p; 429 } 430 if ( p-start > 0 ) { 431 name->assign( start, p-start ); 432 } 433 return p; 434 } 435 return 0; 436 } 437 438 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) 439 { 440 // Presume an entity, and pull it out. 441 TIXML_STRING ent; 442 int i; 443 *length = 0; 444 445 if ( *(p+1) && *(p+1) == '#' && *(p+2) ) 446 { 447 unsigned long ucs = 0; 448 ptrdiff_t delta = 0; 449 unsigned mult = 1; 450 451 if ( *(p+2) == 'x' ) 452 { 453 // Hexadecimal. 454 if ( !*(p+3) ) return 0; 455 456 const char* q = p+3; 457 q = strchr( q, ';' ); 458 459 if ( !q || !*q ) return 0; 460 461 delta = q-p; 462 --q; 463 464 while ( *q != 'x' ) 465 { 466 if ( *q >= '0' && *q <= '9' ) 467 ucs += mult * (*q - '0'); 468 else if ( *q >= 'a' && *q <= 'f' ) 469 ucs += mult * (*q - 'a' + 10); 470 else if ( *q >= 'A' && *q <= 'F' ) 471 ucs += mult * (*q - 'A' + 10 ); 472 else 473 return 0; 474 mult *= 16; 475 --q; 476 } 477 } 478 else 479 { 480 // Decimal. 481 if ( !*(p+2) ) return 0; 482 483 const char* q = p+2; 484 q = strchr( q, ';' ); 485 486 if ( !q || !*q ) return 0; 487 488 delta = q-p; 489 --q; 490 491 while ( *q != '#' ) 492 { 493 if ( *q >= '0' && *q <= '9' ) 494 ucs += mult * (*q - '0'); 495 else 496 return 0; 497 mult *= 10; 498 --q; 499 } 500 } 501 if ( encoding == TIXML_ENCODING_UTF8 ) 502 { 503 // convert the UCS to UTF-8 504 ConvertUTF32ToUTF8( ucs, value, length ); 505 } 506 else 507 { 508 *value = (char)ucs; 509 *length = 1; 510 } 511 return p + delta + 1; 512 } 513 514 // Now try to match it. 515 for( i=0; i<NUM_ENTITY; ++i ) 516 { 517 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) 518 { 519 assert( strlen( entity[i].str ) == entity[i].strLength ); 520 *value = entity[i].chr; 521 *length = 1; 522 return ( p + entity[i].strLength ); 523 } 524 } 525 526 // So it wasn't an entity, its unrecognized, or something like that. 527 *value = *p; // Don't put back the last one, since we return it! 528 //*length = 1; // Leave unrecognized entities - this doesn't really work. 529 // Just writes strange XML. 530 return p+1; 531 } 532 533 534 bool TiXmlBase::StringEqual( const char* p, 535 const char* tag, 536 bool ignoreCase, 537 TiXmlEncoding encoding ) 538 { 539 assert( p ); 540 assert( tag ); 541 if ( !p || !*p ) 542 { 543 assert( 0 ); 544 return false; 545 } 546 547 const char* q = p; 548 549 if ( ignoreCase ) 550 { 551 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) 552 { 553 ++q; 554 ++tag; 555 } 556 557 if ( *tag == 0 ) 558 return true; 559 } 560 else 561 { 562 while ( *q && *tag && *q == *tag ) 563 { 564 ++q; 565 ++tag; 566 } 567 568 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? 569 return true; 570 } 571 return false; 572 } 573 574 const char* TiXmlBase::ReadText( const char* p, 575 TIXML_STRING * text, 576 bool trimWhiteSpace, 577 const char* endTag, 578 bool caseInsensitive, 579 TiXmlEncoding encoding ) 580 { 581 *text = ""; 582 if ( !trimWhiteSpace // certain tags always keep whitespace 583 || !condenseWhiteSpace ) // if true, whitespace is always kept 584 { 585 // Keep all the white space. 586 while ( p && *p 587 && !StringEqual( p, endTag, caseInsensitive, encoding ) 588 ) 589 { 590 int len; 591 char cArr[4] = { 0, 0, 0, 0 }; 592 p = GetChar( p, cArr, &len, encoding ); 593 text->append( cArr, len ); 594 } 595 } 596 else 597 { 598 bool whitespace = false; 599 600 // Remove leading white space: 601 p = SkipWhiteSpace( p, encoding ); 602 while ( p && *p 603 && !StringEqual( p, endTag, caseInsensitive, encoding ) ) 604 { 605 if ( *p == '\r' || *p == '\n' ) 606 { 607 whitespace = true; 608 ++p; 609 } 610 else if ( IsWhiteSpace( *p ) ) 611 { 612 whitespace = true; 613 ++p; 614 } 615 else 616 { 617 // If we've found whitespace, add it before the 618 // new character. Any whitespace just becomes a space. 619 if ( whitespace ) 620 { 621 (*text) += ' '; 622 whitespace = false; 623 } 624 int len; 625 char cArr[4] = { 0, 0, 0, 0 }; 626 p = GetChar( p, cArr, &len, encoding ); 627 if ( len == 1 ) 628 (*text) += cArr[0]; // more efficient 629 else 630 text->append( cArr, len ); 631 } 632 } 633 } 634 if ( p && *p ) 635 p += strlen( endTag ); 636 return ( p && *p ) ? p : 0; 637 } 638 639 #ifdef TIXML_USE_STL 640 641 void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag ) 642 { 643 // The basic issue with a document is that we don't know what we're 644 // streaming. Read something presumed to be a tag (and hope), then 645 // identify it, and call the appropriate stream method on the tag. 646 // 647 // This "pre-streaming" will never read the closing ">" so the 648 // sub-tag can orient itself. 649 650 if ( !StreamTo( in, '<', tag ) ) 651 { 652 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 653 return; 654 } 655 656 while ( in->good() ) 657 { 658 int tagIndex = (int) tag->length(); 659 while ( in->good() && in->peek() != '>' ) 660 { 661 int c = in->get(); 662 if ( c <= 0 ) 663 { 664 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 665 break; 666 } 667 (*tag) += (char) c; 668 } 669 670 if ( in->good() ) 671 { 672 // We now have something we presume to be a node of 673 // some sort. Identify it, and call the node to 674 // continue streaming. 675 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); 676 677 if ( node ) 678 { 679 node->StreamIn( in, tag ); 680 bool isElement = node->ToElement() != 0; 681 delete node; 682 node = 0; 683 684 // If this is the root element, we're done. Parsing will be 685 // done by the >> operator. 686 if ( isElement ) 687 { 688 return; 689 } 690 } 691 else 692 { 693 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 694 return; 695 } 696 } 697 } 698 // We should have returned sooner. 699 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 700 } 701 702 #endif 703 704 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) 705 { 706 ClearError(); 707 708 // Parse away, at the document level. Since a document 709 // contains nothing but other tags, most of what happens 710 // here is skipping white space. 711 if ( !p || !*p ) 712 { 713 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 714 return 0; 715 } 716 717 // Note that, for a document, this needs to come 718 // before the while space skip, so that parsing 719 // starts from the pointer we are given. 720 location.Clear(); 721 if ( prevData ) 722 { 723 location.row = prevData->cursor.row; 724 location.col = prevData->cursor.col; 725 } 726 else 727 { 728 location.row = 0; 729 location.col = 0; 730 } 731 TiXmlParsingData data( p, TabSize(), location.row, location.col ); 732 location = data.Cursor(); 733 734 if ( encoding == TIXML_ENCODING_UNKNOWN ) 735 { 736 // Check for the Microsoft UTF-8 lead bytes. 737 const unsigned char* pU = (const unsigned char*)p; 738 if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0 739 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1 740 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 ) 741 { 742 encoding = TIXML_ENCODING_UTF8; 743 useMicrosoftBOM = true; 744 } 745 } 746 747 p = SkipWhiteSpace( p, encoding ); 748 if ( !p ) 749 { 750 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 751 return 0; 752 } 753 754 while ( p && *p ) 755 { 756 TiXmlNode* node = Identify( p, encoding ); 757 if ( node ) 758 { 759 p = node->Parse( p, &data, encoding ); 760 LinkEndChild( node ); 761 } 762 else 763 { 764 break; 765 } 766 767 // Did we get encoding info? 768 if ( encoding == TIXML_ENCODING_UNKNOWN 769 && node->ToDeclaration() ) 770 { 771 TiXmlDeclaration* dec = node->ToDeclaration(); 772 const char* enc = dec->Encoding(); 773 assert( enc ); 774 775 if ( *enc == 0 ) 776 encoding = TIXML_ENCODING_UTF8; 777 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) 778 encoding = TIXML_ENCODING_UTF8; 779 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) 780 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice 781 else 782 encoding = TIXML_ENCODING_LEGACY; 783 } 784 785 p = SkipWhiteSpace( p, encoding ); 786 } 787 788 // Was this empty? 789 if ( !firstChild ) { 790 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding ); 791 return 0; 792 } 793 794 // All is well. 795 return p; 796 } 797 798 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) 799 { 800 // The first error in a chain is more accurate - don't set again! 801 if ( error ) 802 return; 803 804 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); 805 error = true; 806 errorId = err; 807 errorDesc = errorString[ errorId ]; 808 809 errorLocation.Clear(); 810 if ( pError && data ) 811 { 812 data->Stamp( pError, encoding ); 813 errorLocation = data->Cursor(); 814 } 815 } 816 817 818 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) 819 { 820 TiXmlNode* returnNode = 0; 821 822 p = SkipWhiteSpace( p, encoding ); 823 if( !p || !*p || *p != '<' ) 824 { 825 return 0; 826 } 827 828 p = SkipWhiteSpace( p, encoding ); 829 830 if ( !p || !*p ) 831 { 832 return 0; 833 } 834 835 // What is this thing? 836 // - Elements start with a letter or underscore, but xml is reserved. 837 // - Comments: <!-- 838 // - Decleration: <?xml 839 // - Everthing else is unknown to tinyxml. 840 // 841 842 const char* xmlHeader = { "<?xml" }; 843 const char* commentHeader = { "<!--" }; 844 const char* dtdHeader = { "<!" }; 845 const char* cdataHeader = { "<![CDATA[" }; 846 847 if ( StringEqual( p, xmlHeader, true, encoding ) ) 848 { 849 #ifdef DEBUG_PARSER 850 TIXML_LOG( "XML parsing Declaration\n" ); 851 #endif 852 returnNode = new TiXmlDeclaration(); 853 } 854 else if ( StringEqual( p, commentHeader, false, encoding ) ) 855 { 856 #ifdef DEBUG_PARSER 857 TIXML_LOG( "XML parsing Comment\n" ); 858 #endif 859 returnNode = new TiXmlComment(); 860 } 861 else if ( StringEqual( p, cdataHeader, false, encoding ) ) 862 { 863 #ifdef DEBUG_PARSER 864 TIXML_LOG( "XML parsing CDATA\n" ); 865 #endif 866 TiXmlText* text = new TiXmlText( "" ); 867 text->SetCDATA( true ); 868 returnNode = text; 869 } 870 else if ( StringEqual( p, dtdHeader, false, encoding ) ) 871 { 872 #ifdef DEBUG_PARSER 873 TIXML_LOG( "XML parsing Unknown(1)\n" ); 874 #endif 875 returnNode = new TiXmlUnknown(); 876 } 877 else if ( IsAlpha( *(p+1), encoding ) 878 || *(p+1) == '_' ) 879 { 880 #ifdef DEBUG_PARSER 881 TIXML_LOG( "XML parsing Element\n" ); 882 #endif 883 returnNode = new TiXmlElement( "" ); 884 } 885 else 886 { 887 #ifdef DEBUG_PARSER 888 TIXML_LOG( "XML parsing Unknown(2)\n" ); 889 #endif 890 returnNode = new TiXmlUnknown(); 891 } 892 893 if ( returnNode ) 894 { 895 // Set the parent, so it can report errors 896 returnNode->parent = this; 897 } 898 return returnNode; 899 } 900 901 #ifdef TIXML_USE_STL 902 903 void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag) 904 { 905 // We're called with some amount of pre-parsing. That is, some of "this" 906 // element is in "tag". Go ahead and stream to the closing ">" 907 while( in->good() ) 908 { 909 int c = in->get(); 910 if ( c <= 0 ) 911 { 912 TiXmlDocument* document = GetDocument(); 913 if ( document ) 914 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 915 return; 916 } 917 (*tag) += (char) c ; 918 919 if ( c == '>' ) 920 break; 921 } 922 923 if ( tag->length() < 3 ) return; 924 925 // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. 926 // If not, identify and stream. 927 928 if ( tag->at( tag->length() - 1 ) == '>' 929 && tag->at( tag->length() - 2 ) == '/' ) 930 { 931 // All good! 932 return; 933 } 934 else if ( tag->at( tag->length() - 1 ) == '>' ) 935 { 936 // There is more. Could be: 937 // text 938 // cdata text (which looks like another node) 939 // closing tag 940 // another node. 941 for ( ;; ) 942 { 943 StreamWhiteSpace( in, tag ); 944 945 // Do we have text? 946 if ( in->good() && in->peek() != '<' ) 947 { 948 // Yep, text. 949 TiXmlText text( "" ); 950 text.StreamIn( in, tag ); 951 952 // What follows text is a closing tag or another node. 953 // Go around again and figure it out. 954 continue; 955 } 956 957 // We now have either a closing tag...or another node. 958 // We should be at a "<", regardless. 959 if ( !in->good() ) return; 960 assert( in->peek() == '<' ); 961 int tagIndex = (int) tag->length(); 962 963 bool closingTag = false; 964 bool firstCharFound = false; 965 966 for( ;; ) 967 { 968 if ( !in->good() ) 969 return; 970 971 int c = in->peek(); 972 if ( c <= 0 ) 973 { 974 TiXmlDocument* document = GetDocument(); 975 if ( document ) 976 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 977 return; 978 } 979 980 if ( c == '>' ) 981 break; 982 983 *tag += (char) c; 984 in->get(); 985 986 // Early out if we find the CDATA id. 987 if ( c == '[' && tag->size() >= 9 ) 988 { 989 size_t len = tag->size(); 990 const char* start = tag->c_str() + len - 9; 991 if ( strcmp( start, "<![CDATA[" ) == 0 ) { 992 assert( !closingTag ); 993 break; 994 } 995 } 996 997 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) 998 { 999 firstCharFound = true; 1000 if ( c == '/' ) 1001 closingTag = true; 1002 } 1003 } 1004 // If it was a closing tag, then read in the closing '>' to clean up the input stream. 1005 // If it was not, the streaming will be done by the tag. 1006 if ( closingTag ) 1007 { 1008 if ( !in->good() ) 1009 return; 1010 1011 int c = in->get(); 1012 if ( c <= 0 ) 1013 { 1014 TiXmlDocument* document = GetDocument(); 1015 if ( document ) 1016 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 1017 return; 1018 } 1019 assert( c == '>' ); 1020 *tag += (char) c; 1021 1022 // We are done, once we've found our closing tag. 1023 return; 1024 } 1025 else 1026 { 1027 // If not a closing tag, id it, and stream. 1028 const char* tagloc = tag->c_str() + tagIndex; 1029 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); 1030 if ( !node ) 1031 return; 1032 node->StreamIn( in, tag ); 1033 delete node; 1034 node = 0; 1035 1036 // No return: go around from the beginning: text, closing tag, or node. 1037 } 1038 } 1039 } 1040 } 1041 #endif 1042 1043 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 1044 { 1045 p = SkipWhiteSpace( p, encoding ); 1046 TiXmlDocument* document = GetDocument(); 1047 1048 if ( !p || !*p ) 1049 { 1050 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); 1051 return 0; 1052 } 1053 1054 if ( data ) 1055 { 1056 data->Stamp( p, encoding ); 1057 location = data->Cursor(); 1058 } 1059 1060 if ( *p != '<' ) 1061 { 1062 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); 1063 return 0; 1064 } 1065 1066 p = SkipWhiteSpace( p+1, encoding ); 1067 1068 // Read the name. 1069 const char* pErr = p; 1070 1071 p = ReadName( p, &value, encoding ); 1072 if ( !p || !*p ) 1073 { 1074 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); 1075 return 0; 1076 } 1077 1078 TIXML_STRING endTag ("</"); 1079 endTag += value; 1080 1081 // Check for and read attributes. Also look for an empty 1082 // tag or an end tag. 1083 while ( p && *p ) 1084 { 1085 pErr = p; 1086 p = SkipWhiteSpace( p, encoding ); 1087 if ( !p || !*p ) 1088 { 1089 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 1090 return 0; 1091 } 1092 if ( *p == '/' ) 1093 { 1094 ++p; 1095 // Empty tag. 1096 if ( *p != '>' ) 1097 { 1098 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); 1099 return 0; 1100 } 1101 return (p+1); 1102 } 1103 else if ( *p == '>' ) 1104 { 1105 // Done with attributes (if there were any.) 1106 // Read the value -- which can include other 1107 // elements -- read the end tag, and return. 1108 ++p; 1109 p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. 1110 if ( !p || !*p ) { 1111 // We were looking for the end tag, but found nothing. 1112 // Fix for [ 1663758 ] Failure to report error on bad XML 1113 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 1114 return 0; 1115 } 1116 1117 // We should find the end tag now 1118 // note that: 1119 // </foo > and 1120 // </foo> 1121 // are both valid end tags. 1122 if ( StringEqual( p, endTag.c_str(), false, encoding ) ) 1123 { 1124 p += endTag.length(); 1125 p = SkipWhiteSpace( p, encoding ); 1126 if ( p && *p && *p == '>' ) { 1127 ++p; 1128 return p; 1129 } 1130 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 1131 return 0; 1132 } 1133 else 1134 { 1135 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); 1136 return 0; 1137 } 1138 } 1139 else 1140 { 1141 // Try to read an attribute: 1142 TiXmlAttribute* attrib = new TiXmlAttribute(); 1143 if ( !attrib ) 1144 { 1145 return 0; 1146 } 1147 1148 attrib->SetDocument( document ); 1149 pErr = p; 1150 p = attrib->Parse( p, data, encoding ); 1151 1152 if ( !p || !*p ) 1153 { 1154 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); 1155 delete attrib; 1156 return 0; 1157 } 1158 1159 // Handle the strange case of double attributes: 1160 #ifdef TIXML_USE_STL 1161 TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() ); 1162 #else 1163 TiXmlAttribute* node = attributeSet.Find( attrib->Name() ); 1164 #endif 1165 if ( node ) 1166 { 1167 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); 1168 delete attrib; 1169 return 0; 1170 } 1171 1172 attributeSet.Add( attrib ); 1173 } 1174 } 1175 return p; 1176 } 1177 1178 1179 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 1180 { 1181 TiXmlDocument* document = GetDocument(); 1182 1183 // Read in text and elements in any order. 1184 const char* pWithWhiteSpace = p; 1185 p = SkipWhiteSpace( p, encoding ); 1186 1187 while ( p && *p ) 1188 { 1189 if ( *p != '<' ) 1190 { 1191 // Take what we have, make a text element. 1192 TiXmlText* textNode = new TiXmlText( "" ); 1193 1194 if ( !textNode ) 1195 { 1196 return 0; 1197 } 1198 1199 if ( TiXmlBase::IsWhiteSpaceCondensed() ) 1200 { 1201 p = textNode->Parse( p, data, encoding ); 1202 } 1203 else 1204 { 1205 // Special case: we want to keep the white space 1206 // so that leading spaces aren't removed. 1207 p = textNode->Parse( pWithWhiteSpace, data, encoding ); 1208 } 1209 1210 if ( !textNode->Blank() ) 1211 LinkEndChild( textNode ); 1212 else 1213 delete textNode; 1214 } 1215 else 1216 { 1217 // We hit a '<' 1218 // Have we hit a new element or an end tag? This could also be 1219 // a TiXmlText in the "CDATA" style. 1220 if ( StringEqual( p, "</", false, encoding ) ) 1221 { 1222 return p; 1223 } 1224 else 1225 { 1226 TiXmlNode* node = Identify( p, encoding ); 1227 if ( node ) 1228 { 1229 p = node->Parse( p, data, encoding ); 1230 LinkEndChild( node ); 1231 } 1232 else 1233 { 1234 return 0; 1235 } 1236 } 1237 } 1238 pWithWhiteSpace = p; 1239 p = SkipWhiteSpace( p, encoding ); 1240 } 1241 1242 if ( !p ) 1243 { 1244 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); 1245 } 1246 return p; 1247 } 1248 1249 1250 #ifdef TIXML_USE_STL 1251 void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag ) 1252 { 1253 while ( in->good() ) 1254 { 1255 int c = in->get(); 1256 if ( c <= 0 ) 1257 { 1258 TiXmlDocument* document = GetDocument(); 1259 if ( document ) 1260 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 1261 return; 1262 } 1263 (*tag) += (char) c; 1264 1265 if ( c == '>' ) 1266 { 1267 // All is well. 1268 return; 1269 } 1270 } 1271 } 1272 #endif 1273 1274 1275 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 1276 { 1277 TiXmlDocument* document = GetDocument(); 1278 p = SkipWhiteSpace( p, encoding ); 1279 1280 if ( data ) 1281 { 1282 data->Stamp( p, encoding ); 1283 location = data->Cursor(); 1284 } 1285 if ( !p || !*p || *p != '<' ) 1286 { 1287 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding ); 1288 return 0; 1289 } 1290 ++p; 1291 value = ""; 1292 1293 while ( p && *p && *p != '>' ) 1294 { 1295 value += *p; 1296 ++p; 1297 } 1298 1299 if ( !p ) 1300 { 1301 if ( document ) 1302 document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding ); 1303 } 1304 if ( p && *p == '>' ) 1305 return p+1; 1306 return p; 1307 } 1308 1309 #ifdef TIXML_USE_STL 1310 void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag ) 1311 { 1312 while ( in->good() ) 1313 { 1314 int c = in->get(); 1315 if ( c <= 0 ) 1316 { 1317 TiXmlDocument* document = GetDocument(); 1318 if ( document ) 1319 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 1320 return; 1321 } 1322 1323 (*tag) += (char) c; 1324 1325 if ( c == '>' 1326 && tag->at( tag->length() - 2 ) == '-' 1327 && tag->at( tag->length() - 3 ) == '-' ) 1328 { 1329 // All is well. 1330 return; 1331 } 1332 } 1333 } 1334 #endif 1335 1336 1337 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 1338 { 1339 TiXmlDocument* document = GetDocument(); 1340 value = ""; 1341 1342 p = SkipWhiteSpace( p, encoding ); 1343 1344 if ( data ) 1345 { 1346 data->Stamp( p, encoding ); 1347 location = data->Cursor(); 1348 } 1349 const char* startTag = "<!--"; 1350 const char* endTag = "-->"; 1351 1352 if ( !StringEqual( p, startTag, false, encoding ) ) 1353 { 1354 if ( document ) 1355 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); 1356 return 0; 1357 } 1358 p += strlen( startTag ); 1359 1360 // [ 1475201 ] TinyXML parses entities in comments 1361 // Oops - ReadText doesn't work, because we don't want to parse the entities. 1362 // p = ReadText( p, &value, false, endTag, false, encoding ); 1363 // 1364 // from the XML spec: 1365 /* 1366 [Definition: Comments may appear anywhere in a document outside other markup; in addition, 1367 they may appear within the document type declaration at places allowed by the grammar. 1368 They are not part of the document's character data; an XML processor MAY, but need not, 1369 make it possible for an application to retrieve the text of comments. For compatibility, 1370 the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 1371 references MUST NOT be recognized within comments. 1372 1373 An example of a comment: 1374 1375 <!-- declarations for <head> & <body> --> 1376 */ 1377 1378 value = ""; 1379 // Keep all the white space. 1380 while ( p && *p && !StringEqual( p, endTag, false, encoding ) ) 1381 { 1382 value.append( p, 1 ); 1383 ++p; 1384 } 1385 if ( p && *p ) 1386 p += strlen( endTag ); 1387 1388 return p; 1389 } 1390 1391 1392 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 1393 { 1394 p = SkipWhiteSpace( p, encoding ); 1395 if ( !p || !*p ) return 0; 1396 1397 if ( data ) 1398 { 1399 data->Stamp( p, encoding ); 1400 location = data->Cursor(); 1401 } 1402 // Read the name, the '=' and the value. 1403 const char* pErr = p; 1404 p = ReadName( p, &name, encoding ); 1405 if ( !p || !*p ) 1406 { 1407 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 1408 return 0; 1409 } 1410 p = SkipWhiteSpace( p, encoding ); 1411 if ( !p || !*p || *p != '=' ) 1412 { 1413 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 1414 return 0; 1415 } 1416 1417 ++p; // skip '=' 1418 p = SkipWhiteSpace( p, encoding ); 1419 if ( !p || !*p ) 1420 { 1421 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 1422 return 0; 1423 } 1424 1425 const char* end; 1426 const char SINGLE_QUOTE = '\''; 1427 const char DOUBLE_QUOTE = '\"'; 1428 1429 if ( *p == SINGLE_QUOTE ) 1430 { 1431 ++p; 1432 end = "\'"; // single quote in string 1433 p = ReadText( p, &value, false, end, false, encoding ); 1434 } 1435 else if ( *p == DOUBLE_QUOTE ) 1436 { 1437 ++p; 1438 end = "\""; // double quote in string 1439 p = ReadText( p, &value, false, end, false, encoding ); 1440 } 1441 else 1442 { 1443 // All attribute values should be in single or double quotes. 1444 // But this is such a common error that the parser will try 1445 // its best, even without them. 1446 value = ""; 1447 while ( p && *p // existence 1448 && !IsWhiteSpace( *p ) // whitespace 1449 && *p != '/' && *p != '>' ) // tag end 1450 { 1451 if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) { 1452 // [ 1451649 ] Attribute values with trailing quotes not handled correctly 1453 // We did not have an opening quote but seem to have a 1454 // closing one. Give up and throw an error. 1455 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 1456 return 0; 1457 } 1458 value += *p; 1459 ++p; 1460 } 1461 } 1462 return p; 1463 } 1464 1465 #ifdef TIXML_USE_STL 1466 void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag ) 1467 { 1468 while ( in->good() ) 1469 { 1470 int c = in->peek(); 1471 if ( !cdata && (c == '<' ) ) 1472 { 1473 return; 1474 } 1475 if ( c <= 0 ) 1476 { 1477 TiXmlDocument* document = GetDocument(); 1478 if ( document ) 1479 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 1480 return; 1481 } 1482 1483 (*tag) += (char) c; 1484 in->get(); // "commits" the peek made above 1485 1486 if ( cdata && c == '>' && tag->size() >= 3 ) { 1487 size_t len = tag->size(); 1488 if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) { 1489 // terminator of cdata. 1490 return; 1491 } 1492 } 1493 } 1494 } 1495 #endif 1496 1497 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 1498 { 1499 value = ""; 1500 TiXmlDocument* document = GetDocument(); 1501 1502 if ( data ) 1503 { 1504 data->Stamp( p, encoding ); 1505 location = data->Cursor(); 1506 } 1507 1508 const char* const startTag = "<![CDATA["; 1509 const char* const endTag = "]]>"; 1510 1511 if ( cdata || StringEqual( p, startTag, false, encoding ) ) 1512 { 1513 cdata = true; 1514 1515 if ( !StringEqual( p, startTag, false, encoding ) ) 1516 { 1517 if ( document ) 1518 document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding ); 1519 return 0; 1520 } 1521 p += strlen( startTag ); 1522 1523 // Keep all the white space, ignore the encoding, etc. 1524 while ( p && *p 1525 && !StringEqual( p, endTag, false, encoding ) 1526 ) 1527 { 1528 value += *p; 1529 ++p; 1530 } 1531 1532 TIXML_STRING dummy; 1533 p = ReadText( p, &dummy, false, endTag, false, encoding ); 1534 return p; 1535 } 1536 else 1537 { 1538 bool ignoreWhite = true; 1539 1540 const char* end = "<"; 1541 p = ReadText( p, &value, ignoreWhite, end, false, encoding ); 1542 if ( p && *p ) 1543 return p-1; // don't truncate the '<' 1544 return 0; 1545 } 1546 } 1547 1548 #ifdef TIXML_USE_STL 1549 void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag ) 1550 { 1551 while ( in->good() ) 1552 { 1553 int c = in->get(); 1554 if ( c <= 0 ) 1555 { 1556 TiXmlDocument* document = GetDocument(); 1557 if ( document ) 1558 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 1559 return; 1560 } 1561 (*tag) += (char) c; 1562 1563 if ( c == '>' ) 1564 { 1565 // All is well. 1566 return; 1567 } 1568 } 1569 } 1570 #endif 1571 1572 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) 1573 { 1574 p = SkipWhiteSpace( p, _encoding ); 1575 // Find the beginning, find the end, and look for 1576 // the stuff in-between. 1577 TiXmlDocument* document = GetDocument(); 1578 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) 1579 { 1580 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); 1581 return 0; 1582 } 1583 if ( data ) 1584 { 1585 data->Stamp( p, _encoding ); 1586 location = data->Cursor(); 1587 } 1588 p += 5; 1589 1590 version = ""; 1591 encoding = ""; 1592 standalone = ""; 1593 1594 while ( p && *p ) 1595 { 1596 if ( *p == '>' ) 1597 { 1598 ++p; 1599 return p; 1600 } 1601 1602 p = SkipWhiteSpace( p, _encoding ); 1603 if ( StringEqual( p, "version", true, _encoding ) ) 1604 { 1605 TiXmlAttribute attrib; 1606 p = attrib.Parse( p, data, _encoding ); 1607 version = attrib.Value(); 1608 } 1609 else if ( StringEqual( p, "encoding", true, _encoding ) ) 1610 { 1611 TiXmlAttribute attrib; 1612 p = attrib.Parse( p, data, _encoding ); 1613 encoding = attrib.Value(); 1614 } 1615 else if ( StringEqual( p, "standalone", true, _encoding ) ) 1616 { 1617 TiXmlAttribute attrib; 1618 p = attrib.Parse( p, data, _encoding ); 1619 standalone = attrib.Value(); 1620 } 1621 else 1622 { 1623 // Read over whatever it is. 1624 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) 1625 ++p; 1626 } 1627 } 1628 return 0; 1629 } 1630 1631 bool TiXmlText::Blank() const 1632 { 1633 for ( unsigned i=0; i<value.length(); i++ ) 1634 if ( !IsWhiteSpace( value[i] ) ) 1635 return false; 1636 return true; 1637 } 1638 1639