1 // Copyright 2007-2011 Baptiste Lepilleur 2 // Distributed under MIT license, or public domain if desired and 3 // recognized in your jurisdiction. 4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 6 #if !defined(JSON_IS_AMALGAMATION) 7 # include <json/assertions.h> 8 # include <json/reader.h> 9 # include <json/value.h> 10 # include "json_tool.h" 11 #endif // if !defined(JSON_IS_AMALGAMATION) 12 #include <utility> 13 #include <cstdio> 14 #include <cassert> 15 #include <cstring> 16 #include <stdexcept> 17 18 #if _MSC_VER >= 1400 // VC++ 8.0 19 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. 20 #endif 21 22 namespace Json { 23 24 // Implementation of class Features 25 // //////////////////////////////// 26 27 Features::Features() 28 : allowComments_( true ) 29 , strictRoot_( false ) 30 { 31 } 32 33 34 Features 35 Features::all() 36 { 37 return Features(); 38 } 39 40 41 Features 42 Features::strictMode() 43 { 44 Features features; 45 features.allowComments_ = false; 46 features.strictRoot_ = true; 47 return features; 48 } 49 50 // Implementation of class Reader 51 // //////////////////////////////// 52 53 54 static inline bool 55 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) 56 { 57 return c == c1 || c == c2 || c == c3 || c == c4; 58 } 59 60 static inline bool 61 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) 62 { 63 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; 64 } 65 66 67 static bool 68 containsNewLine( Reader::Location begin, 69 Reader::Location end ) 70 { 71 for ( ;begin < end; ++begin ) 72 if ( *begin == '\n' || *begin == '\r' ) 73 return true; 74 return false; 75 } 76 77 78 // Class Reader 79 // ////////////////////////////////////////////////////////////////// 80 81 Reader::Reader() 82 : errors_(), 83 document_(), 84 begin_(), 85 end_(), 86 current_(), 87 lastValueEnd_(), 88 lastValue_(), 89 commentsBefore_(), 90 features_( Features::all() ), 91 collectComments_() 92 { 93 } 94 95 96 Reader::Reader( const Features &features ) 97 : errors_(), 98 document_(), 99 begin_(), 100 end_(), 101 current_(), 102 lastValueEnd_(), 103 lastValue_(), 104 commentsBefore_(), 105 features_( features ), 106 collectComments_() 107 { 108 } 109 110 111 bool 112 Reader::parse( const std::string &document, 113 Value &root, 114 bool collectComments ) 115 { 116 document_ = document; 117 const char *begin = document_.c_str(); 118 const char *end = begin + document_.length(); 119 return parse( begin, end, root, collectComments ); 120 } 121 122 123 bool 124 Reader::parse( std::istream& sin, 125 Value &root, 126 bool collectComments ) 127 { 128 //std::istream_iterator<char> begin(sin); 129 //std::istream_iterator<char> end; 130 // Those would allow streamed input from a file, if parse() were a 131 // template function. 132 133 // Since std::string is reference-counted, this at least does not 134 // create an extra copy. 135 std::string doc; 136 std::getline(sin, doc, (char)EOF); 137 return parse( doc, root, collectComments ); 138 } 139 140 bool 141 Reader::parse( const char *beginDoc, const char *endDoc, 142 Value &root, 143 bool collectComments ) 144 { 145 if ( !features_.allowComments_ ) 146 { 147 collectComments = false; 148 } 149 150 begin_ = beginDoc; 151 end_ = endDoc; 152 collectComments_ = collectComments; 153 current_ = begin_; 154 lastValueEnd_ = 0; 155 lastValue_ = 0; 156 commentsBefore_ = ""; 157 errors_.clear(); 158 while ( !nodes_.empty() ) 159 nodes_.pop(); 160 nodes_.push( &root ); 161 162 bool successful = readValue(); 163 Token token; 164 skipCommentTokens( token ); 165 if ( collectComments_ && !commentsBefore_.empty() ) 166 root.setComment( commentsBefore_, commentAfter ); 167 if ( features_.strictRoot_ ) 168 { 169 if ( !root.isArray() && !root.isObject() ) 170 { 171 // Set error location to start of doc, ideally should be first token found in doc 172 token.type_ = tokenError; 173 token.start_ = beginDoc; 174 token.end_ = endDoc; 175 addError( "A valid JSON document must be either an array or an object value.", 176 token ); 177 return false; 178 } 179 } 180 return successful; 181 } 182 183 184 bool 185 Reader::readValue() 186 { 187 Token token; 188 skipCommentTokens( token ); 189 bool successful = true; 190 191 if ( collectComments_ && !commentsBefore_.empty() ) 192 { 193 currentValue().setComment( commentsBefore_, commentBefore ); 194 commentsBefore_ = ""; 195 } 196 197 198 switch ( token.type_ ) 199 { 200 case tokenObjectBegin: 201 successful = readObject( token ); 202 break; 203 case tokenArrayBegin: 204 successful = readArray( token ); 205 break; 206 case tokenNumber: 207 successful = decodeNumber( token ); 208 break; 209 case tokenString: 210 successful = decodeString( token ); 211 break; 212 case tokenTrue: 213 currentValue() = true; 214 break; 215 case tokenFalse: 216 currentValue() = false; 217 break; 218 case tokenNull: 219 currentValue() = Value(); 220 break; 221 default: 222 return addError( "Syntax error: value, object or array expected.", token ); 223 } 224 225 if ( collectComments_ ) 226 { 227 lastValueEnd_ = current_; 228 lastValue_ = ¤tValue(); 229 } 230 231 return successful; 232 } 233 234 235 void 236 Reader::skipCommentTokens( Token &token ) 237 { 238 if ( features_.allowComments_ ) 239 { 240 do 241 { 242 readToken( token ); 243 } 244 while ( token.type_ == tokenComment ); 245 } 246 else 247 { 248 readToken( token ); 249 } 250 } 251 252 253 bool 254 Reader::expectToken( TokenType type, Token &token, const char *message ) 255 { 256 readToken( token ); 257 if ( token.type_ != type ) 258 return addError( message, token ); 259 return true; 260 } 261 262 263 bool 264 Reader::readToken( Token &token ) 265 { 266 skipSpaces(); 267 token.start_ = current_; 268 Char c = getNextChar(); 269 bool ok = true; 270 switch ( c ) 271 { 272 case '{': 273 token.type_ = tokenObjectBegin; 274 break; 275 case '}': 276 token.type_ = tokenObjectEnd; 277 break; 278 case '[': 279 token.type_ = tokenArrayBegin; 280 break; 281 case ']': 282 token.type_ = tokenArrayEnd; 283 break; 284 case '"': 285 token.type_ = tokenString; 286 ok = readString(); 287 break; 288 case '/': 289 token.type_ = tokenComment; 290 ok = readComment(); 291 break; 292 case '0': 293 case '1': 294 case '2': 295 case '3': 296 case '4': 297 case '5': 298 case '6': 299 case '7': 300 case '8': 301 case '9': 302 case '-': 303 token.type_ = tokenNumber; 304 readNumber(); 305 break; 306 case 't': 307 token.type_ = tokenTrue; 308 ok = match( "rue", 3 ); 309 break; 310 case 'f': 311 token.type_ = tokenFalse; 312 ok = match( "alse", 4 ); 313 break; 314 case 'n': 315 token.type_ = tokenNull; 316 ok = match( "ull", 3 ); 317 break; 318 case ',': 319 token.type_ = tokenArraySeparator; 320 break; 321 case ':': 322 token.type_ = tokenMemberSeparator; 323 break; 324 case 0: 325 token.type_ = tokenEndOfStream; 326 break; 327 default: 328 ok = false; 329 break; 330 } 331 if ( !ok ) 332 token.type_ = tokenError; 333 token.end_ = current_; 334 return true; 335 } 336 337 338 void 339 Reader::skipSpaces() 340 { 341 while ( current_ != end_ ) 342 { 343 Char c = *current_; 344 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) 345 ++current_; 346 else 347 break; 348 } 349 } 350 351 352 bool 353 Reader::match( Location pattern, 354 int patternLength ) 355 { 356 if ( end_ - current_ < patternLength ) 357 return false; 358 int index = patternLength; 359 while ( index-- ) 360 if ( current_[index] != pattern[index] ) 361 return false; 362 current_ += patternLength; 363 return true; 364 } 365 366 367 bool 368 Reader::readComment() 369 { 370 Location commentBegin = current_ - 1; 371 Char c = getNextChar(); 372 bool successful = false; 373 if ( c == '*' ) 374 successful = readCStyleComment(); 375 else if ( c == '/' ) 376 successful = readCppStyleComment(); 377 if ( !successful ) 378 return false; 379 380 if ( collectComments_ ) 381 { 382 CommentPlacement placement = commentBefore; 383 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) 384 { 385 if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) 386 placement = commentAfterOnSameLine; 387 } 388 389 addComment( commentBegin, current_, placement ); 390 } 391 return true; 392 } 393 394 395 void 396 Reader::addComment( Location begin, 397 Location end, 398 CommentPlacement placement ) 399 { 400 assert( collectComments_ ); 401 if ( placement == commentAfterOnSameLine ) 402 { 403 assert( lastValue_ != 0 ); 404 lastValue_->setComment( std::string( begin, end ), placement ); 405 } 406 else 407 { 408 if ( !commentsBefore_.empty() ) 409 commentsBefore_ += "\n"; 410 commentsBefore_ += std::string( begin, end ); 411 } 412 } 413 414 415 bool 416 Reader::readCStyleComment() 417 { 418 while ( current_ != end_ ) 419 { 420 Char c = getNextChar(); 421 if ( c == '*' && *current_ == '/' ) 422 break; 423 } 424 return getNextChar() == '/'; 425 } 426 427 428 bool 429 Reader::readCppStyleComment() 430 { 431 while ( current_ != end_ ) 432 { 433 Char c = getNextChar(); 434 if ( c == '\r' || c == '\n' ) 435 break; 436 } 437 return true; 438 } 439 440 441 void 442 Reader::readNumber() 443 { 444 while ( current_ != end_ ) 445 { 446 if ( !(*current_ >= '0' && *current_ <= '9') && 447 !in( *current_, '.', 'e', 'E', '+', '-' ) ) 448 break; 449 ++current_; 450 } 451 } 452 453 bool 454 Reader::readString() 455 { 456 Char c = 0; 457 while ( current_ != end_ ) 458 { 459 c = getNextChar(); 460 if ( c == '\\' ) 461 getNextChar(); 462 else if ( c == '"' ) 463 break; 464 } 465 return c == '"'; 466 } 467 468 469 bool 470 Reader::readObject( Token &/*tokenStart*/ ) 471 { 472 Token tokenName; 473 std::string name; 474 currentValue() = Value( objectValue ); 475 while ( readToken( tokenName ) ) 476 { 477 bool initialTokenOk = true; 478 while ( tokenName.type_ == tokenComment && initialTokenOk ) 479 initialTokenOk = readToken( tokenName ); 480 if ( !initialTokenOk ) 481 break; 482 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object 483 return true; 484 if ( tokenName.type_ != tokenString ) 485 break; 486 487 name = ""; 488 if ( !decodeString( tokenName, name ) ) 489 return recoverFromError( tokenObjectEnd ); 490 491 Token colon; 492 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) 493 { 494 return addErrorAndRecover( "Missing ':' after object member name", 495 colon, 496 tokenObjectEnd ); 497 } 498 Value &value = currentValue()[ name ]; 499 nodes_.push( &value ); 500 bool ok = readValue(); 501 nodes_.pop(); 502 if ( !ok ) // error already set 503 return recoverFromError( tokenObjectEnd ); 504 505 Token comma; 506 if ( !readToken( comma ) 507 || ( comma.type_ != tokenObjectEnd && 508 comma.type_ != tokenArraySeparator && 509 comma.type_ != tokenComment ) ) 510 { 511 return addErrorAndRecover( "Missing ',' or '}' in object declaration", 512 comma, 513 tokenObjectEnd ); 514 } 515 bool finalizeTokenOk = true; 516 while ( comma.type_ == tokenComment && 517 finalizeTokenOk ) 518 finalizeTokenOk = readToken( comma ); 519 if ( comma.type_ == tokenObjectEnd ) 520 return true; 521 } 522 return addErrorAndRecover( "Missing '}' or object member name", 523 tokenName, 524 tokenObjectEnd ); 525 } 526 527 528 bool 529 Reader::readArray( Token &/*tokenStart*/ ) 530 { 531 currentValue() = Value( arrayValue ); 532 skipSpaces(); 533 if ( *current_ == ']' ) // empty array 534 { 535 Token endArray; 536 readToken( endArray ); 537 return true; 538 } 539 int index = 0; 540 for (;;) 541 { 542 Value &value = currentValue()[ index++ ]; 543 nodes_.push( &value ); 544 bool ok = readValue(); 545 nodes_.pop(); 546 if ( !ok ) // error already set 547 return recoverFromError( tokenArrayEnd ); 548 549 Token token; 550 // Accept Comment after last item in the array. 551 ok = readToken( token ); 552 while ( token.type_ == tokenComment && ok ) 553 { 554 ok = readToken( token ); 555 } 556 bool badTokenType = ( token.type_ != tokenArraySeparator && 557 token.type_ != tokenArrayEnd ); 558 if ( !ok || badTokenType ) 559 { 560 return addErrorAndRecover( "Missing ',' or ']' in array declaration", 561 token, 562 tokenArrayEnd ); 563 } 564 if ( token.type_ == tokenArrayEnd ) 565 break; 566 } 567 return true; 568 } 569 570 571 bool 572 Reader::decodeNumber( Token &token ) 573 { 574 bool isDouble = false; 575 for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) 576 { 577 isDouble = isDouble 578 || in( *inspect, '.', 'e', 'E', '+' ) 579 || ( *inspect == '-' && inspect != token.start_ ); 580 } 581 if ( isDouble ) 582 return decodeDouble( token ); 583 // Attempts to parse the number as an integer. If the number is 584 // larger than the maximum supported value of an integer then 585 // we decode the number as a double. 586 Location current = token.start_; 587 bool isNegative = *current == '-'; 588 if ( isNegative ) 589 ++current; 590 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) 591 : Value::maxLargestUInt; 592 Value::LargestUInt threshold = maxIntegerValue / 10; 593 Value::LargestUInt value = 0; 594 while ( current < token.end_ ) 595 { 596 Char c = *current++; 597 if ( c < '0' || c > '9' ) 598 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 599 Value::UInt digit(c - '0'); 600 if ( value >= threshold ) 601 { 602 // We've hit or exceeded the max value divided by 10 (rounded down). If 603 // a) we've only just touched the limit, b) this is the last digit, and 604 // c) it's small enough to fit in that rounding delta, we're okay. 605 // Otherwise treat this number as a double to avoid overflow. 606 if (value > threshold || 607 current != token.end_ || 608 digit > maxIntegerValue % 10) 609 { 610 return decodeDouble( token ); 611 } 612 } 613 value = value * 10 + digit; 614 } 615 if ( isNegative ) 616 currentValue() = -Value::LargestInt( value ); 617 else if ( value <= Value::LargestUInt(Value::maxInt) ) 618 currentValue() = Value::LargestInt( value ); 619 else 620 currentValue() = value; 621 return true; 622 } 623 624 625 bool 626 Reader::decodeDouble( Token &token ) 627 { 628 double value = 0; 629 const int bufferSize = 32; 630 int count; 631 int length = int(token.end_ - token.start_); 632 633 // Sanity check to avoid buffer overflow exploits. 634 if (length < 0) { 635 return addError( "Unable to parse token length", token ); 636 } 637 638 // Avoid using a string constant for the format control string given to 639 // sscanf, as this can cause hard to debug crashes on OS X. See here for more 640 // info: 641 // 642 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html 643 char format[] = "%lf"; 644 645 if ( length <= bufferSize ) 646 { 647 Char buffer[bufferSize+1]; 648 memcpy( buffer, token.start_, length ); 649 buffer[length] = 0; 650 count = sscanf( buffer, format, &value ); 651 } 652 else 653 { 654 std::string buffer( token.start_, token.end_ ); 655 count = sscanf( buffer.c_str(), format, &value ); 656 } 657 658 if ( count != 1 ) 659 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 660 currentValue() = value; 661 return true; 662 } 663 664 665 bool 666 Reader::decodeString( Token &token ) 667 { 668 std::string decoded; 669 if ( !decodeString( token, decoded ) ) 670 return false; 671 currentValue() = decoded; 672 return true; 673 } 674 675 676 bool 677 Reader::decodeString( Token &token, std::string &decoded ) 678 { 679 decoded.reserve( token.end_ - token.start_ - 2 ); 680 Location current = token.start_ + 1; // skip '"' 681 Location end = token.end_ - 1; // do not include '"' 682 while ( current != end ) 683 { 684 Char c = *current++; 685 if ( c == '"' ) 686 break; 687 else if ( c == '\\' ) 688 { 689 if ( current == end ) 690 return addError( "Empty escape sequence in string", token, current ); 691 Char escape = *current++; 692 switch ( escape ) 693 { 694 case '"': decoded += '"'; break; 695 case '/': decoded += '/'; break; 696 case '\\': decoded += '\\'; break; 697 case 'b': decoded += '\b'; break; 698 case 'f': decoded += '\f'; break; 699 case 'n': decoded += '\n'; break; 700 case 'r': decoded += '\r'; break; 701 case 't': decoded += '\t'; break; 702 case 'u': 703 { 704 unsigned int unicode; 705 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) 706 return false; 707 decoded += codePointToUTF8(unicode); 708 } 709 break; 710 default: 711 return addError( "Bad escape sequence in string", token, current ); 712 } 713 } 714 else 715 { 716 decoded += c; 717 } 718 } 719 return true; 720 } 721 722 bool 723 Reader::decodeUnicodeCodePoint( Token &token, 724 Location ¤t, 725 Location end, 726 unsigned int &unicode ) 727 { 728 729 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) 730 return false; 731 if (unicode >= 0xD800 && unicode <= 0xDBFF) 732 { 733 // surrogate pairs 734 if (end - current < 6) 735 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); 736 unsigned int surrogatePair; 737 if (*(current++) == '\\' && *(current++)== 'u') 738 { 739 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) 740 { 741 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); 742 } 743 else 744 return false; 745 } 746 else 747 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); 748 } 749 return true; 750 } 751 752 bool 753 Reader::decodeUnicodeEscapeSequence( Token &token, 754 Location ¤t, 755 Location end, 756 unsigned int &unicode ) 757 { 758 if ( end - current < 4 ) 759 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); 760 unicode = 0; 761 for ( int index =0; index < 4; ++index ) 762 { 763 Char c = *current++; 764 unicode *= 16; 765 if ( c >= '0' && c <= '9' ) 766 unicode += c - '0'; 767 else if ( c >= 'a' && c <= 'f' ) 768 unicode += c - 'a' + 10; 769 else if ( c >= 'A' && c <= 'F' ) 770 unicode += c - 'A' + 10; 771 else 772 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); 773 } 774 return true; 775 } 776 777 778 bool 779 Reader::addError( const std::string &message, 780 Token &token, 781 Location extra ) 782 { 783 ErrorInfo info; 784 info.token_ = token; 785 info.message_ = message; 786 info.extra_ = extra; 787 errors_.push_back( info ); 788 return false; 789 } 790 791 792 bool 793 Reader::recoverFromError( TokenType skipUntilToken ) 794 { 795 int errorCount = int(errors_.size()); 796 Token skip; 797 for (;;) 798 { 799 if ( !readToken(skip) ) 800 errors_.resize( errorCount ); // discard errors caused by recovery 801 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) 802 break; 803 } 804 errors_.resize( errorCount ); 805 return false; 806 } 807 808 809 bool 810 Reader::addErrorAndRecover( const std::string &message, 811 Token &token, 812 TokenType skipUntilToken ) 813 { 814 addError( message, token ); 815 return recoverFromError( skipUntilToken ); 816 } 817 818 819 Value & 820 Reader::currentValue() 821 { 822 return *(nodes_.top()); 823 } 824 825 826 Reader::Char 827 Reader::getNextChar() 828 { 829 if ( current_ == end_ ) 830 return 0; 831 return *current_++; 832 } 833 834 835 void 836 Reader::getLocationLineAndColumn( Location location, 837 int &line, 838 int &column ) const 839 { 840 Location current = begin_; 841 Location lastLineStart = current; 842 line = 0; 843 while ( current < location && current != end_ ) 844 { 845 Char c = *current++; 846 if ( c == '\r' ) 847 { 848 if ( *current == '\n' ) 849 ++current; 850 lastLineStart = current; 851 ++line; 852 } 853 else if ( c == '\n' ) 854 { 855 lastLineStart = current; 856 ++line; 857 } 858 } 859 // column & line start at 1 860 column = int(location - lastLineStart) + 1; 861 ++line; 862 } 863 864 865 std::string 866 Reader::getLocationLineAndColumn( Location location ) const 867 { 868 int line, column; 869 getLocationLineAndColumn( location, line, column ); 870 char buffer[18+16+16+1]; 871 sprintf( buffer, "Line %d, Column %d", line, column ); 872 return buffer; 873 } 874 875 876 // Deprecated. Preserved for backward compatibility 877 std::string 878 Reader::getFormatedErrorMessages() const 879 { 880 return getFormattedErrorMessages(); 881 } 882 883 884 std::string 885 Reader::getFormattedErrorMessages() const 886 { 887 std::string formattedMessage; 888 for ( Errors::const_iterator itError = errors_.begin(); 889 itError != errors_.end(); 890 ++itError ) 891 { 892 const ErrorInfo &error = *itError; 893 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; 894 formattedMessage += " " + error.message_ + "\n"; 895 if ( error.extra_ ) 896 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; 897 } 898 return formattedMessage; 899 } 900 901 902 std::istream& operator>>( std::istream &sin, Value &root ) 903 { 904 Json::Reader reader; 905 bool ok = reader.parse(sin, root, true); 906 if (!ok) { 907 fprintf( 908 stderr, 909 "Error from reader: %s", 910 reader.getFormattedErrorMessages().c_str()); 911 912 JSON_FAIL_MESSAGE("reader error"); 913 } 914 return sin; 915 } 916 917 918 } // namespace Json 919