1 // Copyright 2007-2011 Baptiste Lepilleur 2 // Distributed under MIT license, or public domain if desired and 3 // recognized in your jurisdiction. 4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE 5 6 #if !defined(JSON_IS_AMALGAMATION) 7 # include <json/assertions.h> 8 # include <json/reader.h> 9 # include <json/value.h> 10 # include "json_tool.h" 11 #endif // if !defined(JSON_IS_AMALGAMATION) 12 #include <utility> 13 #include <cstdio> 14 #include <cassert> 15 #include <cstring> 16 #include <stdexcept> 17 #ifdef __pnacl__ 18 // This file uses the following headers (at least in Reader::parse), but 19 // the upstream version doesn't include them because iostream pulls in 20 // static initializers. This breaks the PNaCl build because it uses 21 // libc++ which declares getline in <string> (as per the C++ standard) 22 // but defines it in <iostream>. The code therefore fails linking, which 23 // these includes fix. 24 #include <string> 25 #include <iostream> 26 #endif 27 28 #if _MSC_VER >= 1400 // VC++ 8.0 29 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. 30 #endif 31 32 namespace Json { 33 34 // Implementation of class Features 35 // //////////////////////////////// 36 37 Features::Features() 38 : allowComments_( true ) 39 , strictRoot_( false ) 40 { 41 } 42 43 44 Features 45 Features::all() 46 { 47 return Features(); 48 } 49 50 51 Features 52 Features::strictMode() 53 { 54 Features features; 55 features.allowComments_ = false; 56 features.strictRoot_ = true; 57 return features; 58 } 59 60 // Implementation of class Reader 61 // //////////////////////////////// 62 63 64 static inline bool 65 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) 66 { 67 return c == c1 || c == c2 || c == c3 || c == c4; 68 } 69 70 static inline bool 71 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) 72 { 73 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; 74 } 75 76 77 static bool 78 containsNewLine( Reader::Location begin, 79 Reader::Location end ) 80 { 81 for ( ;begin < end; ++begin ) 82 if ( *begin == '\n' || *begin == '\r' ) 83 return true; 84 return false; 85 } 86 87 88 // Class Reader 89 // ////////////////////////////////////////////////////////////////// 90 91 Reader::Reader() 92 : errors_(), 93 document_(), 94 begin_(), 95 end_(), 96 current_(), 97 lastValueEnd_(), 98 lastValue_(), 99 commentsBefore_(), 100 features_( Features::all() ), 101 collectComments_() 102 { 103 } 104 105 106 Reader::Reader( const Features &features ) 107 : errors_(), 108 document_(), 109 begin_(), 110 end_(), 111 current_(), 112 lastValueEnd_(), 113 lastValue_(), 114 commentsBefore_(), 115 features_( features ), 116 collectComments_() 117 { 118 } 119 120 121 bool 122 Reader::parse( const std::string &document, 123 Value &root, 124 bool collectComments ) 125 { 126 document_ = document; 127 const char *begin = document_.c_str(); 128 const char *end = begin + document_.length(); 129 return parse( begin, end, root, collectComments ); 130 } 131 132 133 bool 134 Reader::parse( std::istream& sin, 135 Value &root, 136 bool collectComments ) 137 { 138 //std::istream_iterator<char> begin(sin); 139 //std::istream_iterator<char> end; 140 // Those would allow streamed input from a file, if parse() were a 141 // template function. 142 143 // Since std::string is reference-counted, this at least does not 144 // create an extra copy. 145 std::string doc; 146 std::getline(sin, doc, (char)EOF); 147 return parse( doc, root, collectComments ); 148 } 149 150 bool 151 Reader::parse( const char *beginDoc, const char *endDoc, 152 Value &root, 153 bool collectComments ) 154 { 155 if ( !features_.allowComments_ ) 156 { 157 collectComments = false; 158 } 159 160 begin_ = beginDoc; 161 end_ = endDoc; 162 collectComments_ = collectComments; 163 current_ = begin_; 164 lastValueEnd_ = 0; 165 lastValue_ = 0; 166 commentsBefore_ = ""; 167 errors_.clear(); 168 while ( !nodes_.empty() ) 169 nodes_.pop(); 170 nodes_.push( &root ); 171 172 bool successful = readValue(); 173 Token token; 174 skipCommentTokens( token ); 175 if ( collectComments_ && !commentsBefore_.empty() ) 176 root.setComment( commentsBefore_, commentAfter ); 177 if ( features_.strictRoot_ ) 178 { 179 if ( !root.isArray() && !root.isObject() ) 180 { 181 // Set error location to start of doc, ideally should be first token found in doc 182 token.type_ = tokenError; 183 token.start_ = beginDoc; 184 token.end_ = endDoc; 185 addError( "A valid JSON document must be either an array or an object value.", 186 token ); 187 return false; 188 } 189 } 190 return successful; 191 } 192 193 194 bool 195 Reader::readValue() 196 { 197 Token token; 198 skipCommentTokens( token ); 199 bool successful = true; 200 201 if ( collectComments_ && !commentsBefore_.empty() ) 202 { 203 currentValue().setComment( commentsBefore_, commentBefore ); 204 commentsBefore_ = ""; 205 } 206 207 208 switch ( token.type_ ) 209 { 210 case tokenObjectBegin: 211 successful = readObject( token ); 212 break; 213 case tokenArrayBegin: 214 successful = readArray( token ); 215 break; 216 case tokenNumber: 217 successful = decodeNumber( token ); 218 break; 219 case tokenString: 220 successful = decodeString( token ); 221 break; 222 case tokenTrue: 223 currentValue() = true; 224 break; 225 case tokenFalse: 226 currentValue() = false; 227 break; 228 case tokenNull: 229 currentValue() = Value(); 230 break; 231 default: 232 return addError( "Syntax error: value, object or array expected.", token ); 233 } 234 235 if ( collectComments_ ) 236 { 237 lastValueEnd_ = current_; 238 lastValue_ = ¤tValue(); 239 } 240 241 return successful; 242 } 243 244 245 void 246 Reader::skipCommentTokens( Token &token ) 247 { 248 if ( features_.allowComments_ ) 249 { 250 do 251 { 252 readToken( token ); 253 } 254 while ( token.type_ == tokenComment ); 255 } 256 else 257 { 258 readToken( token ); 259 } 260 } 261 262 263 bool 264 Reader::expectToken( TokenType type, Token &token, const char *message ) 265 { 266 readToken( token ); 267 if ( token.type_ != type ) 268 return addError( message, token ); 269 return true; 270 } 271 272 273 bool 274 Reader::readToken( Token &token ) 275 { 276 skipSpaces(); 277 token.start_ = current_; 278 Char c = getNextChar(); 279 bool ok = true; 280 switch ( c ) 281 { 282 case '{': 283 token.type_ = tokenObjectBegin; 284 break; 285 case '}': 286 token.type_ = tokenObjectEnd; 287 break; 288 case '[': 289 token.type_ = tokenArrayBegin; 290 break; 291 case ']': 292 token.type_ = tokenArrayEnd; 293 break; 294 case '"': 295 token.type_ = tokenString; 296 ok = readString(); 297 break; 298 case '/': 299 token.type_ = tokenComment; 300 ok = readComment(); 301 break; 302 case '0': 303 case '1': 304 case '2': 305 case '3': 306 case '4': 307 case '5': 308 case '6': 309 case '7': 310 case '8': 311 case '9': 312 case '-': 313 token.type_ = tokenNumber; 314 readNumber(); 315 break; 316 case 't': 317 token.type_ = tokenTrue; 318 ok = match( "rue", 3 ); 319 break; 320 case 'f': 321 token.type_ = tokenFalse; 322 ok = match( "alse", 4 ); 323 break; 324 case 'n': 325 token.type_ = tokenNull; 326 ok = match( "ull", 3 ); 327 break; 328 case ',': 329 token.type_ = tokenArraySeparator; 330 break; 331 case ':': 332 token.type_ = tokenMemberSeparator; 333 break; 334 case 0: 335 token.type_ = tokenEndOfStream; 336 break; 337 default: 338 ok = false; 339 break; 340 } 341 if ( !ok ) 342 token.type_ = tokenError; 343 token.end_ = current_; 344 return true; 345 } 346 347 348 void 349 Reader::skipSpaces() 350 { 351 while ( current_ != end_ ) 352 { 353 Char c = *current_; 354 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) 355 ++current_; 356 else 357 break; 358 } 359 } 360 361 362 bool 363 Reader::match( Location pattern, 364 int patternLength ) 365 { 366 if ( end_ - current_ < patternLength ) 367 return false; 368 int index = patternLength; 369 while ( index-- ) 370 if ( current_[index] != pattern[index] ) 371 return false; 372 current_ += patternLength; 373 return true; 374 } 375 376 377 bool 378 Reader::readComment() 379 { 380 Location commentBegin = current_ - 1; 381 Char c = getNextChar(); 382 bool successful = false; 383 if ( c == '*' ) 384 successful = readCStyleComment(); 385 else if ( c == '/' ) 386 successful = readCppStyleComment(); 387 if ( !successful ) 388 return false; 389 390 if ( collectComments_ ) 391 { 392 CommentPlacement placement = commentBefore; 393 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) 394 { 395 if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) 396 placement = commentAfterOnSameLine; 397 } 398 399 addComment( commentBegin, current_, placement ); 400 } 401 return true; 402 } 403 404 405 void 406 Reader::addComment( Location begin, 407 Location end, 408 CommentPlacement placement ) 409 { 410 assert( collectComments_ ); 411 if ( placement == commentAfterOnSameLine ) 412 { 413 assert( lastValue_ != 0 ); 414 lastValue_->setComment( std::string( begin, end ), placement ); 415 } 416 else 417 { 418 if ( !commentsBefore_.empty() ) 419 commentsBefore_ += "\n"; 420 commentsBefore_ += std::string( begin, end ); 421 } 422 } 423 424 425 bool 426 Reader::readCStyleComment() 427 { 428 while ( current_ != end_ ) 429 { 430 Char c = getNextChar(); 431 if ( c == '*' && *current_ == '/' ) 432 break; 433 } 434 return getNextChar() == '/'; 435 } 436 437 438 bool 439 Reader::readCppStyleComment() 440 { 441 while ( current_ != end_ ) 442 { 443 Char c = getNextChar(); 444 if ( c == '\r' || c == '\n' ) 445 break; 446 } 447 return true; 448 } 449 450 451 void 452 Reader::readNumber() 453 { 454 while ( current_ != end_ ) 455 { 456 if ( !(*current_ >= '0' && *current_ <= '9') && 457 !in( *current_, '.', 'e', 'E', '+', '-' ) ) 458 break; 459 ++current_; 460 } 461 } 462 463 bool 464 Reader::readString() 465 { 466 Char c = 0; 467 while ( current_ != end_ ) 468 { 469 c = getNextChar(); 470 if ( c == '\\' ) 471 getNextChar(); 472 else if ( c == '"' ) 473 break; 474 } 475 return c == '"'; 476 } 477 478 479 bool 480 Reader::readObject( Token &/*tokenStart*/ ) 481 { 482 Token tokenName; 483 std::string name; 484 currentValue() = Value( objectValue ); 485 while ( readToken( tokenName ) ) 486 { 487 bool initialTokenOk = true; 488 while ( tokenName.type_ == tokenComment && initialTokenOk ) 489 initialTokenOk = readToken( tokenName ); 490 if ( !initialTokenOk ) 491 break; 492 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object 493 return true; 494 if ( tokenName.type_ != tokenString ) 495 break; 496 497 name = ""; 498 if ( !decodeString( tokenName, name ) ) 499 return recoverFromError( tokenObjectEnd ); 500 501 Token colon; 502 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) 503 { 504 return addErrorAndRecover( "Missing ':' after object member name", 505 colon, 506 tokenObjectEnd ); 507 } 508 Value &value = currentValue()[ name ]; 509 nodes_.push( &value ); 510 bool ok = readValue(); 511 nodes_.pop(); 512 if ( !ok ) // error already set 513 return recoverFromError( tokenObjectEnd ); 514 515 Token comma; 516 if ( !readToken( comma ) 517 || ( comma.type_ != tokenObjectEnd && 518 comma.type_ != tokenArraySeparator && 519 comma.type_ != tokenComment ) ) 520 { 521 return addErrorAndRecover( "Missing ',' or '}' in object declaration", 522 comma, 523 tokenObjectEnd ); 524 } 525 bool finalizeTokenOk = true; 526 while ( comma.type_ == tokenComment && 527 finalizeTokenOk ) 528 finalizeTokenOk = readToken( comma ); 529 if ( comma.type_ == tokenObjectEnd ) 530 return true; 531 } 532 return addErrorAndRecover( "Missing '}' or object member name", 533 tokenName, 534 tokenObjectEnd ); 535 } 536 537 538 bool 539 Reader::readArray( Token &/*tokenStart*/ ) 540 { 541 currentValue() = Value( arrayValue ); 542 skipSpaces(); 543 if ( *current_ == ']' ) // empty array 544 { 545 Token endArray; 546 readToken( endArray ); 547 return true; 548 } 549 int index = 0; 550 for (;;) 551 { 552 Value &value = currentValue()[ index++ ]; 553 nodes_.push( &value ); 554 bool ok = readValue(); 555 nodes_.pop(); 556 if ( !ok ) // error already set 557 return recoverFromError( tokenArrayEnd ); 558 559 Token token; 560 // Accept Comment after last item in the array. 561 ok = readToken( token ); 562 while ( token.type_ == tokenComment && ok ) 563 { 564 ok = readToken( token ); 565 } 566 bool badTokenType = ( token.type_ != tokenArraySeparator && 567 token.type_ != tokenArrayEnd ); 568 if ( !ok || badTokenType ) 569 { 570 return addErrorAndRecover( "Missing ',' or ']' in array declaration", 571 token, 572 tokenArrayEnd ); 573 } 574 if ( token.type_ == tokenArrayEnd ) 575 break; 576 } 577 return true; 578 } 579 580 581 bool 582 Reader::decodeNumber( Token &token ) 583 { 584 bool isDouble = false; 585 for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) 586 { 587 isDouble = isDouble 588 || in( *inspect, '.', 'e', 'E', '+' ) 589 || ( *inspect == '-' && inspect != token.start_ ); 590 } 591 if ( isDouble ) 592 return decodeDouble( token ); 593 // Attempts to parse the number as an integer. If the number is 594 // larger than the maximum supported value of an integer then 595 // we decode the number as a double. 596 Location current = token.start_; 597 bool isNegative = *current == '-'; 598 if ( isNegative ) 599 ++current; 600 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt) 601 : Value::maxLargestUInt; 602 Value::LargestUInt threshold = maxIntegerValue / 10; 603 Value::LargestUInt value = 0; 604 while ( current < token.end_ ) 605 { 606 Char c = *current++; 607 if ( c < '0' || c > '9' ) 608 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 609 Value::UInt digit(c - '0'); 610 if ( value >= threshold ) 611 { 612 // We've hit or exceeded the max value divided by 10 (rounded down). If 613 // a) we've only just touched the limit, b) this is the last digit, and 614 // c) it's small enough to fit in that rounding delta, we're okay. 615 // Otherwise treat this number as a double to avoid overflow. 616 if (value > threshold || 617 current != token.end_ || 618 digit > maxIntegerValue % 10) 619 { 620 return decodeDouble( token ); 621 } 622 } 623 value = value * 10 + digit; 624 } 625 if ( isNegative ) 626 currentValue() = -Value::LargestInt( value ); 627 else if ( value <= Value::LargestUInt(Value::maxInt) ) 628 currentValue() = Value::LargestInt( value ); 629 else 630 currentValue() = value; 631 return true; 632 } 633 634 635 bool 636 Reader::decodeDouble( Token &token ) 637 { 638 double value = 0; 639 const int bufferSize = 32; 640 int count; 641 int length = int(token.end_ - token.start_); 642 643 // Sanity check to avoid buffer overflow exploits. 644 if (length < 0) { 645 return addError( "Unable to parse token length", token ); 646 } 647 648 // Avoid using a string constant for the format control string given to 649 // sscanf, as this can cause hard to debug crashes on OS X. See here for more 650 // info: 651 // 652 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html 653 char format[] = "%lf"; 654 655 if ( length <= bufferSize ) 656 { 657 Char buffer[bufferSize+1]; 658 memcpy( buffer, token.start_, length ); 659 buffer[length] = 0; 660 count = sscanf( buffer, format, &value ); 661 } 662 else 663 { 664 std::string buffer( token.start_, token.end_ ); 665 count = sscanf( buffer.c_str(), format, &value ); 666 } 667 668 if ( count != 1 ) 669 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); 670 currentValue() = value; 671 return true; 672 } 673 674 675 bool 676 Reader::decodeString( Token &token ) 677 { 678 std::string decoded; 679 if ( !decodeString( token, decoded ) ) 680 return false; 681 currentValue() = decoded; 682 return true; 683 } 684 685 686 bool 687 Reader::decodeString( Token &token, std::string &decoded ) 688 { 689 decoded.reserve( token.end_ - token.start_ - 2 ); 690 Location current = token.start_ + 1; // skip '"' 691 Location end = token.end_ - 1; // do not include '"' 692 while ( current != end ) 693 { 694 Char c = *current++; 695 if ( c == '"' ) 696 break; 697 else if ( c == '\\' ) 698 { 699 if ( current == end ) 700 return addError( "Empty escape sequence in string", token, current ); 701 Char escape = *current++; 702 switch ( escape ) 703 { 704 case '"': decoded += '"'; break; 705 case '/': decoded += '/'; break; 706 case '\\': decoded += '\\'; break; 707 case 'b': decoded += '\b'; break; 708 case 'f': decoded += '\f'; break; 709 case 'n': decoded += '\n'; break; 710 case 'r': decoded += '\r'; break; 711 case 't': decoded += '\t'; break; 712 case 'u': 713 { 714 unsigned int unicode; 715 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) 716 return false; 717 decoded += codePointToUTF8(unicode); 718 } 719 break; 720 default: 721 return addError( "Bad escape sequence in string", token, current ); 722 } 723 } 724 else 725 { 726 decoded += c; 727 } 728 } 729 return true; 730 } 731 732 bool 733 Reader::decodeUnicodeCodePoint( Token &token, 734 Location ¤t, 735 Location end, 736 unsigned int &unicode ) 737 { 738 739 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) 740 return false; 741 if (unicode >= 0xD800 && unicode <= 0xDBFF) 742 { 743 // surrogate pairs 744 if (end - current < 6) 745 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); 746 unsigned int surrogatePair; 747 if (*(current++) == '\\' && *(current++)== 'u') 748 { 749 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) 750 { 751 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); 752 } 753 else 754 return false; 755 } 756 else 757 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); 758 } 759 return true; 760 } 761 762 bool 763 Reader::decodeUnicodeEscapeSequence( Token &token, 764 Location ¤t, 765 Location end, 766 unsigned int &unicode ) 767 { 768 if ( end - current < 4 ) 769 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); 770 unicode = 0; 771 for ( int index =0; index < 4; ++index ) 772 { 773 Char c = *current++; 774 unicode *= 16; 775 if ( c >= '0' && c <= '9' ) 776 unicode += c - '0'; 777 else if ( c >= 'a' && c <= 'f' ) 778 unicode += c - 'a' + 10; 779 else if ( c >= 'A' && c <= 'F' ) 780 unicode += c - 'A' + 10; 781 else 782 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); 783 } 784 return true; 785 } 786 787 788 bool 789 Reader::addError( const std::string &message, 790 Token &token, 791 Location extra ) 792 { 793 ErrorInfo info; 794 info.token_ = token; 795 info.message_ = message; 796 info.extra_ = extra; 797 errors_.push_back( info ); 798 return false; 799 } 800 801 802 bool 803 Reader::recoverFromError( TokenType skipUntilToken ) 804 { 805 int errorCount = int(errors_.size()); 806 Token skip; 807 for (;;) 808 { 809 if ( !readToken(skip) ) 810 errors_.resize( errorCount ); // discard errors caused by recovery 811 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) 812 break; 813 } 814 errors_.resize( errorCount ); 815 return false; 816 } 817 818 819 bool 820 Reader::addErrorAndRecover( const std::string &message, 821 Token &token, 822 TokenType skipUntilToken ) 823 { 824 addError( message, token ); 825 return recoverFromError( skipUntilToken ); 826 } 827 828 829 Value & 830 Reader::currentValue() 831 { 832 return *(nodes_.top()); 833 } 834 835 836 Reader::Char 837 Reader::getNextChar() 838 { 839 if ( current_ == end_ ) 840 return 0; 841 return *current_++; 842 } 843 844 845 void 846 Reader::getLocationLineAndColumn( Location location, 847 int &line, 848 int &column ) const 849 { 850 Location current = begin_; 851 Location lastLineStart = current; 852 line = 0; 853 while ( current < location && current != end_ ) 854 { 855 Char c = *current++; 856 if ( c == '\r' ) 857 { 858 if ( *current == '\n' ) 859 ++current; 860 lastLineStart = current; 861 ++line; 862 } 863 else if ( c == '\n' ) 864 { 865 lastLineStart = current; 866 ++line; 867 } 868 } 869 // column & line start at 1 870 column = int(location - lastLineStart) + 1; 871 ++line; 872 } 873 874 875 std::string 876 Reader::getLocationLineAndColumn( Location location ) const 877 { 878 int line, column; 879 getLocationLineAndColumn( location, line, column ); 880 char buffer[18+16+16+1]; 881 sprintf( buffer, "Line %d, Column %d", line, column ); 882 return buffer; 883 } 884 885 886 // Deprecated. Preserved for backward compatibility 887 std::string 888 Reader::getFormatedErrorMessages() const 889 { 890 return getFormattedErrorMessages(); 891 } 892 893 894 std::string 895 Reader::getFormattedErrorMessages() const 896 { 897 std::string formattedMessage; 898 for ( Errors::const_iterator itError = errors_.begin(); 899 itError != errors_.end(); 900 ++itError ) 901 { 902 const ErrorInfo &error = *itError; 903 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; 904 formattedMessage += " " + error.message_ + "\n"; 905 if ( error.extra_ ) 906 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; 907 } 908 return formattedMessage; 909 } 910 911 912 std::istream& operator>>( std::istream &sin, Value &root ) 913 { 914 Json::Reader reader; 915 bool ok = reader.parse(sin, root, true); 916 if (!ok) { 917 fprintf( 918 stderr, 919 "Error from reader: %s", 920 reader.getFormattedErrorMessages().c_str()); 921 922 JSON_FAIL_MESSAGE("reader error"); 923 } 924 return sin; 925 } 926 927 928 } // namespace Json 929