1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/json/json_parser.h" 6 7 #include <cmath> 8 9 #include "base/logging.h" 10 #include "base/macros.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_piece.h" 14 #include "base/strings/string_util.h" 15 #include "base/strings/stringprintf.h" 16 #include "base/strings/utf_string_conversion_utils.h" 17 #include "base/strings/utf_string_conversions.h" 18 #include "base/third_party/icu/icu_utf.h" 19 #include "base/values.h" 20 21 namespace base { 22 namespace internal { 23 24 namespace { 25 26 const int kStackMaxDepth = 100; 27 28 const int32_t kExtendedASCIIStart = 0x80; 29 30 // This and the class below are used to own the JSON input string for when 31 // string tokens are stored as StringPiece instead of std::string. This 32 // optimization avoids about 2/3rds of string memory copies. The constructor 33 // takes ownership of the input string. The real root value is Swap()ed into 34 // the new instance. 35 class DictionaryHiddenRootValue : public DictionaryValue { 36 public: 37 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) { 38 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); 39 DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); 40 } 41 42 void Swap(DictionaryValue* other) override { 43 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; 44 45 // First deep copy to convert JSONStringValue to std::string and swap that 46 // copy with |other|, which contains the new contents of |this|. 47 scoped_ptr<DictionaryValue> copy(DeepCopy()); 48 copy->Swap(other); 49 50 // Then erase the contents of the current dictionary and swap in the 51 // new contents, originally from |other|. 52 Clear(); 53 json_.reset(); 54 DictionaryValue::Swap(copy.get()); 55 } 56 57 // Not overriding DictionaryValue::Remove because it just calls through to 58 // the method below. 59 60 bool RemoveWithoutPathExpansion(const std::string& key, 61 scoped_ptr<Value>* out) override { 62 // If the caller won't take ownership of the removed value, just call up. 63 if (!out) 64 return DictionaryValue::RemoveWithoutPathExpansion(key, out); 65 66 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; 67 68 // Otherwise, remove the value while its still "owned" by this and copy it 69 // to convert any JSONStringValues to std::string. 70 scoped_ptr<Value> out_owned; 71 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) 72 return false; 73 74 out->reset(out_owned->DeepCopy()); 75 76 return true; 77 } 78 79 private: 80 scoped_ptr<std::string> json_; 81 82 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); 83 }; 84 85 class ListHiddenRootValue : public ListValue { 86 public: 87 ListHiddenRootValue(std::string* json, Value* root) : json_(json) { 88 DCHECK(root->IsType(Value::TYPE_LIST)); 89 ListValue::Swap(static_cast<ListValue*>(root)); 90 } 91 92 void Swap(ListValue* other) override { 93 DVLOG(1) << "Swap()ing a ListValue inefficiently."; 94 95 // First deep copy to convert JSONStringValue to std::string and swap that 96 // copy with |other|, which contains the new contents of |this|. 97 scoped_ptr<ListValue> copy(DeepCopy()); 98 copy->Swap(other); 99 100 // Then erase the contents of the current list and swap in the new contents, 101 // originally from |other|. 102 Clear(); 103 json_.reset(); 104 ListValue::Swap(copy.get()); 105 } 106 107 bool Remove(size_t index, scoped_ptr<Value>* out) override { 108 // If the caller won't take ownership of the removed value, just call up. 109 if (!out) 110 return ListValue::Remove(index, out); 111 112 DVLOG(1) << "Remove()ing from a ListValue inefficiently."; 113 114 // Otherwise, remove the value while its still "owned" by this and copy it 115 // to convert any JSONStringValues to std::string. 116 scoped_ptr<Value> out_owned; 117 if (!ListValue::Remove(index, &out_owned)) 118 return false; 119 120 out->reset(out_owned->DeepCopy()); 121 122 return true; 123 } 124 125 private: 126 scoped_ptr<std::string> json_; 127 128 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); 129 }; 130 131 // A variant on StringValue that uses StringPiece instead of copying the string 132 // into the Value. This can only be stored in a child of hidden root (above), 133 // otherwise the referenced string will not be guaranteed to outlive it. 134 class JSONStringValue : public Value { 135 public: 136 explicit JSONStringValue(const StringPiece& piece) 137 : Value(TYPE_STRING), 138 string_piece_(piece) { 139 } 140 141 // Overridden from Value: 142 bool GetAsString(std::string* out_value) const override { 143 string_piece_.CopyToString(out_value); 144 return true; 145 } 146 bool GetAsString(string16* out_value) const override { 147 *out_value = UTF8ToUTF16(string_piece_); 148 return true; 149 } 150 Value* DeepCopy() const override { 151 return new StringValue(string_piece_.as_string()); 152 } 153 bool Equals(const Value* other) const override { 154 std::string other_string; 155 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && 156 StringPiece(other_string) == string_piece_; 157 } 158 159 private: 160 // The location in the original input stream. 161 StringPiece string_piece_; 162 163 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); 164 }; 165 166 // Simple class that checks for maximum recursion/"stack overflow." 167 class StackMarker { 168 public: 169 explicit StackMarker(int* depth) : depth_(depth) { 170 ++(*depth_); 171 DCHECK_LE(*depth_, kStackMaxDepth); 172 } 173 ~StackMarker() { 174 --(*depth_); 175 } 176 177 bool IsTooDeep() const { 178 return *depth_ >= kStackMaxDepth; 179 } 180 181 private: 182 int* const depth_; 183 184 DISALLOW_COPY_AND_ASSIGN(StackMarker); 185 }; 186 187 } // namespace 188 189 JSONParser::JSONParser(int options) 190 : options_(options), 191 start_pos_(NULL), 192 pos_(NULL), 193 end_pos_(NULL), 194 index_(0), 195 stack_depth_(0), 196 line_number_(0), 197 index_last_line_(0), 198 error_code_(JSONReader::JSON_NO_ERROR), 199 error_line_(0), 200 error_column_(0) { 201 } 202 203 JSONParser::~JSONParser() { 204 } 205 206 Value* JSONParser::Parse(const StringPiece& input) { 207 scoped_ptr<std::string> input_copy; 208 // If the children of a JSON root can be detached, then hidden roots cannot 209 // be used, so do not bother copying the input because StringPiece will not 210 // be used anywhere. 211 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 212 input_copy.reset(new std::string(input.as_string())); 213 start_pos_ = input_copy->data(); 214 } else { 215 start_pos_ = input.data(); 216 } 217 pos_ = start_pos_; 218 end_pos_ = start_pos_ + input.length(); 219 index_ = 0; 220 line_number_ = 1; 221 index_last_line_ = 0; 222 223 error_code_ = JSONReader::JSON_NO_ERROR; 224 error_line_ = 0; 225 error_column_ = 0; 226 227 // When the input JSON string starts with a UTF-8 Byte-Order-Mark 228 // <0xEF 0xBB 0xBF>, advance the start position to avoid the 229 // ParseNextToken function mis-treating a Unicode BOM as an invalid 230 // character and returning NULL. 231 if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF && 232 static_cast<uint8_t>(*(pos_ + 1)) == 0xBB && 233 static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) { 234 NextNChars(3); 235 } 236 237 // Parse the first and any nested tokens. 238 scoped_ptr<Value> root(ParseNextToken()); 239 if (!root.get()) 240 return NULL; 241 242 // Make sure the input stream is at an end. 243 if (GetNextToken() != T_END_OF_INPUT) { 244 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { 245 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); 246 return NULL; 247 } 248 } 249 250 // Dictionaries and lists can contain JSONStringValues, so wrap them in a 251 // hidden root. 252 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 253 if (root->IsType(Value::TYPE_DICTIONARY)) { 254 return new DictionaryHiddenRootValue(input_copy.release(), root.get()); 255 } else if (root->IsType(Value::TYPE_LIST)) { 256 return new ListHiddenRootValue(input_copy.release(), root.get()); 257 } else if (root->IsType(Value::TYPE_STRING)) { 258 // A string type could be a JSONStringValue, but because there's no 259 // corresponding HiddenRootValue, the memory will be lost. Deep copy to 260 // preserve it. 261 return root->DeepCopy(); 262 } 263 } 264 265 // All other values can be returned directly. 266 return root.release(); 267 } 268 269 JSONReader::JsonParseError JSONParser::error_code() const { 270 return error_code_; 271 } 272 273 std::string JSONParser::GetErrorMessage() const { 274 return FormatErrorMessage(error_line_, error_column_, 275 JSONReader::ErrorCodeToString(error_code_)); 276 } 277 278 int JSONParser::error_line() const { 279 return error_line_; 280 } 281 282 int JSONParser::error_column() const { 283 return error_column_; 284 } 285 286 // StringBuilder /////////////////////////////////////////////////////////////// 287 288 JSONParser::StringBuilder::StringBuilder() 289 : pos_(NULL), 290 length_(0), 291 string_(NULL) { 292 } 293 294 JSONParser::StringBuilder::StringBuilder(const char* pos) 295 : pos_(pos), 296 length_(0), 297 string_(NULL) { 298 } 299 300 void JSONParser::StringBuilder::Swap(StringBuilder* other) { 301 std::swap(other->string_, string_); 302 std::swap(other->pos_, pos_); 303 std::swap(other->length_, length_); 304 } 305 306 JSONParser::StringBuilder::~StringBuilder() { 307 delete string_; 308 } 309 310 void JSONParser::StringBuilder::Append(const char& c) { 311 DCHECK_GE(c, 0); 312 DCHECK_LT(c, 128); 313 314 if (string_) 315 string_->push_back(c); 316 else 317 ++length_; 318 } 319 320 void JSONParser::StringBuilder::AppendString(const std::string& str) { 321 DCHECK(string_); 322 string_->append(str); 323 } 324 325 void JSONParser::StringBuilder::Convert() { 326 if (string_) 327 return; 328 string_ = new std::string(pos_, length_); 329 } 330 331 bool JSONParser::StringBuilder::CanBeStringPiece() const { 332 return !string_; 333 } 334 335 StringPiece JSONParser::StringBuilder::AsStringPiece() { 336 if (string_) 337 return StringPiece(); 338 return StringPiece(pos_, length_); 339 } 340 341 const std::string& JSONParser::StringBuilder::AsString() { 342 if (!string_) 343 Convert(); 344 return *string_; 345 } 346 347 // JSONParser private ////////////////////////////////////////////////////////// 348 349 inline bool JSONParser::CanConsume(int length) { 350 return pos_ + length <= end_pos_; 351 } 352 353 const char* JSONParser::NextChar() { 354 DCHECK(CanConsume(1)); 355 ++index_; 356 ++pos_; 357 return pos_; 358 } 359 360 void JSONParser::NextNChars(int n) { 361 DCHECK(CanConsume(n)); 362 index_ += n; 363 pos_ += n; 364 } 365 366 JSONParser::Token JSONParser::GetNextToken() { 367 EatWhitespaceAndComments(); 368 if (!CanConsume(1)) 369 return T_END_OF_INPUT; 370 371 switch (*pos_) { 372 case '{': 373 return T_OBJECT_BEGIN; 374 case '}': 375 return T_OBJECT_END; 376 case '[': 377 return T_ARRAY_BEGIN; 378 case ']': 379 return T_ARRAY_END; 380 case '"': 381 return T_STRING; 382 case '0': 383 case '1': 384 case '2': 385 case '3': 386 case '4': 387 case '5': 388 case '6': 389 case '7': 390 case '8': 391 case '9': 392 case '-': 393 return T_NUMBER; 394 case 't': 395 return T_BOOL_TRUE; 396 case 'f': 397 return T_BOOL_FALSE; 398 case 'n': 399 return T_NULL; 400 case ',': 401 return T_LIST_SEPARATOR; 402 case ':': 403 return T_OBJECT_PAIR_SEPARATOR; 404 default: 405 return T_INVALID_TOKEN; 406 } 407 } 408 409 void JSONParser::EatWhitespaceAndComments() { 410 while (pos_ < end_pos_) { 411 switch (*pos_) { 412 case '\r': 413 case '\n': 414 index_last_line_ = index_; 415 // Don't increment line_number_ twice for "\r\n". 416 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r')) 417 ++line_number_; 418 // Fall through. 419 case ' ': 420 case '\t': 421 NextChar(); 422 break; 423 case '/': 424 if (!EatComment()) 425 return; 426 break; 427 default: 428 return; 429 } 430 } 431 } 432 433 bool JSONParser::EatComment() { 434 if (*pos_ != '/' || !CanConsume(1)) 435 return false; 436 437 char next_char = *NextChar(); 438 if (next_char == '/') { 439 // Single line comment, read to newline. 440 while (CanConsume(1)) { 441 next_char = *NextChar(); 442 if (next_char == '\n' || next_char == '\r') 443 return true; 444 } 445 } else if (next_char == '*') { 446 char previous_char = '\0'; 447 // Block comment, read until end marker. 448 while (CanConsume(1)) { 449 next_char = *NextChar(); 450 if (previous_char == '*' && next_char == '/') { 451 // EatWhitespaceAndComments will inspect pos_, which will still be on 452 // the last / of the comment, so advance once more (which may also be 453 // end of input). 454 NextChar(); 455 return true; 456 } 457 previous_char = next_char; 458 } 459 460 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. 461 } 462 463 return false; 464 } 465 466 Value* JSONParser::ParseNextToken() { 467 return ParseToken(GetNextToken()); 468 } 469 470 Value* JSONParser::ParseToken(Token token) { 471 switch (token) { 472 case T_OBJECT_BEGIN: 473 return ConsumeDictionary(); 474 case T_ARRAY_BEGIN: 475 return ConsumeList(); 476 case T_STRING: 477 return ConsumeString(); 478 case T_NUMBER: 479 return ConsumeNumber(); 480 case T_BOOL_TRUE: 481 case T_BOOL_FALSE: 482 case T_NULL: 483 return ConsumeLiteral(); 484 default: 485 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 486 return NULL; 487 } 488 } 489 490 Value* JSONParser::ConsumeDictionary() { 491 if (*pos_ != '{') { 492 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 493 return NULL; 494 } 495 496 StackMarker depth_check(&stack_depth_); 497 if (depth_check.IsTooDeep()) { 498 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 499 return NULL; 500 } 501 502 scoped_ptr<DictionaryValue> dict(new DictionaryValue); 503 504 NextChar(); 505 Token token = GetNextToken(); 506 while (token != T_OBJECT_END) { 507 if (token != T_STRING) { 508 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); 509 return NULL; 510 } 511 512 // First consume the key. 513 StringBuilder key; 514 if (!ConsumeStringRaw(&key)) { 515 return NULL; 516 } 517 518 // Read the separator. 519 NextChar(); 520 token = GetNextToken(); 521 if (token != T_OBJECT_PAIR_SEPARATOR) { 522 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 523 return NULL; 524 } 525 526 // The next token is the value. Ownership transfers to |dict|. 527 NextChar(); 528 Value* value = ParseNextToken(); 529 if (!value) { 530 // ReportError from deeper level. 531 return NULL; 532 } 533 534 dict->SetWithoutPathExpansion(key.AsString(), value); 535 536 NextChar(); 537 token = GetNextToken(); 538 if (token == T_LIST_SEPARATOR) { 539 NextChar(); 540 token = GetNextToken(); 541 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 542 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 543 return NULL; 544 } 545 } else if (token != T_OBJECT_END) { 546 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 547 return NULL; 548 } 549 } 550 551 return dict.release(); 552 } 553 554 Value* JSONParser::ConsumeList() { 555 if (*pos_ != '[') { 556 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 557 return NULL; 558 } 559 560 StackMarker depth_check(&stack_depth_); 561 if (depth_check.IsTooDeep()) { 562 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 563 return NULL; 564 } 565 566 scoped_ptr<ListValue> list(new ListValue); 567 568 NextChar(); 569 Token token = GetNextToken(); 570 while (token != T_ARRAY_END) { 571 Value* item = ParseToken(token); 572 if (!item) { 573 // ReportError from deeper level. 574 return NULL; 575 } 576 577 list->Append(item); 578 579 NextChar(); 580 token = GetNextToken(); 581 if (token == T_LIST_SEPARATOR) { 582 NextChar(); 583 token = GetNextToken(); 584 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 585 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 586 return NULL; 587 } 588 } else if (token != T_ARRAY_END) { 589 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 590 return NULL; 591 } 592 } 593 594 return list.release(); 595 } 596 597 Value* JSONParser::ConsumeString() { 598 StringBuilder string; 599 if (!ConsumeStringRaw(&string)) 600 return NULL; 601 602 // Create the Value representation, using a hidden root, if configured 603 // to do so, and if the string can be represented by StringPiece. 604 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { 605 return new JSONStringValue(string.AsStringPiece()); 606 } else { 607 if (string.CanBeStringPiece()) 608 string.Convert(); 609 return new StringValue(string.AsString()); 610 } 611 } 612 613 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { 614 if (*pos_ != '"') { 615 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 616 return false; 617 } 618 619 // StringBuilder will internally build a StringPiece unless a UTF-16 620 // conversion occurs, at which point it will perform a copy into a 621 // std::string. 622 StringBuilder string(NextChar()); 623 624 int length = end_pos_ - start_pos_; 625 int32_t next_char = 0; 626 627 while (CanConsume(1)) { 628 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. 629 CBU8_NEXT(start_pos_, index_, length, next_char); 630 if (next_char < 0 || !IsValidCharacter(next_char)) { 631 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); 632 return false; 633 } 634 635 // If this character is an escape sequence... 636 if (next_char == '\\') { 637 // The input string will be adjusted (either by combining the two 638 // characters of an encoded escape sequence, or with a UTF conversion), 639 // so using StringPiece isn't possible -- force a conversion. 640 string.Convert(); 641 642 if (!CanConsume(1)) { 643 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 644 return false; 645 } 646 647 switch (*NextChar()) { 648 // Allowed esape sequences: 649 case 'x': { // UTF-8 sequence. 650 // UTF-8 \x escape sequences are not allowed in the spec, but they 651 // are supported here for backwards-compatiblity with the old parser. 652 if (!CanConsume(2)) { 653 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); 654 return false; 655 } 656 657 int hex_digit = 0; 658 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { 659 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 660 return false; 661 } 662 NextChar(); 663 664 if (hex_digit < kExtendedASCIIStart) 665 string.Append(static_cast<char>(hex_digit)); 666 else 667 DecodeUTF8(hex_digit, &string); 668 break; 669 } 670 case 'u': { // UTF-16 sequence. 671 // UTF units are of the form \uXXXX. 672 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. 673 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 674 return false; 675 } 676 677 // Skip the 'u'. 678 NextChar(); 679 680 std::string utf8_units; 681 if (!DecodeUTF16(&utf8_units)) { 682 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 683 return false; 684 } 685 686 string.AppendString(utf8_units); 687 break; 688 } 689 case '"': 690 string.Append('"'); 691 break; 692 case '\\': 693 string.Append('\\'); 694 break; 695 case '/': 696 string.Append('/'); 697 break; 698 case 'b': 699 string.Append('\b'); 700 break; 701 case 'f': 702 string.Append('\f'); 703 break; 704 case 'n': 705 string.Append('\n'); 706 break; 707 case 'r': 708 string.Append('\r'); 709 break; 710 case 't': 711 string.Append('\t'); 712 break; 713 case 'v': // Not listed as valid escape sequence in the RFC. 714 string.Append('\v'); 715 break; 716 // All other escape squences are illegal. 717 default: 718 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 719 return false; 720 } 721 } else if (next_char == '"') { 722 --index_; // Rewind by one because of CBU8_NEXT. 723 out->Swap(&string); 724 return true; 725 } else { 726 if (next_char < kExtendedASCIIStart) 727 string.Append(static_cast<char>(next_char)); 728 else 729 DecodeUTF8(next_char, &string); 730 } 731 } 732 733 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 734 return false; 735 } 736 737 // Entry is at the first X in \uXXXX. 738 bool JSONParser::DecodeUTF16(std::string* dest_string) { 739 if (!CanConsume(4)) 740 return false; 741 742 // This is a 32-bit field because the shift operations in the 743 // conversion process below cause MSVC to error about "data loss." 744 // This only stores UTF-16 code units, though. 745 // Consume the UTF-16 code unit, which may be a high surrogate. 746 int code_unit16_high = 0; 747 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) 748 return false; 749 750 // Only add 3, not 4, because at the end of this iteration, the parser has 751 // finished working with the last digit of the UTF sequence, meaning that 752 // the next iteration will advance to the next byte. 753 NextNChars(3); 754 755 // Used to convert the UTF-16 code units to a code point and then to a UTF-8 756 // code unit sequence. 757 char code_unit8[8] = { 0 }; 758 size_t offset = 0; 759 760 // If this is a high surrogate, consume the next code unit to get the 761 // low surrogate. 762 if (CBU16_IS_SURROGATE(code_unit16_high)) { 763 // Make sure this is the high surrogate. If not, it's an encoding 764 // error. 765 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) 766 return false; 767 768 // Make sure that the token has more characters to consume the 769 // lower surrogate. 770 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. 771 return false; 772 if (*NextChar() != '\\' || *NextChar() != 'u') 773 return false; 774 775 NextChar(); // Read past 'u'. 776 int code_unit16_low = 0; 777 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) 778 return false; 779 780 NextNChars(3); 781 782 if (!CBU16_IS_TRAIL(code_unit16_low)) { 783 return false; 784 } 785 786 uint32_t code_point = 787 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low); 788 if (!IsValidCharacter(code_point)) 789 return false; 790 791 offset = 0; 792 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point); 793 } else { 794 // Not a surrogate. 795 DCHECK(CBU16_IS_SINGLE(code_unit16_high)); 796 if (!IsValidCharacter(code_unit16_high)) 797 return false; 798 799 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high); 800 } 801 802 dest_string->append(code_unit8); 803 return true; 804 } 805 806 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) { 807 DCHECK(IsValidCharacter(point)); 808 809 // Anything outside of the basic ASCII plane will need to be decoded from 810 // int32_t to a multi-byte sequence. 811 if (point < kExtendedASCIIStart) { 812 dest->Append(static_cast<char>(point)); 813 } else { 814 char utf8_units[4] = { 0 }; 815 int offset = 0; 816 CBU8_APPEND_UNSAFE(utf8_units, offset, point); 817 dest->Convert(); 818 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be 819 // zero terminated at this point. |offset| contains the correct length. 820 dest->AppendString(std::string(utf8_units, offset)); 821 } 822 } 823 824 Value* JSONParser::ConsumeNumber() { 825 const char* num_start = pos_; 826 const int start_index = index_; 827 int end_index = start_index; 828 829 if (*pos_ == '-') 830 NextChar(); 831 832 if (!ReadInt(false)) { 833 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 834 return NULL; 835 } 836 end_index = index_; 837 838 // The optional fraction part. 839 if (*pos_ == '.') { 840 if (!CanConsume(1)) { 841 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 842 return NULL; 843 } 844 NextChar(); 845 if (!ReadInt(true)) { 846 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 847 return NULL; 848 } 849 end_index = index_; 850 } 851 852 // Optional exponent part. 853 if (*pos_ == 'e' || *pos_ == 'E') { 854 NextChar(); 855 if (*pos_ == '-' || *pos_ == '+') 856 NextChar(); 857 if (!ReadInt(true)) { 858 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 859 return NULL; 860 } 861 end_index = index_; 862 } 863 864 // ReadInt is greedy because numbers have no easily detectable sentinel, 865 // so save off where the parser should be on exit (see Consume invariant at 866 // the top of the header), then make sure the next token is one which is 867 // valid. 868 const char* exit_pos = pos_ - 1; 869 int exit_index = index_ - 1; 870 871 switch (GetNextToken()) { 872 case T_OBJECT_END: 873 case T_ARRAY_END: 874 case T_LIST_SEPARATOR: 875 case T_END_OF_INPUT: 876 break; 877 default: 878 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 879 return NULL; 880 } 881 882 pos_ = exit_pos; 883 index_ = exit_index; 884 885 StringPiece num_string(num_start, end_index - start_index); 886 887 int num_int; 888 if (StringToInt(num_string, &num_int)) 889 return new FundamentalValue(num_int); 890 891 double num_double; 892 if (StringToDouble(num_string.as_string(), &num_double) && 893 std::isfinite(num_double)) { 894 return new FundamentalValue(num_double); 895 } 896 897 return NULL; 898 } 899 900 bool JSONParser::ReadInt(bool allow_leading_zeros) { 901 char first = *pos_; 902 int len = 0; 903 904 char c = first; 905 while (CanConsume(1) && IsAsciiDigit(c)) { 906 c = *NextChar(); 907 ++len; 908 } 909 910 if (len == 0) 911 return false; 912 913 if (!allow_leading_zeros && len > 1 && first == '0') 914 return false; 915 916 return true; 917 } 918 919 Value* JSONParser::ConsumeLiteral() { 920 switch (*pos_) { 921 case 't': { 922 const char kTrueLiteral[] = "true"; 923 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); 924 if (!CanConsume(kTrueLen - 1) || 925 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { 926 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 927 return NULL; 928 } 929 NextNChars(kTrueLen - 1); 930 return new FundamentalValue(true); 931 } 932 case 'f': { 933 const char kFalseLiteral[] = "false"; 934 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); 935 if (!CanConsume(kFalseLen - 1) || 936 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { 937 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 938 return NULL; 939 } 940 NextNChars(kFalseLen - 1); 941 return new FundamentalValue(false); 942 } 943 case 'n': { 944 const char kNullLiteral[] = "null"; 945 const int kNullLen = static_cast<int>(strlen(kNullLiteral)); 946 if (!CanConsume(kNullLen - 1) || 947 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { 948 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 949 return NULL; 950 } 951 NextNChars(kNullLen - 1); 952 return Value::CreateNullValue().release(); 953 } 954 default: 955 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 956 return NULL; 957 } 958 } 959 960 // static 961 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { 962 return strncmp(one, two, len) == 0; 963 } 964 965 void JSONParser::ReportError(JSONReader::JsonParseError code, 966 int column_adjust) { 967 error_code_ = code; 968 error_line_ = line_number_; 969 error_column_ = index_ - index_last_line_ + column_adjust; 970 } 971 972 // static 973 std::string JSONParser::FormatErrorMessage(int line, int column, 974 const std::string& description) { 975 if (line || column) { 976 return StringPrintf("Line: %i, column: %i, %s", 977 line, column, description.c_str()); 978 } 979 return description; 980 } 981 982 } // namespace internal 983 } // namespace base 984