1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/json/json_parser.h" 6 7 #include <cmath> 8 #include <utility> 9 10 #include "base/logging.h" 11 #include "base/macros.h" 12 #include "base/memory/ptr_util.h" 13 #include "base/strings/string_number_conversions.h" 14 #include "base/strings/string_piece.h" 15 #include "base/strings/string_util.h" 16 #include "base/strings/stringprintf.h" 17 #include "base/strings/utf_string_conversion_utils.h" 18 #include "base/strings/utf_string_conversions.h" 19 #include "base/third_party/icu/icu_utf.h" 20 #include "base/values.h" 21 22 namespace base { 23 namespace internal { 24 25 namespace { 26 27 const int kStackMaxDepth = 100; 28 29 const int32_t kExtendedASCIIStart = 0x80; 30 31 // DictionaryHiddenRootValue and ListHiddenRootValue are used in conjunction 32 // with JSONStringValue as an optimization for reducing the number of string 33 // copies. When this optimization is active, the parser uses a hidden root to 34 // keep the original JSON input string live and creates JSONStringValue children 35 // holding StringPiece references to the input string, avoiding about 2/3rds of 36 // string memory copies. The real root value is Swap()ed into the new instance. 37 class DictionaryHiddenRootValue : public DictionaryValue { 38 public: 39 DictionaryHiddenRootValue(std::unique_ptr<std::string> json, 40 std::unique_ptr<Value> root) 41 : json_(std::move(json)) { 42 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); 43 DictionaryValue::Swap(static_cast<DictionaryValue*>(root.get())); 44 } 45 46 void Swap(DictionaryValue* other) override { 47 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; 48 49 // First deep copy to convert JSONStringValue to std::string and swap that 50 // copy with |other|, which contains the new contents of |this|. 51 std::unique_ptr<DictionaryValue> copy(CreateDeepCopy()); 52 copy->Swap(other); 53 54 // Then erase the contents of the current dictionary and swap in the 55 // new contents, originally from |other|. 56 Clear(); 57 json_.reset(); 58 DictionaryValue::Swap(copy.get()); 59 } 60 61 // Not overriding DictionaryValue::Remove because it just calls through to 62 // the method below. 63 64 bool RemoveWithoutPathExpansion(const std::string& key, 65 std::unique_ptr<Value>* out) override { 66 // If the caller won't take ownership of the removed value, just call up. 67 if (!out) 68 return DictionaryValue::RemoveWithoutPathExpansion(key, out); 69 70 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; 71 72 // Otherwise, remove the value while its still "owned" by this and copy it 73 // to convert any JSONStringValues to std::string. 74 std::unique_ptr<Value> out_owned; 75 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) 76 return false; 77 78 *out = out_owned->CreateDeepCopy(); 79 80 return true; 81 } 82 83 private: 84 std::unique_ptr<std::string> json_; 85 86 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); 87 }; 88 89 class ListHiddenRootValue : public ListValue { 90 public: 91 ListHiddenRootValue(std::unique_ptr<std::string> json, 92 std::unique_ptr<Value> root) 93 : json_(std::move(json)) { 94 DCHECK(root->IsType(Value::TYPE_LIST)); 95 ListValue::Swap(static_cast<ListValue*>(root.get())); 96 } 97 98 void Swap(ListValue* other) override { 99 DVLOG(1) << "Swap()ing a ListValue inefficiently."; 100 101 // First deep copy to convert JSONStringValue to std::string and swap that 102 // copy with |other|, which contains the new contents of |this|. 103 std::unique_ptr<ListValue> copy(CreateDeepCopy()); 104 copy->Swap(other); 105 106 // Then erase the contents of the current list and swap in the new contents, 107 // originally from |other|. 108 Clear(); 109 json_.reset(); 110 ListValue::Swap(copy.get()); 111 } 112 113 bool Remove(size_t index, std::unique_ptr<Value>* out) override { 114 // If the caller won't take ownership of the removed value, just call up. 115 if (!out) 116 return ListValue::Remove(index, out); 117 118 DVLOG(1) << "Remove()ing from a ListValue inefficiently."; 119 120 // Otherwise, remove the value while its still "owned" by this and copy it 121 // to convert any JSONStringValues to std::string. 122 std::unique_ptr<Value> out_owned; 123 if (!ListValue::Remove(index, &out_owned)) 124 return false; 125 126 *out = out_owned->CreateDeepCopy(); 127 128 return true; 129 } 130 131 private: 132 std::unique_ptr<std::string> json_; 133 134 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); 135 }; 136 137 // A variant on StringValue that uses StringPiece instead of copying the string 138 // into the Value. This can only be stored in a child of hidden root (above), 139 // otherwise the referenced string will not be guaranteed to outlive it. 140 class JSONStringValue : public Value { 141 public: 142 explicit JSONStringValue(StringPiece piece) 143 : Value(TYPE_STRING), string_piece_(piece) {} 144 145 // Overridden from Value: 146 bool GetAsString(std::string* out_value) const override { 147 string_piece_.CopyToString(out_value); 148 return true; 149 } 150 bool GetAsString(string16* out_value) const override { 151 *out_value = UTF8ToUTF16(string_piece_); 152 return true; 153 } 154 Value* DeepCopy() const override { 155 return new StringValue(string_piece_.as_string()); 156 } 157 bool Equals(const Value* other) const override { 158 std::string other_string; 159 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && 160 StringPiece(other_string) == string_piece_; 161 } 162 163 private: 164 // The location in the original input stream. 165 StringPiece string_piece_; 166 167 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); 168 }; 169 170 // Simple class that checks for maximum recursion/"stack overflow." 171 class StackMarker { 172 public: 173 explicit StackMarker(int* depth) : depth_(depth) { 174 ++(*depth_); 175 DCHECK_LE(*depth_, kStackMaxDepth); 176 } 177 ~StackMarker() { 178 --(*depth_); 179 } 180 181 bool IsTooDeep() const { 182 return *depth_ >= kStackMaxDepth; 183 } 184 185 private: 186 int* const depth_; 187 188 DISALLOW_COPY_AND_ASSIGN(StackMarker); 189 }; 190 191 } // namespace 192 193 JSONParser::JSONParser(int options) 194 : options_(options), 195 start_pos_(nullptr), 196 pos_(nullptr), 197 end_pos_(nullptr), 198 index_(0), 199 stack_depth_(0), 200 line_number_(0), 201 index_last_line_(0), 202 error_code_(JSONReader::JSON_NO_ERROR), 203 error_line_(0), 204 error_column_(0) { 205 } 206 207 JSONParser::~JSONParser() { 208 } 209 210 std::unique_ptr<Value> JSONParser::Parse(StringPiece input) { 211 std::unique_ptr<std::string> input_copy; 212 // If the children of a JSON root can be detached, then hidden roots cannot 213 // be used, so do not bother copying the input because StringPiece will not 214 // be used anywhere. 215 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 216 input_copy = MakeUnique<std::string>(input.as_string()); 217 start_pos_ = input_copy->data(); 218 } else { 219 start_pos_ = input.data(); 220 } 221 pos_ = start_pos_; 222 end_pos_ = start_pos_ + input.length(); 223 index_ = 0; 224 line_number_ = 1; 225 index_last_line_ = 0; 226 227 error_code_ = JSONReader::JSON_NO_ERROR; 228 error_line_ = 0; 229 error_column_ = 0; 230 231 // When the input JSON string starts with a UTF-8 Byte-Order-Mark 232 // <0xEF 0xBB 0xBF>, advance the start position to avoid the 233 // ParseNextToken function mis-treating a Unicode BOM as an invalid 234 // character and returning NULL. 235 if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF && 236 static_cast<uint8_t>(*(pos_ + 1)) == 0xBB && 237 static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) { 238 NextNChars(3); 239 } 240 241 // Parse the first and any nested tokens. 242 std::unique_ptr<Value> root(ParseNextToken()); 243 if (!root) 244 return nullptr; 245 246 // Make sure the input stream is at an end. 247 if (GetNextToken() != T_END_OF_INPUT) { 248 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { 249 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); 250 return nullptr; 251 } 252 } 253 254 // Dictionaries and lists can contain JSONStringValues, so wrap them in a 255 // hidden root. 256 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 257 if (root->IsType(Value::TYPE_DICTIONARY)) { 258 return MakeUnique<DictionaryHiddenRootValue>(std::move(input_copy), 259 std::move(root)); 260 } 261 if (root->IsType(Value::TYPE_LIST)) { 262 return MakeUnique<ListHiddenRootValue>(std::move(input_copy), 263 std::move(root)); 264 } 265 if (root->IsType(Value::TYPE_STRING)) { 266 // A string type could be a JSONStringValue, but because there's no 267 // corresponding HiddenRootValue, the memory will be lost. Deep copy to 268 // preserve it. 269 return root->CreateDeepCopy(); 270 } 271 } 272 273 // All other values can be returned directly. 274 return root; 275 } 276 277 JSONReader::JsonParseError JSONParser::error_code() const { 278 return error_code_; 279 } 280 281 std::string JSONParser::GetErrorMessage() const { 282 return FormatErrorMessage(error_line_, error_column_, 283 JSONReader::ErrorCodeToString(error_code_)); 284 } 285 286 int JSONParser::error_line() const { 287 return error_line_; 288 } 289 290 int JSONParser::error_column() const { 291 return error_column_; 292 } 293 294 // StringBuilder /////////////////////////////////////////////////////////////// 295 296 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {} 297 298 JSONParser::StringBuilder::StringBuilder(const char* pos) 299 : pos_(pos), 300 length_(0), 301 string_(nullptr) { 302 } 303 304 void JSONParser::StringBuilder::Swap(StringBuilder* other) { 305 std::swap(other->string_, string_); 306 std::swap(other->pos_, pos_); 307 std::swap(other->length_, length_); 308 } 309 310 JSONParser::StringBuilder::~StringBuilder() { 311 delete string_; 312 } 313 314 void JSONParser::StringBuilder::Append(const char& c) { 315 DCHECK_GE(c, 0); 316 DCHECK_LT(static_cast<unsigned char>(c), 128); 317 318 if (string_) 319 string_->push_back(c); 320 else 321 ++length_; 322 } 323 324 void JSONParser::StringBuilder::AppendString(const std::string& str) { 325 DCHECK(string_); 326 string_->append(str); 327 } 328 329 void JSONParser::StringBuilder::Convert() { 330 if (string_) 331 return; 332 string_ = new std::string(pos_, length_); 333 } 334 335 bool JSONParser::StringBuilder::CanBeStringPiece() const { 336 return !string_; 337 } 338 339 StringPiece JSONParser::StringBuilder::AsStringPiece() { 340 if (string_) 341 return StringPiece(); 342 return StringPiece(pos_, length_); 343 } 344 345 const std::string& JSONParser::StringBuilder::AsString() { 346 if (!string_) 347 Convert(); 348 return *string_; 349 } 350 351 // JSONParser private ////////////////////////////////////////////////////////// 352 353 inline bool JSONParser::CanConsume(int length) { 354 return pos_ + length <= end_pos_; 355 } 356 357 const char* JSONParser::NextChar() { 358 DCHECK(CanConsume(1)); 359 ++index_; 360 ++pos_; 361 return pos_; 362 } 363 364 void JSONParser::NextNChars(int n) { 365 DCHECK(CanConsume(n)); 366 index_ += n; 367 pos_ += n; 368 } 369 370 JSONParser::Token JSONParser::GetNextToken() { 371 EatWhitespaceAndComments(); 372 if (!CanConsume(1)) 373 return T_END_OF_INPUT; 374 375 switch (*pos_) { 376 case '{': 377 return T_OBJECT_BEGIN; 378 case '}': 379 return T_OBJECT_END; 380 case '[': 381 return T_ARRAY_BEGIN; 382 case ']': 383 return T_ARRAY_END; 384 case '"': 385 return T_STRING; 386 case '0': 387 case '1': 388 case '2': 389 case '3': 390 case '4': 391 case '5': 392 case '6': 393 case '7': 394 case '8': 395 case '9': 396 case '-': 397 return T_NUMBER; 398 case 't': 399 return T_BOOL_TRUE; 400 case 'f': 401 return T_BOOL_FALSE; 402 case 'n': 403 return T_NULL; 404 case ',': 405 return T_LIST_SEPARATOR; 406 case ':': 407 return T_OBJECT_PAIR_SEPARATOR; 408 default: 409 return T_INVALID_TOKEN; 410 } 411 } 412 413 void JSONParser::EatWhitespaceAndComments() { 414 while (pos_ < end_pos_) { 415 switch (*pos_) { 416 case '\r': 417 case '\n': 418 index_last_line_ = index_; 419 // Don't increment line_number_ twice for "\r\n". 420 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r')) 421 ++line_number_; 422 // Fall through. 423 case ' ': 424 case '\t': 425 NextChar(); 426 break; 427 case '/': 428 if (!EatComment()) 429 return; 430 break; 431 default: 432 return; 433 } 434 } 435 } 436 437 bool JSONParser::EatComment() { 438 if (*pos_ != '/' || !CanConsume(1)) 439 return false; 440 441 char next_char = *NextChar(); 442 if (next_char == '/') { 443 // Single line comment, read to newline. 444 while (CanConsume(1)) { 445 next_char = *NextChar(); 446 if (next_char == '\n' || next_char == '\r') 447 return true; 448 } 449 } else if (next_char == '*') { 450 char previous_char = '\0'; 451 // Block comment, read until end marker. 452 while (CanConsume(1)) { 453 next_char = *NextChar(); 454 if (previous_char == '*' && next_char == '/') { 455 // EatWhitespaceAndComments will inspect pos_, which will still be on 456 // the last / of the comment, so advance once more (which may also be 457 // end of input). 458 NextChar(); 459 return true; 460 } 461 previous_char = next_char; 462 } 463 464 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. 465 } 466 467 return false; 468 } 469 470 Value* JSONParser::ParseNextToken() { 471 return ParseToken(GetNextToken()); 472 } 473 474 Value* JSONParser::ParseToken(Token token) { 475 switch (token) { 476 case T_OBJECT_BEGIN: 477 return ConsumeDictionary(); 478 case T_ARRAY_BEGIN: 479 return ConsumeList(); 480 case T_STRING: 481 return ConsumeString(); 482 case T_NUMBER: 483 return ConsumeNumber(); 484 case T_BOOL_TRUE: 485 case T_BOOL_FALSE: 486 case T_NULL: 487 return ConsumeLiteral(); 488 default: 489 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 490 return nullptr; 491 } 492 } 493 494 Value* JSONParser::ConsumeDictionary() { 495 if (*pos_ != '{') { 496 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 497 return nullptr; 498 } 499 500 StackMarker depth_check(&stack_depth_); 501 if (depth_check.IsTooDeep()) { 502 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 503 return nullptr; 504 } 505 506 std::unique_ptr<DictionaryValue> dict(new DictionaryValue); 507 508 NextChar(); 509 Token token = GetNextToken(); 510 while (token != T_OBJECT_END) { 511 if (token != T_STRING) { 512 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); 513 return nullptr; 514 } 515 516 // First consume the key. 517 StringBuilder key; 518 if (!ConsumeStringRaw(&key)) { 519 return nullptr; 520 } 521 522 // Read the separator. 523 NextChar(); 524 token = GetNextToken(); 525 if (token != T_OBJECT_PAIR_SEPARATOR) { 526 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 527 return nullptr; 528 } 529 530 // The next token is the value. Ownership transfers to |dict|. 531 NextChar(); 532 Value* value = ParseNextToken(); 533 if (!value) { 534 // ReportError from deeper level. 535 return nullptr; 536 } 537 538 dict->SetWithoutPathExpansion(key.AsString(), value); 539 540 NextChar(); 541 token = GetNextToken(); 542 if (token == T_LIST_SEPARATOR) { 543 NextChar(); 544 token = GetNextToken(); 545 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 546 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 547 return nullptr; 548 } 549 } else if (token != T_OBJECT_END) { 550 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 551 return nullptr; 552 } 553 } 554 555 return dict.release(); 556 } 557 558 Value* JSONParser::ConsumeList() { 559 if (*pos_ != '[') { 560 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 561 return nullptr; 562 } 563 564 StackMarker depth_check(&stack_depth_); 565 if (depth_check.IsTooDeep()) { 566 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 567 return nullptr; 568 } 569 570 std::unique_ptr<ListValue> list(new ListValue); 571 572 NextChar(); 573 Token token = GetNextToken(); 574 while (token != T_ARRAY_END) { 575 Value* item = ParseToken(token); 576 if (!item) { 577 // ReportError from deeper level. 578 return nullptr; 579 } 580 581 list->Append(item); 582 583 NextChar(); 584 token = GetNextToken(); 585 if (token == T_LIST_SEPARATOR) { 586 NextChar(); 587 token = GetNextToken(); 588 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 589 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 590 return nullptr; 591 } 592 } else if (token != T_ARRAY_END) { 593 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 594 return nullptr; 595 } 596 } 597 598 return list.release(); 599 } 600 601 Value* JSONParser::ConsumeString() { 602 StringBuilder string; 603 if (!ConsumeStringRaw(&string)) 604 return nullptr; 605 606 // Create the Value representation, using a hidden root, if configured 607 // to do so, and if the string can be represented by StringPiece. 608 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) 609 return new JSONStringValue(string.AsStringPiece()); 610 611 if (string.CanBeStringPiece()) 612 string.Convert(); 613 return new StringValue(string.AsString()); 614 } 615 616 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { 617 if (*pos_ != '"') { 618 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 619 return false; 620 } 621 622 // StringBuilder will internally build a StringPiece unless a UTF-16 623 // conversion occurs, at which point it will perform a copy into a 624 // std::string. 625 StringBuilder string(NextChar()); 626 627 int length = end_pos_ - start_pos_; 628 int32_t next_char = 0; 629 630 while (CanConsume(1)) { 631 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. 632 CBU8_NEXT(start_pos_, index_, length, next_char); 633 if (next_char < 0 || !IsValidCharacter(next_char)) { 634 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); 635 return false; 636 } 637 638 if (next_char == '"') { 639 --index_; // Rewind by one because of CBU8_NEXT. 640 out->Swap(&string); 641 return true; 642 } 643 644 // If this character is not an escape sequence... 645 if (next_char != '\\') { 646 if (next_char < kExtendedASCIIStart) 647 string.Append(static_cast<char>(next_char)); 648 else 649 DecodeUTF8(next_char, &string); 650 } else { 651 // And if it is an escape sequence, the input string will be adjusted 652 // (either by combining the two characters of an encoded escape sequence, 653 // or with a UTF conversion), so using StringPiece isn't possible -- force 654 // a conversion. 655 string.Convert(); 656 657 if (!CanConsume(1)) { 658 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 659 return false; 660 } 661 662 switch (*NextChar()) { 663 // Allowed esape sequences: 664 case 'x': { // UTF-8 sequence. 665 // UTF-8 \x escape sequences are not allowed in the spec, but they 666 // are supported here for backwards-compatiblity with the old parser. 667 if (!CanConsume(2)) { 668 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); 669 return false; 670 } 671 672 int hex_digit = 0; 673 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { 674 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 675 return false; 676 } 677 NextChar(); 678 679 if (hex_digit < kExtendedASCIIStart) 680 string.Append(static_cast<char>(hex_digit)); 681 else 682 DecodeUTF8(hex_digit, &string); 683 break; 684 } 685 case 'u': { // UTF-16 sequence. 686 // UTF units are of the form \uXXXX. 687 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. 688 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 689 return false; 690 } 691 692 // Skip the 'u'. 693 NextChar(); 694 695 std::string utf8_units; 696 if (!DecodeUTF16(&utf8_units)) { 697 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 698 return false; 699 } 700 701 string.AppendString(utf8_units); 702 break; 703 } 704 case '"': 705 string.Append('"'); 706 break; 707 case '\\': 708 string.Append('\\'); 709 break; 710 case '/': 711 string.Append('/'); 712 break; 713 case 'b': 714 string.Append('\b'); 715 break; 716 case 'f': 717 string.Append('\f'); 718 break; 719 case 'n': 720 string.Append('\n'); 721 break; 722 case 'r': 723 string.Append('\r'); 724 break; 725 case 't': 726 string.Append('\t'); 727 break; 728 case 'v': // Not listed as valid escape sequence in the RFC. 729 string.Append('\v'); 730 break; 731 // All other escape squences are illegal. 732 default: 733 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 734 return false; 735 } 736 } 737 } 738 739 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 740 return false; 741 } 742 743 // Entry is at the first X in \uXXXX. 744 bool JSONParser::DecodeUTF16(std::string* dest_string) { 745 if (!CanConsume(4)) 746 return false; 747 748 // This is a 32-bit field because the shift operations in the 749 // conversion process below cause MSVC to error about "data loss." 750 // This only stores UTF-16 code units, though. 751 // Consume the UTF-16 code unit, which may be a high surrogate. 752 int code_unit16_high = 0; 753 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) 754 return false; 755 756 // Only add 3, not 4, because at the end of this iteration, the parser has 757 // finished working with the last digit of the UTF sequence, meaning that 758 // the next iteration will advance to the next byte. 759 NextNChars(3); 760 761 // Used to convert the UTF-16 code units to a code point and then to a UTF-8 762 // code unit sequence. 763 char code_unit8[8] = { 0 }; 764 size_t offset = 0; 765 766 // If this is a high surrogate, consume the next code unit to get the 767 // low surrogate. 768 if (CBU16_IS_SURROGATE(code_unit16_high)) { 769 // Make sure this is the high surrogate. If not, it's an encoding 770 // error. 771 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) 772 return false; 773 774 // Make sure that the token has more characters to consume the 775 // lower surrogate. 776 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. 777 return false; 778 if (*NextChar() != '\\' || *NextChar() != 'u') 779 return false; 780 781 NextChar(); // Read past 'u'. 782 int code_unit16_low = 0; 783 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) 784 return false; 785 786 NextNChars(3); 787 788 if (!CBU16_IS_TRAIL(code_unit16_low)) { 789 return false; 790 } 791 792 uint32_t code_point = 793 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low); 794 if (!IsValidCharacter(code_point)) 795 return false; 796 797 offset = 0; 798 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point); 799 } else { 800 // Not a surrogate. 801 DCHECK(CBU16_IS_SINGLE(code_unit16_high)); 802 if (!IsValidCharacter(code_unit16_high)) 803 return false; 804 805 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high); 806 } 807 808 dest_string->append(code_unit8); 809 return true; 810 } 811 812 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) { 813 DCHECK(IsValidCharacter(point)); 814 815 // Anything outside of the basic ASCII plane will need to be decoded from 816 // int32_t to a multi-byte sequence. 817 if (point < kExtendedASCIIStart) { 818 dest->Append(static_cast<char>(point)); 819 } else { 820 char utf8_units[4] = { 0 }; 821 int offset = 0; 822 CBU8_APPEND_UNSAFE(utf8_units, offset, point); 823 dest->Convert(); 824 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be 825 // zero terminated at this point. |offset| contains the correct length. 826 dest->AppendString(std::string(utf8_units, offset)); 827 } 828 } 829 830 Value* JSONParser::ConsumeNumber() { 831 const char* num_start = pos_; 832 const int start_index = index_; 833 int end_index = start_index; 834 835 if (*pos_ == '-') 836 NextChar(); 837 838 if (!ReadInt(false)) { 839 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 840 return nullptr; 841 } 842 end_index = index_; 843 844 // The optional fraction part. 845 if (*pos_ == '.') { 846 if (!CanConsume(1)) { 847 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 848 return nullptr; 849 } 850 NextChar(); 851 if (!ReadInt(true)) { 852 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 853 return nullptr; 854 } 855 end_index = index_; 856 } 857 858 // Optional exponent part. 859 if (*pos_ == 'e' || *pos_ == 'E') { 860 NextChar(); 861 if (*pos_ == '-' || *pos_ == '+') 862 NextChar(); 863 if (!ReadInt(true)) { 864 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 865 return nullptr; 866 } 867 end_index = index_; 868 } 869 870 // ReadInt is greedy because numbers have no easily detectable sentinel, 871 // so save off where the parser should be on exit (see Consume invariant at 872 // the top of the header), then make sure the next token is one which is 873 // valid. 874 const char* exit_pos = pos_ - 1; 875 int exit_index = index_ - 1; 876 877 switch (GetNextToken()) { 878 case T_OBJECT_END: 879 case T_ARRAY_END: 880 case T_LIST_SEPARATOR: 881 case T_END_OF_INPUT: 882 break; 883 default: 884 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 885 return nullptr; 886 } 887 888 pos_ = exit_pos; 889 index_ = exit_index; 890 891 StringPiece num_string(num_start, end_index - start_index); 892 893 int num_int; 894 if (StringToInt(num_string, &num_int)) 895 return new FundamentalValue(num_int); 896 897 double num_double; 898 if (StringToDouble(num_string.as_string(), &num_double) && 899 std::isfinite(num_double)) { 900 return new FundamentalValue(num_double); 901 } 902 903 return nullptr; 904 } 905 906 bool JSONParser::ReadInt(bool allow_leading_zeros) { 907 char first = *pos_; 908 int len = 0; 909 910 char c = first; 911 while (CanConsume(1) && IsAsciiDigit(c)) { 912 c = *NextChar(); 913 ++len; 914 } 915 916 if (len == 0) 917 return false; 918 919 if (!allow_leading_zeros && len > 1 && first == '0') 920 return false; 921 922 return true; 923 } 924 925 Value* JSONParser::ConsumeLiteral() { 926 switch (*pos_) { 927 case 't': { 928 const char kTrueLiteral[] = "true"; 929 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); 930 if (!CanConsume(kTrueLen - 1) || 931 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { 932 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 933 return nullptr; 934 } 935 NextNChars(kTrueLen - 1); 936 return new FundamentalValue(true); 937 } 938 case 'f': { 939 const char kFalseLiteral[] = "false"; 940 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); 941 if (!CanConsume(kFalseLen - 1) || 942 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { 943 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 944 return nullptr; 945 } 946 NextNChars(kFalseLen - 1); 947 return new FundamentalValue(false); 948 } 949 case 'n': { 950 const char kNullLiteral[] = "null"; 951 const int kNullLen = static_cast<int>(strlen(kNullLiteral)); 952 if (!CanConsume(kNullLen - 1) || 953 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { 954 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 955 return nullptr; 956 } 957 NextNChars(kNullLen - 1); 958 return Value::CreateNullValue().release(); 959 } 960 default: 961 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 962 return nullptr; 963 } 964 } 965 966 // static 967 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { 968 return strncmp(one, two, len) == 0; 969 } 970 971 void JSONParser::ReportError(JSONReader::JsonParseError code, 972 int column_adjust) { 973 error_code_ = code; 974 error_line_ = line_number_; 975 error_column_ = index_ - index_last_line_ + column_adjust; 976 } 977 978 // static 979 std::string JSONParser::FormatErrorMessage(int line, int column, 980 const std::string& description) { 981 if (line || column) { 982 return StringPrintf("Line: %i, column: %i, %s", 983 line, column, description.c_str()); 984 } 985 return description; 986 } 987 988 } // namespace internal 989 } // namespace base 990