1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/json/json_parser.h" 6 7 #include <cmath> 8 9 #include "base/logging.h" 10 #include "base/macros.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_piece.h" 14 #include "base/strings/string_util.h" 15 #include "base/strings/stringprintf.h" 16 #include "base/strings/utf_string_conversion_utils.h" 17 #include "base/third_party/icu/icu_utf.h" 18 #include "base/values.h" 19 20 namespace base { 21 namespace internal { 22 23 namespace { 24 25 const int kStackMaxDepth = 100; 26 27 const int32_t kExtendedASCIIStart = 0x80; 28 29 // This and the class below are used to own the JSON input string for when 30 // string tokens are stored as StringPiece instead of std::string. This 31 // optimization avoids about 2/3rds of string memory copies. The constructor 32 // takes ownership of the input string. The real root value is Swap()ed into 33 // the new instance. 34 class DictionaryHiddenRootValue : public DictionaryValue { 35 public: 36 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) { 37 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); 38 DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); 39 } 40 41 void Swap(DictionaryValue* other) override { 42 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; 43 44 // First deep copy to convert JSONStringValue to std::string and swap that 45 // copy with |other|, which contains the new contents of |this|. 46 scoped_ptr<DictionaryValue> copy(DeepCopy()); 47 copy->Swap(other); 48 49 // Then erase the contents of the current dictionary and swap in the 50 // new contents, originally from |other|. 51 Clear(); 52 json_.reset(); 53 DictionaryValue::Swap(copy.get()); 54 } 55 56 // Not overriding DictionaryValue::Remove because it just calls through to 57 // the method below. 58 59 bool RemoveWithoutPathExpansion(const std::string& key, 60 scoped_ptr<Value>* out) override { 61 // If the caller won't take ownership of the removed value, just call up. 62 if (!out) 63 return DictionaryValue::RemoveWithoutPathExpansion(key, out); 64 65 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; 66 67 // Otherwise, remove the value while its still "owned" by this and copy it 68 // to convert any JSONStringValues to std::string. 69 scoped_ptr<Value> out_owned; 70 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) 71 return false; 72 73 out->reset(out_owned->DeepCopy()); 74 75 return true; 76 } 77 78 private: 79 scoped_ptr<std::string> json_; 80 81 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); 82 }; 83 84 class ListHiddenRootValue : public ListValue { 85 public: 86 ListHiddenRootValue(std::string* json, Value* root) : json_(json) { 87 DCHECK(root->IsType(Value::TYPE_LIST)); 88 ListValue::Swap(static_cast<ListValue*>(root)); 89 } 90 91 void Swap(ListValue* other) override { 92 DVLOG(1) << "Swap()ing a ListValue inefficiently."; 93 94 // First deep copy to convert JSONStringValue to std::string and swap that 95 // copy with |other|, which contains the new contents of |this|. 96 scoped_ptr<ListValue> copy(DeepCopy()); 97 copy->Swap(other); 98 99 // Then erase the contents of the current list and swap in the new contents, 100 // originally from |other|. 101 Clear(); 102 json_.reset(); 103 ListValue::Swap(copy.get()); 104 } 105 106 bool Remove(size_t index, scoped_ptr<Value>* out) override { 107 // If the caller won't take ownership of the removed value, just call up. 108 if (!out) 109 return ListValue::Remove(index, out); 110 111 DVLOG(1) << "Remove()ing from a ListValue inefficiently."; 112 113 // Otherwise, remove the value while its still "owned" by this and copy it 114 // to convert any JSONStringValues to std::string. 115 scoped_ptr<Value> out_owned; 116 if (!ListValue::Remove(index, &out_owned)) 117 return false; 118 119 out->reset(out_owned->DeepCopy()); 120 121 return true; 122 } 123 124 private: 125 scoped_ptr<std::string> json_; 126 127 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); 128 }; 129 130 // A variant on StringValue that uses StringPiece instead of copying the string 131 // into the Value. This can only be stored in a child of hidden root (above), 132 // otherwise the referenced string will not be guaranteed to outlive it. 133 class JSONStringValue : public Value { 134 public: 135 explicit JSONStringValue(const StringPiece& piece) 136 : Value(TYPE_STRING), 137 string_piece_(piece) { 138 } 139 140 // Overridden from Value: 141 bool GetAsString(std::string* out_value) const override { 142 string_piece_.CopyToString(out_value); 143 return true; 144 } 145 Value* DeepCopy() const override { 146 return new StringValue(string_piece_.as_string()); 147 } 148 bool Equals(const Value* other) const override { 149 std::string other_string; 150 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && 151 StringPiece(other_string) == string_piece_; 152 } 153 154 private: 155 // The location in the original input stream. 156 StringPiece string_piece_; 157 158 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); 159 }; 160 161 // Simple class that checks for maximum recursion/"stack overflow." 162 class StackMarker { 163 public: 164 explicit StackMarker(int* depth) : depth_(depth) { 165 ++(*depth_); 166 DCHECK_LE(*depth_, kStackMaxDepth); 167 } 168 ~StackMarker() { 169 --(*depth_); 170 } 171 172 bool IsTooDeep() const { 173 return *depth_ >= kStackMaxDepth; 174 } 175 176 private: 177 int* const depth_; 178 179 DISALLOW_COPY_AND_ASSIGN(StackMarker); 180 }; 181 182 } // namespace 183 184 JSONParser::JSONParser(int options) 185 : options_(options), 186 start_pos_(NULL), 187 pos_(NULL), 188 end_pos_(NULL), 189 index_(0), 190 stack_depth_(0), 191 line_number_(0), 192 index_last_line_(0), 193 error_code_(JSONReader::JSON_NO_ERROR), 194 error_line_(0), 195 error_column_(0) { 196 } 197 198 JSONParser::~JSONParser() { 199 } 200 201 Value* JSONParser::Parse(const StringPiece& input) { 202 scoped_ptr<std::string> input_copy; 203 // If the children of a JSON root can be detached, then hidden roots cannot 204 // be used, so do not bother copying the input because StringPiece will not 205 // be used anywhere. 206 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 207 input_copy.reset(new std::string(input.as_string())); 208 start_pos_ = input_copy->data(); 209 } else { 210 start_pos_ = input.data(); 211 } 212 pos_ = start_pos_; 213 end_pos_ = start_pos_ + input.length(); 214 index_ = 0; 215 line_number_ = 1; 216 index_last_line_ = 0; 217 218 error_code_ = JSONReader::JSON_NO_ERROR; 219 error_line_ = 0; 220 error_column_ = 0; 221 222 // When the input JSON string starts with a UTF-8 Byte-Order-Mark 223 // <0xEF 0xBB 0xBF>, advance the start position to avoid the 224 // ParseNextToken function mis-treating a Unicode BOM as an invalid 225 // character and returning NULL. 226 if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF && 227 static_cast<uint8_t>(*(pos_ + 1)) == 0xBB && 228 static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) { 229 NextNChars(3); 230 } 231 232 // Parse the first and any nested tokens. 233 scoped_ptr<Value> root(ParseNextToken()); 234 if (!root.get()) 235 return NULL; 236 237 // Make sure the input stream is at an end. 238 if (GetNextToken() != T_END_OF_INPUT) { 239 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { 240 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); 241 return NULL; 242 } 243 } 244 245 // Dictionaries and lists can contain JSONStringValues, so wrap them in a 246 // hidden root. 247 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 248 if (root->IsType(Value::TYPE_DICTIONARY)) { 249 return new DictionaryHiddenRootValue(input_copy.release(), root.get()); 250 } else if (root->IsType(Value::TYPE_LIST)) { 251 return new ListHiddenRootValue(input_copy.release(), root.get()); 252 } else if (root->IsType(Value::TYPE_STRING)) { 253 // A string type could be a JSONStringValue, but because there's no 254 // corresponding HiddenRootValue, the memory will be lost. Deep copy to 255 // preserve it. 256 return root->DeepCopy(); 257 } 258 } 259 260 // All other values can be returned directly. 261 return root.release(); 262 } 263 264 JSONReader::JsonParseError JSONParser::error_code() const { 265 return error_code_; 266 } 267 268 std::string JSONParser::GetErrorMessage() const { 269 return FormatErrorMessage(error_line_, error_column_, 270 JSONReader::ErrorCodeToString(error_code_)); 271 } 272 273 int JSONParser::error_line() const { 274 return error_line_; 275 } 276 277 int JSONParser::error_column() const { 278 return error_column_; 279 } 280 281 // StringBuilder /////////////////////////////////////////////////////////////// 282 283 JSONParser::StringBuilder::StringBuilder() 284 : pos_(NULL), 285 length_(0), 286 string_(NULL) { 287 } 288 289 JSONParser::StringBuilder::StringBuilder(const char* pos) 290 : pos_(pos), 291 length_(0), 292 string_(NULL) { 293 } 294 295 void JSONParser::StringBuilder::Swap(StringBuilder* other) { 296 std::swap(other->string_, string_); 297 std::swap(other->pos_, pos_); 298 std::swap(other->length_, length_); 299 } 300 301 JSONParser::StringBuilder::~StringBuilder() { 302 delete string_; 303 } 304 305 void JSONParser::StringBuilder::Append(const char& c) { 306 DCHECK_GE(c, 0); 307 DCHECK_LT(c, 128); 308 309 if (string_) 310 string_->push_back(c); 311 else 312 ++length_; 313 } 314 315 void JSONParser::StringBuilder::AppendString(const std::string& str) { 316 DCHECK(string_); 317 string_->append(str); 318 } 319 320 void JSONParser::StringBuilder::Convert() { 321 if (string_) 322 return; 323 string_ = new std::string(pos_, length_); 324 } 325 326 bool JSONParser::StringBuilder::CanBeStringPiece() const { 327 return !string_; 328 } 329 330 StringPiece JSONParser::StringBuilder::AsStringPiece() { 331 if (string_) 332 return StringPiece(); 333 return StringPiece(pos_, length_); 334 } 335 336 const std::string& JSONParser::StringBuilder::AsString() { 337 if (!string_) 338 Convert(); 339 return *string_; 340 } 341 342 // JSONParser private ////////////////////////////////////////////////////////// 343 344 inline bool JSONParser::CanConsume(int length) { 345 return pos_ + length <= end_pos_; 346 } 347 348 const char* JSONParser::NextChar() { 349 DCHECK(CanConsume(1)); 350 ++index_; 351 ++pos_; 352 return pos_; 353 } 354 355 void JSONParser::NextNChars(int n) { 356 DCHECK(CanConsume(n)); 357 index_ += n; 358 pos_ += n; 359 } 360 361 JSONParser::Token JSONParser::GetNextToken() { 362 EatWhitespaceAndComments(); 363 if (!CanConsume(1)) 364 return T_END_OF_INPUT; 365 366 switch (*pos_) { 367 case '{': 368 return T_OBJECT_BEGIN; 369 case '}': 370 return T_OBJECT_END; 371 case '[': 372 return T_ARRAY_BEGIN; 373 case ']': 374 return T_ARRAY_END; 375 case '"': 376 return T_STRING; 377 case '0': 378 case '1': 379 case '2': 380 case '3': 381 case '4': 382 case '5': 383 case '6': 384 case '7': 385 case '8': 386 case '9': 387 case '-': 388 return T_NUMBER; 389 case 't': 390 return T_BOOL_TRUE; 391 case 'f': 392 return T_BOOL_FALSE; 393 case 'n': 394 return T_NULL; 395 case ',': 396 return T_LIST_SEPARATOR; 397 case ':': 398 return T_OBJECT_PAIR_SEPARATOR; 399 default: 400 return T_INVALID_TOKEN; 401 } 402 } 403 404 void JSONParser::EatWhitespaceAndComments() { 405 while (pos_ < end_pos_) { 406 switch (*pos_) { 407 case '\r': 408 case '\n': 409 index_last_line_ = index_; 410 // Don't increment line_number_ twice for "\r\n". 411 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r')) 412 ++line_number_; 413 // Fall through. 414 case ' ': 415 case '\t': 416 NextChar(); 417 break; 418 case '/': 419 if (!EatComment()) 420 return; 421 break; 422 default: 423 return; 424 } 425 } 426 } 427 428 bool JSONParser::EatComment() { 429 if (*pos_ != '/' || !CanConsume(1)) 430 return false; 431 432 char next_char = *NextChar(); 433 if (next_char == '/') { 434 // Single line comment, read to newline. 435 while (CanConsume(1)) { 436 next_char = *NextChar(); 437 if (next_char == '\n' || next_char == '\r') 438 return true; 439 } 440 } else if (next_char == '*') { 441 char previous_char = '\0'; 442 // Block comment, read until end marker. 443 while (CanConsume(1)) { 444 next_char = *NextChar(); 445 if (previous_char == '*' && next_char == '/') { 446 // EatWhitespaceAndComments will inspect pos_, which will still be on 447 // the last / of the comment, so advance once more (which may also be 448 // end of input). 449 NextChar(); 450 return true; 451 } 452 previous_char = next_char; 453 } 454 455 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. 456 } 457 458 return false; 459 } 460 461 Value* JSONParser::ParseNextToken() { 462 return ParseToken(GetNextToken()); 463 } 464 465 Value* JSONParser::ParseToken(Token token) { 466 switch (token) { 467 case T_OBJECT_BEGIN: 468 return ConsumeDictionary(); 469 case T_ARRAY_BEGIN: 470 return ConsumeList(); 471 case T_STRING: 472 return ConsumeString(); 473 case T_NUMBER: 474 return ConsumeNumber(); 475 case T_BOOL_TRUE: 476 case T_BOOL_FALSE: 477 case T_NULL: 478 return ConsumeLiteral(); 479 default: 480 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 481 return NULL; 482 } 483 } 484 485 Value* JSONParser::ConsumeDictionary() { 486 if (*pos_ != '{') { 487 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 488 return NULL; 489 } 490 491 StackMarker depth_check(&stack_depth_); 492 if (depth_check.IsTooDeep()) { 493 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 494 return NULL; 495 } 496 497 scoped_ptr<DictionaryValue> dict(new DictionaryValue); 498 499 NextChar(); 500 Token token = GetNextToken(); 501 while (token != T_OBJECT_END) { 502 if (token != T_STRING) { 503 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); 504 return NULL; 505 } 506 507 // First consume the key. 508 StringBuilder key; 509 if (!ConsumeStringRaw(&key)) { 510 return NULL; 511 } 512 513 // Read the separator. 514 NextChar(); 515 token = GetNextToken(); 516 if (token != T_OBJECT_PAIR_SEPARATOR) { 517 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 518 return NULL; 519 } 520 521 // The next token is the value. Ownership transfers to |dict|. 522 NextChar(); 523 Value* value = ParseNextToken(); 524 if (!value) { 525 // ReportError from deeper level. 526 return NULL; 527 } 528 529 dict->SetWithoutPathExpansion(key.AsString(), value); 530 531 NextChar(); 532 token = GetNextToken(); 533 if (token == T_LIST_SEPARATOR) { 534 NextChar(); 535 token = GetNextToken(); 536 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 537 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 538 return NULL; 539 } 540 } else if (token != T_OBJECT_END) { 541 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 542 return NULL; 543 } 544 } 545 546 return dict.release(); 547 } 548 549 Value* JSONParser::ConsumeList() { 550 if (*pos_ != '[') { 551 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 552 return NULL; 553 } 554 555 StackMarker depth_check(&stack_depth_); 556 if (depth_check.IsTooDeep()) { 557 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 558 return NULL; 559 } 560 561 scoped_ptr<ListValue> list(new ListValue); 562 563 NextChar(); 564 Token token = GetNextToken(); 565 while (token != T_ARRAY_END) { 566 Value* item = ParseToken(token); 567 if (!item) { 568 // ReportError from deeper level. 569 return NULL; 570 } 571 572 list->Append(item); 573 574 NextChar(); 575 token = GetNextToken(); 576 if (token == T_LIST_SEPARATOR) { 577 NextChar(); 578 token = GetNextToken(); 579 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 580 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 581 return NULL; 582 } 583 } else if (token != T_ARRAY_END) { 584 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 585 return NULL; 586 } 587 } 588 589 return list.release(); 590 } 591 592 Value* JSONParser::ConsumeString() { 593 StringBuilder string; 594 if (!ConsumeStringRaw(&string)) 595 return NULL; 596 597 // Create the Value representation, using a hidden root, if configured 598 // to do so, and if the string can be represented by StringPiece. 599 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { 600 return new JSONStringValue(string.AsStringPiece()); 601 } else { 602 if (string.CanBeStringPiece()) 603 string.Convert(); 604 return new StringValue(string.AsString()); 605 } 606 } 607 608 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { 609 if (*pos_ != '"') { 610 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 611 return false; 612 } 613 614 // StringBuilder will internally build a StringPiece unless a UTF-16 615 // conversion occurs, at which point it will perform a copy into a 616 // std::string. 617 StringBuilder string(NextChar()); 618 619 int length = end_pos_ - start_pos_; 620 int32_t next_char = 0; 621 622 while (CanConsume(1)) { 623 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. 624 CBU8_NEXT(start_pos_, index_, length, next_char); 625 if (next_char < 0 || !IsValidCharacter(next_char)) { 626 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); 627 return false; 628 } 629 630 // If this character is an escape sequence... 631 if (next_char == '\\') { 632 // The input string will be adjusted (either by combining the two 633 // characters of an encoded escape sequence, or with a UTF conversion), 634 // so using StringPiece isn't possible -- force a conversion. 635 string.Convert(); 636 637 if (!CanConsume(1)) { 638 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 639 return false; 640 } 641 642 switch (*NextChar()) { 643 // Allowed esape sequences: 644 case 'x': { // UTF-8 sequence. 645 // UTF-8 \x escape sequences are not allowed in the spec, but they 646 // are supported here for backwards-compatiblity with the old parser. 647 if (!CanConsume(2)) { 648 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); 649 return false; 650 } 651 652 int hex_digit = 0; 653 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { 654 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 655 return false; 656 } 657 NextChar(); 658 659 if (hex_digit < kExtendedASCIIStart) 660 string.Append(static_cast<char>(hex_digit)); 661 else 662 DecodeUTF8(hex_digit, &string); 663 break; 664 } 665 case 'u': { // UTF-16 sequence. 666 // UTF units are of the form \uXXXX. 667 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. 668 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 669 return false; 670 } 671 672 // Skip the 'u'. 673 NextChar(); 674 675 std::string utf8_units; 676 if (!DecodeUTF16(&utf8_units)) { 677 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 678 return false; 679 } 680 681 string.AppendString(utf8_units); 682 break; 683 } 684 case '"': 685 string.Append('"'); 686 break; 687 case '\\': 688 string.Append('\\'); 689 break; 690 case '/': 691 string.Append('/'); 692 break; 693 case 'b': 694 string.Append('\b'); 695 break; 696 case 'f': 697 string.Append('\f'); 698 break; 699 case 'n': 700 string.Append('\n'); 701 break; 702 case 'r': 703 string.Append('\r'); 704 break; 705 case 't': 706 string.Append('\t'); 707 break; 708 case 'v': // Not listed as valid escape sequence in the RFC. 709 string.Append('\v'); 710 break; 711 // All other escape squences are illegal. 712 default: 713 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 714 return false; 715 } 716 } else if (next_char == '"') { 717 --index_; // Rewind by one because of CBU8_NEXT. 718 out->Swap(&string); 719 return true; 720 } else { 721 if (next_char < kExtendedASCIIStart) 722 string.Append(static_cast<char>(next_char)); 723 else 724 DecodeUTF8(next_char, &string); 725 } 726 } 727 728 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 729 return false; 730 } 731 732 // Entry is at the first X in \uXXXX. 733 bool JSONParser::DecodeUTF16(std::string* dest_string) { 734 if (!CanConsume(4)) 735 return false; 736 737 // This is a 32-bit field because the shift operations in the 738 // conversion process below cause MSVC to error about "data loss." 739 // This only stores UTF-16 code units, though. 740 // Consume the UTF-16 code unit, which may be a high surrogate. 741 int code_unit16_high = 0; 742 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) 743 return false; 744 745 // Only add 3, not 4, because at the end of this iteration, the parser has 746 // finished working with the last digit of the UTF sequence, meaning that 747 // the next iteration will advance to the next byte. 748 NextNChars(3); 749 750 // Used to convert the UTF-16 code units to a code point and then to a UTF-8 751 // code unit sequence. 752 char code_unit8[8] = { 0 }; 753 size_t offset = 0; 754 755 // If this is a high surrogate, consume the next code unit to get the 756 // low surrogate. 757 if (CBU16_IS_SURROGATE(code_unit16_high)) { 758 // Make sure this is the high surrogate. If not, it's an encoding 759 // error. 760 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) 761 return false; 762 763 // Make sure that the token has more characters to consume the 764 // lower surrogate. 765 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. 766 return false; 767 if (*NextChar() != '\\' || *NextChar() != 'u') 768 return false; 769 770 NextChar(); // Read past 'u'. 771 int code_unit16_low = 0; 772 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) 773 return false; 774 775 NextNChars(3); 776 777 if (!CBU16_IS_TRAIL(code_unit16_low)) { 778 return false; 779 } 780 781 uint32_t code_point = 782 CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low); 783 if (!IsValidCharacter(code_point)) 784 return false; 785 786 offset = 0; 787 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point); 788 } else { 789 // Not a surrogate. 790 DCHECK(CBU16_IS_SINGLE(code_unit16_high)); 791 if (!IsValidCharacter(code_unit16_high)) 792 return false; 793 794 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high); 795 } 796 797 dest_string->append(code_unit8); 798 return true; 799 } 800 801 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) { 802 DCHECK(IsValidCharacter(point)); 803 804 // Anything outside of the basic ASCII plane will need to be decoded from 805 // int32_t to a multi-byte sequence. 806 if (point < kExtendedASCIIStart) { 807 dest->Append(static_cast<char>(point)); 808 } else { 809 char utf8_units[4] = { 0 }; 810 int offset = 0; 811 CBU8_APPEND_UNSAFE(utf8_units, offset, point); 812 dest->Convert(); 813 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be 814 // zero terminated at this point. |offset| contains the correct length. 815 dest->AppendString(std::string(utf8_units, offset)); 816 } 817 } 818 819 Value* JSONParser::ConsumeNumber() { 820 const char* num_start = pos_; 821 const int start_index = index_; 822 int end_index = start_index; 823 824 if (*pos_ == '-') 825 NextChar(); 826 827 if (!ReadInt(false)) { 828 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 829 return NULL; 830 } 831 end_index = index_; 832 833 // The optional fraction part. 834 if (*pos_ == '.') { 835 if (!CanConsume(1)) { 836 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 837 return NULL; 838 } 839 NextChar(); 840 if (!ReadInt(true)) { 841 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 842 return NULL; 843 } 844 end_index = index_; 845 } 846 847 // Optional exponent part. 848 if (*pos_ == 'e' || *pos_ == 'E') { 849 NextChar(); 850 if (*pos_ == '-' || *pos_ == '+') 851 NextChar(); 852 if (!ReadInt(true)) { 853 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 854 return NULL; 855 } 856 end_index = index_; 857 } 858 859 // ReadInt is greedy because numbers have no easily detectable sentinel, 860 // so save off where the parser should be on exit (see Consume invariant at 861 // the top of the header), then make sure the next token is one which is 862 // valid. 863 const char* exit_pos = pos_ - 1; 864 int exit_index = index_ - 1; 865 866 switch (GetNextToken()) { 867 case T_OBJECT_END: 868 case T_ARRAY_END: 869 case T_LIST_SEPARATOR: 870 case T_END_OF_INPUT: 871 break; 872 default: 873 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 874 return NULL; 875 } 876 877 pos_ = exit_pos; 878 index_ = exit_index; 879 880 StringPiece num_string(num_start, end_index - start_index); 881 882 int num_int; 883 if (StringToInt(num_string, &num_int)) 884 return new FundamentalValue(num_int); 885 886 double num_double; 887 if (StringToDouble(num_string.as_string(), &num_double) && 888 std::isfinite(num_double)) { 889 return new FundamentalValue(num_double); 890 } 891 892 return NULL; 893 } 894 895 bool JSONParser::ReadInt(bool allow_leading_zeros) { 896 char first = *pos_; 897 int len = 0; 898 899 char c = first; 900 while (CanConsume(1) && std::isdigit(c)) { 901 c = *NextChar(); 902 ++len; 903 } 904 905 if (len == 0) 906 return false; 907 908 if (!allow_leading_zeros && len > 1 && first == '0') 909 return false; 910 911 return true; 912 } 913 914 Value* JSONParser::ConsumeLiteral() { 915 switch (*pos_) { 916 case 't': { 917 const char kTrueLiteral[] = "true"; 918 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); 919 if (!CanConsume(kTrueLen - 1) || 920 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { 921 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 922 return NULL; 923 } 924 NextNChars(kTrueLen - 1); 925 return new FundamentalValue(true); 926 } 927 case 'f': { 928 const char kFalseLiteral[] = "false"; 929 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); 930 if (!CanConsume(kFalseLen - 1) || 931 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { 932 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 933 return NULL; 934 } 935 NextNChars(kFalseLen - 1); 936 return new FundamentalValue(false); 937 } 938 case 'n': { 939 const char kNullLiteral[] = "null"; 940 const int kNullLen = static_cast<int>(strlen(kNullLiteral)); 941 if (!CanConsume(kNullLen - 1) || 942 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { 943 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 944 return NULL; 945 } 946 NextNChars(kNullLen - 1); 947 return Value::CreateNullValue().release(); 948 } 949 default: 950 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 951 return NULL; 952 } 953 } 954 955 // static 956 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { 957 return strncmp(one, two, len) == 0; 958 } 959 960 void JSONParser::ReportError(JSONReader::JsonParseError code, 961 int column_adjust) { 962 error_code_ = code; 963 error_line_ = line_number_; 964 error_column_ = index_ - index_last_line_ + column_adjust; 965 } 966 967 // static 968 std::string JSONParser::FormatErrorMessage(int line, int column, 969 const std::string& description) { 970 if (line || column) { 971 return StringPrintf("Line: %i, column: %i, %s", 972 line, column, description.c_str()); 973 } 974 return description; 975 } 976 977 } // namespace internal 978 } // namespace base 979