1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/json/json_parser.h" 6 7 #include "base/float_util.h" 8 #include "base/logging.h" 9 #include "base/memory/scoped_ptr.h" 10 #include "base/strings/string_number_conversions.h" 11 #include "base/strings/string_piece.h" 12 #include "base/strings/string_util.h" 13 #include "base/strings/stringprintf.h" 14 #include "base/strings/utf_string_conversion_utils.h" 15 #include "base/strings/utf_string_conversions.h" 16 #include "base/third_party/icu/icu_utf.h" 17 #include "base/values.h" 18 19 namespace base { 20 namespace internal { 21 22 namespace { 23 24 const int kStackMaxDepth = 100; 25 26 const int32 kExtendedASCIIStart = 0x80; 27 28 // This and the class below are used to own the JSON input string for when 29 // string tokens are stored as StringPiece instead of std::string. This 30 // optimization avoids about 2/3rds of string memory copies. The constructor 31 // takes ownership of the input string. The real root value is Swap()ed into 32 // the new instance. 33 class DictionaryHiddenRootValue : public base::DictionaryValue { 34 public: 35 DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) { 36 DCHECK(root->IsType(Value::TYPE_DICTIONARY)); 37 DictionaryValue::Swap(static_cast<DictionaryValue*>(root)); 38 } 39 40 virtual void Swap(DictionaryValue* other) OVERRIDE { 41 DVLOG(1) << "Swap()ing a DictionaryValue inefficiently."; 42 43 // First deep copy to convert JSONStringValue to std::string and swap that 44 // copy with |other|, which contains the new contents of |this|. 45 scoped_ptr<base::DictionaryValue> copy(DeepCopy()); 46 copy->Swap(other); 47 48 // Then erase the contents of the current dictionary and swap in the 49 // new contents, originally from |other|. 50 Clear(); 51 json_.reset(); 52 DictionaryValue::Swap(copy.get()); 53 } 54 55 // Not overriding DictionaryValue::Remove because it just calls through to 56 // the method below. 57 58 virtual bool RemoveWithoutPathExpansion(const std::string& key, 59 scoped_ptr<Value>* out) OVERRIDE { 60 // If the caller won't take ownership of the removed value, just call up. 61 if (!out) 62 return DictionaryValue::RemoveWithoutPathExpansion(key, out); 63 64 DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently."; 65 66 // Otherwise, remove the value while its still "owned" by this and copy it 67 // to convert any JSONStringValues to std::string. 68 scoped_ptr<Value> out_owned; 69 if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned)) 70 return false; 71 72 out->reset(out_owned->DeepCopy()); 73 74 return true; 75 } 76 77 private: 78 scoped_ptr<std::string> json_; 79 80 DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue); 81 }; 82 83 class ListHiddenRootValue : public base::ListValue { 84 public: 85 ListHiddenRootValue(std::string* json, Value* root) : json_(json) { 86 DCHECK(root->IsType(Value::TYPE_LIST)); 87 ListValue::Swap(static_cast<ListValue*>(root)); 88 } 89 90 virtual void Swap(ListValue* other) OVERRIDE { 91 DVLOG(1) << "Swap()ing a ListValue inefficiently."; 92 93 // First deep copy to convert JSONStringValue to std::string and swap that 94 // copy with |other|, which contains the new contents of |this|. 95 scoped_ptr<base::ListValue> copy(DeepCopy()); 96 copy->Swap(other); 97 98 // Then erase the contents of the current list and swap in the new contents, 99 // originally from |other|. 100 Clear(); 101 json_.reset(); 102 ListValue::Swap(copy.get()); 103 } 104 105 virtual bool Remove(size_t index, scoped_ptr<Value>* out) OVERRIDE { 106 // If the caller won't take ownership of the removed value, just call up. 107 if (!out) 108 return ListValue::Remove(index, out); 109 110 DVLOG(1) << "Remove()ing from a ListValue inefficiently."; 111 112 // Otherwise, remove the value while its still "owned" by this and copy it 113 // to convert any JSONStringValues to std::string. 114 scoped_ptr<Value> out_owned; 115 if (!ListValue::Remove(index, &out_owned)) 116 return false; 117 118 out->reset(out_owned->DeepCopy()); 119 120 return true; 121 } 122 123 private: 124 scoped_ptr<std::string> json_; 125 126 DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue); 127 }; 128 129 // A variant on StringValue that uses StringPiece instead of copying the string 130 // into the Value. This can only be stored in a child of hidden root (above), 131 // otherwise the referenced string will not be guaranteed to outlive it. 132 class JSONStringValue : public base::Value { 133 public: 134 explicit JSONStringValue(const base::StringPiece& piece) 135 : Value(TYPE_STRING), 136 string_piece_(piece) { 137 } 138 139 // Overridden from base::Value: 140 virtual bool GetAsString(std::string* out_value) const OVERRIDE { 141 string_piece_.CopyToString(out_value); 142 return true; 143 } 144 virtual bool GetAsString(string16* out_value) const OVERRIDE { 145 *out_value = UTF8ToUTF16(string_piece_); 146 return true; 147 } 148 virtual Value* DeepCopy() const OVERRIDE { 149 return new StringValue(string_piece_.as_string()); 150 } 151 virtual bool Equals(const Value* other) const OVERRIDE { 152 std::string other_string; 153 return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) && 154 StringPiece(other_string) == string_piece_; 155 } 156 157 private: 158 // The location in the original input stream. 159 base::StringPiece string_piece_; 160 161 DISALLOW_COPY_AND_ASSIGN(JSONStringValue); 162 }; 163 164 // Simple class that checks for maximum recursion/"stack overflow." 165 class StackMarker { 166 public: 167 explicit StackMarker(int* depth) : depth_(depth) { 168 ++(*depth_); 169 DCHECK_LE(*depth_, kStackMaxDepth); 170 } 171 ~StackMarker() { 172 --(*depth_); 173 } 174 175 bool IsTooDeep() const { 176 return *depth_ >= kStackMaxDepth; 177 } 178 179 private: 180 int* const depth_; 181 182 DISALLOW_COPY_AND_ASSIGN(StackMarker); 183 }; 184 185 } // namespace 186 187 JSONParser::JSONParser(int options) 188 : options_(options), 189 start_pos_(NULL), 190 pos_(NULL), 191 end_pos_(NULL), 192 index_(0), 193 stack_depth_(0), 194 line_number_(0), 195 index_last_line_(0), 196 error_code_(JSONReader::JSON_NO_ERROR), 197 error_line_(0), 198 error_column_(0) { 199 } 200 201 JSONParser::~JSONParser() { 202 } 203 204 Value* JSONParser::Parse(const StringPiece& input) { 205 scoped_ptr<std::string> input_copy; 206 // If the children of a JSON root can be detached, then hidden roots cannot 207 // be used, so do not bother copying the input because StringPiece will not 208 // be used anywhere. 209 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 210 input_copy.reset(new std::string(input.as_string())); 211 start_pos_ = input_copy->data(); 212 } else { 213 start_pos_ = input.data(); 214 } 215 pos_ = start_pos_; 216 end_pos_ = start_pos_ + input.length(); 217 index_ = 0; 218 line_number_ = 1; 219 index_last_line_ = 0; 220 221 error_code_ = JSONReader::JSON_NO_ERROR; 222 error_line_ = 0; 223 error_column_ = 0; 224 225 // When the input JSON string starts with a UTF-8 Byte-Order-Mark 226 // <0xEF 0xBB 0xBF>, advance the start position to avoid the 227 // ParseNextToken function mis-treating a Unicode BOM as an invalid 228 // character and returning NULL. 229 if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF && 230 static_cast<uint8>(*(pos_ + 1)) == 0xBB && 231 static_cast<uint8>(*(pos_ + 2)) == 0xBF) { 232 NextNChars(3); 233 } 234 235 // Parse the first and any nested tokens. 236 scoped_ptr<Value> root(ParseNextToken()); 237 if (!root.get()) 238 return NULL; 239 240 // Make sure the input stream is at an end. 241 if (GetNextToken() != T_END_OF_INPUT) { 242 if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) { 243 ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1); 244 return NULL; 245 } 246 } 247 248 // Dictionaries and lists can contain JSONStringValues, so wrap them in a 249 // hidden root. 250 if (!(options_ & JSON_DETACHABLE_CHILDREN)) { 251 if (root->IsType(Value::TYPE_DICTIONARY)) { 252 return new DictionaryHiddenRootValue(input_copy.release(), root.get()); 253 } else if (root->IsType(Value::TYPE_LIST)) { 254 return new ListHiddenRootValue(input_copy.release(), root.get()); 255 } else if (root->IsType(Value::TYPE_STRING)) { 256 // A string type could be a JSONStringValue, but because there's no 257 // corresponding HiddenRootValue, the memory will be lost. Deep copy to 258 // preserve it. 259 return root->DeepCopy(); 260 } 261 } 262 263 // All other values can be returned directly. 264 return root.release(); 265 } 266 267 JSONReader::JsonParseError JSONParser::error_code() const { 268 return error_code_; 269 } 270 271 std::string JSONParser::GetErrorMessage() const { 272 return FormatErrorMessage(error_line_, error_column_, 273 JSONReader::ErrorCodeToString(error_code_)); 274 } 275 276 // StringBuilder /////////////////////////////////////////////////////////////// 277 278 JSONParser::StringBuilder::StringBuilder() 279 : pos_(NULL), 280 length_(0), 281 string_(NULL) { 282 } 283 284 JSONParser::StringBuilder::StringBuilder(const char* pos) 285 : pos_(pos), 286 length_(0), 287 string_(NULL) { 288 } 289 290 void JSONParser::StringBuilder::Swap(StringBuilder* other) { 291 std::swap(other->string_, string_); 292 std::swap(other->pos_, pos_); 293 std::swap(other->length_, length_); 294 } 295 296 JSONParser::StringBuilder::~StringBuilder() { 297 delete string_; 298 } 299 300 void JSONParser::StringBuilder::Append(const char& c) { 301 DCHECK_GE(c, 0); 302 DCHECK_LT(c, 128); 303 304 if (string_) 305 string_->push_back(c); 306 else 307 ++length_; 308 } 309 310 void JSONParser::StringBuilder::AppendString(const std::string& str) { 311 DCHECK(string_); 312 string_->append(str); 313 } 314 315 void JSONParser::StringBuilder::Convert() { 316 if (string_) 317 return; 318 string_ = new std::string(pos_, length_); 319 } 320 321 bool JSONParser::StringBuilder::CanBeStringPiece() const { 322 return !string_; 323 } 324 325 StringPiece JSONParser::StringBuilder::AsStringPiece() { 326 if (string_) 327 return StringPiece(); 328 return StringPiece(pos_, length_); 329 } 330 331 const std::string& JSONParser::StringBuilder::AsString() { 332 if (!string_) 333 Convert(); 334 return *string_; 335 } 336 337 // JSONParser private ////////////////////////////////////////////////////////// 338 339 inline bool JSONParser::CanConsume(int length) { 340 return pos_ + length <= end_pos_; 341 } 342 343 const char* JSONParser::NextChar() { 344 DCHECK(CanConsume(1)); 345 ++index_; 346 ++pos_; 347 return pos_; 348 } 349 350 void JSONParser::NextNChars(int n) { 351 DCHECK(CanConsume(n)); 352 index_ += n; 353 pos_ += n; 354 } 355 356 JSONParser::Token JSONParser::GetNextToken() { 357 EatWhitespaceAndComments(); 358 if (!CanConsume(1)) 359 return T_END_OF_INPUT; 360 361 switch (*pos_) { 362 case '{': 363 return T_OBJECT_BEGIN; 364 case '}': 365 return T_OBJECT_END; 366 case '[': 367 return T_ARRAY_BEGIN; 368 case ']': 369 return T_ARRAY_END; 370 case '"': 371 return T_STRING; 372 case '0': 373 case '1': 374 case '2': 375 case '3': 376 case '4': 377 case '5': 378 case '6': 379 case '7': 380 case '8': 381 case '9': 382 case '-': 383 return T_NUMBER; 384 case 't': 385 return T_BOOL_TRUE; 386 case 'f': 387 return T_BOOL_FALSE; 388 case 'n': 389 return T_NULL; 390 case ',': 391 return T_LIST_SEPARATOR; 392 case ':': 393 return T_OBJECT_PAIR_SEPARATOR; 394 default: 395 return T_INVALID_TOKEN; 396 } 397 } 398 399 void JSONParser::EatWhitespaceAndComments() { 400 while (pos_ < end_pos_) { 401 switch (*pos_) { 402 case '\r': 403 case '\n': 404 index_last_line_ = index_; 405 // Don't increment line_number_ twice for "\r\n". 406 if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r')) 407 ++line_number_; 408 // Fall through. 409 case ' ': 410 case '\t': 411 NextChar(); 412 break; 413 case '/': 414 if (!EatComment()) 415 return; 416 break; 417 default: 418 return; 419 } 420 } 421 } 422 423 bool JSONParser::EatComment() { 424 if (*pos_ != '/' || !CanConsume(1)) 425 return false; 426 427 char next_char = *NextChar(); 428 if (next_char == '/') { 429 // Single line comment, read to newline. 430 while (CanConsume(1)) { 431 char next_char = *NextChar(); 432 if (next_char == '\n' || next_char == '\r') 433 return true; 434 } 435 } else if (next_char == '*') { 436 char previous_char = '\0'; 437 // Block comment, read until end marker. 438 while (CanConsume(1)) { 439 next_char = *NextChar(); 440 if (previous_char == '*' && next_char == '/') { 441 // EatWhitespaceAndComments will inspect pos_, which will still be on 442 // the last / of the comment, so advance once more (which may also be 443 // end of input). 444 NextChar(); 445 return true; 446 } 447 previous_char = next_char; 448 } 449 450 // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT. 451 } 452 453 return false; 454 } 455 456 Value* JSONParser::ParseNextToken() { 457 return ParseToken(GetNextToken()); 458 } 459 460 Value* JSONParser::ParseToken(Token token) { 461 switch (token) { 462 case T_OBJECT_BEGIN: 463 return ConsumeDictionary(); 464 case T_ARRAY_BEGIN: 465 return ConsumeList(); 466 case T_STRING: 467 return ConsumeString(); 468 case T_NUMBER: 469 return ConsumeNumber(); 470 case T_BOOL_TRUE: 471 case T_BOOL_FALSE: 472 case T_NULL: 473 return ConsumeLiteral(); 474 default: 475 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 476 return NULL; 477 } 478 } 479 480 Value* JSONParser::ConsumeDictionary() { 481 if (*pos_ != '{') { 482 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 483 return NULL; 484 } 485 486 StackMarker depth_check(&stack_depth_); 487 if (depth_check.IsTooDeep()) { 488 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 489 return NULL; 490 } 491 492 scoped_ptr<DictionaryValue> dict(new DictionaryValue); 493 494 NextChar(); 495 Token token = GetNextToken(); 496 while (token != T_OBJECT_END) { 497 if (token != T_STRING) { 498 ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1); 499 return NULL; 500 } 501 502 // First consume the key. 503 StringBuilder key; 504 if (!ConsumeStringRaw(&key)) { 505 return NULL; 506 } 507 508 // Read the separator. 509 NextChar(); 510 token = GetNextToken(); 511 if (token != T_OBJECT_PAIR_SEPARATOR) { 512 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 513 return NULL; 514 } 515 516 // The next token is the value. Ownership transfers to |dict|. 517 NextChar(); 518 Value* value = ParseNextToken(); 519 if (!value) { 520 // ReportError from deeper level. 521 return NULL; 522 } 523 524 dict->SetWithoutPathExpansion(key.AsString(), value); 525 526 NextChar(); 527 token = GetNextToken(); 528 if (token == T_LIST_SEPARATOR) { 529 NextChar(); 530 token = GetNextToken(); 531 if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 532 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 533 return NULL; 534 } 535 } else if (token != T_OBJECT_END) { 536 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 537 return NULL; 538 } 539 } 540 541 return dict.release(); 542 } 543 544 Value* JSONParser::ConsumeList() { 545 if (*pos_ != '[') { 546 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 547 return NULL; 548 } 549 550 StackMarker depth_check(&stack_depth_); 551 if (depth_check.IsTooDeep()) { 552 ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1); 553 return NULL; 554 } 555 556 scoped_ptr<ListValue> list(new ListValue); 557 558 NextChar(); 559 Token token = GetNextToken(); 560 while (token != T_ARRAY_END) { 561 Value* item = ParseToken(token); 562 if (!item) { 563 // ReportError from deeper level. 564 return NULL; 565 } 566 567 list->Append(item); 568 569 NextChar(); 570 token = GetNextToken(); 571 if (token == T_LIST_SEPARATOR) { 572 NextChar(); 573 token = GetNextToken(); 574 if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) { 575 ReportError(JSONReader::JSON_TRAILING_COMMA, 1); 576 return NULL; 577 } 578 } else if (token != T_ARRAY_END) { 579 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 580 return NULL; 581 } 582 } 583 584 return list.release(); 585 } 586 587 Value* JSONParser::ConsumeString() { 588 StringBuilder string; 589 if (!ConsumeStringRaw(&string)) 590 return NULL; 591 592 // Create the Value representation, using a hidden root, if configured 593 // to do so, and if the string can be represented by StringPiece. 594 if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) { 595 return new JSONStringValue(string.AsStringPiece()); 596 } else { 597 if (string.CanBeStringPiece()) 598 string.Convert(); 599 return new StringValue(string.AsString()); 600 } 601 } 602 603 bool JSONParser::ConsumeStringRaw(StringBuilder* out) { 604 if (*pos_ != '"') { 605 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 606 return false; 607 } 608 609 // StringBuilder will internally build a StringPiece unless a UTF-16 610 // conversion occurs, at which point it will perform a copy into a 611 // std::string. 612 StringBuilder string(NextChar()); 613 614 int length = end_pos_ - start_pos_; 615 int32 next_char = 0; 616 617 while (CanConsume(1)) { 618 pos_ = start_pos_ + index_; // CBU8_NEXT is postcrement. 619 CBU8_NEXT(start_pos_, index_, length, next_char); 620 if (next_char < 0 || !IsValidCharacter(next_char)) { 621 ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1); 622 return false; 623 } 624 625 // If this character is an escape sequence... 626 if (next_char == '\\') { 627 // The input string will be adjusted (either by combining the two 628 // characters of an encoded escape sequence, or with a UTF conversion), 629 // so using StringPiece isn't possible -- force a conversion. 630 string.Convert(); 631 632 if (!CanConsume(1)) { 633 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 634 return false; 635 } 636 637 switch (*NextChar()) { 638 // Allowed esape sequences: 639 case 'x': { // UTF-8 sequence. 640 // UTF-8 \x escape sequences are not allowed in the spec, but they 641 // are supported here for backwards-compatiblity with the old parser. 642 if (!CanConsume(2)) { 643 ReportError(JSONReader::JSON_INVALID_ESCAPE, 1); 644 return false; 645 } 646 647 int hex_digit = 0; 648 if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) { 649 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 650 return false; 651 } 652 NextChar(); 653 654 if (hex_digit < kExtendedASCIIStart) 655 string.Append(hex_digit); 656 else 657 DecodeUTF8(hex_digit, &string); 658 break; 659 } 660 case 'u': { // UTF-16 sequence. 661 // UTF units are of the form \uXXXX. 662 if (!CanConsume(5)) { // 5 being 'u' and four HEX digits. 663 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 664 return false; 665 } 666 667 // Skip the 'u'. 668 NextChar(); 669 670 std::string utf8_units; 671 if (!DecodeUTF16(&utf8_units)) { 672 ReportError(JSONReader::JSON_INVALID_ESCAPE, -1); 673 return false; 674 } 675 676 string.AppendString(utf8_units); 677 break; 678 } 679 case '"': 680 string.Append('"'); 681 break; 682 case '\\': 683 string.Append('\\'); 684 break; 685 case '/': 686 string.Append('/'); 687 break; 688 case 'b': 689 string.Append('\b'); 690 break; 691 case 'f': 692 string.Append('\f'); 693 break; 694 case 'n': 695 string.Append('\n'); 696 break; 697 case 'r': 698 string.Append('\r'); 699 break; 700 case 't': 701 string.Append('\t'); 702 break; 703 case 'v': // Not listed as valid escape sequence in the RFC. 704 string.Append('\v'); 705 break; 706 // All other escape squences are illegal. 707 default: 708 ReportError(JSONReader::JSON_INVALID_ESCAPE, 0); 709 return false; 710 } 711 } else if (next_char == '"') { 712 --index_; // Rewind by one because of CBU8_NEXT. 713 out->Swap(&string); 714 return true; 715 } else { 716 if (next_char < kExtendedASCIIStart) 717 string.Append(next_char); 718 else 719 DecodeUTF8(next_char, &string); 720 } 721 } 722 723 ReportError(JSONReader::JSON_SYNTAX_ERROR, 0); 724 return false; 725 } 726 727 // Entry is at the first X in \uXXXX. 728 bool JSONParser::DecodeUTF16(std::string* dest_string) { 729 if (!CanConsume(4)) 730 return false; 731 732 // This is a 32-bit field because the shift operations in the 733 // conversion process below cause MSVC to error about "data loss." 734 // This only stores UTF-16 code units, though. 735 // Consume the UTF-16 code unit, which may be a high surrogate. 736 int code_unit16_high = 0; 737 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high)) 738 return false; 739 740 // Only add 3, not 4, because at the end of this iteration, the parser has 741 // finished working with the last digit of the UTF sequence, meaning that 742 // the next iteration will advance to the next byte. 743 NextNChars(3); 744 745 // Used to convert the UTF-16 code units to a code point and then to a UTF-8 746 // code unit sequence. 747 char code_unit8[8] = { 0 }; 748 size_t offset = 0; 749 750 // If this is a high surrogate, consume the next code unit to get the 751 // low surrogate. 752 if (CBU16_IS_SURROGATE(code_unit16_high)) { 753 // Make sure this is the high surrogate. If not, it's an encoding 754 // error. 755 if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high)) 756 return false; 757 758 // Make sure that the token has more characters to consume the 759 // lower surrogate. 760 if (!CanConsume(6)) // 6 being '\' 'u' and four HEX digits. 761 return false; 762 if (*NextChar() != '\\' || *NextChar() != 'u') 763 return false; 764 765 NextChar(); // Read past 'u'. 766 int code_unit16_low = 0; 767 if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low)) 768 return false; 769 770 NextNChars(3); 771 772 if (!CBU16_IS_TRAIL(code_unit16_low)) { 773 return false; 774 } 775 776 uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high, 777 code_unit16_low); 778 offset = 0; 779 CBU8_APPEND_UNSAFE(code_unit8, offset, code_point); 780 } else { 781 // Not a surrogate. 782 DCHECK(CBU16_IS_SINGLE(code_unit16_high)); 783 CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high); 784 } 785 786 dest_string->append(code_unit8); 787 return true; 788 } 789 790 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) { 791 // Anything outside of the basic ASCII plane will need to be decoded from 792 // int32 to a multi-byte sequence. 793 if (point < kExtendedASCIIStart) { 794 dest->Append(point); 795 } else { 796 char utf8_units[4] = { 0 }; 797 int offset = 0; 798 CBU8_APPEND_UNSAFE(utf8_units, offset, point); 799 dest->Convert(); 800 // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be 801 // zero terminated at this point. |offset| contains the correct length. 802 dest->AppendString(std::string(utf8_units, offset)); 803 } 804 } 805 806 Value* JSONParser::ConsumeNumber() { 807 const char* num_start = pos_; 808 const int start_index = index_; 809 int end_index = start_index; 810 811 if (*pos_ == '-') 812 NextChar(); 813 814 if (!ReadInt(false)) { 815 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 816 return NULL; 817 } 818 end_index = index_; 819 820 // The optional fraction part. 821 if (*pos_ == '.') { 822 if (!CanConsume(1)) { 823 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 824 return NULL; 825 } 826 NextChar(); 827 if (!ReadInt(true)) { 828 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 829 return NULL; 830 } 831 end_index = index_; 832 } 833 834 // Optional exponent part. 835 if (*pos_ == 'e' || *pos_ == 'E') { 836 NextChar(); 837 if (*pos_ == '-' || *pos_ == '+') 838 NextChar(); 839 if (!ReadInt(true)) { 840 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 841 return NULL; 842 } 843 end_index = index_; 844 } 845 846 // ReadInt is greedy because numbers have no easily detectable sentinel, 847 // so save off where the parser should be on exit (see Consume invariant at 848 // the top of the header), then make sure the next token is one which is 849 // valid. 850 const char* exit_pos = pos_ - 1; 851 int exit_index = index_ - 1; 852 853 switch (GetNextToken()) { 854 case T_OBJECT_END: 855 case T_ARRAY_END: 856 case T_LIST_SEPARATOR: 857 case T_END_OF_INPUT: 858 break; 859 default: 860 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 861 return NULL; 862 } 863 864 pos_ = exit_pos; 865 index_ = exit_index; 866 867 StringPiece num_string(num_start, end_index - start_index); 868 869 int num_int; 870 if (StringToInt(num_string, &num_int)) 871 return new FundamentalValue(num_int); 872 873 double num_double; 874 if (base::StringToDouble(num_string.as_string(), &num_double) && 875 IsFinite(num_double)) { 876 return new FundamentalValue(num_double); 877 } 878 879 return NULL; 880 } 881 882 bool JSONParser::ReadInt(bool allow_leading_zeros) { 883 char first = *pos_; 884 int len = 0; 885 886 char c = first; 887 while (CanConsume(1) && IsAsciiDigit(c)) { 888 c = *NextChar(); 889 ++len; 890 } 891 892 if (len == 0) 893 return false; 894 895 if (!allow_leading_zeros && len > 1 && first == '0') 896 return false; 897 898 return true; 899 } 900 901 Value* JSONParser::ConsumeLiteral() { 902 switch (*pos_) { 903 case 't': { 904 const char* kTrueLiteral = "true"; 905 const int kTrueLen = static_cast<int>(strlen(kTrueLiteral)); 906 if (!CanConsume(kTrueLen - 1) || 907 !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) { 908 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 909 return NULL; 910 } 911 NextNChars(kTrueLen - 1); 912 return new FundamentalValue(true); 913 } 914 case 'f': { 915 const char* kFalseLiteral = "false"; 916 const int kFalseLen = static_cast<int>(strlen(kFalseLiteral)); 917 if (!CanConsume(kFalseLen - 1) || 918 !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) { 919 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 920 return NULL; 921 } 922 NextNChars(kFalseLen - 1); 923 return new FundamentalValue(false); 924 } 925 case 'n': { 926 const char* kNullLiteral = "null"; 927 const int kNullLen = static_cast<int>(strlen(kNullLiteral)); 928 if (!CanConsume(kNullLen - 1) || 929 !StringsAreEqual(pos_, kNullLiteral, kNullLen)) { 930 ReportError(JSONReader::JSON_SYNTAX_ERROR, 1); 931 return NULL; 932 } 933 NextNChars(kNullLen - 1); 934 return Value::CreateNullValue(); 935 } 936 default: 937 ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1); 938 return NULL; 939 } 940 } 941 942 // static 943 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) { 944 return strncmp(one, two, len) == 0; 945 } 946 947 void JSONParser::ReportError(JSONReader::JsonParseError code, 948 int column_adjust) { 949 error_code_ = code; 950 error_line_ = line_number_; 951 error_column_ = index_ - index_last_line_ + column_adjust; 952 } 953 954 // static 955 std::string JSONParser::FormatErrorMessage(int line, int column, 956 const std::string& description) { 957 if (line || column) { 958 return StringPrintf("Line: %i, column: %i, %s", 959 line, column, description.c_str()); 960 } 961 return description; 962 } 963 964 } // namespace internal 965 } // namespace base 966