1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_PARSING_JSON_PARSER_H_ 6 #define V8_PARSING_JSON_PARSER_H_ 7 8 #include "src/char-predicates.h" 9 #include "src/conversions.h" 10 #include "src/debug/debug.h" 11 #include "src/factory.h" 12 #include "src/messages.h" 13 #include "src/parsing/scanner.h" 14 #include "src/parsing/token.h" 15 #include "src/transitions.h" 16 #include "src/types.h" 17 18 namespace v8 { 19 namespace internal { 20 21 enum ParseElementResult { kElementFound, kElementNotFound, kNullHandle }; 22 23 24 // A simple json parser. 25 template <bool seq_one_byte> 26 class JsonParser BASE_EMBEDDED { 27 public: 28 MUST_USE_RESULT static MaybeHandle<Object> Parse(Handle<String> source) { 29 return JsonParser(source).ParseJson(); 30 } 31 32 static const int kEndOfString = -1; 33 34 private: 35 explicit JsonParser(Handle<String> source) 36 : source_(source), 37 source_length_(source->length()), 38 isolate_(source->map()->GetHeap()->isolate()), 39 factory_(isolate_->factory()), 40 object_constructor_(isolate_->native_context()->object_function(), 41 isolate_), 42 position_(-1) { 43 source_ = String::Flatten(source_); 44 pretenure_ = (source_length_ >= kPretenureTreshold) ? TENURED : NOT_TENURED; 45 46 // Optimized fast case where we only have Latin1 characters. 47 if (seq_one_byte) { 48 seq_source_ = Handle<SeqOneByteString>::cast(source_); 49 } 50 } 51 52 // Parse a string containing a single JSON value. 53 MaybeHandle<Object> ParseJson(); 54 55 inline void Advance() { 56 position_++; 57 if (position_ >= source_length_) { 58 c0_ = kEndOfString; 59 } else if (seq_one_byte) { 60 c0_ = seq_source_->SeqOneByteStringGet(position_); 61 } else { 62 c0_ = source_->Get(position_); 63 } 64 } 65 66 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 67 // section 15.12.1.1. The only allowed whitespace characters between tokens 68 // are tab, carriage-return, newline and space. 69 70 inline void AdvanceSkipWhitespace() { 71 do { 72 Advance(); 73 } while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r'); 74 } 75 76 inline void SkipWhitespace() { 77 while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n' || c0_ == '\r') { 78 Advance(); 79 } 80 } 81 82 inline uc32 AdvanceGetChar() { 83 Advance(); 84 return c0_; 85 } 86 87 // Checks that current charater is c. 88 // If so, then consume c and skip whitespace. 89 inline bool MatchSkipWhiteSpace(uc32 c) { 90 if (c0_ == c) { 91 AdvanceSkipWhitespace(); 92 return true; 93 } 94 return false; 95 } 96 97 // A JSON string (production JSONString) is subset of valid JavaScript string 98 // literals. The string must only be double-quoted (not single-quoted), and 99 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 100 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 101 Handle<String> ParseJsonString() { 102 return ScanJsonString<false>(); 103 } 104 105 bool ParseJsonString(Handle<String> expected) { 106 int length = expected->length(); 107 if (source_->length() - position_ - 1 > length) { 108 DisallowHeapAllocation no_gc; 109 String::FlatContent content = expected->GetFlatContent(); 110 if (content.IsOneByte()) { 111 DCHECK_EQ('"', c0_); 112 const uint8_t* input_chars = seq_source_->GetChars() + position_ + 1; 113 const uint8_t* expected_chars = content.ToOneByteVector().start(); 114 for (int i = 0; i < length; i++) { 115 uint8_t c0 = input_chars[i]; 116 if (c0 != expected_chars[i] || c0 == '"' || c0 < 0x20 || c0 == '\\') { 117 return false; 118 } 119 } 120 if (input_chars[length] == '"') { 121 position_ = position_ + length + 1; 122 AdvanceSkipWhitespace(); 123 return true; 124 } 125 } 126 } 127 return false; 128 } 129 130 Handle<String> ParseJsonInternalizedString() { 131 return ScanJsonString<true>(); 132 } 133 134 template <bool is_internalized> 135 Handle<String> ScanJsonString(); 136 // Creates a new string and copies prefix[start..end] into the beginning 137 // of it. Then scans the rest of the string, adding characters after the 138 // prefix. Called by ScanJsonString when reaching a '\' or non-Latin1 char. 139 template <typename StringType, typename SinkChar> 140 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); 141 142 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 143 // decimal number literals. 144 // It includes an optional minus sign, must have at least one 145 // digit before and after a decimal point, may not have prefixed zeros (unless 146 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 147 // Hexadecimal and octal numbers are not allowed. 148 Handle<Object> ParseJsonNumber(); 149 150 // Parse a single JSON value from input (grammar production JSONValue). 151 // A JSON value is either a (double-quoted) string literal, a number literal, 152 // one of "true", "false", or "null", or an object or array literal. 153 Handle<Object> ParseJsonValue(); 154 155 // Parse a JSON object literal (grammar production JSONObject). 156 // An object literal is a squiggly-braced and comma separated sequence 157 // (possibly empty) of key/value pairs, where the key is a JSON string 158 // literal, the value is a JSON value, and the two are separated by a colon. 159 // A JSON array doesn't allow numbers and identifiers as keys, like a 160 // JavaScript array. 161 Handle<Object> ParseJsonObject(); 162 163 // Helper for ParseJsonObject. Parses the form "123": obj, which is recorded 164 // as an element, not a property. 165 ParseElementResult ParseElement(Handle<JSObject> json_object); 166 167 // Parses a JSON array literal (grammar production JSONArray). An array 168 // literal is a square-bracketed and comma separated sequence (possibly empty) 169 // of JSON values. 170 // A JSON array doesn't allow leaving out values from the sequence, nor does 171 // it allow a terminal comma, like a JavaScript array does. 172 Handle<Object> ParseJsonArray(); 173 174 175 // Mark that a parsing error has happened at the current token, and 176 // return a null handle. Primarily for readability. 177 inline Handle<Object> ReportUnexpectedCharacter() { 178 return Handle<Object>::null(); 179 } 180 181 inline Isolate* isolate() { return isolate_; } 182 inline Factory* factory() { return factory_; } 183 inline Handle<JSFunction> object_constructor() { return object_constructor_; } 184 185 static const int kInitialSpecialStringLength = 32; 186 static const int kPretenureTreshold = 100 * 1024; 187 188 189 private: 190 Zone* zone() { return &zone_; } 191 192 void CommitStateToJsonObject(Handle<JSObject> json_object, Handle<Map> map, 193 ZoneList<Handle<Object> >* properties); 194 195 Handle<String> source_; 196 int source_length_; 197 Handle<SeqOneByteString> seq_source_; 198 199 PretenureFlag pretenure_; 200 Isolate* isolate_; 201 Factory* factory_; 202 Zone zone_; 203 Handle<JSFunction> object_constructor_; 204 uc32 c0_; 205 int position_; 206 }; 207 208 template <bool seq_one_byte> 209 MaybeHandle<Object> JsonParser<seq_one_byte>::ParseJson() { 210 // Advance to the first character (possibly EOS) 211 AdvanceSkipWhitespace(); 212 Handle<Object> result = ParseJsonValue(); 213 if (result.is_null() || c0_ != kEndOfString) { 214 // Some exception (for example stack overflow) is already pending. 215 if (isolate_->has_pending_exception()) return Handle<Object>::null(); 216 217 // Parse failed. Current character is the unexpected token. 218 Factory* factory = this->factory(); 219 MessageTemplate::Template message; 220 Handle<String> argument; 221 222 switch (c0_) { 223 case kEndOfString: 224 message = MessageTemplate::kUnexpectedEOS; 225 break; 226 case '-': 227 case '0': 228 case '1': 229 case '2': 230 case '3': 231 case '4': 232 case '5': 233 case '6': 234 case '7': 235 case '8': 236 case '9': 237 message = MessageTemplate::kUnexpectedTokenNumber; 238 break; 239 case '"': 240 message = MessageTemplate::kUnexpectedTokenString; 241 break; 242 default: 243 message = MessageTemplate::kUnexpectedToken; 244 argument = factory->LookupSingleCharacterStringFromCode(c0_); 245 break; 246 } 247 248 Handle<Script> script(factory->NewScript(source_)); 249 // We should sent compile error event because we compile JSON object in 250 // separated source file. 251 isolate()->debug()->OnCompileError(script); 252 MessageLocation location(script, position_, position_ + 1); 253 Handle<Object> error = factory->NewSyntaxError(message, argument); 254 return isolate()->template Throw<Object>(error, &location); 255 } 256 return result; 257 } 258 259 260 // Parse any JSON value. 261 template <bool seq_one_byte> 262 Handle<Object> JsonParser<seq_one_byte>::ParseJsonValue() { 263 StackLimitCheck stack_check(isolate_); 264 if (stack_check.HasOverflowed()) { 265 isolate_->StackOverflow(); 266 return Handle<Object>::null(); 267 } 268 269 if (stack_check.InterruptRequested()) { 270 ExecutionAccess access(isolate_); 271 // Avoid blocking GC in long running parser (v8:3974). 272 isolate_->stack_guard()->HandleGCInterrupt(); 273 } 274 275 if (c0_ == '"') return ParseJsonString(); 276 if ((c0_ >= '0' && c0_ <= '9') || c0_ == '-') return ParseJsonNumber(); 277 if (c0_ == '{') return ParseJsonObject(); 278 if (c0_ == '[') return ParseJsonArray(); 279 if (c0_ == 'f') { 280 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && 281 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { 282 AdvanceSkipWhitespace(); 283 return factory()->false_value(); 284 } 285 return ReportUnexpectedCharacter(); 286 } 287 if (c0_ == 't') { 288 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && 289 AdvanceGetChar() == 'e') { 290 AdvanceSkipWhitespace(); 291 return factory()->true_value(); 292 } 293 return ReportUnexpectedCharacter(); 294 } 295 if (c0_ == 'n') { 296 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && 297 AdvanceGetChar() == 'l') { 298 AdvanceSkipWhitespace(); 299 return factory()->null_value(); 300 } 301 return ReportUnexpectedCharacter(); 302 } 303 return ReportUnexpectedCharacter(); 304 } 305 306 307 template <bool seq_one_byte> 308 ParseElementResult JsonParser<seq_one_byte>::ParseElement( 309 Handle<JSObject> json_object) { 310 uint32_t index = 0; 311 // Maybe an array index, try to parse it. 312 if (c0_ == '0') { 313 // With a leading zero, the string has to be "0" only to be an index. 314 Advance(); 315 } else { 316 do { 317 int d = c0_ - '0'; 318 if (index > 429496729U - ((d + 3) >> 3)) break; 319 index = (index * 10) + d; 320 Advance(); 321 } while (IsDecimalDigit(c0_)); 322 } 323 324 if (c0_ == '"') { 325 // Successfully parsed index, parse and store element. 326 AdvanceSkipWhitespace(); 327 328 if (c0_ == ':') { 329 AdvanceSkipWhitespace(); 330 Handle<Object> value = ParseJsonValue(); 331 if (!value.is_null()) { 332 JSObject::SetOwnElementIgnoreAttributes(json_object, index, value, NONE) 333 .Assert(); 334 return kElementFound; 335 } else { 336 return kNullHandle; 337 } 338 } 339 } 340 return kElementNotFound; 341 } 342 343 // Parse a JSON object. Position must be right at '{'. 344 template <bool seq_one_byte> 345 Handle<Object> JsonParser<seq_one_byte>::ParseJsonObject() { 346 HandleScope scope(isolate()); 347 Handle<JSObject> json_object = 348 factory()->NewJSObject(object_constructor(), pretenure_); 349 Handle<Map> map(json_object->map()); 350 int descriptor = 0; 351 ZoneList<Handle<Object> > properties(8, zone()); 352 DCHECK_EQ(c0_, '{'); 353 354 bool transitioning = true; 355 356 AdvanceSkipWhitespace(); 357 if (c0_ != '}') { 358 do { 359 if (c0_ != '"') return ReportUnexpectedCharacter(); 360 361 int start_position = position_; 362 Advance(); 363 364 if (IsDecimalDigit(c0_)) { 365 ParseElementResult element_result = ParseElement(json_object); 366 if (element_result == kNullHandle) return Handle<Object>::null(); 367 if (element_result == kElementFound) continue; 368 } 369 // Not an index, fallback to the slow path. 370 371 position_ = start_position; 372 #ifdef DEBUG 373 c0_ = '"'; 374 #endif 375 376 Handle<String> key; 377 Handle<Object> value; 378 379 // Try to follow existing transitions as long as possible. Once we stop 380 // transitioning, no transition can be found anymore. 381 DCHECK(transitioning); 382 // First check whether there is a single expected transition. If so, try 383 // to parse it first. 384 bool follow_expected = false; 385 Handle<Map> target; 386 if (seq_one_byte) { 387 key = TransitionArray::ExpectedTransitionKey(map); 388 follow_expected = !key.is_null() && ParseJsonString(key); 389 } 390 // If the expected transition hits, follow it. 391 if (follow_expected) { 392 target = TransitionArray::ExpectedTransitionTarget(map); 393 } else { 394 // If the expected transition failed, parse an internalized string and 395 // try to find a matching transition. 396 key = ParseJsonInternalizedString(); 397 if (key.is_null()) return ReportUnexpectedCharacter(); 398 399 target = TransitionArray::FindTransitionToField(map, key); 400 // If a transition was found, follow it and continue. 401 transitioning = !target.is_null(); 402 } 403 if (c0_ != ':') return ReportUnexpectedCharacter(); 404 405 AdvanceSkipWhitespace(); 406 value = ParseJsonValue(); 407 if (value.is_null()) return ReportUnexpectedCharacter(); 408 409 if (transitioning) { 410 PropertyDetails details = 411 target->instance_descriptors()->GetDetails(descriptor); 412 Representation expected_representation = details.representation(); 413 414 if (value->FitsRepresentation(expected_representation)) { 415 if (expected_representation.IsHeapObject() && 416 !target->instance_descriptors() 417 ->GetFieldType(descriptor) 418 ->NowContains(value)) { 419 Handle<HeapType> value_type( 420 value->OptimalType(isolate(), expected_representation)); 421 Map::GeneralizeFieldType(target, descriptor, 422 expected_representation, value_type); 423 } 424 DCHECK(target->instance_descriptors() 425 ->GetFieldType(descriptor) 426 ->NowContains(value)); 427 properties.Add(value, zone()); 428 map = target; 429 descriptor++; 430 continue; 431 } else { 432 transitioning = false; 433 } 434 } 435 436 DCHECK(!transitioning); 437 438 // Commit the intermediate state to the object and stop transitioning. 439 CommitStateToJsonObject(json_object, map, &properties); 440 441 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, value) 442 .Check(); 443 } while (transitioning && MatchSkipWhiteSpace(',')); 444 445 // If we transitioned until the very end, transition the map now. 446 if (transitioning) { 447 CommitStateToJsonObject(json_object, map, &properties); 448 } else { 449 while (MatchSkipWhiteSpace(',')) { 450 HandleScope local_scope(isolate()); 451 if (c0_ != '"') return ReportUnexpectedCharacter(); 452 453 int start_position = position_; 454 Advance(); 455 456 if (IsDecimalDigit(c0_)) { 457 ParseElementResult element_result = ParseElement(json_object); 458 if (element_result == kNullHandle) return Handle<Object>::null(); 459 if (element_result == kElementFound) continue; 460 } 461 // Not an index, fallback to the slow path. 462 463 position_ = start_position; 464 #ifdef DEBUG 465 c0_ = '"'; 466 #endif 467 468 Handle<String> key; 469 Handle<Object> value; 470 471 key = ParseJsonInternalizedString(); 472 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); 473 474 AdvanceSkipWhitespace(); 475 value = ParseJsonValue(); 476 if (value.is_null()) return ReportUnexpectedCharacter(); 477 478 JSObject::DefinePropertyOrElementIgnoreAttributes(json_object, key, 479 value).Check(); 480 } 481 } 482 483 if (c0_ != '}') { 484 return ReportUnexpectedCharacter(); 485 } 486 } 487 AdvanceSkipWhitespace(); 488 return scope.CloseAndEscape(json_object); 489 } 490 491 492 template <bool seq_one_byte> 493 void JsonParser<seq_one_byte>::CommitStateToJsonObject( 494 Handle<JSObject> json_object, Handle<Map> map, 495 ZoneList<Handle<Object> >* properties) { 496 JSObject::AllocateStorageForMap(json_object, map); 497 DCHECK(!json_object->map()->is_dictionary_map()); 498 499 DisallowHeapAllocation no_gc; 500 501 int length = properties->length(); 502 for (int i = 0; i < length; i++) { 503 Handle<Object> value = (*properties)[i]; 504 json_object->WriteToField(i, *value); 505 } 506 } 507 508 509 // Parse a JSON array. Position must be right at '['. 510 template <bool seq_one_byte> 511 Handle<Object> JsonParser<seq_one_byte>::ParseJsonArray() { 512 HandleScope scope(isolate()); 513 ZoneList<Handle<Object> > elements(4, zone()); 514 DCHECK_EQ(c0_, '['); 515 516 AdvanceSkipWhitespace(); 517 if (c0_ != ']') { 518 do { 519 Handle<Object> element = ParseJsonValue(); 520 if (element.is_null()) return ReportUnexpectedCharacter(); 521 elements.Add(element, zone()); 522 } while (MatchSkipWhiteSpace(',')); 523 if (c0_ != ']') { 524 return ReportUnexpectedCharacter(); 525 } 526 } 527 AdvanceSkipWhitespace(); 528 // Allocate a fixed array with all the elements. 529 Handle<FixedArray> fast_elements = 530 factory()->NewFixedArray(elements.length(), pretenure_); 531 for (int i = 0, n = elements.length(); i < n; i++) { 532 fast_elements->set(i, *elements[i]); 533 } 534 Handle<Object> json_array = factory()->NewJSArrayWithElements( 535 fast_elements, FAST_ELEMENTS, Strength::WEAK, pretenure_); 536 return scope.CloseAndEscape(json_array); 537 } 538 539 540 template <bool seq_one_byte> 541 Handle<Object> JsonParser<seq_one_byte>::ParseJsonNumber() { 542 bool negative = false; 543 int beg_pos = position_; 544 if (c0_ == '-') { 545 Advance(); 546 negative = true; 547 } 548 if (c0_ == '0') { 549 Advance(); 550 // Prefix zero is only allowed if it's the only digit before 551 // a decimal point or exponent. 552 if (IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); 553 } else { 554 int i = 0; 555 int digits = 0; 556 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); 557 do { 558 i = i * 10 + c0_ - '0'; 559 digits++; 560 Advance(); 561 } while (IsDecimalDigit(c0_)); 562 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { 563 SkipWhitespace(); 564 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); 565 } 566 } 567 if (c0_ == '.') { 568 Advance(); 569 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); 570 do { 571 Advance(); 572 } while (IsDecimalDigit(c0_)); 573 } 574 if (AsciiAlphaToLower(c0_) == 'e') { 575 Advance(); 576 if (c0_ == '-' || c0_ == '+') Advance(); 577 if (!IsDecimalDigit(c0_)) return ReportUnexpectedCharacter(); 578 do { 579 Advance(); 580 } while (IsDecimalDigit(c0_)); 581 } 582 int length = position_ - beg_pos; 583 double number; 584 if (seq_one_byte) { 585 Vector<const uint8_t> chars(seq_source_->GetChars() + beg_pos, length); 586 number = StringToDouble(isolate()->unicode_cache(), chars, 587 NO_FLAGS, // Hex, octal or trailing junk. 588 std::numeric_limits<double>::quiet_NaN()); 589 } else { 590 Vector<uint8_t> buffer = Vector<uint8_t>::New(length); 591 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); 592 Vector<const uint8_t> result = 593 Vector<const uint8_t>(buffer.start(), length); 594 number = StringToDouble(isolate()->unicode_cache(), 595 result, 596 NO_FLAGS, // Hex, octal or trailing junk. 597 0.0); 598 buffer.Dispose(); 599 } 600 SkipWhitespace(); 601 return factory()->NewNumber(number, pretenure_); 602 } 603 604 605 template <typename StringType> 606 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); 607 608 template <> 609 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { 610 seq_str->SeqTwoByteStringSet(i, c); 611 } 612 613 template <> 614 inline void SeqStringSet(Handle<SeqOneByteString> seq_str, int i, uc32 c) { 615 seq_str->SeqOneByteStringSet(i, c); 616 } 617 618 template <typename StringType> 619 inline Handle<StringType> NewRawString(Factory* factory, 620 int length, 621 PretenureFlag pretenure); 622 623 template <> 624 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, 625 int length, 626 PretenureFlag pretenure) { 627 return factory->NewRawTwoByteString(length, pretenure).ToHandleChecked(); 628 } 629 630 template <> 631 inline Handle<SeqOneByteString> NewRawString(Factory* factory, 632 int length, 633 PretenureFlag pretenure) { 634 return factory->NewRawOneByteString(length, pretenure).ToHandleChecked(); 635 } 636 637 638 // Scans the rest of a JSON string starting from position_ and writes 639 // prefix[start..end] along with the scanned characters into a 640 // sequential string of type StringType. 641 template <bool seq_one_byte> 642 template <typename StringType, typename SinkChar> 643 Handle<String> JsonParser<seq_one_byte>::SlowScanJsonString( 644 Handle<String> prefix, int start, int end) { 645 int count = end - start; 646 int max_length = count + source_length_ - position_; 647 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); 648 Handle<StringType> seq_string = 649 NewRawString<StringType>(factory(), length, pretenure_); 650 // Copy prefix into seq_str. 651 SinkChar* dest = seq_string->GetChars(); 652 String::WriteToFlat(*prefix, dest, start, end); 653 654 while (c0_ != '"') { 655 // Check for control character (0x00-0x1f) or unterminated string (<0). 656 if (c0_ < 0x20) return Handle<String>::null(); 657 if (count >= length) { 658 // We need to create a longer sequential string for the result. 659 return SlowScanJsonString<StringType, SinkChar>(seq_string, 0, count); 660 } 661 if (c0_ != '\\') { 662 // If the sink can contain UC16 characters, or source_ contains only 663 // Latin1 characters, there's no need to test whether we can store the 664 // character. Otherwise check whether the UC16 source character can fit 665 // in the Latin1 sink. 666 if (sizeof(SinkChar) == kUC16Size || seq_one_byte || 667 c0_ <= String::kMaxOneByteCharCode) { 668 SeqStringSet(seq_string, count++, c0_); 669 Advance(); 670 } else { 671 // StringType is SeqOneByteString and we just read a non-Latin1 char. 672 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 0, count); 673 } 674 } else { 675 Advance(); // Advance past the \. 676 switch (c0_) { 677 case '"': 678 case '\\': 679 case '/': 680 SeqStringSet(seq_string, count++, c0_); 681 break; 682 case 'b': 683 SeqStringSet(seq_string, count++, '\x08'); 684 break; 685 case 'f': 686 SeqStringSet(seq_string, count++, '\x0c'); 687 break; 688 case 'n': 689 SeqStringSet(seq_string, count++, '\x0a'); 690 break; 691 case 'r': 692 SeqStringSet(seq_string, count++, '\x0d'); 693 break; 694 case 't': 695 SeqStringSet(seq_string, count++, '\x09'); 696 break; 697 case 'u': { 698 uc32 value = 0; 699 for (int i = 0; i < 4; i++) { 700 Advance(); 701 int digit = HexValue(c0_); 702 if (digit < 0) { 703 return Handle<String>::null(); 704 } 705 value = value * 16 + digit; 706 } 707 if (sizeof(SinkChar) == kUC16Size || 708 value <= String::kMaxOneByteCharCode) { 709 SeqStringSet(seq_string, count++, value); 710 break; 711 } else { 712 // StringType is SeqOneByteString and we just read a non-Latin1 713 // char. 714 position_ -= 6; // Rewind position_ to \ in \uxxxx. 715 Advance(); 716 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_string, 717 0, 718 count); 719 } 720 } 721 default: 722 return Handle<String>::null(); 723 } 724 Advance(); 725 } 726 } 727 728 DCHECK_EQ('"', c0_); 729 // Advance past the last '"'. 730 AdvanceSkipWhitespace(); 731 732 // Shrink seq_string length to count and return. 733 return SeqString::Truncate(seq_string, count); 734 } 735 736 737 template <bool seq_one_byte> 738 template <bool is_internalized> 739 Handle<String> JsonParser<seq_one_byte>::ScanJsonString() { 740 DCHECK_EQ('"', c0_); 741 Advance(); 742 if (c0_ == '"') { 743 AdvanceSkipWhitespace(); 744 return factory()->empty_string(); 745 } 746 747 if (seq_one_byte && is_internalized) { 748 // Fast path for existing internalized strings. If the the string being 749 // parsed is not a known internalized string, contains backslashes or 750 // unexpectedly reaches the end of string, return with an empty handle. 751 uint32_t running_hash = isolate()->heap()->HashSeed(); 752 int position = position_; 753 uc32 c0 = c0_; 754 do { 755 if (c0 == '\\') { 756 c0_ = c0; 757 int beg_pos = position_; 758 position_ = position; 759 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, 760 beg_pos, 761 position_); 762 } 763 if (c0 < 0x20) return Handle<String>::null(); 764 running_hash = StringHasher::AddCharacterCore(running_hash, 765 static_cast<uint16_t>(c0)); 766 position++; 767 if (position >= source_length_) return Handle<String>::null(); 768 c0 = seq_source_->SeqOneByteStringGet(position); 769 } while (c0 != '"'); 770 int length = position - position_; 771 uint32_t hash = (length <= String::kMaxHashCalcLength) 772 ? StringHasher::GetHashCore(running_hash) 773 : static_cast<uint32_t>(length); 774 Vector<const uint8_t> string_vector( 775 seq_source_->GetChars() + position_, length); 776 StringTable* string_table = isolate()->heap()->string_table(); 777 uint32_t capacity = string_table->Capacity(); 778 uint32_t entry = StringTable::FirstProbe(hash, capacity); 779 uint32_t count = 1; 780 Handle<String> result; 781 while (true) { 782 Object* element = string_table->KeyAt(entry); 783 if (element == isolate()->heap()->undefined_value()) { 784 // Lookup failure. 785 result = factory()->InternalizeOneByteString( 786 seq_source_, position_, length); 787 break; 788 } 789 if (element != isolate()->heap()->the_hole_value() && 790 String::cast(element)->IsOneByteEqualTo(string_vector)) { 791 result = Handle<String>(String::cast(element), isolate()); 792 #ifdef DEBUG 793 uint32_t hash_field = 794 (hash << String::kHashShift) | String::kIsNotArrayIndexMask; 795 DCHECK_EQ(static_cast<int>(result->Hash()), 796 static_cast<int>(hash_field >> String::kHashShift)); 797 #endif 798 break; 799 } 800 entry = StringTable::NextProbe(entry, count++, capacity); 801 } 802 position_ = position; 803 // Advance past the last '"'. 804 AdvanceSkipWhitespace(); 805 return result; 806 } 807 808 int beg_pos = position_; 809 // Fast case for Latin1 only without escape characters. 810 do { 811 // Check for control character (0x00-0x1f) or unterminated string (<0). 812 if (c0_ < 0x20) return Handle<String>::null(); 813 if (c0_ != '\\') { 814 if (seq_one_byte || c0_ <= String::kMaxOneByteCharCode) { 815 Advance(); 816 } else { 817 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, 818 beg_pos, 819 position_); 820 } 821 } else { 822 return SlowScanJsonString<SeqOneByteString, uint8_t>(source_, 823 beg_pos, 824 position_); 825 } 826 } while (c0_ != '"'); 827 int length = position_ - beg_pos; 828 Handle<String> result = 829 factory()->NewRawOneByteString(length, pretenure_).ToHandleChecked(); 830 uint8_t* dest = SeqOneByteString::cast(*result)->GetChars(); 831 String::WriteToFlat(*source_, dest, beg_pos, position_); 832 833 DCHECK_EQ('"', c0_); 834 // Advance past the last '"'. 835 AdvanceSkipWhitespace(); 836 return result; 837 } 838 839 } // namespace internal 840 } // namespace v8 841 842 #endif // V8_PARSING_JSON_PARSER_H_ 843