1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #ifndef V8_JSON_PARSER_H_ 29 #define V8_JSON_PARSER_H_ 30 31 #include "v8.h" 32 33 #include "char-predicates-inl.h" 34 #include "v8conversions.h" 35 #include "messages.h" 36 #include "spaces-inl.h" 37 #include "token.h" 38 39 namespace v8 { 40 namespace internal { 41 42 // A simple json parser. 43 template <bool seq_ascii> 44 class JsonParser BASE_EMBEDDED { 45 public: 46 static Handle<Object> Parse(Handle<String> source) { 47 return JsonParser().ParseJson(source); 48 } 49 50 static const int kEndOfString = -1; 51 52 private: 53 // Parse a string containing a single JSON value. 54 Handle<Object> ParseJson(Handle<String> source); 55 56 inline void Advance() { 57 position_++; 58 if (position_ >= source_length_) { 59 c0_ = kEndOfString; 60 } else if (seq_ascii) { 61 c0_ = seq_source_->SeqAsciiStringGet(position_); 62 } else { 63 c0_ = source_->Get(position_); 64 } 65 } 66 67 // The JSON lexical grammar is specified in the ECMAScript 5 standard, 68 // section 15.12.1.1. The only allowed whitespace characters between tokens 69 // are tab, carriage-return, newline and space. 70 71 inline void AdvanceSkipWhitespace() { 72 do { 73 Advance(); 74 } while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' '); 75 } 76 77 inline void SkipWhitespace() { 78 while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') { 79 Advance(); 80 } 81 } 82 83 inline uc32 AdvanceGetChar() { 84 Advance(); 85 return c0_; 86 } 87 88 // Checks that current charater is c. 89 // If so, then consume c and skip whitespace. 90 inline bool MatchSkipWhiteSpace(uc32 c) { 91 if (c0_ == c) { 92 AdvanceSkipWhitespace(); 93 return true; 94 } 95 return false; 96 } 97 98 // A JSON string (production JSONString) is subset of valid JavaScript string 99 // literals. The string must only be double-quoted (not single-quoted), and 100 // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and 101 // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. 102 Handle<String> ParseJsonString() { 103 return ScanJsonString<false>(); 104 } 105 Handle<String> ParseJsonSymbol() { 106 return ScanJsonString<true>(); 107 } 108 template <bool is_symbol> 109 Handle<String> ScanJsonString(); 110 // Creates a new string and copies prefix[start..end] into the beginning 111 // of it. Then scans the rest of the string, adding characters after the 112 // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char. 113 template <typename StringType, typename SinkChar> 114 Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end); 115 116 // A JSON number (production JSONNumber) is a subset of the valid JavaScript 117 // decimal number literals. 118 // It includes an optional minus sign, must have at least one 119 // digit before and after a decimal point, may not have prefixed zeros (unless 120 // the integer part is zero), and may include an exponent part (e.g., "e-10"). 121 // Hexadecimal and octal numbers are not allowed. 122 Handle<Object> ParseJsonNumber(); 123 124 // Parse a single JSON value from input (grammar production JSONValue). 125 // A JSON value is either a (double-quoted) string literal, a number literal, 126 // one of "true", "false", or "null", or an object or array literal. 127 Handle<Object> ParseJsonValue(); 128 129 // Parse a JSON object literal (grammar production JSONObject). 130 // An object literal is a squiggly-braced and comma separated sequence 131 // (possibly empty) of key/value pairs, where the key is a JSON string 132 // literal, the value is a JSON value, and the two are separated by a colon. 133 // A JSON array doesn't allow numbers and identifiers as keys, like a 134 // JavaScript array. 135 Handle<Object> ParseJsonObject(); 136 137 // Parses a JSON array literal (grammar production JSONArray). An array 138 // literal is a square-bracketed and comma separated sequence (possibly empty) 139 // of JSON values. 140 // A JSON array doesn't allow leaving out values from the sequence, nor does 141 // it allow a terminal comma, like a JavaScript array does. 142 Handle<Object> ParseJsonArray(); 143 144 145 // Mark that a parsing error has happened at the current token, and 146 // return a null handle. Primarily for readability. 147 inline Handle<Object> ReportUnexpectedCharacter() { 148 return Handle<Object>::null(); 149 } 150 151 inline Isolate* isolate() { return isolate_; } 152 153 static const int kInitialSpecialStringLength = 1024; 154 155 156 private: 157 Handle<String> source_; 158 int source_length_; 159 Handle<SeqAsciiString> seq_source_; 160 161 Isolate* isolate_; 162 uc32 c0_; 163 int position_; 164 }; 165 166 template <bool seq_ascii> 167 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) { 168 isolate_ = source->map()->GetHeap()->isolate(); 169 FlattenString(source); 170 source_ = source; 171 source_length_ = source_->length(); 172 173 // Optimized fast case where we only have ASCII characters. 174 if (seq_ascii) { 175 seq_source_ = Handle<SeqAsciiString>::cast(source_); 176 } 177 178 // Set initial position right before the string. 179 position_ = -1; 180 // Advance to the first character (possibly EOS) 181 AdvanceSkipWhitespace(); 182 Handle<Object> result = ParseJsonValue(); 183 if (result.is_null() || c0_ != kEndOfString) { 184 // Parse failed. Current character is the unexpected token. 185 186 const char* message; 187 Factory* factory = isolate()->factory(); 188 Handle<JSArray> array; 189 190 switch (c0_) { 191 case kEndOfString: 192 message = "unexpected_eos"; 193 array = factory->NewJSArray(0); 194 break; 195 case '-': 196 case '0': 197 case '1': 198 case '2': 199 case '3': 200 case '4': 201 case '5': 202 case '6': 203 case '7': 204 case '8': 205 case '9': 206 message = "unexpected_token_number"; 207 array = factory->NewJSArray(0); 208 break; 209 case '"': 210 message = "unexpected_token_string"; 211 array = factory->NewJSArray(0); 212 break; 213 default: 214 message = "unexpected_token"; 215 Handle<Object> name = LookupSingleCharacterStringFromCode(c0_); 216 Handle<FixedArray> element = factory->NewFixedArray(1); 217 element->set(0, *name); 218 array = factory->NewJSArrayWithElements(element); 219 break; 220 } 221 222 MessageLocation location(factory->NewScript(source), 223 position_, 224 position_ + 1); 225 Handle<Object> result = factory->NewSyntaxError(message, array); 226 isolate()->Throw(*result, &location); 227 return Handle<Object>::null(); 228 } 229 return result; 230 } 231 232 233 // Parse any JSON value. 234 template <bool seq_ascii> 235 Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() { 236 switch (c0_) { 237 case '"': 238 return ParseJsonString(); 239 case '-': 240 case '0': 241 case '1': 242 case '2': 243 case '3': 244 case '4': 245 case '5': 246 case '6': 247 case '7': 248 case '8': 249 case '9': 250 return ParseJsonNumber(); 251 case 'f': 252 if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && 253 AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { 254 AdvanceSkipWhitespace(); 255 return isolate()->factory()->false_value(); 256 } else { 257 return ReportUnexpectedCharacter(); 258 } 259 case 't': 260 if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && 261 AdvanceGetChar() == 'e') { 262 AdvanceSkipWhitespace(); 263 return isolate()->factory()->true_value(); 264 } else { 265 return ReportUnexpectedCharacter(); 266 } 267 case 'n': 268 if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && 269 AdvanceGetChar() == 'l') { 270 AdvanceSkipWhitespace(); 271 return isolate()->factory()->null_value(); 272 } else { 273 return ReportUnexpectedCharacter(); 274 } 275 case '{': 276 return ParseJsonObject(); 277 case '[': 278 return ParseJsonArray(); 279 default: 280 return ReportUnexpectedCharacter(); 281 } 282 } 283 284 285 // Parse a JSON object. Position must be right at '{'. 286 template <bool seq_ascii> 287 Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() { 288 Handle<JSFunction> object_constructor( 289 isolate()->global_context()->object_function()); 290 Handle<JSObject> json_object = 291 isolate()->factory()->NewJSObject(object_constructor); 292 ASSERT_EQ(c0_, '{'); 293 294 AdvanceSkipWhitespace(); 295 if (c0_ != '}') { 296 do { 297 if (c0_ != '"') return ReportUnexpectedCharacter(); 298 Handle<String> key = ParseJsonSymbol(); 299 if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); 300 AdvanceSkipWhitespace(); 301 Handle<Object> value = ParseJsonValue(); 302 if (value.is_null()) return ReportUnexpectedCharacter(); 303 304 uint32_t index; 305 if (key->AsArrayIndex(&index)) { 306 JSObject::SetOwnElement(json_object, index, value, kNonStrictMode); 307 } else if (key->Equals(isolate()->heap()->Proto_symbol())) { 308 SetPrototype(json_object, value); 309 } else { 310 JSObject::SetLocalPropertyIgnoreAttributes( 311 json_object, key, value, NONE); 312 } 313 } while (MatchSkipWhiteSpace(',')); 314 if (c0_ != '}') { 315 return ReportUnexpectedCharacter(); 316 } 317 } 318 AdvanceSkipWhitespace(); 319 return json_object; 320 } 321 322 // Parse a JSON array. Position must be right at '['. 323 template <bool seq_ascii> 324 Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() { 325 ZoneScope zone_scope(isolate(), DELETE_ON_EXIT); 326 ZoneList<Handle<Object> > elements(4); 327 ASSERT_EQ(c0_, '['); 328 329 AdvanceSkipWhitespace(); 330 if (c0_ != ']') { 331 do { 332 Handle<Object> element = ParseJsonValue(); 333 if (element.is_null()) return ReportUnexpectedCharacter(); 334 elements.Add(element); 335 } while (MatchSkipWhiteSpace(',')); 336 if (c0_ != ']') { 337 return ReportUnexpectedCharacter(); 338 } 339 } 340 AdvanceSkipWhitespace(); 341 // Allocate a fixed array with all the elements. 342 Handle<FixedArray> fast_elements = 343 isolate()->factory()->NewFixedArray(elements.length()); 344 for (int i = 0, n = elements.length(); i < n; i++) { 345 fast_elements->set(i, *elements[i]); 346 } 347 return isolate()->factory()->NewJSArrayWithElements(fast_elements); 348 } 349 350 351 template <bool seq_ascii> 352 Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() { 353 bool negative = false; 354 int beg_pos = position_; 355 if (c0_ == '-') { 356 Advance(); 357 negative = true; 358 } 359 if (c0_ == '0') { 360 Advance(); 361 // Prefix zero is only allowed if it's the only digit before 362 // a decimal point or exponent. 363 if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter(); 364 } else { 365 int i = 0; 366 int digits = 0; 367 if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); 368 do { 369 i = i * 10 + c0_ - '0'; 370 digits++; 371 Advance(); 372 } while (c0_ >= '0' && c0_ <= '9'); 373 if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { 374 SkipWhitespace(); 375 return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate()); 376 } 377 } 378 if (c0_ == '.') { 379 Advance(); 380 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); 381 do { 382 Advance(); 383 } while (c0_ >= '0' && c0_ <= '9'); 384 } 385 if (AsciiAlphaToLower(c0_) == 'e') { 386 Advance(); 387 if (c0_ == '-' || c0_ == '+') Advance(); 388 if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); 389 do { 390 Advance(); 391 } while (c0_ >= '0' && c0_ <= '9'); 392 } 393 int length = position_ - beg_pos; 394 double number; 395 if (seq_ascii) { 396 Vector<const char> chars(seq_source_->GetChars() + beg_pos, length); 397 number = StringToDouble(isolate()->unicode_cache(), 398 chars, 399 NO_FLAGS, // Hex, octal or trailing junk. 400 OS::nan_value()); 401 } else { 402 Vector<char> buffer = Vector<char>::New(length); 403 String::WriteToFlat(*source_, buffer.start(), beg_pos, position_); 404 Vector<const char> result = 405 Vector<const char>(reinterpret_cast<const char*>(buffer.start()), 406 length); 407 number = StringToDouble(isolate()->unicode_cache(), 408 result, 409 NO_FLAGS, // Hex, octal or trailing junk. 410 0.0); 411 buffer.Dispose(); 412 } 413 SkipWhitespace(); 414 return isolate()->factory()->NewNumber(number); 415 } 416 417 418 template <typename StringType> 419 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c); 420 421 template <> 422 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) { 423 seq_str->SeqTwoByteStringSet(i, c); 424 } 425 426 template <> 427 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) { 428 seq_str->SeqAsciiStringSet(i, c); 429 } 430 431 template <typename StringType> 432 inline Handle<StringType> NewRawString(Factory* factory, int length); 433 434 template <> 435 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) { 436 return factory->NewRawTwoByteString(length, NOT_TENURED); 437 } 438 439 template <> 440 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) { 441 return factory->NewRawAsciiString(length, NOT_TENURED); 442 } 443 444 445 // Scans the rest of a JSON string starting from position_ and writes 446 // prefix[start..end] along with the scanned characters into a 447 // sequential string of type StringType. 448 template <bool seq_ascii> 449 template <typename StringType, typename SinkChar> 450 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString( 451 Handle<String> prefix, int start, int end) { 452 int count = end - start; 453 int max_length = count + source_length_ - position_; 454 int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count)); 455 Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(), 456 length); 457 // Copy prefix into seq_str. 458 SinkChar* dest = seq_str->GetChars(); 459 String::WriteToFlat(*prefix, dest, start, end); 460 461 while (c0_ != '"') { 462 // Check for control character (0x00-0x1f) or unterminated string (<0). 463 if (c0_ < 0x20) return Handle<String>::null(); 464 if (count >= length) { 465 // We need to create a longer sequential string for the result. 466 return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count); 467 } 468 if (c0_ != '\\') { 469 // If the sink can contain UC16 characters, or source_ contains only 470 // ASCII characters, there's no need to test whether we can store the 471 // character. Otherwise check whether the UC16 source character can fit 472 // in the ASCII sink. 473 if (sizeof(SinkChar) == kUC16Size || 474 seq_ascii || 475 c0_ <= kMaxAsciiCharCode) { 476 SeqStringSet(seq_str, count++, c0_); 477 Advance(); 478 } else { 479 // StringType is SeqAsciiString and we just read a non-ASCII char. 480 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count); 481 } 482 } else { 483 Advance(); // Advance past the \. 484 switch (c0_) { 485 case '"': 486 case '\\': 487 case '/': 488 SeqStringSet(seq_str, count++, c0_); 489 break; 490 case 'b': 491 SeqStringSet(seq_str, count++, '\x08'); 492 break; 493 case 'f': 494 SeqStringSet(seq_str, count++, '\x0c'); 495 break; 496 case 'n': 497 SeqStringSet(seq_str, count++, '\x0a'); 498 break; 499 case 'r': 500 SeqStringSet(seq_str, count++, '\x0d'); 501 break; 502 case 't': 503 SeqStringSet(seq_str, count++, '\x09'); 504 break; 505 case 'u': { 506 uc32 value = 0; 507 for (int i = 0; i < 4; i++) { 508 Advance(); 509 int digit = HexValue(c0_); 510 if (digit < 0) { 511 return Handle<String>::null(); 512 } 513 value = value * 16 + digit; 514 } 515 if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) { 516 SeqStringSet(seq_str, count++, value); 517 break; 518 } else { 519 // StringType is SeqAsciiString and we just read a non-ASCII char. 520 position_ -= 6; // Rewind position_ to \ in \uxxxx. 521 Advance(); 522 return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 523 0, 524 count); 525 } 526 } 527 default: 528 return Handle<String>::null(); 529 } 530 Advance(); 531 } 532 } 533 // Shrink seq_string length to count. 534 if (isolate()->heap()->InNewSpace(*seq_str)) { 535 isolate()->heap()->new_space()-> 536 template ShrinkStringAtAllocationBoundary<StringType>( 537 *seq_str, count); 538 } else { 539 int string_size = StringType::SizeFor(count); 540 int allocated_string_size = StringType::SizeFor(length); 541 int delta = allocated_string_size - string_size; 542 Address start_filler_object = seq_str->address() + string_size; 543 seq_str->set_length(count); 544 isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); 545 } 546 ASSERT_EQ('"', c0_); 547 // Advance past the last '"'. 548 AdvanceSkipWhitespace(); 549 return seq_str; 550 } 551 552 553 template <bool seq_ascii> 554 template <bool is_symbol> 555 Handle<String> JsonParser<seq_ascii>::ScanJsonString() { 556 ASSERT_EQ('"', c0_); 557 Advance(); 558 if (c0_ == '"') { 559 AdvanceSkipWhitespace(); 560 return Handle<String>(isolate()->heap()->empty_string()); 561 } 562 int beg_pos = position_; 563 // Fast case for ASCII only without escape characters. 564 do { 565 // Check for control character (0x00-0x1f) or unterminated string (<0). 566 if (c0_ < 0x20) return Handle<String>::null(); 567 if (c0_ != '\\') { 568 if (seq_ascii || c0_ <= kMaxAsciiCharCode) { 569 Advance(); 570 } else { 571 return SlowScanJsonString<SeqTwoByteString, uc16>(source_, 572 beg_pos, 573 position_); 574 } 575 } else { 576 return SlowScanJsonString<SeqAsciiString, char>(source_, 577 beg_pos, 578 position_); 579 } 580 } while (c0_ != '"'); 581 int length = position_ - beg_pos; 582 Handle<String> result; 583 if (seq_ascii && is_symbol) { 584 result = isolate()->factory()->LookupAsciiSymbol(seq_source_, 585 beg_pos, 586 length); 587 } else { 588 result = isolate()->factory()->NewRawAsciiString(length); 589 char* dest = SeqAsciiString::cast(*result)->GetChars(); 590 String::WriteToFlat(*source_, dest, beg_pos, position_); 591 } 592 ASSERT_EQ('"', c0_); 593 // Advance past the last '"'. 594 AdvanceSkipWhitespace(); 595 return result; 596 } 597 598 } } // namespace v8::internal 599 600 #endif // V8_JSON_PARSER_H_ 601