Home | History | Annotate | Download | only in src
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #ifndef V8_JSON_PARSER_H_
     29 #define V8_JSON_PARSER_H_
     30 
     31 #include "v8.h"
     32 
     33 #include "char-predicates-inl.h"
     34 #include "v8conversions.h"
     35 #include "messages.h"
     36 #include "spaces-inl.h"
     37 #include "token.h"
     38 
     39 namespace v8 {
     40 namespace internal {
     41 
     42 // A simple json parser.
     43 template <bool seq_ascii>
     44 class JsonParser BASE_EMBEDDED {
     45  public:
     46   static Handle<Object> Parse(Handle<String> source) {
     47     return JsonParser().ParseJson(source);
     48   }
     49 
     50   static const int kEndOfString = -1;
     51 
     52  private:
     53   // Parse a string containing a single JSON value.
     54   Handle<Object> ParseJson(Handle<String> source);
     55 
     56   inline void Advance() {
     57     position_++;
     58     if (position_ >= source_length_) {
     59       c0_ = kEndOfString;
     60     } else if (seq_ascii) {
     61       c0_ = seq_source_->SeqAsciiStringGet(position_);
     62     } else {
     63       c0_ = source_->Get(position_);
     64     }
     65   }
     66 
     67   // The JSON lexical grammar is specified in the ECMAScript 5 standard,
     68   // section 15.12.1.1. The only allowed whitespace characters between tokens
     69   // are tab, carriage-return, newline and space.
     70 
     71   inline void AdvanceSkipWhitespace() {
     72     do {
     73       Advance();
     74     } while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ');
     75   }
     76 
     77   inline void SkipWhitespace() {
     78     while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') {
     79       Advance();
     80     }
     81   }
     82 
     83   inline uc32 AdvanceGetChar() {
     84     Advance();
     85     return c0_;
     86   }
     87 
     88   // Checks that current charater is c.
     89   // If so, then consume c and skip whitespace.
     90   inline bool MatchSkipWhiteSpace(uc32 c) {
     91     if (c0_ == c) {
     92       AdvanceSkipWhitespace();
     93       return true;
     94     }
     95     return false;
     96   }
     97 
     98   // A JSON string (production JSONString) is subset of valid JavaScript string
     99   // literals. The string must only be double-quoted (not single-quoted), and
    100   // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
    101   // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
    102   Handle<String> ParseJsonString() {
    103     return ScanJsonString<false>();
    104   }
    105   Handle<String> ParseJsonSymbol() {
    106     return ScanJsonString<true>();
    107   }
    108   template <bool is_symbol>
    109   Handle<String> ScanJsonString();
    110   // Creates a new string and copies prefix[start..end] into the beginning
    111   // of it. Then scans the rest of the string, adding characters after the
    112   // prefix. Called by ScanJsonString when reaching a '\' or non-ASCII char.
    113   template <typename StringType, typename SinkChar>
    114   Handle<String> SlowScanJsonString(Handle<String> prefix, int start, int end);
    115 
    116   // A JSON number (production JSONNumber) is a subset of the valid JavaScript
    117   // decimal number literals.
    118   // It includes an optional minus sign, must have at least one
    119   // digit before and after a decimal point, may not have prefixed zeros (unless
    120   // the integer part is zero), and may include an exponent part (e.g., "e-10").
    121   // Hexadecimal and octal numbers are not allowed.
    122   Handle<Object> ParseJsonNumber();
    123 
    124   // Parse a single JSON value from input (grammar production JSONValue).
    125   // A JSON value is either a (double-quoted) string literal, a number literal,
    126   // one of "true", "false", or "null", or an object or array literal.
    127   Handle<Object> ParseJsonValue();
    128 
    129   // Parse a JSON object literal (grammar production JSONObject).
    130   // An object literal is a squiggly-braced and comma separated sequence
    131   // (possibly empty) of key/value pairs, where the key is a JSON string
    132   // literal, the value is a JSON value, and the two are separated by a colon.
    133   // A JSON array doesn't allow numbers and identifiers as keys, like a
    134   // JavaScript array.
    135   Handle<Object> ParseJsonObject();
    136 
    137   // Parses a JSON array literal (grammar production JSONArray). An array
    138   // literal is a square-bracketed and comma separated sequence (possibly empty)
    139   // of JSON values.
    140   // A JSON array doesn't allow leaving out values from the sequence, nor does
    141   // it allow a terminal comma, like a JavaScript array does.
    142   Handle<Object> ParseJsonArray();
    143 
    144 
    145   // Mark that a parsing error has happened at the current token, and
    146   // return a null handle. Primarily for readability.
    147   inline Handle<Object> ReportUnexpectedCharacter() {
    148     return Handle<Object>::null();
    149   }
    150 
    151   inline Isolate* isolate() { return isolate_; }
    152 
    153   static const int kInitialSpecialStringLength = 1024;
    154 
    155 
    156  private:
    157   Handle<String> source_;
    158   int source_length_;
    159   Handle<SeqAsciiString> seq_source_;
    160 
    161   Isolate* isolate_;
    162   uc32 c0_;
    163   int position_;
    164 };
    165 
    166 template <bool seq_ascii>
    167 Handle<Object> JsonParser<seq_ascii>::ParseJson(Handle<String> source) {
    168   isolate_ = source->map()->GetHeap()->isolate();
    169   FlattenString(source);
    170   source_ = source;
    171   source_length_ = source_->length();
    172 
    173   // Optimized fast case where we only have ASCII characters.
    174   if (seq_ascii) {
    175     seq_source_ = Handle<SeqAsciiString>::cast(source_);
    176   }
    177 
    178   // Set initial position right before the string.
    179   position_ = -1;
    180   // Advance to the first character (possibly EOS)
    181   AdvanceSkipWhitespace();
    182   Handle<Object> result = ParseJsonValue();
    183   if (result.is_null() || c0_ != kEndOfString) {
    184     // Parse failed. Current character is the unexpected token.
    185 
    186     const char* message;
    187     Factory* factory = isolate()->factory();
    188     Handle<JSArray> array;
    189 
    190     switch (c0_) {
    191       case kEndOfString:
    192         message = "unexpected_eos";
    193         array = factory->NewJSArray(0);
    194         break;
    195       case '-':
    196       case '0':
    197       case '1':
    198       case '2':
    199       case '3':
    200       case '4':
    201       case '5':
    202       case '6':
    203       case '7':
    204       case '8':
    205       case '9':
    206         message = "unexpected_token_number";
    207         array = factory->NewJSArray(0);
    208         break;
    209       case '"':
    210         message = "unexpected_token_string";
    211         array = factory->NewJSArray(0);
    212         break;
    213       default:
    214         message = "unexpected_token";
    215         Handle<Object> name = LookupSingleCharacterStringFromCode(c0_);
    216         Handle<FixedArray> element = factory->NewFixedArray(1);
    217         element->set(0, *name);
    218         array = factory->NewJSArrayWithElements(element);
    219         break;
    220     }
    221 
    222     MessageLocation location(factory->NewScript(source),
    223                              position_,
    224                              position_ + 1);
    225     Handle<Object> result = factory->NewSyntaxError(message, array);
    226     isolate()->Throw(*result, &location);
    227     return Handle<Object>::null();
    228   }
    229   return result;
    230 }
    231 
    232 
    233 // Parse any JSON value.
    234 template <bool seq_ascii>
    235 Handle<Object> JsonParser<seq_ascii>::ParseJsonValue() {
    236   switch (c0_) {
    237     case '"':
    238       return ParseJsonString();
    239     case '-':
    240     case '0':
    241     case '1':
    242     case '2':
    243     case '3':
    244     case '4':
    245     case '5':
    246     case '6':
    247     case '7':
    248     case '8':
    249     case '9':
    250       return ParseJsonNumber();
    251     case 'f':
    252       if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' &&
    253           AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') {
    254         AdvanceSkipWhitespace();
    255         return isolate()->factory()->false_value();
    256       } else {
    257         return ReportUnexpectedCharacter();
    258       }
    259     case 't':
    260       if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' &&
    261           AdvanceGetChar() == 'e') {
    262         AdvanceSkipWhitespace();
    263         return isolate()->factory()->true_value();
    264       } else {
    265         return ReportUnexpectedCharacter();
    266       }
    267     case 'n':
    268       if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' &&
    269           AdvanceGetChar() == 'l') {
    270         AdvanceSkipWhitespace();
    271         return isolate()->factory()->null_value();
    272       } else {
    273         return ReportUnexpectedCharacter();
    274       }
    275     case '{':
    276       return ParseJsonObject();
    277     case '[':
    278       return ParseJsonArray();
    279     default:
    280       return ReportUnexpectedCharacter();
    281   }
    282 }
    283 
    284 
    285 // Parse a JSON object. Position must be right at '{'.
    286 template <bool seq_ascii>
    287 Handle<Object> JsonParser<seq_ascii>::ParseJsonObject() {
    288   Handle<JSFunction> object_constructor(
    289       isolate()->global_context()->object_function());
    290   Handle<JSObject> json_object =
    291       isolate()->factory()->NewJSObject(object_constructor);
    292   ASSERT_EQ(c0_, '{');
    293 
    294   AdvanceSkipWhitespace();
    295   if (c0_ != '}') {
    296     do {
    297       if (c0_ != '"') return ReportUnexpectedCharacter();
    298       Handle<String> key = ParseJsonSymbol();
    299       if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter();
    300       AdvanceSkipWhitespace();
    301       Handle<Object> value = ParseJsonValue();
    302       if (value.is_null()) return ReportUnexpectedCharacter();
    303 
    304       uint32_t index;
    305       if (key->AsArrayIndex(&index)) {
    306         JSObject::SetOwnElement(json_object, index, value, kNonStrictMode);
    307       } else if (key->Equals(isolate()->heap()->Proto_symbol())) {
    308         SetPrototype(json_object, value);
    309       } else {
    310         JSObject::SetLocalPropertyIgnoreAttributes(
    311             json_object, key, value, NONE);
    312       }
    313     } while (MatchSkipWhiteSpace(','));
    314     if (c0_ != '}') {
    315       return ReportUnexpectedCharacter();
    316     }
    317   }
    318   AdvanceSkipWhitespace();
    319   return json_object;
    320 }
    321 
    322 // Parse a JSON array. Position must be right at '['.
    323 template <bool seq_ascii>
    324 Handle<Object> JsonParser<seq_ascii>::ParseJsonArray() {
    325   ZoneScope zone_scope(isolate(), DELETE_ON_EXIT);
    326   ZoneList<Handle<Object> > elements(4);
    327   ASSERT_EQ(c0_, '[');
    328 
    329   AdvanceSkipWhitespace();
    330   if (c0_ != ']') {
    331     do {
    332       Handle<Object> element = ParseJsonValue();
    333       if (element.is_null()) return ReportUnexpectedCharacter();
    334       elements.Add(element);
    335     } while (MatchSkipWhiteSpace(','));
    336     if (c0_ != ']') {
    337       return ReportUnexpectedCharacter();
    338     }
    339   }
    340   AdvanceSkipWhitespace();
    341   // Allocate a fixed array with all the elements.
    342   Handle<FixedArray> fast_elements =
    343       isolate()->factory()->NewFixedArray(elements.length());
    344   for (int i = 0, n = elements.length(); i < n; i++) {
    345     fast_elements->set(i, *elements[i]);
    346   }
    347   return isolate()->factory()->NewJSArrayWithElements(fast_elements);
    348 }
    349 
    350 
    351 template <bool seq_ascii>
    352 Handle<Object> JsonParser<seq_ascii>::ParseJsonNumber() {
    353   bool negative = false;
    354   int beg_pos = position_;
    355   if (c0_ == '-') {
    356     Advance();
    357     negative = true;
    358   }
    359   if (c0_ == '0') {
    360     Advance();
    361     // Prefix zero is only allowed if it's the only digit before
    362     // a decimal point or exponent.
    363     if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter();
    364   } else {
    365     int i = 0;
    366     int digits = 0;
    367     if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter();
    368     do {
    369       i = i * 10 + c0_ - '0';
    370       digits++;
    371       Advance();
    372     } while (c0_ >= '0' && c0_ <= '9');
    373     if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) {
    374       SkipWhitespace();
    375       return Handle<Smi>(Smi::FromInt((negative ? -i : i)), isolate());
    376     }
    377   }
    378   if (c0_ == '.') {
    379     Advance();
    380     if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
    381     do {
    382       Advance();
    383     } while (c0_ >= '0' && c0_ <= '9');
    384   }
    385   if (AsciiAlphaToLower(c0_) == 'e') {
    386     Advance();
    387     if (c0_ == '-' || c0_ == '+') Advance();
    388     if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter();
    389     do {
    390       Advance();
    391     } while (c0_ >= '0' && c0_ <= '9');
    392   }
    393   int length = position_ - beg_pos;
    394   double number;
    395   if (seq_ascii) {
    396     Vector<const char> chars(seq_source_->GetChars() +  beg_pos, length);
    397     number = StringToDouble(isolate()->unicode_cache(),
    398                              chars,
    399                              NO_FLAGS,  // Hex, octal or trailing junk.
    400                              OS::nan_value());
    401   } else {
    402     Vector<char> buffer = Vector<char>::New(length);
    403     String::WriteToFlat(*source_, buffer.start(), beg_pos, position_);
    404     Vector<const char> result =
    405         Vector<const char>(reinterpret_cast<const char*>(buffer.start()),
    406         length);
    407     number = StringToDouble(isolate()->unicode_cache(),
    408                              result,
    409                              NO_FLAGS,  // Hex, octal or trailing junk.
    410                              0.0);
    411     buffer.Dispose();
    412   }
    413   SkipWhitespace();
    414   return isolate()->factory()->NewNumber(number);
    415 }
    416 
    417 
    418 template <typename StringType>
    419 inline void SeqStringSet(Handle<StringType> seq_str, int i, uc32 c);
    420 
    421 template <>
    422 inline void SeqStringSet(Handle<SeqTwoByteString> seq_str, int i, uc32 c) {
    423   seq_str->SeqTwoByteStringSet(i, c);
    424 }
    425 
    426 template <>
    427 inline void SeqStringSet(Handle<SeqAsciiString> seq_str, int i, uc32 c) {
    428   seq_str->SeqAsciiStringSet(i, c);
    429 }
    430 
    431 template <typename StringType>
    432 inline Handle<StringType> NewRawString(Factory* factory, int length);
    433 
    434 template <>
    435 inline Handle<SeqTwoByteString> NewRawString(Factory* factory, int length) {
    436   return factory->NewRawTwoByteString(length, NOT_TENURED);
    437 }
    438 
    439 template <>
    440 inline Handle<SeqAsciiString> NewRawString(Factory* factory, int length) {
    441   return factory->NewRawAsciiString(length, NOT_TENURED);
    442 }
    443 
    444 
    445 // Scans the rest of a JSON string starting from position_ and writes
    446 // prefix[start..end] along with the scanned characters into a
    447 // sequential string of type StringType.
    448 template <bool seq_ascii>
    449 template <typename StringType, typename SinkChar>
    450 Handle<String> JsonParser<seq_ascii>::SlowScanJsonString(
    451     Handle<String> prefix, int start, int end) {
    452   int count = end - start;
    453   int max_length = count + source_length_ - position_;
    454   int length = Min(max_length, Max(kInitialSpecialStringLength, 2 * count));
    455   Handle<StringType> seq_str = NewRawString<StringType>(isolate()->factory(),
    456                                                         length);
    457   // Copy prefix into seq_str.
    458   SinkChar* dest = seq_str->GetChars();
    459   String::WriteToFlat(*prefix, dest, start, end);
    460 
    461   while (c0_ != '"') {
    462     // Check for control character (0x00-0x1f) or unterminated string (<0).
    463     if (c0_ < 0x20) return Handle<String>::null();
    464     if (count >= length) {
    465       // We need to create a longer sequential string for the result.
    466       return SlowScanJsonString<StringType, SinkChar>(seq_str, 0, count);
    467     }
    468     if (c0_ != '\\') {
    469       // If the sink can contain UC16 characters, or source_ contains only
    470       // ASCII characters, there's no need to test whether we can store the
    471       // character. Otherwise check whether the UC16 source character can fit
    472       // in the ASCII sink.
    473       if (sizeof(SinkChar) == kUC16Size ||
    474           seq_ascii ||
    475           c0_ <= kMaxAsciiCharCode) {
    476         SeqStringSet(seq_str, count++, c0_);
    477         Advance();
    478       } else {
    479         // StringType is SeqAsciiString and we just read a non-ASCII char.
    480         return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str, 0, count);
    481       }
    482     } else {
    483       Advance();  // Advance past the \.
    484       switch (c0_) {
    485         case '"':
    486         case '\\':
    487         case '/':
    488           SeqStringSet(seq_str, count++, c0_);
    489           break;
    490         case 'b':
    491           SeqStringSet(seq_str, count++, '\x08');
    492           break;
    493         case 'f':
    494           SeqStringSet(seq_str, count++, '\x0c');
    495           break;
    496         case 'n':
    497           SeqStringSet(seq_str, count++, '\x0a');
    498           break;
    499         case 'r':
    500           SeqStringSet(seq_str, count++, '\x0d');
    501           break;
    502         case 't':
    503           SeqStringSet(seq_str, count++, '\x09');
    504           break;
    505         case 'u': {
    506           uc32 value = 0;
    507           for (int i = 0; i < 4; i++) {
    508             Advance();
    509             int digit = HexValue(c0_);
    510             if (digit < 0) {
    511               return Handle<String>::null();
    512             }
    513             value = value * 16 + digit;
    514           }
    515           if (sizeof(SinkChar) == kUC16Size || value <= kMaxAsciiCharCode) {
    516             SeqStringSet(seq_str, count++, value);
    517             break;
    518           } else {
    519             // StringType is SeqAsciiString and we just read a non-ASCII char.
    520             position_ -= 6;  // Rewind position_ to \ in \uxxxx.
    521             Advance();
    522             return SlowScanJsonString<SeqTwoByteString, uc16>(seq_str,
    523                                                               0,
    524                                                               count);
    525           }
    526         }
    527         default:
    528           return Handle<String>::null();
    529       }
    530       Advance();
    531     }
    532   }
    533   // Shrink seq_string length to count.
    534   if (isolate()->heap()->InNewSpace(*seq_str)) {
    535     isolate()->heap()->new_space()->
    536         template ShrinkStringAtAllocationBoundary<StringType>(
    537             *seq_str, count);
    538   } else {
    539     int string_size = StringType::SizeFor(count);
    540     int allocated_string_size = StringType::SizeFor(length);
    541     int delta = allocated_string_size - string_size;
    542     Address start_filler_object = seq_str->address() + string_size;
    543     seq_str->set_length(count);
    544     isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta);
    545   }
    546   ASSERT_EQ('"', c0_);
    547   // Advance past the last '"'.
    548   AdvanceSkipWhitespace();
    549   return seq_str;
    550 }
    551 
    552 
    553 template <bool seq_ascii>
    554 template <bool is_symbol>
    555 Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
    556   ASSERT_EQ('"', c0_);
    557   Advance();
    558   if (c0_ == '"') {
    559     AdvanceSkipWhitespace();
    560     return Handle<String>(isolate()->heap()->empty_string());
    561   }
    562   int beg_pos = position_;
    563   // Fast case for ASCII only without escape characters.
    564   do {
    565     // Check for control character (0x00-0x1f) or unterminated string (<0).
    566     if (c0_ < 0x20) return Handle<String>::null();
    567     if (c0_ != '\\') {
    568       if (seq_ascii || c0_ <= kMaxAsciiCharCode) {
    569         Advance();
    570       } else {
    571         return SlowScanJsonString<SeqTwoByteString, uc16>(source_,
    572                                                           beg_pos,
    573                                                           position_);
    574       }
    575     } else {
    576       return SlowScanJsonString<SeqAsciiString, char>(source_,
    577                                                       beg_pos,
    578                                                       position_);
    579     }
    580   } while (c0_ != '"');
    581   int length = position_ - beg_pos;
    582   Handle<String> result;
    583   if (seq_ascii && is_symbol) {
    584     result = isolate()->factory()->LookupAsciiSymbol(seq_source_,
    585                                                      beg_pos,
    586                                                      length);
    587   } else {
    588     result = isolate()->factory()->NewRawAsciiString(length);
    589     char* dest = SeqAsciiString::cast(*result)->GetChars();
    590     String::WriteToFlat(*source_, dest, beg_pos, position_);
    591   }
    592   ASSERT_EQ('"', c0_);
    593   // Advance past the last '"'.
    594   AdvanceSkipWhitespace();
    595   return result;
    596 }
    597 
    598 } }  // namespace v8::internal
    599 
    600 #endif  // V8_JSON_PARSER_H_
    601