Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/json/json_parser.h"
      6 
      7 #include "base/float_util.h"
      8 #include "base/logging.h"
      9 #include "base/memory/scoped_ptr.h"
     10 #include "base/strings/string_number_conversions.h"
     11 #include "base/strings/string_piece.h"
     12 #include "base/strings/string_util.h"
     13 #include "base/strings/stringprintf.h"
     14 #include "base/strings/utf_string_conversion_utils.h"
     15 #include "base/strings/utf_string_conversions.h"
     16 #include "base/third_party/icu/icu_utf.h"
     17 #include "base/values.h"
     18 
     19 namespace base {
     20 namespace internal {
     21 
     22 namespace {
     23 
     24 const int kStackMaxDepth = 100;
     25 
     26 const int32 kExtendedASCIIStart = 0x80;
     27 
     28 // This and the class below are used to own the JSON input string for when
     29 // string tokens are stored as StringPiece instead of std::string. This
     30 // optimization avoids about 2/3rds of string memory copies. The constructor
     31 // takes ownership of the input string. The real root value is Swap()ed into
     32 // the new instance.
     33 class DictionaryHiddenRootValue : public base::DictionaryValue {
     34  public:
     35   DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
     36     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
     37     DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
     38   }
     39 
     40   virtual void Swap(DictionaryValue* other) OVERRIDE {
     41     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
     42 
     43     // First deep copy to convert JSONStringValue to std::string and swap that
     44     // copy with |other|, which contains the new contents of |this|.
     45     scoped_ptr<base::DictionaryValue> copy(DeepCopy());
     46     copy->Swap(other);
     47 
     48     // Then erase the contents of the current dictionary and swap in the
     49     // new contents, originally from |other|.
     50     Clear();
     51     json_.reset();
     52     DictionaryValue::Swap(copy.get());
     53   }
     54 
     55   // Not overriding DictionaryValue::Remove because it just calls through to
     56   // the method below.
     57 
     58   virtual bool RemoveWithoutPathExpansion(const std::string& key,
     59                                           scoped_ptr<Value>* out) OVERRIDE {
     60     // If the caller won't take ownership of the removed value, just call up.
     61     if (!out)
     62       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
     63 
     64     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
     65 
     66     // Otherwise, remove the value while its still "owned" by this and copy it
     67     // to convert any JSONStringValues to std::string.
     68     scoped_ptr<Value> out_owned;
     69     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
     70       return false;
     71 
     72     out->reset(out_owned->DeepCopy());
     73 
     74     return true;
     75   }
     76 
     77  private:
     78   scoped_ptr<std::string> json_;
     79 
     80   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
     81 };
     82 
     83 class ListHiddenRootValue : public base::ListValue {
     84  public:
     85   ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
     86     DCHECK(root->IsType(Value::TYPE_LIST));
     87     ListValue::Swap(static_cast<ListValue*>(root));
     88   }
     89 
     90   virtual void Swap(ListValue* other) OVERRIDE {
     91     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
     92 
     93     // First deep copy to convert JSONStringValue to std::string and swap that
     94     // copy with |other|, which contains the new contents of |this|.
     95     scoped_ptr<base::ListValue> copy(DeepCopy());
     96     copy->Swap(other);
     97 
     98     // Then erase the contents of the current list and swap in the new contents,
     99     // originally from |other|.
    100     Clear();
    101     json_.reset();
    102     ListValue::Swap(copy.get());
    103   }
    104 
    105   virtual bool Remove(size_t index, scoped_ptr<Value>* out) OVERRIDE {
    106     // If the caller won't take ownership of the removed value, just call up.
    107     if (!out)
    108       return ListValue::Remove(index, out);
    109 
    110     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
    111 
    112     // Otherwise, remove the value while its still "owned" by this and copy it
    113     // to convert any JSONStringValues to std::string.
    114     scoped_ptr<Value> out_owned;
    115     if (!ListValue::Remove(index, &out_owned))
    116       return false;
    117 
    118     out->reset(out_owned->DeepCopy());
    119 
    120     return true;
    121   }
    122 
    123  private:
    124   scoped_ptr<std::string> json_;
    125 
    126   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
    127 };
    128 
    129 // A variant on StringValue that uses StringPiece instead of copying the string
    130 // into the Value. This can only be stored in a child of hidden root (above),
    131 // otherwise the referenced string will not be guaranteed to outlive it.
    132 class JSONStringValue : public base::Value {
    133  public:
    134   explicit JSONStringValue(const base::StringPiece& piece)
    135       : Value(TYPE_STRING),
    136         string_piece_(piece) {
    137   }
    138 
    139   // Overridden from base::Value:
    140   virtual bool GetAsString(std::string* out_value) const OVERRIDE {
    141     string_piece_.CopyToString(out_value);
    142     return true;
    143   }
    144   virtual bool GetAsString(string16* out_value) const OVERRIDE {
    145     *out_value = UTF8ToUTF16(string_piece_);
    146     return true;
    147   }
    148   virtual Value* DeepCopy() const OVERRIDE {
    149     return new StringValue(string_piece_.as_string());
    150   }
    151   virtual bool Equals(const Value* other) const OVERRIDE {
    152     std::string other_string;
    153     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
    154         StringPiece(other_string) == string_piece_;
    155   }
    156 
    157  private:
    158   // The location in the original input stream.
    159   base::StringPiece string_piece_;
    160 
    161   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
    162 };
    163 
    164 // Simple class that checks for maximum recursion/"stack overflow."
    165 class StackMarker {
    166  public:
    167   explicit StackMarker(int* depth) : depth_(depth) {
    168     ++(*depth_);
    169     DCHECK_LE(*depth_, kStackMaxDepth);
    170   }
    171   ~StackMarker() {
    172     --(*depth_);
    173   }
    174 
    175   bool IsTooDeep() const {
    176     return *depth_ >= kStackMaxDepth;
    177   }
    178 
    179  private:
    180   int* const depth_;
    181 
    182   DISALLOW_COPY_AND_ASSIGN(StackMarker);
    183 };
    184 
    185 }  // namespace
    186 
    187 JSONParser::JSONParser(int options)
    188     : options_(options),
    189       start_pos_(NULL),
    190       pos_(NULL),
    191       end_pos_(NULL),
    192       index_(0),
    193       stack_depth_(0),
    194       line_number_(0),
    195       index_last_line_(0),
    196       error_code_(JSONReader::JSON_NO_ERROR),
    197       error_line_(0),
    198       error_column_(0) {
    199 }
    200 
    201 JSONParser::~JSONParser() {
    202 }
    203 
    204 Value* JSONParser::Parse(const StringPiece& input) {
    205   scoped_ptr<std::string> input_copy;
    206   // If the children of a JSON root can be detached, then hidden roots cannot
    207   // be used, so do not bother copying the input because StringPiece will not
    208   // be used anywhere.
    209   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    210     input_copy.reset(new std::string(input.as_string()));
    211     start_pos_ = input_copy->data();
    212   } else {
    213     start_pos_ = input.data();
    214   }
    215   pos_ = start_pos_;
    216   end_pos_ = start_pos_ + input.length();
    217   index_ = 0;
    218   line_number_ = 1;
    219   index_last_line_ = 0;
    220 
    221   error_code_ = JSONReader::JSON_NO_ERROR;
    222   error_line_ = 0;
    223   error_column_ = 0;
    224 
    225   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
    226   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
    227   // ParseNextToken function mis-treating a Unicode BOM as an invalid
    228   // character and returning NULL.
    229   if (CanConsume(3) && static_cast<uint8>(*pos_) == 0xEF &&
    230       static_cast<uint8>(*(pos_ + 1)) == 0xBB &&
    231       static_cast<uint8>(*(pos_ + 2)) == 0xBF) {
    232     NextNChars(3);
    233   }
    234 
    235   // Parse the first and any nested tokens.
    236   scoped_ptr<Value> root(ParseNextToken());
    237   if (!root.get())
    238     return NULL;
    239 
    240   // Make sure the input stream is at an end.
    241   if (GetNextToken() != T_END_OF_INPUT) {
    242     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
    243       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
    244       return NULL;
    245     }
    246   }
    247 
    248   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
    249   // hidden root.
    250   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    251     if (root->IsType(Value::TYPE_DICTIONARY)) {
    252       return new DictionaryHiddenRootValue(input_copy.release(), root.get());
    253     } else if (root->IsType(Value::TYPE_LIST)) {
    254       return new ListHiddenRootValue(input_copy.release(), root.get());
    255     } else if (root->IsType(Value::TYPE_STRING)) {
    256       // A string type could be a JSONStringValue, but because there's no
    257       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
    258       // preserve it.
    259       return root->DeepCopy();
    260     }
    261   }
    262 
    263   // All other values can be returned directly.
    264   return root.release();
    265 }
    266 
    267 JSONReader::JsonParseError JSONParser::error_code() const {
    268   return error_code_;
    269 }
    270 
    271 std::string JSONParser::GetErrorMessage() const {
    272   return FormatErrorMessage(error_line_, error_column_,
    273       JSONReader::ErrorCodeToString(error_code_));
    274 }
    275 
    276 // StringBuilder ///////////////////////////////////////////////////////////////
    277 
    278 JSONParser::StringBuilder::StringBuilder()
    279     : pos_(NULL),
    280       length_(0),
    281       string_(NULL) {
    282 }
    283 
    284 JSONParser::StringBuilder::StringBuilder(const char* pos)
    285     : pos_(pos),
    286       length_(0),
    287       string_(NULL) {
    288 }
    289 
    290 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
    291   std::swap(other->string_, string_);
    292   std::swap(other->pos_, pos_);
    293   std::swap(other->length_, length_);
    294 }
    295 
    296 JSONParser::StringBuilder::~StringBuilder() {
    297   delete string_;
    298 }
    299 
    300 void JSONParser::StringBuilder::Append(const char& c) {
    301   DCHECK_GE(c, 0);
    302   DCHECK_LT(c, 128);
    303 
    304   if (string_)
    305     string_->push_back(c);
    306   else
    307     ++length_;
    308 }
    309 
    310 void JSONParser::StringBuilder::AppendString(const std::string& str) {
    311   DCHECK(string_);
    312   string_->append(str);
    313 }
    314 
    315 void JSONParser::StringBuilder::Convert() {
    316   if (string_)
    317     return;
    318   string_  = new std::string(pos_, length_);
    319 }
    320 
    321 bool JSONParser::StringBuilder::CanBeStringPiece() const {
    322   return !string_;
    323 }
    324 
    325 StringPiece JSONParser::StringBuilder::AsStringPiece() {
    326   if (string_)
    327     return StringPiece();
    328   return StringPiece(pos_, length_);
    329 }
    330 
    331 const std::string& JSONParser::StringBuilder::AsString() {
    332   if (!string_)
    333     Convert();
    334   return *string_;
    335 }
    336 
    337 // JSONParser private //////////////////////////////////////////////////////////
    338 
    339 inline bool JSONParser::CanConsume(int length) {
    340   return pos_ + length <= end_pos_;
    341 }
    342 
    343 const char* JSONParser::NextChar() {
    344   DCHECK(CanConsume(1));
    345   ++index_;
    346   ++pos_;
    347   return pos_;
    348 }
    349 
    350 void JSONParser::NextNChars(int n) {
    351   DCHECK(CanConsume(n));
    352   index_ += n;
    353   pos_ += n;
    354 }
    355 
    356 JSONParser::Token JSONParser::GetNextToken() {
    357   EatWhitespaceAndComments();
    358   if (!CanConsume(1))
    359     return T_END_OF_INPUT;
    360 
    361   switch (*pos_) {
    362     case '{':
    363       return T_OBJECT_BEGIN;
    364     case '}':
    365       return T_OBJECT_END;
    366     case '[':
    367       return T_ARRAY_BEGIN;
    368     case ']':
    369       return T_ARRAY_END;
    370     case '"':
    371       return T_STRING;
    372     case '0':
    373     case '1':
    374     case '2':
    375     case '3':
    376     case '4':
    377     case '5':
    378     case '6':
    379     case '7':
    380     case '8':
    381     case '9':
    382     case '-':
    383       return T_NUMBER;
    384     case 't':
    385       return T_BOOL_TRUE;
    386     case 'f':
    387       return T_BOOL_FALSE;
    388     case 'n':
    389       return T_NULL;
    390     case ',':
    391       return T_LIST_SEPARATOR;
    392     case ':':
    393       return T_OBJECT_PAIR_SEPARATOR;
    394     default:
    395       return T_INVALID_TOKEN;
    396   }
    397 }
    398 
    399 void JSONParser::EatWhitespaceAndComments() {
    400   while (pos_ < end_pos_) {
    401     switch (*pos_) {
    402       case '\r':
    403       case '\n':
    404         index_last_line_ = index_;
    405         // Don't increment line_number_ twice for "\r\n".
    406         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
    407           ++line_number_;
    408         // Fall through.
    409       case ' ':
    410       case '\t':
    411         NextChar();
    412         break;
    413       case '/':
    414         if (!EatComment())
    415           return;
    416         break;
    417       default:
    418         return;
    419     }
    420   }
    421 }
    422 
    423 bool JSONParser::EatComment() {
    424   if (*pos_ != '/' || !CanConsume(1))
    425     return false;
    426 
    427   char next_char = *NextChar();
    428   if (next_char == '/') {
    429     // Single line comment, read to newline.
    430     while (CanConsume(1)) {
    431       char next_char = *NextChar();
    432       if (next_char == '\n' || next_char == '\r')
    433         return true;
    434     }
    435   } else if (next_char == '*') {
    436     char previous_char = '\0';
    437     // Block comment, read until end marker.
    438     while (CanConsume(1)) {
    439       next_char = *NextChar();
    440       if (previous_char == '*' && next_char == '/') {
    441         // EatWhitespaceAndComments will inspect pos_, which will still be on
    442         // the last / of the comment, so advance once more (which may also be
    443         // end of input).
    444         NextChar();
    445         return true;
    446       }
    447       previous_char = next_char;
    448     }
    449 
    450     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
    451   }
    452 
    453   return false;
    454 }
    455 
    456 Value* JSONParser::ParseNextToken() {
    457   return ParseToken(GetNextToken());
    458 }
    459 
    460 Value* JSONParser::ParseToken(Token token) {
    461   switch (token) {
    462     case T_OBJECT_BEGIN:
    463       return ConsumeDictionary();
    464     case T_ARRAY_BEGIN:
    465       return ConsumeList();
    466     case T_STRING:
    467       return ConsumeString();
    468     case T_NUMBER:
    469       return ConsumeNumber();
    470     case T_BOOL_TRUE:
    471     case T_BOOL_FALSE:
    472     case T_NULL:
    473       return ConsumeLiteral();
    474     default:
    475       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    476       return NULL;
    477   }
    478 }
    479 
    480 Value* JSONParser::ConsumeDictionary() {
    481   if (*pos_ != '{') {
    482     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    483     return NULL;
    484   }
    485 
    486   StackMarker depth_check(&stack_depth_);
    487   if (depth_check.IsTooDeep()) {
    488     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    489     return NULL;
    490   }
    491 
    492   scoped_ptr<DictionaryValue> dict(new DictionaryValue);
    493 
    494   NextChar();
    495   Token token = GetNextToken();
    496   while (token != T_OBJECT_END) {
    497     if (token != T_STRING) {
    498       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
    499       return NULL;
    500     }
    501 
    502     // First consume the key.
    503     StringBuilder key;
    504     if (!ConsumeStringRaw(&key)) {
    505       return NULL;
    506     }
    507 
    508     // Read the separator.
    509     NextChar();
    510     token = GetNextToken();
    511     if (token != T_OBJECT_PAIR_SEPARATOR) {
    512       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    513       return NULL;
    514     }
    515 
    516     // The next token is the value. Ownership transfers to |dict|.
    517     NextChar();
    518     Value* value = ParseNextToken();
    519     if (!value) {
    520       // ReportError from deeper level.
    521       return NULL;
    522     }
    523 
    524     dict->SetWithoutPathExpansion(key.AsString(), value);
    525 
    526     NextChar();
    527     token = GetNextToken();
    528     if (token == T_LIST_SEPARATOR) {
    529       NextChar();
    530       token = GetNextToken();
    531       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    532         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    533         return NULL;
    534       }
    535     } else if (token != T_OBJECT_END) {
    536       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    537       return NULL;
    538     }
    539   }
    540 
    541   return dict.release();
    542 }
    543 
    544 Value* JSONParser::ConsumeList() {
    545   if (*pos_ != '[') {
    546     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    547     return NULL;
    548   }
    549 
    550   StackMarker depth_check(&stack_depth_);
    551   if (depth_check.IsTooDeep()) {
    552     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    553     return NULL;
    554   }
    555 
    556   scoped_ptr<ListValue> list(new ListValue);
    557 
    558   NextChar();
    559   Token token = GetNextToken();
    560   while (token != T_ARRAY_END) {
    561     Value* item = ParseToken(token);
    562     if (!item) {
    563       // ReportError from deeper level.
    564       return NULL;
    565     }
    566 
    567     list->Append(item);
    568 
    569     NextChar();
    570     token = GetNextToken();
    571     if (token == T_LIST_SEPARATOR) {
    572       NextChar();
    573       token = GetNextToken();
    574       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    575         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    576         return NULL;
    577       }
    578     } else if (token != T_ARRAY_END) {
    579       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    580       return NULL;
    581     }
    582   }
    583 
    584   return list.release();
    585 }
    586 
    587 Value* JSONParser::ConsumeString() {
    588   StringBuilder string;
    589   if (!ConsumeStringRaw(&string))
    590     return NULL;
    591 
    592   // Create the Value representation, using a hidden root, if configured
    593   // to do so, and if the string can be represented by StringPiece.
    594   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
    595     return new JSONStringValue(string.AsStringPiece());
    596   } else {
    597     if (string.CanBeStringPiece())
    598       string.Convert();
    599     return new StringValue(string.AsString());
    600   }
    601 }
    602 
    603 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
    604   if (*pos_ != '"') {
    605     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    606     return false;
    607   }
    608 
    609   // StringBuilder will internally build a StringPiece unless a UTF-16
    610   // conversion occurs, at which point it will perform a copy into a
    611   // std::string.
    612   StringBuilder string(NextChar());
    613 
    614   int length = end_pos_ - start_pos_;
    615   int32 next_char = 0;
    616 
    617   while (CanConsume(1)) {
    618     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
    619     CBU8_NEXT(start_pos_, index_, length, next_char);
    620     if (next_char < 0 || !IsValidCharacter(next_char)) {
    621       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
    622       return false;
    623     }
    624 
    625     // If this character is an escape sequence...
    626     if (next_char == '\\') {
    627       // The input string will be adjusted (either by combining the two
    628       // characters of an encoded escape sequence, or with a UTF conversion),
    629       // so using StringPiece isn't possible -- force a conversion.
    630       string.Convert();
    631 
    632       if (!CanConsume(1)) {
    633         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    634         return false;
    635       }
    636 
    637       switch (*NextChar()) {
    638         // Allowed esape sequences:
    639         case 'x': {  // UTF-8 sequence.
    640           // UTF-8 \x escape sequences are not allowed in the spec, but they
    641           // are supported here for backwards-compatiblity with the old parser.
    642           if (!CanConsume(2)) {
    643             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
    644             return false;
    645           }
    646 
    647           int hex_digit = 0;
    648           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
    649             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    650             return false;
    651           }
    652           NextChar();
    653 
    654           if (hex_digit < kExtendedASCIIStart)
    655             string.Append(hex_digit);
    656           else
    657             DecodeUTF8(hex_digit, &string);
    658           break;
    659         }
    660         case 'u': {  // UTF-16 sequence.
    661           // UTF units are of the form \uXXXX.
    662           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
    663             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    664             return false;
    665           }
    666 
    667           // Skip the 'u'.
    668           NextChar();
    669 
    670           std::string utf8_units;
    671           if (!DecodeUTF16(&utf8_units)) {
    672             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    673             return false;
    674           }
    675 
    676           string.AppendString(utf8_units);
    677           break;
    678         }
    679         case '"':
    680           string.Append('"');
    681           break;
    682         case '\\':
    683           string.Append('\\');
    684           break;
    685         case '/':
    686           string.Append('/');
    687           break;
    688         case 'b':
    689           string.Append('\b');
    690           break;
    691         case 'f':
    692           string.Append('\f');
    693           break;
    694         case 'n':
    695           string.Append('\n');
    696           break;
    697         case 'r':
    698           string.Append('\r');
    699           break;
    700         case 't':
    701           string.Append('\t');
    702           break;
    703         case 'v':  // Not listed as valid escape sequence in the RFC.
    704           string.Append('\v');
    705           break;
    706         // All other escape squences are illegal.
    707         default:
    708           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    709           return false;
    710       }
    711     } else if (next_char == '"') {
    712       --index_;  // Rewind by one because of CBU8_NEXT.
    713       out->Swap(&string);
    714       return true;
    715     } else {
    716       if (next_char < kExtendedASCIIStart)
    717         string.Append(next_char);
    718       else
    719         DecodeUTF8(next_char, &string);
    720     }
    721   }
    722 
    723   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    724   return false;
    725 }
    726 
    727 // Entry is at the first X in \uXXXX.
    728 bool JSONParser::DecodeUTF16(std::string* dest_string) {
    729   if (!CanConsume(4))
    730     return false;
    731 
    732   // This is a 32-bit field because the shift operations in the
    733   // conversion process below cause MSVC to error about "data loss."
    734   // This only stores UTF-16 code units, though.
    735   // Consume the UTF-16 code unit, which may be a high surrogate.
    736   int code_unit16_high = 0;
    737   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
    738     return false;
    739 
    740   // Only add 3, not 4, because at the end of this iteration, the parser has
    741   // finished working with the last digit of the UTF sequence, meaning that
    742   // the next iteration will advance to the next byte.
    743   NextNChars(3);
    744 
    745   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
    746   // code unit sequence.
    747   char code_unit8[8] = { 0 };
    748   size_t offset = 0;
    749 
    750   // If this is a high surrogate, consume the next code unit to get the
    751   // low surrogate.
    752   if (CBU16_IS_SURROGATE(code_unit16_high)) {
    753     // Make sure this is the high surrogate. If not, it's an encoding
    754     // error.
    755     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
    756       return false;
    757 
    758     // Make sure that the token has more characters to consume the
    759     // lower surrogate.
    760     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
    761       return false;
    762     if (*NextChar() != '\\' || *NextChar() != 'u')
    763       return false;
    764 
    765     NextChar();  // Read past 'u'.
    766     int code_unit16_low = 0;
    767     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
    768       return false;
    769 
    770     NextNChars(3);
    771 
    772     if (!CBU16_IS_TRAIL(code_unit16_low)) {
    773       return false;
    774     }
    775 
    776     uint32 code_point = CBU16_GET_SUPPLEMENTARY(code_unit16_high,
    777                                                 code_unit16_low);
    778     offset = 0;
    779     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
    780   } else {
    781     // Not a surrogate.
    782     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
    783     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
    784   }
    785 
    786   dest_string->append(code_unit8);
    787   return true;
    788 }
    789 
    790 void JSONParser::DecodeUTF8(const int32& point, StringBuilder* dest) {
    791   // Anything outside of the basic ASCII plane will need to be decoded from
    792   // int32 to a multi-byte sequence.
    793   if (point < kExtendedASCIIStart) {
    794     dest->Append(point);
    795   } else {
    796     char utf8_units[4] = { 0 };
    797     int offset = 0;
    798     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
    799     dest->Convert();
    800     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
    801     // zero terminated at this point.  |offset| contains the correct length.
    802     dest->AppendString(std::string(utf8_units, offset));
    803   }
    804 }
    805 
    806 Value* JSONParser::ConsumeNumber() {
    807   const char* num_start = pos_;
    808   const int start_index = index_;
    809   int end_index = start_index;
    810 
    811   if (*pos_ == '-')
    812     NextChar();
    813 
    814   if (!ReadInt(false)) {
    815     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    816     return NULL;
    817   }
    818   end_index = index_;
    819 
    820   // The optional fraction part.
    821   if (*pos_ == '.') {
    822     if (!CanConsume(1)) {
    823       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    824       return NULL;
    825     }
    826     NextChar();
    827     if (!ReadInt(true)) {
    828       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    829       return NULL;
    830     }
    831     end_index = index_;
    832   }
    833 
    834   // Optional exponent part.
    835   if (*pos_ == 'e' || *pos_ == 'E') {
    836     NextChar();
    837     if (*pos_ == '-' || *pos_ == '+')
    838       NextChar();
    839     if (!ReadInt(true)) {
    840       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    841       return NULL;
    842     }
    843     end_index = index_;
    844   }
    845 
    846   // ReadInt is greedy because numbers have no easily detectable sentinel,
    847   // so save off where the parser should be on exit (see Consume invariant at
    848   // the top of the header), then make sure the next token is one which is
    849   // valid.
    850   const char* exit_pos = pos_ - 1;
    851   int exit_index = index_ - 1;
    852 
    853   switch (GetNextToken()) {
    854     case T_OBJECT_END:
    855     case T_ARRAY_END:
    856     case T_LIST_SEPARATOR:
    857     case T_END_OF_INPUT:
    858       break;
    859     default:
    860       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    861       return NULL;
    862   }
    863 
    864   pos_ = exit_pos;
    865   index_ = exit_index;
    866 
    867   StringPiece num_string(num_start, end_index - start_index);
    868 
    869   int num_int;
    870   if (StringToInt(num_string, &num_int))
    871     return new FundamentalValue(num_int);
    872 
    873   double num_double;
    874   if (base::StringToDouble(num_string.as_string(), &num_double) &&
    875       IsFinite(num_double)) {
    876     return new FundamentalValue(num_double);
    877   }
    878 
    879   return NULL;
    880 }
    881 
    882 bool JSONParser::ReadInt(bool allow_leading_zeros) {
    883   char first = *pos_;
    884   int len = 0;
    885 
    886   char c = first;
    887   while (CanConsume(1) && IsAsciiDigit(c)) {
    888     c = *NextChar();
    889     ++len;
    890   }
    891 
    892   if (len == 0)
    893     return false;
    894 
    895   if (!allow_leading_zeros && len > 1 && first == '0')
    896     return false;
    897 
    898   return true;
    899 }
    900 
    901 Value* JSONParser::ConsumeLiteral() {
    902   switch (*pos_) {
    903     case 't': {
    904       const char* kTrueLiteral = "true";
    905       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
    906       if (!CanConsume(kTrueLen - 1) ||
    907           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
    908         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    909         return NULL;
    910       }
    911       NextNChars(kTrueLen - 1);
    912       return new FundamentalValue(true);
    913     }
    914     case 'f': {
    915       const char* kFalseLiteral = "false";
    916       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
    917       if (!CanConsume(kFalseLen - 1) ||
    918           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
    919         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    920         return NULL;
    921       }
    922       NextNChars(kFalseLen - 1);
    923       return new FundamentalValue(false);
    924     }
    925     case 'n': {
    926       const char* kNullLiteral = "null";
    927       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
    928       if (!CanConsume(kNullLen - 1) ||
    929           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
    930         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    931         return NULL;
    932       }
    933       NextNChars(kNullLen - 1);
    934       return Value::CreateNullValue();
    935     }
    936     default:
    937       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    938       return NULL;
    939   }
    940 }
    941 
    942 // static
    943 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
    944   return strncmp(one, two, len) == 0;
    945 }
    946 
    947 void JSONParser::ReportError(JSONReader::JsonParseError code,
    948                              int column_adjust) {
    949   error_code_ = code;
    950   error_line_ = line_number_;
    951   error_column_ = index_ - index_last_line_ + column_adjust;
    952 }
    953 
    954 // static
    955 std::string JSONParser::FormatErrorMessage(int line, int column,
    956                                            const std::string& description) {
    957   if (line || column) {
    958     return StringPrintf("Line: %i, column: %i, %s",
    959         line, column, description.c_str());
    960   }
    961   return description;
    962 }
    963 
    964 }  // namespace internal
    965 }  // namespace base
    966