Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/json/json_parser.h"
      6 
      7 #include <cmath>
      8 
      9 #include "base/logging.h"
     10 #include "base/macros.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_piece.h"
     14 #include "base/strings/string_util.h"
     15 #include "base/strings/stringprintf.h"
     16 #include "base/strings/utf_string_conversion_utils.h"
     17 #include "base/third_party/icu/icu_utf.h"
     18 #include "base/values.h"
     19 
     20 namespace base {
     21 namespace internal {
     22 
     23 namespace {
     24 
     25 const int kStackMaxDepth = 100;
     26 
     27 const int32_t kExtendedASCIIStart = 0x80;
     28 
     29 // This and the class below are used to own the JSON input string for when
     30 // string tokens are stored as StringPiece instead of std::string. This
     31 // optimization avoids about 2/3rds of string memory copies. The constructor
     32 // takes ownership of the input string. The real root value is Swap()ed into
     33 // the new instance.
     34 class DictionaryHiddenRootValue : public DictionaryValue {
     35  public:
     36   DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
     37     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
     38     DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
     39   }
     40 
     41   void Swap(DictionaryValue* other) override {
     42     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
     43 
     44     // First deep copy to convert JSONStringValue to std::string and swap that
     45     // copy with |other|, which contains the new contents of |this|.
     46     scoped_ptr<DictionaryValue> copy(DeepCopy());
     47     copy->Swap(other);
     48 
     49     // Then erase the contents of the current dictionary and swap in the
     50     // new contents, originally from |other|.
     51     Clear();
     52     json_.reset();
     53     DictionaryValue::Swap(copy.get());
     54   }
     55 
     56   // Not overriding DictionaryValue::Remove because it just calls through to
     57   // the method below.
     58 
     59   bool RemoveWithoutPathExpansion(const std::string& key,
     60                                   scoped_ptr<Value>* out) override {
     61     // If the caller won't take ownership of the removed value, just call up.
     62     if (!out)
     63       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
     64 
     65     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
     66 
     67     // Otherwise, remove the value while its still "owned" by this and copy it
     68     // to convert any JSONStringValues to std::string.
     69     scoped_ptr<Value> out_owned;
     70     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
     71       return false;
     72 
     73     out->reset(out_owned->DeepCopy());
     74 
     75     return true;
     76   }
     77 
     78  private:
     79   scoped_ptr<std::string> json_;
     80 
     81   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
     82 };
     83 
     84 class ListHiddenRootValue : public ListValue {
     85  public:
     86   ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
     87     DCHECK(root->IsType(Value::TYPE_LIST));
     88     ListValue::Swap(static_cast<ListValue*>(root));
     89   }
     90 
     91   void Swap(ListValue* other) override {
     92     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
     93 
     94     // First deep copy to convert JSONStringValue to std::string and swap that
     95     // copy with |other|, which contains the new contents of |this|.
     96     scoped_ptr<ListValue> copy(DeepCopy());
     97     copy->Swap(other);
     98 
     99     // Then erase the contents of the current list and swap in the new contents,
    100     // originally from |other|.
    101     Clear();
    102     json_.reset();
    103     ListValue::Swap(copy.get());
    104   }
    105 
    106   bool Remove(size_t index, scoped_ptr<Value>* out) override {
    107     // If the caller won't take ownership of the removed value, just call up.
    108     if (!out)
    109       return ListValue::Remove(index, out);
    110 
    111     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
    112 
    113     // Otherwise, remove the value while its still "owned" by this and copy it
    114     // to convert any JSONStringValues to std::string.
    115     scoped_ptr<Value> out_owned;
    116     if (!ListValue::Remove(index, &out_owned))
    117       return false;
    118 
    119     out->reset(out_owned->DeepCopy());
    120 
    121     return true;
    122   }
    123 
    124  private:
    125   scoped_ptr<std::string> json_;
    126 
    127   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
    128 };
    129 
    130 // A variant on StringValue that uses StringPiece instead of copying the string
    131 // into the Value. This can only be stored in a child of hidden root (above),
    132 // otherwise the referenced string will not be guaranteed to outlive it.
    133 class JSONStringValue : public Value {
    134  public:
    135   explicit JSONStringValue(const StringPiece& piece)
    136       : Value(TYPE_STRING),
    137         string_piece_(piece) {
    138   }
    139 
    140   // Overridden from Value:
    141   bool GetAsString(std::string* out_value) const override {
    142     string_piece_.CopyToString(out_value);
    143     return true;
    144   }
    145   Value* DeepCopy() const override {
    146     return new StringValue(string_piece_.as_string());
    147   }
    148   bool Equals(const Value* other) const override {
    149     std::string other_string;
    150     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
    151         StringPiece(other_string) == string_piece_;
    152   }
    153 
    154  private:
    155   // The location in the original input stream.
    156   StringPiece string_piece_;
    157 
    158   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
    159 };
    160 
    161 // Simple class that checks for maximum recursion/"stack overflow."
    162 class StackMarker {
    163  public:
    164   explicit StackMarker(int* depth) : depth_(depth) {
    165     ++(*depth_);
    166     DCHECK_LE(*depth_, kStackMaxDepth);
    167   }
    168   ~StackMarker() {
    169     --(*depth_);
    170   }
    171 
    172   bool IsTooDeep() const {
    173     return *depth_ >= kStackMaxDepth;
    174   }
    175 
    176  private:
    177   int* const depth_;
    178 
    179   DISALLOW_COPY_AND_ASSIGN(StackMarker);
    180 };
    181 
    182 }  // namespace
    183 
    184 JSONParser::JSONParser(int options)
    185     : options_(options),
    186       start_pos_(NULL),
    187       pos_(NULL),
    188       end_pos_(NULL),
    189       index_(0),
    190       stack_depth_(0),
    191       line_number_(0),
    192       index_last_line_(0),
    193       error_code_(JSONReader::JSON_NO_ERROR),
    194       error_line_(0),
    195       error_column_(0) {
    196 }
    197 
    198 JSONParser::~JSONParser() {
    199 }
    200 
    201 Value* JSONParser::Parse(const StringPiece& input) {
    202   scoped_ptr<std::string> input_copy;
    203   // If the children of a JSON root can be detached, then hidden roots cannot
    204   // be used, so do not bother copying the input because StringPiece will not
    205   // be used anywhere.
    206   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    207     input_copy.reset(new std::string(input.as_string()));
    208     start_pos_ = input_copy->data();
    209   } else {
    210     start_pos_ = input.data();
    211   }
    212   pos_ = start_pos_;
    213   end_pos_ = start_pos_ + input.length();
    214   index_ = 0;
    215   line_number_ = 1;
    216   index_last_line_ = 0;
    217 
    218   error_code_ = JSONReader::JSON_NO_ERROR;
    219   error_line_ = 0;
    220   error_column_ = 0;
    221 
    222   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
    223   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
    224   // ParseNextToken function mis-treating a Unicode BOM as an invalid
    225   // character and returning NULL.
    226   if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
    227       static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
    228       static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
    229     NextNChars(3);
    230   }
    231 
    232   // Parse the first and any nested tokens.
    233   scoped_ptr<Value> root(ParseNextToken());
    234   if (!root.get())
    235     return NULL;
    236 
    237   // Make sure the input stream is at an end.
    238   if (GetNextToken() != T_END_OF_INPUT) {
    239     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
    240       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
    241       return NULL;
    242     }
    243   }
    244 
    245   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
    246   // hidden root.
    247   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    248     if (root->IsType(Value::TYPE_DICTIONARY)) {
    249       return new DictionaryHiddenRootValue(input_copy.release(), root.get());
    250     } else if (root->IsType(Value::TYPE_LIST)) {
    251       return new ListHiddenRootValue(input_copy.release(), root.get());
    252     } else if (root->IsType(Value::TYPE_STRING)) {
    253       // A string type could be a JSONStringValue, but because there's no
    254       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
    255       // preserve it.
    256       return root->DeepCopy();
    257     }
    258   }
    259 
    260   // All other values can be returned directly.
    261   return root.release();
    262 }
    263 
    264 JSONReader::JsonParseError JSONParser::error_code() const {
    265   return error_code_;
    266 }
    267 
    268 std::string JSONParser::GetErrorMessage() const {
    269   return FormatErrorMessage(error_line_, error_column_,
    270       JSONReader::ErrorCodeToString(error_code_));
    271 }
    272 
    273 int JSONParser::error_line() const {
    274   return error_line_;
    275 }
    276 
    277 int JSONParser::error_column() const {
    278   return error_column_;
    279 }
    280 
    281 // StringBuilder ///////////////////////////////////////////////////////////////
    282 
    283 JSONParser::StringBuilder::StringBuilder()
    284     : pos_(NULL),
    285       length_(0),
    286       string_(NULL) {
    287 }
    288 
    289 JSONParser::StringBuilder::StringBuilder(const char* pos)
    290     : pos_(pos),
    291       length_(0),
    292       string_(NULL) {
    293 }
    294 
    295 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
    296   std::swap(other->string_, string_);
    297   std::swap(other->pos_, pos_);
    298   std::swap(other->length_, length_);
    299 }
    300 
    301 JSONParser::StringBuilder::~StringBuilder() {
    302   delete string_;
    303 }
    304 
    305 void JSONParser::StringBuilder::Append(const char& c) {
    306   DCHECK_GE(c, 0);
    307   DCHECK_LT(c, 128);
    308 
    309   if (string_)
    310     string_->push_back(c);
    311   else
    312     ++length_;
    313 }
    314 
    315 void JSONParser::StringBuilder::AppendString(const std::string& str) {
    316   DCHECK(string_);
    317   string_->append(str);
    318 }
    319 
    320 void JSONParser::StringBuilder::Convert() {
    321   if (string_)
    322     return;
    323   string_  = new std::string(pos_, length_);
    324 }
    325 
    326 bool JSONParser::StringBuilder::CanBeStringPiece() const {
    327   return !string_;
    328 }
    329 
    330 StringPiece JSONParser::StringBuilder::AsStringPiece() {
    331   if (string_)
    332     return StringPiece();
    333   return StringPiece(pos_, length_);
    334 }
    335 
    336 const std::string& JSONParser::StringBuilder::AsString() {
    337   if (!string_)
    338     Convert();
    339   return *string_;
    340 }
    341 
    342 // JSONParser private //////////////////////////////////////////////////////////
    343 
    344 inline bool JSONParser::CanConsume(int length) {
    345   return pos_ + length <= end_pos_;
    346 }
    347 
    348 const char* JSONParser::NextChar() {
    349   DCHECK(CanConsume(1));
    350   ++index_;
    351   ++pos_;
    352   return pos_;
    353 }
    354 
    355 void JSONParser::NextNChars(int n) {
    356   DCHECK(CanConsume(n));
    357   index_ += n;
    358   pos_ += n;
    359 }
    360 
    361 JSONParser::Token JSONParser::GetNextToken() {
    362   EatWhitespaceAndComments();
    363   if (!CanConsume(1))
    364     return T_END_OF_INPUT;
    365 
    366   switch (*pos_) {
    367     case '{':
    368       return T_OBJECT_BEGIN;
    369     case '}':
    370       return T_OBJECT_END;
    371     case '[':
    372       return T_ARRAY_BEGIN;
    373     case ']':
    374       return T_ARRAY_END;
    375     case '"':
    376       return T_STRING;
    377     case '0':
    378     case '1':
    379     case '2':
    380     case '3':
    381     case '4':
    382     case '5':
    383     case '6':
    384     case '7':
    385     case '8':
    386     case '9':
    387     case '-':
    388       return T_NUMBER;
    389     case 't':
    390       return T_BOOL_TRUE;
    391     case 'f':
    392       return T_BOOL_FALSE;
    393     case 'n':
    394       return T_NULL;
    395     case ',':
    396       return T_LIST_SEPARATOR;
    397     case ':':
    398       return T_OBJECT_PAIR_SEPARATOR;
    399     default:
    400       return T_INVALID_TOKEN;
    401   }
    402 }
    403 
    404 void JSONParser::EatWhitespaceAndComments() {
    405   while (pos_ < end_pos_) {
    406     switch (*pos_) {
    407       case '\r':
    408       case '\n':
    409         index_last_line_ = index_;
    410         // Don't increment line_number_ twice for "\r\n".
    411         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
    412           ++line_number_;
    413         // Fall through.
    414       case ' ':
    415       case '\t':
    416         NextChar();
    417         break;
    418       case '/':
    419         if (!EatComment())
    420           return;
    421         break;
    422       default:
    423         return;
    424     }
    425   }
    426 }
    427 
    428 bool JSONParser::EatComment() {
    429   if (*pos_ != '/' || !CanConsume(1))
    430     return false;
    431 
    432   char next_char = *NextChar();
    433   if (next_char == '/') {
    434     // Single line comment, read to newline.
    435     while (CanConsume(1)) {
    436       next_char = *NextChar();
    437       if (next_char == '\n' || next_char == '\r')
    438         return true;
    439     }
    440   } else if (next_char == '*') {
    441     char previous_char = '\0';
    442     // Block comment, read until end marker.
    443     while (CanConsume(1)) {
    444       next_char = *NextChar();
    445       if (previous_char == '*' && next_char == '/') {
    446         // EatWhitespaceAndComments will inspect pos_, which will still be on
    447         // the last / of the comment, so advance once more (which may also be
    448         // end of input).
    449         NextChar();
    450         return true;
    451       }
    452       previous_char = next_char;
    453     }
    454 
    455     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
    456   }
    457 
    458   return false;
    459 }
    460 
    461 Value* JSONParser::ParseNextToken() {
    462   return ParseToken(GetNextToken());
    463 }
    464 
    465 Value* JSONParser::ParseToken(Token token) {
    466   switch (token) {
    467     case T_OBJECT_BEGIN:
    468       return ConsumeDictionary();
    469     case T_ARRAY_BEGIN:
    470       return ConsumeList();
    471     case T_STRING:
    472       return ConsumeString();
    473     case T_NUMBER:
    474       return ConsumeNumber();
    475     case T_BOOL_TRUE:
    476     case T_BOOL_FALSE:
    477     case T_NULL:
    478       return ConsumeLiteral();
    479     default:
    480       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    481       return NULL;
    482   }
    483 }
    484 
    485 Value* JSONParser::ConsumeDictionary() {
    486   if (*pos_ != '{') {
    487     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    488     return NULL;
    489   }
    490 
    491   StackMarker depth_check(&stack_depth_);
    492   if (depth_check.IsTooDeep()) {
    493     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    494     return NULL;
    495   }
    496 
    497   scoped_ptr<DictionaryValue> dict(new DictionaryValue);
    498 
    499   NextChar();
    500   Token token = GetNextToken();
    501   while (token != T_OBJECT_END) {
    502     if (token != T_STRING) {
    503       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
    504       return NULL;
    505     }
    506 
    507     // First consume the key.
    508     StringBuilder key;
    509     if (!ConsumeStringRaw(&key)) {
    510       return NULL;
    511     }
    512 
    513     // Read the separator.
    514     NextChar();
    515     token = GetNextToken();
    516     if (token != T_OBJECT_PAIR_SEPARATOR) {
    517       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    518       return NULL;
    519     }
    520 
    521     // The next token is the value. Ownership transfers to |dict|.
    522     NextChar();
    523     Value* value = ParseNextToken();
    524     if (!value) {
    525       // ReportError from deeper level.
    526       return NULL;
    527     }
    528 
    529     dict->SetWithoutPathExpansion(key.AsString(), value);
    530 
    531     NextChar();
    532     token = GetNextToken();
    533     if (token == T_LIST_SEPARATOR) {
    534       NextChar();
    535       token = GetNextToken();
    536       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    537         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    538         return NULL;
    539       }
    540     } else if (token != T_OBJECT_END) {
    541       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    542       return NULL;
    543     }
    544   }
    545 
    546   return dict.release();
    547 }
    548 
    549 Value* JSONParser::ConsumeList() {
    550   if (*pos_ != '[') {
    551     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    552     return NULL;
    553   }
    554 
    555   StackMarker depth_check(&stack_depth_);
    556   if (depth_check.IsTooDeep()) {
    557     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    558     return NULL;
    559   }
    560 
    561   scoped_ptr<ListValue> list(new ListValue);
    562 
    563   NextChar();
    564   Token token = GetNextToken();
    565   while (token != T_ARRAY_END) {
    566     Value* item = ParseToken(token);
    567     if (!item) {
    568       // ReportError from deeper level.
    569       return NULL;
    570     }
    571 
    572     list->Append(item);
    573 
    574     NextChar();
    575     token = GetNextToken();
    576     if (token == T_LIST_SEPARATOR) {
    577       NextChar();
    578       token = GetNextToken();
    579       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    580         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    581         return NULL;
    582       }
    583     } else if (token != T_ARRAY_END) {
    584       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    585       return NULL;
    586     }
    587   }
    588 
    589   return list.release();
    590 }
    591 
    592 Value* JSONParser::ConsumeString() {
    593   StringBuilder string;
    594   if (!ConsumeStringRaw(&string))
    595     return NULL;
    596 
    597   // Create the Value representation, using a hidden root, if configured
    598   // to do so, and if the string can be represented by StringPiece.
    599   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
    600     return new JSONStringValue(string.AsStringPiece());
    601   } else {
    602     if (string.CanBeStringPiece())
    603       string.Convert();
    604     return new StringValue(string.AsString());
    605   }
    606 }
    607 
    608 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
    609   if (*pos_ != '"') {
    610     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    611     return false;
    612   }
    613 
    614   // StringBuilder will internally build a StringPiece unless a UTF-16
    615   // conversion occurs, at which point it will perform a copy into a
    616   // std::string.
    617   StringBuilder string(NextChar());
    618 
    619   int length = end_pos_ - start_pos_;
    620   int32_t next_char = 0;
    621 
    622   while (CanConsume(1)) {
    623     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
    624     CBU8_NEXT(start_pos_, index_, length, next_char);
    625     if (next_char < 0 || !IsValidCharacter(next_char)) {
    626       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
    627       return false;
    628     }
    629 
    630     // If this character is an escape sequence...
    631     if (next_char == '\\') {
    632       // The input string will be adjusted (either by combining the two
    633       // characters of an encoded escape sequence, or with a UTF conversion),
    634       // so using StringPiece isn't possible -- force a conversion.
    635       string.Convert();
    636 
    637       if (!CanConsume(1)) {
    638         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    639         return false;
    640       }
    641 
    642       switch (*NextChar()) {
    643         // Allowed esape sequences:
    644         case 'x': {  // UTF-8 sequence.
    645           // UTF-8 \x escape sequences are not allowed in the spec, but they
    646           // are supported here for backwards-compatiblity with the old parser.
    647           if (!CanConsume(2)) {
    648             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
    649             return false;
    650           }
    651 
    652           int hex_digit = 0;
    653           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
    654             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    655             return false;
    656           }
    657           NextChar();
    658 
    659           if (hex_digit < kExtendedASCIIStart)
    660             string.Append(static_cast<char>(hex_digit));
    661           else
    662             DecodeUTF8(hex_digit, &string);
    663           break;
    664         }
    665         case 'u': {  // UTF-16 sequence.
    666           // UTF units are of the form \uXXXX.
    667           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
    668             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    669             return false;
    670           }
    671 
    672           // Skip the 'u'.
    673           NextChar();
    674 
    675           std::string utf8_units;
    676           if (!DecodeUTF16(&utf8_units)) {
    677             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    678             return false;
    679           }
    680 
    681           string.AppendString(utf8_units);
    682           break;
    683         }
    684         case '"':
    685           string.Append('"');
    686           break;
    687         case '\\':
    688           string.Append('\\');
    689           break;
    690         case '/':
    691           string.Append('/');
    692           break;
    693         case 'b':
    694           string.Append('\b');
    695           break;
    696         case 'f':
    697           string.Append('\f');
    698           break;
    699         case 'n':
    700           string.Append('\n');
    701           break;
    702         case 'r':
    703           string.Append('\r');
    704           break;
    705         case 't':
    706           string.Append('\t');
    707           break;
    708         case 'v':  // Not listed as valid escape sequence in the RFC.
    709           string.Append('\v');
    710           break;
    711         // All other escape squences are illegal.
    712         default:
    713           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    714           return false;
    715       }
    716     } else if (next_char == '"') {
    717       --index_;  // Rewind by one because of CBU8_NEXT.
    718       out->Swap(&string);
    719       return true;
    720     } else {
    721       if (next_char < kExtendedASCIIStart)
    722         string.Append(static_cast<char>(next_char));
    723       else
    724         DecodeUTF8(next_char, &string);
    725     }
    726   }
    727 
    728   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    729   return false;
    730 }
    731 
    732 // Entry is at the first X in \uXXXX.
    733 bool JSONParser::DecodeUTF16(std::string* dest_string) {
    734   if (!CanConsume(4))
    735     return false;
    736 
    737   // This is a 32-bit field because the shift operations in the
    738   // conversion process below cause MSVC to error about "data loss."
    739   // This only stores UTF-16 code units, though.
    740   // Consume the UTF-16 code unit, which may be a high surrogate.
    741   int code_unit16_high = 0;
    742   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
    743     return false;
    744 
    745   // Only add 3, not 4, because at the end of this iteration, the parser has
    746   // finished working with the last digit of the UTF sequence, meaning that
    747   // the next iteration will advance to the next byte.
    748   NextNChars(3);
    749 
    750   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
    751   // code unit sequence.
    752   char code_unit8[8] = { 0 };
    753   size_t offset = 0;
    754 
    755   // If this is a high surrogate, consume the next code unit to get the
    756   // low surrogate.
    757   if (CBU16_IS_SURROGATE(code_unit16_high)) {
    758     // Make sure this is the high surrogate. If not, it's an encoding
    759     // error.
    760     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
    761       return false;
    762 
    763     // Make sure that the token has more characters to consume the
    764     // lower surrogate.
    765     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
    766       return false;
    767     if (*NextChar() != '\\' || *NextChar() != 'u')
    768       return false;
    769 
    770     NextChar();  // Read past 'u'.
    771     int code_unit16_low = 0;
    772     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
    773       return false;
    774 
    775     NextNChars(3);
    776 
    777     if (!CBU16_IS_TRAIL(code_unit16_low)) {
    778       return false;
    779     }
    780 
    781     uint32_t code_point =
    782         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
    783     if (!IsValidCharacter(code_point))
    784       return false;
    785 
    786     offset = 0;
    787     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
    788   } else {
    789     // Not a surrogate.
    790     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
    791     if (!IsValidCharacter(code_unit16_high))
    792       return false;
    793 
    794     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
    795   }
    796 
    797   dest_string->append(code_unit8);
    798   return true;
    799 }
    800 
    801 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
    802   DCHECK(IsValidCharacter(point));
    803 
    804   // Anything outside of the basic ASCII plane will need to be decoded from
    805   // int32_t to a multi-byte sequence.
    806   if (point < kExtendedASCIIStart) {
    807     dest->Append(static_cast<char>(point));
    808   } else {
    809     char utf8_units[4] = { 0 };
    810     int offset = 0;
    811     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
    812     dest->Convert();
    813     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
    814     // zero terminated at this point.  |offset| contains the correct length.
    815     dest->AppendString(std::string(utf8_units, offset));
    816   }
    817 }
    818 
    819 Value* JSONParser::ConsumeNumber() {
    820   const char* num_start = pos_;
    821   const int start_index = index_;
    822   int end_index = start_index;
    823 
    824   if (*pos_ == '-')
    825     NextChar();
    826 
    827   if (!ReadInt(false)) {
    828     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    829     return NULL;
    830   }
    831   end_index = index_;
    832 
    833   // The optional fraction part.
    834   if (*pos_ == '.') {
    835     if (!CanConsume(1)) {
    836       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    837       return NULL;
    838     }
    839     NextChar();
    840     if (!ReadInt(true)) {
    841       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    842       return NULL;
    843     }
    844     end_index = index_;
    845   }
    846 
    847   // Optional exponent part.
    848   if (*pos_ == 'e' || *pos_ == 'E') {
    849     NextChar();
    850     if (*pos_ == '-' || *pos_ == '+')
    851       NextChar();
    852     if (!ReadInt(true)) {
    853       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    854       return NULL;
    855     }
    856     end_index = index_;
    857   }
    858 
    859   // ReadInt is greedy because numbers have no easily detectable sentinel,
    860   // so save off where the parser should be on exit (see Consume invariant at
    861   // the top of the header), then make sure the next token is one which is
    862   // valid.
    863   const char* exit_pos = pos_ - 1;
    864   int exit_index = index_ - 1;
    865 
    866   switch (GetNextToken()) {
    867     case T_OBJECT_END:
    868     case T_ARRAY_END:
    869     case T_LIST_SEPARATOR:
    870     case T_END_OF_INPUT:
    871       break;
    872     default:
    873       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    874       return NULL;
    875   }
    876 
    877   pos_ = exit_pos;
    878   index_ = exit_index;
    879 
    880   StringPiece num_string(num_start, end_index - start_index);
    881 
    882   int num_int;
    883   if (StringToInt(num_string, &num_int))
    884     return new FundamentalValue(num_int);
    885 
    886   double num_double;
    887   if (StringToDouble(num_string.as_string(), &num_double) &&
    888       std::isfinite(num_double)) {
    889     return new FundamentalValue(num_double);
    890   }
    891 
    892   return NULL;
    893 }
    894 
    895 bool JSONParser::ReadInt(bool allow_leading_zeros) {
    896   char first = *pos_;
    897   int len = 0;
    898 
    899   char c = first;
    900   while (CanConsume(1) && std::isdigit(c)) {
    901     c = *NextChar();
    902     ++len;
    903   }
    904 
    905   if (len == 0)
    906     return false;
    907 
    908   if (!allow_leading_zeros && len > 1 && first == '0')
    909     return false;
    910 
    911   return true;
    912 }
    913 
    914 Value* JSONParser::ConsumeLiteral() {
    915   switch (*pos_) {
    916     case 't': {
    917       const char kTrueLiteral[] = "true";
    918       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
    919       if (!CanConsume(kTrueLen - 1) ||
    920           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
    921         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    922         return NULL;
    923       }
    924       NextNChars(kTrueLen - 1);
    925       return new FundamentalValue(true);
    926     }
    927     case 'f': {
    928       const char kFalseLiteral[] = "false";
    929       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
    930       if (!CanConsume(kFalseLen - 1) ||
    931           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
    932         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    933         return NULL;
    934       }
    935       NextNChars(kFalseLen - 1);
    936       return new FundamentalValue(false);
    937     }
    938     case 'n': {
    939       const char kNullLiteral[] = "null";
    940       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
    941       if (!CanConsume(kNullLen - 1) ||
    942           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
    943         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    944         return NULL;
    945       }
    946       NextNChars(kNullLen - 1);
    947       return Value::CreateNullValue().release();
    948     }
    949     default:
    950       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    951       return NULL;
    952   }
    953 }
    954 
    955 // static
    956 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
    957   return strncmp(one, two, len) == 0;
    958 }
    959 
    960 void JSONParser::ReportError(JSONReader::JsonParseError code,
    961                              int column_adjust) {
    962   error_code_ = code;
    963   error_line_ = line_number_;
    964   error_column_ = index_ - index_last_line_ + column_adjust;
    965 }
    966 
    967 // static
    968 std::string JSONParser::FormatErrorMessage(int line, int column,
    969                                            const std::string& description) {
    970   if (line || column) {
    971     return StringPrintf("Line: %i, column: %i, %s",
    972         line, column, description.c_str());
    973   }
    974   return description;
    975 }
    976 
    977 }  // namespace internal
    978 }  // namespace base
    979