Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/json/json_parser.h"
      6 
      7 #include <cmath>
      8 
      9 #include "base/logging.h"
     10 #include "base/macros.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/strings/string_number_conversions.h"
     13 #include "base/strings/string_piece.h"
     14 #include "base/strings/string_util.h"
     15 #include "base/strings/stringprintf.h"
     16 #include "base/strings/utf_string_conversion_utils.h"
     17 #include "base/strings/utf_string_conversions.h"
     18 #include "base/third_party/icu/icu_utf.h"
     19 #include "base/values.h"
     20 
     21 namespace base {
     22 namespace internal {
     23 
     24 namespace {
     25 
     26 const int kStackMaxDepth = 100;
     27 
     28 const int32_t kExtendedASCIIStart = 0x80;
     29 
     30 // This and the class below are used to own the JSON input string for when
     31 // string tokens are stored as StringPiece instead of std::string. This
     32 // optimization avoids about 2/3rds of string memory copies. The constructor
     33 // takes ownership of the input string. The real root value is Swap()ed into
     34 // the new instance.
     35 class DictionaryHiddenRootValue : public DictionaryValue {
     36  public:
     37   DictionaryHiddenRootValue(std::string* json, Value* root) : json_(json) {
     38     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
     39     DictionaryValue::Swap(static_cast<DictionaryValue*>(root));
     40   }
     41 
     42   void Swap(DictionaryValue* other) override {
     43     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
     44 
     45     // First deep copy to convert JSONStringValue to std::string and swap that
     46     // copy with |other|, which contains the new contents of |this|.
     47     scoped_ptr<DictionaryValue> copy(DeepCopy());
     48     copy->Swap(other);
     49 
     50     // Then erase the contents of the current dictionary and swap in the
     51     // new contents, originally from |other|.
     52     Clear();
     53     json_.reset();
     54     DictionaryValue::Swap(copy.get());
     55   }
     56 
     57   // Not overriding DictionaryValue::Remove because it just calls through to
     58   // the method below.
     59 
     60   bool RemoveWithoutPathExpansion(const std::string& key,
     61                                   scoped_ptr<Value>* out) override {
     62     // If the caller won't take ownership of the removed value, just call up.
     63     if (!out)
     64       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
     65 
     66     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
     67 
     68     // Otherwise, remove the value while its still "owned" by this and copy it
     69     // to convert any JSONStringValues to std::string.
     70     scoped_ptr<Value> out_owned;
     71     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
     72       return false;
     73 
     74     out->reset(out_owned->DeepCopy());
     75 
     76     return true;
     77   }
     78 
     79  private:
     80   scoped_ptr<std::string> json_;
     81 
     82   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
     83 };
     84 
     85 class ListHiddenRootValue : public ListValue {
     86  public:
     87   ListHiddenRootValue(std::string* json, Value* root) : json_(json) {
     88     DCHECK(root->IsType(Value::TYPE_LIST));
     89     ListValue::Swap(static_cast<ListValue*>(root));
     90   }
     91 
     92   void Swap(ListValue* other) override {
     93     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
     94 
     95     // First deep copy to convert JSONStringValue to std::string and swap that
     96     // copy with |other|, which contains the new contents of |this|.
     97     scoped_ptr<ListValue> copy(DeepCopy());
     98     copy->Swap(other);
     99 
    100     // Then erase the contents of the current list and swap in the new contents,
    101     // originally from |other|.
    102     Clear();
    103     json_.reset();
    104     ListValue::Swap(copy.get());
    105   }
    106 
    107   bool Remove(size_t index, scoped_ptr<Value>* out) override {
    108     // If the caller won't take ownership of the removed value, just call up.
    109     if (!out)
    110       return ListValue::Remove(index, out);
    111 
    112     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
    113 
    114     // Otherwise, remove the value while its still "owned" by this and copy it
    115     // to convert any JSONStringValues to std::string.
    116     scoped_ptr<Value> out_owned;
    117     if (!ListValue::Remove(index, &out_owned))
    118       return false;
    119 
    120     out->reset(out_owned->DeepCopy());
    121 
    122     return true;
    123   }
    124 
    125  private:
    126   scoped_ptr<std::string> json_;
    127 
    128   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
    129 };
    130 
    131 // A variant on StringValue that uses StringPiece instead of copying the string
    132 // into the Value. This can only be stored in a child of hidden root (above),
    133 // otherwise the referenced string will not be guaranteed to outlive it.
    134 class JSONStringValue : public Value {
    135  public:
    136   explicit JSONStringValue(const StringPiece& piece)
    137       : Value(TYPE_STRING),
    138         string_piece_(piece) {
    139   }
    140 
    141   // Overridden from Value:
    142   bool GetAsString(std::string* out_value) const override {
    143     string_piece_.CopyToString(out_value);
    144     return true;
    145   }
    146   bool GetAsString(string16* out_value) const override {
    147     *out_value = UTF8ToUTF16(string_piece_);
    148     return true;
    149   }
    150   Value* DeepCopy() const override {
    151     return new StringValue(string_piece_.as_string());
    152   }
    153   bool Equals(const Value* other) const override {
    154     std::string other_string;
    155     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
    156         StringPiece(other_string) == string_piece_;
    157   }
    158 
    159  private:
    160   // The location in the original input stream.
    161   StringPiece string_piece_;
    162 
    163   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
    164 };
    165 
    166 // Simple class that checks for maximum recursion/"stack overflow."
    167 class StackMarker {
    168  public:
    169   explicit StackMarker(int* depth) : depth_(depth) {
    170     ++(*depth_);
    171     DCHECK_LE(*depth_, kStackMaxDepth);
    172   }
    173   ~StackMarker() {
    174     --(*depth_);
    175   }
    176 
    177   bool IsTooDeep() const {
    178     return *depth_ >= kStackMaxDepth;
    179   }
    180 
    181  private:
    182   int* const depth_;
    183 
    184   DISALLOW_COPY_AND_ASSIGN(StackMarker);
    185 };
    186 
    187 }  // namespace
    188 
    189 JSONParser::JSONParser(int options)
    190     : options_(options),
    191       start_pos_(NULL),
    192       pos_(NULL),
    193       end_pos_(NULL),
    194       index_(0),
    195       stack_depth_(0),
    196       line_number_(0),
    197       index_last_line_(0),
    198       error_code_(JSONReader::JSON_NO_ERROR),
    199       error_line_(0),
    200       error_column_(0) {
    201 }
    202 
    203 JSONParser::~JSONParser() {
    204 }
    205 
    206 Value* JSONParser::Parse(const StringPiece& input) {
    207   scoped_ptr<std::string> input_copy;
    208   // If the children of a JSON root can be detached, then hidden roots cannot
    209   // be used, so do not bother copying the input because StringPiece will not
    210   // be used anywhere.
    211   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    212     input_copy.reset(new std::string(input.as_string()));
    213     start_pos_ = input_copy->data();
    214   } else {
    215     start_pos_ = input.data();
    216   }
    217   pos_ = start_pos_;
    218   end_pos_ = start_pos_ + input.length();
    219   index_ = 0;
    220   line_number_ = 1;
    221   index_last_line_ = 0;
    222 
    223   error_code_ = JSONReader::JSON_NO_ERROR;
    224   error_line_ = 0;
    225   error_column_ = 0;
    226 
    227   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
    228   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
    229   // ParseNextToken function mis-treating a Unicode BOM as an invalid
    230   // character and returning NULL.
    231   if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
    232       static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
    233       static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
    234     NextNChars(3);
    235   }
    236 
    237   // Parse the first and any nested tokens.
    238   scoped_ptr<Value> root(ParseNextToken());
    239   if (!root.get())
    240     return NULL;
    241 
    242   // Make sure the input stream is at an end.
    243   if (GetNextToken() != T_END_OF_INPUT) {
    244     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
    245       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
    246       return NULL;
    247     }
    248   }
    249 
    250   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
    251   // hidden root.
    252   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    253     if (root->IsType(Value::TYPE_DICTIONARY)) {
    254       return new DictionaryHiddenRootValue(input_copy.release(), root.get());
    255     } else if (root->IsType(Value::TYPE_LIST)) {
    256       return new ListHiddenRootValue(input_copy.release(), root.get());
    257     } else if (root->IsType(Value::TYPE_STRING)) {
    258       // A string type could be a JSONStringValue, but because there's no
    259       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
    260       // preserve it.
    261       return root->DeepCopy();
    262     }
    263   }
    264 
    265   // All other values can be returned directly.
    266   return root.release();
    267 }
    268 
    269 JSONReader::JsonParseError JSONParser::error_code() const {
    270   return error_code_;
    271 }
    272 
    273 std::string JSONParser::GetErrorMessage() const {
    274   return FormatErrorMessage(error_line_, error_column_,
    275       JSONReader::ErrorCodeToString(error_code_));
    276 }
    277 
    278 int JSONParser::error_line() const {
    279   return error_line_;
    280 }
    281 
    282 int JSONParser::error_column() const {
    283   return error_column_;
    284 }
    285 
    286 // StringBuilder ///////////////////////////////////////////////////////////////
    287 
    288 JSONParser::StringBuilder::StringBuilder()
    289     : pos_(NULL),
    290       length_(0),
    291       string_(NULL) {
    292 }
    293 
    294 JSONParser::StringBuilder::StringBuilder(const char* pos)
    295     : pos_(pos),
    296       length_(0),
    297       string_(NULL) {
    298 }
    299 
    300 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
    301   std::swap(other->string_, string_);
    302   std::swap(other->pos_, pos_);
    303   std::swap(other->length_, length_);
    304 }
    305 
    306 JSONParser::StringBuilder::~StringBuilder() {
    307   delete string_;
    308 }
    309 
    310 void JSONParser::StringBuilder::Append(const char& c) {
    311   DCHECK_GE(c, 0);
    312   DCHECK_LT(c, 128);
    313 
    314   if (string_)
    315     string_->push_back(c);
    316   else
    317     ++length_;
    318 }
    319 
    320 void JSONParser::StringBuilder::AppendString(const std::string& str) {
    321   DCHECK(string_);
    322   string_->append(str);
    323 }
    324 
    325 void JSONParser::StringBuilder::Convert() {
    326   if (string_)
    327     return;
    328   string_  = new std::string(pos_, length_);
    329 }
    330 
    331 bool JSONParser::StringBuilder::CanBeStringPiece() const {
    332   return !string_;
    333 }
    334 
    335 StringPiece JSONParser::StringBuilder::AsStringPiece() {
    336   if (string_)
    337     return StringPiece();
    338   return StringPiece(pos_, length_);
    339 }
    340 
    341 const std::string& JSONParser::StringBuilder::AsString() {
    342   if (!string_)
    343     Convert();
    344   return *string_;
    345 }
    346 
    347 // JSONParser private //////////////////////////////////////////////////////////
    348 
    349 inline bool JSONParser::CanConsume(int length) {
    350   return pos_ + length <= end_pos_;
    351 }
    352 
    353 const char* JSONParser::NextChar() {
    354   DCHECK(CanConsume(1));
    355   ++index_;
    356   ++pos_;
    357   return pos_;
    358 }
    359 
    360 void JSONParser::NextNChars(int n) {
    361   DCHECK(CanConsume(n));
    362   index_ += n;
    363   pos_ += n;
    364 }
    365 
    366 JSONParser::Token JSONParser::GetNextToken() {
    367   EatWhitespaceAndComments();
    368   if (!CanConsume(1))
    369     return T_END_OF_INPUT;
    370 
    371   switch (*pos_) {
    372     case '{':
    373       return T_OBJECT_BEGIN;
    374     case '}':
    375       return T_OBJECT_END;
    376     case '[':
    377       return T_ARRAY_BEGIN;
    378     case ']':
    379       return T_ARRAY_END;
    380     case '"':
    381       return T_STRING;
    382     case '0':
    383     case '1':
    384     case '2':
    385     case '3':
    386     case '4':
    387     case '5':
    388     case '6':
    389     case '7':
    390     case '8':
    391     case '9':
    392     case '-':
    393       return T_NUMBER;
    394     case 't':
    395       return T_BOOL_TRUE;
    396     case 'f':
    397       return T_BOOL_FALSE;
    398     case 'n':
    399       return T_NULL;
    400     case ',':
    401       return T_LIST_SEPARATOR;
    402     case ':':
    403       return T_OBJECT_PAIR_SEPARATOR;
    404     default:
    405       return T_INVALID_TOKEN;
    406   }
    407 }
    408 
    409 void JSONParser::EatWhitespaceAndComments() {
    410   while (pos_ < end_pos_) {
    411     switch (*pos_) {
    412       case '\r':
    413       case '\n':
    414         index_last_line_ = index_;
    415         // Don't increment line_number_ twice for "\r\n".
    416         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
    417           ++line_number_;
    418         // Fall through.
    419       case ' ':
    420       case '\t':
    421         NextChar();
    422         break;
    423       case '/':
    424         if (!EatComment())
    425           return;
    426         break;
    427       default:
    428         return;
    429     }
    430   }
    431 }
    432 
    433 bool JSONParser::EatComment() {
    434   if (*pos_ != '/' || !CanConsume(1))
    435     return false;
    436 
    437   char next_char = *NextChar();
    438   if (next_char == '/') {
    439     // Single line comment, read to newline.
    440     while (CanConsume(1)) {
    441       next_char = *NextChar();
    442       if (next_char == '\n' || next_char == '\r')
    443         return true;
    444     }
    445   } else if (next_char == '*') {
    446     char previous_char = '\0';
    447     // Block comment, read until end marker.
    448     while (CanConsume(1)) {
    449       next_char = *NextChar();
    450       if (previous_char == '*' && next_char == '/') {
    451         // EatWhitespaceAndComments will inspect pos_, which will still be on
    452         // the last / of the comment, so advance once more (which may also be
    453         // end of input).
    454         NextChar();
    455         return true;
    456       }
    457       previous_char = next_char;
    458     }
    459 
    460     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
    461   }
    462 
    463   return false;
    464 }
    465 
    466 Value* JSONParser::ParseNextToken() {
    467   return ParseToken(GetNextToken());
    468 }
    469 
    470 Value* JSONParser::ParseToken(Token token) {
    471   switch (token) {
    472     case T_OBJECT_BEGIN:
    473       return ConsumeDictionary();
    474     case T_ARRAY_BEGIN:
    475       return ConsumeList();
    476     case T_STRING:
    477       return ConsumeString();
    478     case T_NUMBER:
    479       return ConsumeNumber();
    480     case T_BOOL_TRUE:
    481     case T_BOOL_FALSE:
    482     case T_NULL:
    483       return ConsumeLiteral();
    484     default:
    485       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    486       return NULL;
    487   }
    488 }
    489 
    490 Value* JSONParser::ConsumeDictionary() {
    491   if (*pos_ != '{') {
    492     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    493     return NULL;
    494   }
    495 
    496   StackMarker depth_check(&stack_depth_);
    497   if (depth_check.IsTooDeep()) {
    498     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    499     return NULL;
    500   }
    501 
    502   scoped_ptr<DictionaryValue> dict(new DictionaryValue);
    503 
    504   NextChar();
    505   Token token = GetNextToken();
    506   while (token != T_OBJECT_END) {
    507     if (token != T_STRING) {
    508       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
    509       return NULL;
    510     }
    511 
    512     // First consume the key.
    513     StringBuilder key;
    514     if (!ConsumeStringRaw(&key)) {
    515       return NULL;
    516     }
    517 
    518     // Read the separator.
    519     NextChar();
    520     token = GetNextToken();
    521     if (token != T_OBJECT_PAIR_SEPARATOR) {
    522       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    523       return NULL;
    524     }
    525 
    526     // The next token is the value. Ownership transfers to |dict|.
    527     NextChar();
    528     Value* value = ParseNextToken();
    529     if (!value) {
    530       // ReportError from deeper level.
    531       return NULL;
    532     }
    533 
    534     dict->SetWithoutPathExpansion(key.AsString(), value);
    535 
    536     NextChar();
    537     token = GetNextToken();
    538     if (token == T_LIST_SEPARATOR) {
    539       NextChar();
    540       token = GetNextToken();
    541       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    542         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    543         return NULL;
    544       }
    545     } else if (token != T_OBJECT_END) {
    546       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    547       return NULL;
    548     }
    549   }
    550 
    551   return dict.release();
    552 }
    553 
    554 Value* JSONParser::ConsumeList() {
    555   if (*pos_ != '[') {
    556     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    557     return NULL;
    558   }
    559 
    560   StackMarker depth_check(&stack_depth_);
    561   if (depth_check.IsTooDeep()) {
    562     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    563     return NULL;
    564   }
    565 
    566   scoped_ptr<ListValue> list(new ListValue);
    567 
    568   NextChar();
    569   Token token = GetNextToken();
    570   while (token != T_ARRAY_END) {
    571     Value* item = ParseToken(token);
    572     if (!item) {
    573       // ReportError from deeper level.
    574       return NULL;
    575     }
    576 
    577     list->Append(item);
    578 
    579     NextChar();
    580     token = GetNextToken();
    581     if (token == T_LIST_SEPARATOR) {
    582       NextChar();
    583       token = GetNextToken();
    584       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    585         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    586         return NULL;
    587       }
    588     } else if (token != T_ARRAY_END) {
    589       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    590       return NULL;
    591     }
    592   }
    593 
    594   return list.release();
    595 }
    596 
    597 Value* JSONParser::ConsumeString() {
    598   StringBuilder string;
    599   if (!ConsumeStringRaw(&string))
    600     return NULL;
    601 
    602   // Create the Value representation, using a hidden root, if configured
    603   // to do so, and if the string can be represented by StringPiece.
    604   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN)) {
    605     return new JSONStringValue(string.AsStringPiece());
    606   } else {
    607     if (string.CanBeStringPiece())
    608       string.Convert();
    609     return new StringValue(string.AsString());
    610   }
    611 }
    612 
    613 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
    614   if (*pos_ != '"') {
    615     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    616     return false;
    617   }
    618 
    619   // StringBuilder will internally build a StringPiece unless a UTF-16
    620   // conversion occurs, at which point it will perform a copy into a
    621   // std::string.
    622   StringBuilder string(NextChar());
    623 
    624   int length = end_pos_ - start_pos_;
    625   int32_t next_char = 0;
    626 
    627   while (CanConsume(1)) {
    628     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
    629     CBU8_NEXT(start_pos_, index_, length, next_char);
    630     if (next_char < 0 || !IsValidCharacter(next_char)) {
    631       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
    632       return false;
    633     }
    634 
    635     // If this character is an escape sequence...
    636     if (next_char == '\\') {
    637       // The input string will be adjusted (either by combining the two
    638       // characters of an encoded escape sequence, or with a UTF conversion),
    639       // so using StringPiece isn't possible -- force a conversion.
    640       string.Convert();
    641 
    642       if (!CanConsume(1)) {
    643         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    644         return false;
    645       }
    646 
    647       switch (*NextChar()) {
    648         // Allowed esape sequences:
    649         case 'x': {  // UTF-8 sequence.
    650           // UTF-8 \x escape sequences are not allowed in the spec, but they
    651           // are supported here for backwards-compatiblity with the old parser.
    652           if (!CanConsume(2)) {
    653             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
    654             return false;
    655           }
    656 
    657           int hex_digit = 0;
    658           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
    659             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    660             return false;
    661           }
    662           NextChar();
    663 
    664           if (hex_digit < kExtendedASCIIStart)
    665             string.Append(static_cast<char>(hex_digit));
    666           else
    667             DecodeUTF8(hex_digit, &string);
    668           break;
    669         }
    670         case 'u': {  // UTF-16 sequence.
    671           // UTF units are of the form \uXXXX.
    672           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
    673             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    674             return false;
    675           }
    676 
    677           // Skip the 'u'.
    678           NextChar();
    679 
    680           std::string utf8_units;
    681           if (!DecodeUTF16(&utf8_units)) {
    682             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    683             return false;
    684           }
    685 
    686           string.AppendString(utf8_units);
    687           break;
    688         }
    689         case '"':
    690           string.Append('"');
    691           break;
    692         case '\\':
    693           string.Append('\\');
    694           break;
    695         case '/':
    696           string.Append('/');
    697           break;
    698         case 'b':
    699           string.Append('\b');
    700           break;
    701         case 'f':
    702           string.Append('\f');
    703           break;
    704         case 'n':
    705           string.Append('\n');
    706           break;
    707         case 'r':
    708           string.Append('\r');
    709           break;
    710         case 't':
    711           string.Append('\t');
    712           break;
    713         case 'v':  // Not listed as valid escape sequence in the RFC.
    714           string.Append('\v');
    715           break;
    716         // All other escape squences are illegal.
    717         default:
    718           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    719           return false;
    720       }
    721     } else if (next_char == '"') {
    722       --index_;  // Rewind by one because of CBU8_NEXT.
    723       out->Swap(&string);
    724       return true;
    725     } else {
    726       if (next_char < kExtendedASCIIStart)
    727         string.Append(static_cast<char>(next_char));
    728       else
    729         DecodeUTF8(next_char, &string);
    730     }
    731   }
    732 
    733   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    734   return false;
    735 }
    736 
    737 // Entry is at the first X in \uXXXX.
    738 bool JSONParser::DecodeUTF16(std::string* dest_string) {
    739   if (!CanConsume(4))
    740     return false;
    741 
    742   // This is a 32-bit field because the shift operations in the
    743   // conversion process below cause MSVC to error about "data loss."
    744   // This only stores UTF-16 code units, though.
    745   // Consume the UTF-16 code unit, which may be a high surrogate.
    746   int code_unit16_high = 0;
    747   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
    748     return false;
    749 
    750   // Only add 3, not 4, because at the end of this iteration, the parser has
    751   // finished working with the last digit of the UTF sequence, meaning that
    752   // the next iteration will advance to the next byte.
    753   NextNChars(3);
    754 
    755   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
    756   // code unit sequence.
    757   char code_unit8[8] = { 0 };
    758   size_t offset = 0;
    759 
    760   // If this is a high surrogate, consume the next code unit to get the
    761   // low surrogate.
    762   if (CBU16_IS_SURROGATE(code_unit16_high)) {
    763     // Make sure this is the high surrogate. If not, it's an encoding
    764     // error.
    765     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
    766       return false;
    767 
    768     // Make sure that the token has more characters to consume the
    769     // lower surrogate.
    770     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
    771       return false;
    772     if (*NextChar() != '\\' || *NextChar() != 'u')
    773       return false;
    774 
    775     NextChar();  // Read past 'u'.
    776     int code_unit16_low = 0;
    777     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
    778       return false;
    779 
    780     NextNChars(3);
    781 
    782     if (!CBU16_IS_TRAIL(code_unit16_low)) {
    783       return false;
    784     }
    785 
    786     uint32_t code_point =
    787         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
    788     if (!IsValidCharacter(code_point))
    789       return false;
    790 
    791     offset = 0;
    792     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
    793   } else {
    794     // Not a surrogate.
    795     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
    796     if (!IsValidCharacter(code_unit16_high))
    797       return false;
    798 
    799     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
    800   }
    801 
    802   dest_string->append(code_unit8);
    803   return true;
    804 }
    805 
    806 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
    807   DCHECK(IsValidCharacter(point));
    808 
    809   // Anything outside of the basic ASCII plane will need to be decoded from
    810   // int32_t to a multi-byte sequence.
    811   if (point < kExtendedASCIIStart) {
    812     dest->Append(static_cast<char>(point));
    813   } else {
    814     char utf8_units[4] = { 0 };
    815     int offset = 0;
    816     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
    817     dest->Convert();
    818     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
    819     // zero terminated at this point.  |offset| contains the correct length.
    820     dest->AppendString(std::string(utf8_units, offset));
    821   }
    822 }
    823 
    824 Value* JSONParser::ConsumeNumber() {
    825   const char* num_start = pos_;
    826   const int start_index = index_;
    827   int end_index = start_index;
    828 
    829   if (*pos_ == '-')
    830     NextChar();
    831 
    832   if (!ReadInt(false)) {
    833     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    834     return NULL;
    835   }
    836   end_index = index_;
    837 
    838   // The optional fraction part.
    839   if (*pos_ == '.') {
    840     if (!CanConsume(1)) {
    841       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    842       return NULL;
    843     }
    844     NextChar();
    845     if (!ReadInt(true)) {
    846       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    847       return NULL;
    848     }
    849     end_index = index_;
    850   }
    851 
    852   // Optional exponent part.
    853   if (*pos_ == 'e' || *pos_ == 'E') {
    854     NextChar();
    855     if (*pos_ == '-' || *pos_ == '+')
    856       NextChar();
    857     if (!ReadInt(true)) {
    858       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    859       return NULL;
    860     }
    861     end_index = index_;
    862   }
    863 
    864   // ReadInt is greedy because numbers have no easily detectable sentinel,
    865   // so save off where the parser should be on exit (see Consume invariant at
    866   // the top of the header), then make sure the next token is one which is
    867   // valid.
    868   const char* exit_pos = pos_ - 1;
    869   int exit_index = index_ - 1;
    870 
    871   switch (GetNextToken()) {
    872     case T_OBJECT_END:
    873     case T_ARRAY_END:
    874     case T_LIST_SEPARATOR:
    875     case T_END_OF_INPUT:
    876       break;
    877     default:
    878       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    879       return NULL;
    880   }
    881 
    882   pos_ = exit_pos;
    883   index_ = exit_index;
    884 
    885   StringPiece num_string(num_start, end_index - start_index);
    886 
    887   int num_int;
    888   if (StringToInt(num_string, &num_int))
    889     return new FundamentalValue(num_int);
    890 
    891   double num_double;
    892   if (StringToDouble(num_string.as_string(), &num_double) &&
    893       std::isfinite(num_double)) {
    894     return new FundamentalValue(num_double);
    895   }
    896 
    897   return NULL;
    898 }
    899 
    900 bool JSONParser::ReadInt(bool allow_leading_zeros) {
    901   char first = *pos_;
    902   int len = 0;
    903 
    904   char c = first;
    905   while (CanConsume(1) && IsAsciiDigit(c)) {
    906     c = *NextChar();
    907     ++len;
    908   }
    909 
    910   if (len == 0)
    911     return false;
    912 
    913   if (!allow_leading_zeros && len > 1 && first == '0')
    914     return false;
    915 
    916   return true;
    917 }
    918 
    919 Value* JSONParser::ConsumeLiteral() {
    920   switch (*pos_) {
    921     case 't': {
    922       const char kTrueLiteral[] = "true";
    923       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
    924       if (!CanConsume(kTrueLen - 1) ||
    925           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
    926         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    927         return NULL;
    928       }
    929       NextNChars(kTrueLen - 1);
    930       return new FundamentalValue(true);
    931     }
    932     case 'f': {
    933       const char kFalseLiteral[] = "false";
    934       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
    935       if (!CanConsume(kFalseLen - 1) ||
    936           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
    937         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    938         return NULL;
    939       }
    940       NextNChars(kFalseLen - 1);
    941       return new FundamentalValue(false);
    942     }
    943     case 'n': {
    944       const char kNullLiteral[] = "null";
    945       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
    946       if (!CanConsume(kNullLen - 1) ||
    947           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
    948         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    949         return NULL;
    950       }
    951       NextNChars(kNullLen - 1);
    952       return Value::CreateNullValue().release();
    953     }
    954     default:
    955       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    956       return NULL;
    957   }
    958 }
    959 
    960 // static
    961 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
    962   return strncmp(one, two, len) == 0;
    963 }
    964 
    965 void JSONParser::ReportError(JSONReader::JsonParseError code,
    966                              int column_adjust) {
    967   error_code_ = code;
    968   error_line_ = line_number_;
    969   error_column_ = index_ - index_last_line_ + column_adjust;
    970 }
    971 
    972 // static
    973 std::string JSONParser::FormatErrorMessage(int line, int column,
    974                                            const std::string& description) {
    975   if (line || column) {
    976     return StringPrintf("Line: %i, column: %i, %s",
    977         line, column, description.c_str());
    978   }
    979   return description;
    980 }
    981 
    982 }  // namespace internal
    983 }  // namespace base
    984