Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/json/json_parser.h"
      6 
      7 #include <cmath>
      8 #include <utility>
      9 
     10 #include "base/logging.h"
     11 #include "base/macros.h"
     12 #include "base/memory/ptr_util.h"
     13 #include "base/strings/string_number_conversions.h"
     14 #include "base/strings/string_piece.h"
     15 #include "base/strings/string_util.h"
     16 #include "base/strings/stringprintf.h"
     17 #include "base/strings/utf_string_conversion_utils.h"
     18 #include "base/strings/utf_string_conversions.h"
     19 #include "base/third_party/icu/icu_utf.h"
     20 #include "base/values.h"
     21 
     22 namespace base {
     23 namespace internal {
     24 
     25 namespace {
     26 
     27 const int kStackMaxDepth = 100;
     28 
     29 const int32_t kExtendedASCIIStart = 0x80;
     30 
     31 // DictionaryHiddenRootValue and ListHiddenRootValue are used in conjunction
     32 // with JSONStringValue as an optimization for reducing the number of string
     33 // copies. When this optimization is active, the parser uses a hidden root to
     34 // keep the original JSON input string live and creates JSONStringValue children
     35 // holding StringPiece references to the input string, avoiding about 2/3rds of
     36 // string memory copies. The real root value is Swap()ed into the new instance.
     37 class DictionaryHiddenRootValue : public DictionaryValue {
     38  public:
     39   DictionaryHiddenRootValue(std::unique_ptr<std::string> json,
     40                             std::unique_ptr<Value> root)
     41       : json_(std::move(json)) {
     42     DCHECK(root->IsType(Value::TYPE_DICTIONARY));
     43     DictionaryValue::Swap(static_cast<DictionaryValue*>(root.get()));
     44   }
     45 
     46   void Swap(DictionaryValue* other) override {
     47     DVLOG(1) << "Swap()ing a DictionaryValue inefficiently.";
     48 
     49     // First deep copy to convert JSONStringValue to std::string and swap that
     50     // copy with |other|, which contains the new contents of |this|.
     51     std::unique_ptr<DictionaryValue> copy(CreateDeepCopy());
     52     copy->Swap(other);
     53 
     54     // Then erase the contents of the current dictionary and swap in the
     55     // new contents, originally from |other|.
     56     Clear();
     57     json_.reset();
     58     DictionaryValue::Swap(copy.get());
     59   }
     60 
     61   // Not overriding DictionaryValue::Remove because it just calls through to
     62   // the method below.
     63 
     64   bool RemoveWithoutPathExpansion(const std::string& key,
     65                                   std::unique_ptr<Value>* out) override {
     66     // If the caller won't take ownership of the removed value, just call up.
     67     if (!out)
     68       return DictionaryValue::RemoveWithoutPathExpansion(key, out);
     69 
     70     DVLOG(1) << "Remove()ing from a DictionaryValue inefficiently.";
     71 
     72     // Otherwise, remove the value while its still "owned" by this and copy it
     73     // to convert any JSONStringValues to std::string.
     74     std::unique_ptr<Value> out_owned;
     75     if (!DictionaryValue::RemoveWithoutPathExpansion(key, &out_owned))
     76       return false;
     77 
     78     *out = out_owned->CreateDeepCopy();
     79 
     80     return true;
     81   }
     82 
     83  private:
     84   std::unique_ptr<std::string> json_;
     85 
     86   DISALLOW_COPY_AND_ASSIGN(DictionaryHiddenRootValue);
     87 };
     88 
     89 class ListHiddenRootValue : public ListValue {
     90  public:
     91   ListHiddenRootValue(std::unique_ptr<std::string> json,
     92                       std::unique_ptr<Value> root)
     93       : json_(std::move(json)) {
     94     DCHECK(root->IsType(Value::TYPE_LIST));
     95     ListValue::Swap(static_cast<ListValue*>(root.get()));
     96   }
     97 
     98   void Swap(ListValue* other) override {
     99     DVLOG(1) << "Swap()ing a ListValue inefficiently.";
    100 
    101     // First deep copy to convert JSONStringValue to std::string and swap that
    102     // copy with |other|, which contains the new contents of |this|.
    103     std::unique_ptr<ListValue> copy(CreateDeepCopy());
    104     copy->Swap(other);
    105 
    106     // Then erase the contents of the current list and swap in the new contents,
    107     // originally from |other|.
    108     Clear();
    109     json_.reset();
    110     ListValue::Swap(copy.get());
    111   }
    112 
    113   bool Remove(size_t index, std::unique_ptr<Value>* out) override {
    114     // If the caller won't take ownership of the removed value, just call up.
    115     if (!out)
    116       return ListValue::Remove(index, out);
    117 
    118     DVLOG(1) << "Remove()ing from a ListValue inefficiently.";
    119 
    120     // Otherwise, remove the value while its still "owned" by this and copy it
    121     // to convert any JSONStringValues to std::string.
    122     std::unique_ptr<Value> out_owned;
    123     if (!ListValue::Remove(index, &out_owned))
    124       return false;
    125 
    126     *out = out_owned->CreateDeepCopy();
    127 
    128     return true;
    129   }
    130 
    131  private:
    132   std::unique_ptr<std::string> json_;
    133 
    134   DISALLOW_COPY_AND_ASSIGN(ListHiddenRootValue);
    135 };
    136 
    137 // A variant on StringValue that uses StringPiece instead of copying the string
    138 // into the Value. This can only be stored in a child of hidden root (above),
    139 // otherwise the referenced string will not be guaranteed to outlive it.
    140 class JSONStringValue : public Value {
    141  public:
    142   explicit JSONStringValue(StringPiece piece)
    143       : Value(TYPE_STRING), string_piece_(piece) {}
    144 
    145   // Overridden from Value:
    146   bool GetAsString(std::string* out_value) const override {
    147     string_piece_.CopyToString(out_value);
    148     return true;
    149   }
    150   bool GetAsString(string16* out_value) const override {
    151     *out_value = UTF8ToUTF16(string_piece_);
    152     return true;
    153   }
    154   Value* DeepCopy() const override {
    155     return new StringValue(string_piece_.as_string());
    156   }
    157   bool Equals(const Value* other) const override {
    158     std::string other_string;
    159     return other->IsType(TYPE_STRING) && other->GetAsString(&other_string) &&
    160         StringPiece(other_string) == string_piece_;
    161   }
    162 
    163  private:
    164   // The location in the original input stream.
    165   StringPiece string_piece_;
    166 
    167   DISALLOW_COPY_AND_ASSIGN(JSONStringValue);
    168 };
    169 
    170 // Simple class that checks for maximum recursion/"stack overflow."
    171 class StackMarker {
    172  public:
    173   explicit StackMarker(int* depth) : depth_(depth) {
    174     ++(*depth_);
    175     DCHECK_LE(*depth_, kStackMaxDepth);
    176   }
    177   ~StackMarker() {
    178     --(*depth_);
    179   }
    180 
    181   bool IsTooDeep() const {
    182     return *depth_ >= kStackMaxDepth;
    183   }
    184 
    185  private:
    186   int* const depth_;
    187 
    188   DISALLOW_COPY_AND_ASSIGN(StackMarker);
    189 };
    190 
    191 }  // namespace
    192 
    193 JSONParser::JSONParser(int options)
    194     : options_(options),
    195       start_pos_(nullptr),
    196       pos_(nullptr),
    197       end_pos_(nullptr),
    198       index_(0),
    199       stack_depth_(0),
    200       line_number_(0),
    201       index_last_line_(0),
    202       error_code_(JSONReader::JSON_NO_ERROR),
    203       error_line_(0),
    204       error_column_(0) {
    205 }
    206 
    207 JSONParser::~JSONParser() {
    208 }
    209 
    210 std::unique_ptr<Value> JSONParser::Parse(StringPiece input) {
    211   std::unique_ptr<std::string> input_copy;
    212   // If the children of a JSON root can be detached, then hidden roots cannot
    213   // be used, so do not bother copying the input because StringPiece will not
    214   // be used anywhere.
    215   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    216     input_copy = MakeUnique<std::string>(input.as_string());
    217     start_pos_ = input_copy->data();
    218   } else {
    219     start_pos_ = input.data();
    220   }
    221   pos_ = start_pos_;
    222   end_pos_ = start_pos_ + input.length();
    223   index_ = 0;
    224   line_number_ = 1;
    225   index_last_line_ = 0;
    226 
    227   error_code_ = JSONReader::JSON_NO_ERROR;
    228   error_line_ = 0;
    229   error_column_ = 0;
    230 
    231   // When the input JSON string starts with a UTF-8 Byte-Order-Mark
    232   // <0xEF 0xBB 0xBF>, advance the start position to avoid the
    233   // ParseNextToken function mis-treating a Unicode BOM as an invalid
    234   // character and returning NULL.
    235   if (CanConsume(3) && static_cast<uint8_t>(*pos_) == 0xEF &&
    236       static_cast<uint8_t>(*(pos_ + 1)) == 0xBB &&
    237       static_cast<uint8_t>(*(pos_ + 2)) == 0xBF) {
    238     NextNChars(3);
    239   }
    240 
    241   // Parse the first and any nested tokens.
    242   std::unique_ptr<Value> root(ParseNextToken());
    243   if (!root)
    244     return nullptr;
    245 
    246   // Make sure the input stream is at an end.
    247   if (GetNextToken() != T_END_OF_INPUT) {
    248     if (!CanConsume(1) || (NextChar() && GetNextToken() != T_END_OF_INPUT)) {
    249       ReportError(JSONReader::JSON_UNEXPECTED_DATA_AFTER_ROOT, 1);
    250       return nullptr;
    251     }
    252   }
    253 
    254   // Dictionaries and lists can contain JSONStringValues, so wrap them in a
    255   // hidden root.
    256   if (!(options_ & JSON_DETACHABLE_CHILDREN)) {
    257     if (root->IsType(Value::TYPE_DICTIONARY)) {
    258       return MakeUnique<DictionaryHiddenRootValue>(std::move(input_copy),
    259                                                    std::move(root));
    260     }
    261     if (root->IsType(Value::TYPE_LIST)) {
    262       return MakeUnique<ListHiddenRootValue>(std::move(input_copy),
    263                                              std::move(root));
    264     }
    265     if (root->IsType(Value::TYPE_STRING)) {
    266       // A string type could be a JSONStringValue, but because there's no
    267       // corresponding HiddenRootValue, the memory will be lost. Deep copy to
    268       // preserve it.
    269       return root->CreateDeepCopy();
    270     }
    271   }
    272 
    273   // All other values can be returned directly.
    274   return root;
    275 }
    276 
    277 JSONReader::JsonParseError JSONParser::error_code() const {
    278   return error_code_;
    279 }
    280 
    281 std::string JSONParser::GetErrorMessage() const {
    282   return FormatErrorMessage(error_line_, error_column_,
    283       JSONReader::ErrorCodeToString(error_code_));
    284 }
    285 
    286 int JSONParser::error_line() const {
    287   return error_line_;
    288 }
    289 
    290 int JSONParser::error_column() const {
    291   return error_column_;
    292 }
    293 
    294 // StringBuilder ///////////////////////////////////////////////////////////////
    295 
    296 JSONParser::StringBuilder::StringBuilder() : StringBuilder(nullptr) {}
    297 
    298 JSONParser::StringBuilder::StringBuilder(const char* pos)
    299     : pos_(pos),
    300       length_(0),
    301       string_(nullptr) {
    302 }
    303 
    304 void JSONParser::StringBuilder::Swap(StringBuilder* other) {
    305   std::swap(other->string_, string_);
    306   std::swap(other->pos_, pos_);
    307   std::swap(other->length_, length_);
    308 }
    309 
    310 JSONParser::StringBuilder::~StringBuilder() {
    311   delete string_;
    312 }
    313 
    314 void JSONParser::StringBuilder::Append(const char& c) {
    315   DCHECK_GE(c, 0);
    316   DCHECK_LT(static_cast<unsigned char>(c), 128);
    317 
    318   if (string_)
    319     string_->push_back(c);
    320   else
    321     ++length_;
    322 }
    323 
    324 void JSONParser::StringBuilder::AppendString(const std::string& str) {
    325   DCHECK(string_);
    326   string_->append(str);
    327 }
    328 
    329 void JSONParser::StringBuilder::Convert() {
    330   if (string_)
    331     return;
    332   string_  = new std::string(pos_, length_);
    333 }
    334 
    335 bool JSONParser::StringBuilder::CanBeStringPiece() const {
    336   return !string_;
    337 }
    338 
    339 StringPiece JSONParser::StringBuilder::AsStringPiece() {
    340   if (string_)
    341     return StringPiece();
    342   return StringPiece(pos_, length_);
    343 }
    344 
    345 const std::string& JSONParser::StringBuilder::AsString() {
    346   if (!string_)
    347     Convert();
    348   return *string_;
    349 }
    350 
    351 // JSONParser private //////////////////////////////////////////////////////////
    352 
    353 inline bool JSONParser::CanConsume(int length) {
    354   return pos_ + length <= end_pos_;
    355 }
    356 
    357 const char* JSONParser::NextChar() {
    358   DCHECK(CanConsume(1));
    359   ++index_;
    360   ++pos_;
    361   return pos_;
    362 }
    363 
    364 void JSONParser::NextNChars(int n) {
    365   DCHECK(CanConsume(n));
    366   index_ += n;
    367   pos_ += n;
    368 }
    369 
    370 JSONParser::Token JSONParser::GetNextToken() {
    371   EatWhitespaceAndComments();
    372   if (!CanConsume(1))
    373     return T_END_OF_INPUT;
    374 
    375   switch (*pos_) {
    376     case '{':
    377       return T_OBJECT_BEGIN;
    378     case '}':
    379       return T_OBJECT_END;
    380     case '[':
    381       return T_ARRAY_BEGIN;
    382     case ']':
    383       return T_ARRAY_END;
    384     case '"':
    385       return T_STRING;
    386     case '0':
    387     case '1':
    388     case '2':
    389     case '3':
    390     case '4':
    391     case '5':
    392     case '6':
    393     case '7':
    394     case '8':
    395     case '9':
    396     case '-':
    397       return T_NUMBER;
    398     case 't':
    399       return T_BOOL_TRUE;
    400     case 'f':
    401       return T_BOOL_FALSE;
    402     case 'n':
    403       return T_NULL;
    404     case ',':
    405       return T_LIST_SEPARATOR;
    406     case ':':
    407       return T_OBJECT_PAIR_SEPARATOR;
    408     default:
    409       return T_INVALID_TOKEN;
    410   }
    411 }
    412 
    413 void JSONParser::EatWhitespaceAndComments() {
    414   while (pos_ < end_pos_) {
    415     switch (*pos_) {
    416       case '\r':
    417       case '\n':
    418         index_last_line_ = index_;
    419         // Don't increment line_number_ twice for "\r\n".
    420         if (!(*pos_ == '\n' && pos_ > start_pos_ && *(pos_ - 1) == '\r'))
    421           ++line_number_;
    422         // Fall through.
    423       case ' ':
    424       case '\t':
    425         NextChar();
    426         break;
    427       case '/':
    428         if (!EatComment())
    429           return;
    430         break;
    431       default:
    432         return;
    433     }
    434   }
    435 }
    436 
    437 bool JSONParser::EatComment() {
    438   if (*pos_ != '/' || !CanConsume(1))
    439     return false;
    440 
    441   char next_char = *NextChar();
    442   if (next_char == '/') {
    443     // Single line comment, read to newline.
    444     while (CanConsume(1)) {
    445       next_char = *NextChar();
    446       if (next_char == '\n' || next_char == '\r')
    447         return true;
    448     }
    449   } else if (next_char == '*') {
    450     char previous_char = '\0';
    451     // Block comment, read until end marker.
    452     while (CanConsume(1)) {
    453       next_char = *NextChar();
    454       if (previous_char == '*' && next_char == '/') {
    455         // EatWhitespaceAndComments will inspect pos_, which will still be on
    456         // the last / of the comment, so advance once more (which may also be
    457         // end of input).
    458         NextChar();
    459         return true;
    460       }
    461       previous_char = next_char;
    462     }
    463 
    464     // If the comment is unterminated, GetNextToken will report T_END_OF_INPUT.
    465   }
    466 
    467   return false;
    468 }
    469 
    470 Value* JSONParser::ParseNextToken() {
    471   return ParseToken(GetNextToken());
    472 }
    473 
    474 Value* JSONParser::ParseToken(Token token) {
    475   switch (token) {
    476     case T_OBJECT_BEGIN:
    477       return ConsumeDictionary();
    478     case T_ARRAY_BEGIN:
    479       return ConsumeList();
    480     case T_STRING:
    481       return ConsumeString();
    482     case T_NUMBER:
    483       return ConsumeNumber();
    484     case T_BOOL_TRUE:
    485     case T_BOOL_FALSE:
    486     case T_NULL:
    487       return ConsumeLiteral();
    488     default:
    489       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    490       return nullptr;
    491   }
    492 }
    493 
    494 Value* JSONParser::ConsumeDictionary() {
    495   if (*pos_ != '{') {
    496     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    497     return nullptr;
    498   }
    499 
    500   StackMarker depth_check(&stack_depth_);
    501   if (depth_check.IsTooDeep()) {
    502     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    503     return nullptr;
    504   }
    505 
    506   std::unique_ptr<DictionaryValue> dict(new DictionaryValue);
    507 
    508   NextChar();
    509   Token token = GetNextToken();
    510   while (token != T_OBJECT_END) {
    511     if (token != T_STRING) {
    512       ReportError(JSONReader::JSON_UNQUOTED_DICTIONARY_KEY, 1);
    513       return nullptr;
    514     }
    515 
    516     // First consume the key.
    517     StringBuilder key;
    518     if (!ConsumeStringRaw(&key)) {
    519       return nullptr;
    520     }
    521 
    522     // Read the separator.
    523     NextChar();
    524     token = GetNextToken();
    525     if (token != T_OBJECT_PAIR_SEPARATOR) {
    526       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    527       return nullptr;
    528     }
    529 
    530     // The next token is the value. Ownership transfers to |dict|.
    531     NextChar();
    532     Value* value = ParseNextToken();
    533     if (!value) {
    534       // ReportError from deeper level.
    535       return nullptr;
    536     }
    537 
    538     dict->SetWithoutPathExpansion(key.AsString(), value);
    539 
    540     NextChar();
    541     token = GetNextToken();
    542     if (token == T_LIST_SEPARATOR) {
    543       NextChar();
    544       token = GetNextToken();
    545       if (token == T_OBJECT_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    546         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    547         return nullptr;
    548       }
    549     } else if (token != T_OBJECT_END) {
    550       ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    551       return nullptr;
    552     }
    553   }
    554 
    555   return dict.release();
    556 }
    557 
    558 Value* JSONParser::ConsumeList() {
    559   if (*pos_ != '[') {
    560     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    561     return nullptr;
    562   }
    563 
    564   StackMarker depth_check(&stack_depth_);
    565   if (depth_check.IsTooDeep()) {
    566     ReportError(JSONReader::JSON_TOO_MUCH_NESTING, 1);
    567     return nullptr;
    568   }
    569 
    570   std::unique_ptr<ListValue> list(new ListValue);
    571 
    572   NextChar();
    573   Token token = GetNextToken();
    574   while (token != T_ARRAY_END) {
    575     Value* item = ParseToken(token);
    576     if (!item) {
    577       // ReportError from deeper level.
    578       return nullptr;
    579     }
    580 
    581     list->Append(item);
    582 
    583     NextChar();
    584     token = GetNextToken();
    585     if (token == T_LIST_SEPARATOR) {
    586       NextChar();
    587       token = GetNextToken();
    588       if (token == T_ARRAY_END && !(options_ & JSON_ALLOW_TRAILING_COMMAS)) {
    589         ReportError(JSONReader::JSON_TRAILING_COMMA, 1);
    590         return nullptr;
    591       }
    592     } else if (token != T_ARRAY_END) {
    593       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    594       return nullptr;
    595     }
    596   }
    597 
    598   return list.release();
    599 }
    600 
    601 Value* JSONParser::ConsumeString() {
    602   StringBuilder string;
    603   if (!ConsumeStringRaw(&string))
    604     return nullptr;
    605 
    606   // Create the Value representation, using a hidden root, if configured
    607   // to do so, and if the string can be represented by StringPiece.
    608   if (string.CanBeStringPiece() && !(options_ & JSON_DETACHABLE_CHILDREN))
    609     return new JSONStringValue(string.AsStringPiece());
    610 
    611   if (string.CanBeStringPiece())
    612     string.Convert();
    613   return new StringValue(string.AsString());
    614 }
    615 
    616 bool JSONParser::ConsumeStringRaw(StringBuilder* out) {
    617   if (*pos_ != '"') {
    618     ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    619     return false;
    620   }
    621 
    622   // StringBuilder will internally build a StringPiece unless a UTF-16
    623   // conversion occurs, at which point it will perform a copy into a
    624   // std::string.
    625   StringBuilder string(NextChar());
    626 
    627   int length = end_pos_ - start_pos_;
    628   int32_t next_char = 0;
    629 
    630   while (CanConsume(1)) {
    631     pos_ = start_pos_ + index_;  // CBU8_NEXT is postcrement.
    632     CBU8_NEXT(start_pos_, index_, length, next_char);
    633     if (next_char < 0 || !IsValidCharacter(next_char)) {
    634       ReportError(JSONReader::JSON_UNSUPPORTED_ENCODING, 1);
    635       return false;
    636     }
    637 
    638     if (next_char == '"') {
    639       --index_;  // Rewind by one because of CBU8_NEXT.
    640       out->Swap(&string);
    641       return true;
    642     }
    643 
    644     // If this character is not an escape sequence...
    645     if (next_char != '\\') {
    646       if (next_char < kExtendedASCIIStart)
    647         string.Append(static_cast<char>(next_char));
    648       else
    649         DecodeUTF8(next_char, &string);
    650     } else {
    651       // And if it is an escape sequence, the input string will be adjusted
    652       // (either by combining the two characters of an encoded escape sequence,
    653       // or with a UTF conversion), so using StringPiece isn't possible -- force
    654       // a conversion.
    655       string.Convert();
    656 
    657       if (!CanConsume(1)) {
    658         ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    659         return false;
    660       }
    661 
    662       switch (*NextChar()) {
    663         // Allowed esape sequences:
    664         case 'x': {  // UTF-8 sequence.
    665           // UTF-8 \x escape sequences are not allowed in the spec, but they
    666           // are supported here for backwards-compatiblity with the old parser.
    667           if (!CanConsume(2)) {
    668             ReportError(JSONReader::JSON_INVALID_ESCAPE, 1);
    669             return false;
    670           }
    671 
    672           int hex_digit = 0;
    673           if (!HexStringToInt(StringPiece(NextChar(), 2), &hex_digit)) {
    674             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    675             return false;
    676           }
    677           NextChar();
    678 
    679           if (hex_digit < kExtendedASCIIStart)
    680             string.Append(static_cast<char>(hex_digit));
    681           else
    682             DecodeUTF8(hex_digit, &string);
    683           break;
    684         }
    685         case 'u': {  // UTF-16 sequence.
    686           // UTF units are of the form \uXXXX.
    687           if (!CanConsume(5)) {  // 5 being 'u' and four HEX digits.
    688             ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    689             return false;
    690           }
    691 
    692           // Skip the 'u'.
    693           NextChar();
    694 
    695           std::string utf8_units;
    696           if (!DecodeUTF16(&utf8_units)) {
    697             ReportError(JSONReader::JSON_INVALID_ESCAPE, -1);
    698             return false;
    699           }
    700 
    701           string.AppendString(utf8_units);
    702           break;
    703         }
    704         case '"':
    705           string.Append('"');
    706           break;
    707         case '\\':
    708           string.Append('\\');
    709           break;
    710         case '/':
    711           string.Append('/');
    712           break;
    713         case 'b':
    714           string.Append('\b');
    715           break;
    716         case 'f':
    717           string.Append('\f');
    718           break;
    719         case 'n':
    720           string.Append('\n');
    721           break;
    722         case 'r':
    723           string.Append('\r');
    724           break;
    725         case 't':
    726           string.Append('\t');
    727           break;
    728         case 'v':  // Not listed as valid escape sequence in the RFC.
    729           string.Append('\v');
    730           break;
    731         // All other escape squences are illegal.
    732         default:
    733           ReportError(JSONReader::JSON_INVALID_ESCAPE, 0);
    734           return false;
    735       }
    736     }
    737   }
    738 
    739   ReportError(JSONReader::JSON_SYNTAX_ERROR, 0);
    740   return false;
    741 }
    742 
    743 // Entry is at the first X in \uXXXX.
    744 bool JSONParser::DecodeUTF16(std::string* dest_string) {
    745   if (!CanConsume(4))
    746     return false;
    747 
    748   // This is a 32-bit field because the shift operations in the
    749   // conversion process below cause MSVC to error about "data loss."
    750   // This only stores UTF-16 code units, though.
    751   // Consume the UTF-16 code unit, which may be a high surrogate.
    752   int code_unit16_high = 0;
    753   if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_high))
    754     return false;
    755 
    756   // Only add 3, not 4, because at the end of this iteration, the parser has
    757   // finished working with the last digit of the UTF sequence, meaning that
    758   // the next iteration will advance to the next byte.
    759   NextNChars(3);
    760 
    761   // Used to convert the UTF-16 code units to a code point and then to a UTF-8
    762   // code unit sequence.
    763   char code_unit8[8] = { 0 };
    764   size_t offset = 0;
    765 
    766   // If this is a high surrogate, consume the next code unit to get the
    767   // low surrogate.
    768   if (CBU16_IS_SURROGATE(code_unit16_high)) {
    769     // Make sure this is the high surrogate. If not, it's an encoding
    770     // error.
    771     if (!CBU16_IS_SURROGATE_LEAD(code_unit16_high))
    772       return false;
    773 
    774     // Make sure that the token has more characters to consume the
    775     // lower surrogate.
    776     if (!CanConsume(6))  // 6 being '\' 'u' and four HEX digits.
    777       return false;
    778     if (*NextChar() != '\\' || *NextChar() != 'u')
    779       return false;
    780 
    781     NextChar();  // Read past 'u'.
    782     int code_unit16_low = 0;
    783     if (!HexStringToInt(StringPiece(pos_, 4), &code_unit16_low))
    784       return false;
    785 
    786     NextNChars(3);
    787 
    788     if (!CBU16_IS_TRAIL(code_unit16_low)) {
    789       return false;
    790     }
    791 
    792     uint32_t code_point =
    793         CBU16_GET_SUPPLEMENTARY(code_unit16_high, code_unit16_low);
    794     if (!IsValidCharacter(code_point))
    795       return false;
    796 
    797     offset = 0;
    798     CBU8_APPEND_UNSAFE(code_unit8, offset, code_point);
    799   } else {
    800     // Not a surrogate.
    801     DCHECK(CBU16_IS_SINGLE(code_unit16_high));
    802     if (!IsValidCharacter(code_unit16_high))
    803       return false;
    804 
    805     CBU8_APPEND_UNSAFE(code_unit8, offset, code_unit16_high);
    806   }
    807 
    808   dest_string->append(code_unit8);
    809   return true;
    810 }
    811 
    812 void JSONParser::DecodeUTF8(const int32_t& point, StringBuilder* dest) {
    813   DCHECK(IsValidCharacter(point));
    814 
    815   // Anything outside of the basic ASCII plane will need to be decoded from
    816   // int32_t to a multi-byte sequence.
    817   if (point < kExtendedASCIIStart) {
    818     dest->Append(static_cast<char>(point));
    819   } else {
    820     char utf8_units[4] = { 0 };
    821     int offset = 0;
    822     CBU8_APPEND_UNSAFE(utf8_units, offset, point);
    823     dest->Convert();
    824     // CBU8_APPEND_UNSAFE can overwrite up to 4 bytes, so utf8_units may not be
    825     // zero terminated at this point.  |offset| contains the correct length.
    826     dest->AppendString(std::string(utf8_units, offset));
    827   }
    828 }
    829 
    830 Value* JSONParser::ConsumeNumber() {
    831   const char* num_start = pos_;
    832   const int start_index = index_;
    833   int end_index = start_index;
    834 
    835   if (*pos_ == '-')
    836     NextChar();
    837 
    838   if (!ReadInt(false)) {
    839     ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    840     return nullptr;
    841   }
    842   end_index = index_;
    843 
    844   // The optional fraction part.
    845   if (*pos_ == '.') {
    846     if (!CanConsume(1)) {
    847       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    848       return nullptr;
    849     }
    850     NextChar();
    851     if (!ReadInt(true)) {
    852       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    853       return nullptr;
    854     }
    855     end_index = index_;
    856   }
    857 
    858   // Optional exponent part.
    859   if (*pos_ == 'e' || *pos_ == 'E') {
    860     NextChar();
    861     if (*pos_ == '-' || *pos_ == '+')
    862       NextChar();
    863     if (!ReadInt(true)) {
    864       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    865       return nullptr;
    866     }
    867     end_index = index_;
    868   }
    869 
    870   // ReadInt is greedy because numbers have no easily detectable sentinel,
    871   // so save off where the parser should be on exit (see Consume invariant at
    872   // the top of the header), then make sure the next token is one which is
    873   // valid.
    874   const char* exit_pos = pos_ - 1;
    875   int exit_index = index_ - 1;
    876 
    877   switch (GetNextToken()) {
    878     case T_OBJECT_END:
    879     case T_ARRAY_END:
    880     case T_LIST_SEPARATOR:
    881     case T_END_OF_INPUT:
    882       break;
    883     default:
    884       ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    885       return nullptr;
    886   }
    887 
    888   pos_ = exit_pos;
    889   index_ = exit_index;
    890 
    891   StringPiece num_string(num_start, end_index - start_index);
    892 
    893   int num_int;
    894   if (StringToInt(num_string, &num_int))
    895     return new FundamentalValue(num_int);
    896 
    897   double num_double;
    898   if (StringToDouble(num_string.as_string(), &num_double) &&
    899       std::isfinite(num_double)) {
    900     return new FundamentalValue(num_double);
    901   }
    902 
    903   return nullptr;
    904 }
    905 
    906 bool JSONParser::ReadInt(bool allow_leading_zeros) {
    907   char first = *pos_;
    908   int len = 0;
    909 
    910   char c = first;
    911   while (CanConsume(1) && IsAsciiDigit(c)) {
    912     c = *NextChar();
    913     ++len;
    914   }
    915 
    916   if (len == 0)
    917     return false;
    918 
    919   if (!allow_leading_zeros && len > 1 && first == '0')
    920     return false;
    921 
    922   return true;
    923 }
    924 
    925 Value* JSONParser::ConsumeLiteral() {
    926   switch (*pos_) {
    927     case 't': {
    928       const char kTrueLiteral[] = "true";
    929       const int kTrueLen = static_cast<int>(strlen(kTrueLiteral));
    930       if (!CanConsume(kTrueLen - 1) ||
    931           !StringsAreEqual(pos_, kTrueLiteral, kTrueLen)) {
    932         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    933         return nullptr;
    934       }
    935       NextNChars(kTrueLen - 1);
    936       return new FundamentalValue(true);
    937     }
    938     case 'f': {
    939       const char kFalseLiteral[] = "false";
    940       const int kFalseLen = static_cast<int>(strlen(kFalseLiteral));
    941       if (!CanConsume(kFalseLen - 1) ||
    942           !StringsAreEqual(pos_, kFalseLiteral, kFalseLen)) {
    943         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    944         return nullptr;
    945       }
    946       NextNChars(kFalseLen - 1);
    947       return new FundamentalValue(false);
    948     }
    949     case 'n': {
    950       const char kNullLiteral[] = "null";
    951       const int kNullLen = static_cast<int>(strlen(kNullLiteral));
    952       if (!CanConsume(kNullLen - 1) ||
    953           !StringsAreEqual(pos_, kNullLiteral, kNullLen)) {
    954         ReportError(JSONReader::JSON_SYNTAX_ERROR, 1);
    955         return nullptr;
    956       }
    957       NextNChars(kNullLen - 1);
    958       return Value::CreateNullValue().release();
    959     }
    960     default:
    961       ReportError(JSONReader::JSON_UNEXPECTED_TOKEN, 1);
    962       return nullptr;
    963   }
    964 }
    965 
    966 // static
    967 bool JSONParser::StringsAreEqual(const char* one, const char* two, size_t len) {
    968   return strncmp(one, two, len) == 0;
    969 }
    970 
    971 void JSONParser::ReportError(JSONReader::JsonParseError code,
    972                              int column_adjust) {
    973   error_code_ = code;
    974   error_line_ = line_number_;
    975   error_column_ = index_ - index_last_line_ + column_adjust;
    976 }
    977 
    978 // static
    979 std::string JSONParser::FormatErrorMessage(int line, int column,
    980                                            const std::string& description) {
    981   if (line || column) {
    982     return StringPrintf("Line: %i, column: %i, %s",
    983         line, column, description.c_str());
    984   }
    985   return description;
    986 }
    987 
    988 }  // namespace internal
    989 }  // namespace base
    990