Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef BASE_JSON_JSON_PARSER_H_
      6 #define BASE_JSON_JSON_PARSER_H_
      7 
      8 #include <string>
      9 
     10 #include "base/base_export.h"
     11 #include "base/basictypes.h"
     12 #include "base/compiler_specific.h"
     13 #include "base/json/json_reader.h"
     14 #include "base/strings/string_piece.h"
     15 
     16 #if !defined(OS_CHROMEOS)
     17 #include "base/gtest_prod_util.h"
     18 #endif
     19 
     20 namespace base {
     21 class Value;
     22 }
     23 
     24 #if defined(OS_CHROMEOS)
     25 // Chromium and Chromium OS check out gtest to different places, so this is
     26 // unable to compile on both if gtest_prod.h is included here. Instead, include
     27 // its only contents -- this will need to be updated if the macro ever changes.
     28 #define FRIEND_TEST(test_case_name, test_name)\
     29 friend class test_case_name##_##test_name##_Test
     30 
     31 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \
     32   FRIEND_TEST(test_case_name, test_name); \
     33   FRIEND_TEST(test_case_name, DISABLED_##test_name); \
     34   FRIEND_TEST(test_case_name, FLAKY_##test_name)
     35 #endif  // OS_CHROMEOS
     36 
     37 namespace base {
     38 namespace internal {
     39 
     40 class JSONParserTest;
     41 
     42 // The implementation behind the JSONReader interface. This class is not meant
     43 // to be used directly; it encapsulates logic that need not be exposed publicly.
     44 //
     45 // This parser guarantees O(n) time through the input string. It also optimizes
     46 // base::StringValue by using StringPiece where possible when returning Value
     47 // objects by using "hidden roots," discussed in the implementation.
     48 //
     49 // Iteration happens on the byte level, with the functions CanConsume and
     50 // NextChar. The conversion from byte to JSON token happens without advancing
     51 // the parser in GetNextToken/ParseToken, that is tokenization operates on
     52 // the current parser position without advancing.
     53 //
     54 // Built on top of these are a family of Consume functions that iterate
     55 // internally. Invariant: on entry of a Consume function, the parser is wound
     56 // to the first byte of a valid JSON token. On exit, it is on the last byte
     57 // of a token, such that the next iteration of the parser will be at the byte
     58 // immediately following the token, which would likely be the first byte of the
     59 // next token.
     60 class BASE_EXPORT_PRIVATE JSONParser {
     61  public:
     62   explicit JSONParser(int options);
     63   ~JSONParser();
     64 
     65   // Parses the input string according to the set options and returns the
     66   // result as a Value owned by the caller.
     67   Value* Parse(const StringPiece& input);
     68 
     69   // Returns the error code.
     70   JSONReader::JsonParseError error_code() const;
     71 
     72   // Returns the human-friendly error message.
     73   std::string GetErrorMessage() const;
     74 
     75  private:
     76   enum Token {
     77     T_OBJECT_BEGIN,           // {
     78     T_OBJECT_END,             // }
     79     T_ARRAY_BEGIN,            // [
     80     T_ARRAY_END,              // ]
     81     T_STRING,
     82     T_NUMBER,
     83     T_BOOL_TRUE,              // true
     84     T_BOOL_FALSE,             // false
     85     T_NULL,                   // null
     86     T_LIST_SEPARATOR,         // ,
     87     T_OBJECT_PAIR_SEPARATOR,  // :
     88     T_END_OF_INPUT,
     89     T_INVALID_TOKEN,
     90   };
     91 
     92   // A helper class used for parsing strings. One optimization performed is to
     93   // create base::Value with a StringPiece to avoid unnecessary std::string
     94   // copies. This is not possible if the input string needs to be decoded from
     95   // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped.
     96   // This class centralizes that logic.
     97   class StringBuilder {
     98    public:
     99     // Empty constructor. Used for creating a builder with which to Swap().
    100     StringBuilder();
    101 
    102     // |pos| is the beginning of an input string, excluding the |"|.
    103     explicit StringBuilder(const char* pos);
    104 
    105     ~StringBuilder();
    106 
    107     // Swaps the contents of |other| with this.
    108     void Swap(StringBuilder* other);
    109 
    110     // Either increases the |length_| of the string or copies the character if
    111     // the StringBuilder has been converted. |c| must be in the basic ASCII
    112     // plane; all other characters need to be in UTF-8 units, appended with
    113     // AppendString below.
    114     void Append(const char& c);
    115 
    116     // Appends a string to the std::string. Must be Convert()ed to use.
    117     void AppendString(const std::string& str);
    118 
    119     // Converts the builder from its default StringPiece to a full std::string,
    120     // performing a copy. Once a builder is converted, it cannot be made a
    121     // StringPiece again.
    122     void Convert();
    123 
    124     // Returns whether the builder can be converted to a StringPiece.
    125     bool CanBeStringPiece() const;
    126 
    127     // Returns the StringPiece representation. Returns an empty piece if it
    128     // cannot be converted.
    129     StringPiece AsStringPiece();
    130 
    131     // Returns the builder as a std::string.
    132     const std::string& AsString();
    133 
    134    private:
    135     // The beginning of the input string.
    136     const char* pos_;
    137 
    138     // Number of bytes in |pos_| that make up the string being built.
    139     size_t length_;
    140 
    141     // The copied string representation. NULL until Convert() is called.
    142     // Strong. scoped_ptr<T> has too much of an overhead here.
    143     std::string* string_;
    144   };
    145 
    146   // Quick check that the stream has capacity to consume |length| more bytes.
    147   bool CanConsume(int length);
    148 
    149   // The basic way to consume a single character in the stream. Consumes one
    150   // byte of the input stream and returns a pointer to the rest of it.
    151   const char* NextChar();
    152 
    153   // Performs the equivalent of NextChar N times.
    154   void NextNChars(int n);
    155 
    156   // Skips over whitespace and comments to find the next token in the stream.
    157   // This does not advance the parser for non-whitespace or comment chars.
    158   Token GetNextToken();
    159 
    160   // Consumes whitespace characters and comments until the next non-that is
    161   // encountered.
    162   void EatWhitespaceAndComments();
    163   // Helper function that consumes a comment, assuming that the parser is
    164   // currently wound to a '/'.
    165   bool EatComment();
    166 
    167   // Calls GetNextToken() and then ParseToken(). Caller owns the result.
    168   Value* ParseNextToken();
    169 
    170   // Takes a token that represents the start of a Value ("a structural token"
    171   // in RFC terms) and consumes it, returning the result as an object the
    172   // caller owns.
    173   Value* ParseToken(Token token);
    174 
    175   // Assuming that the parser is currently wound to '{', this parses a JSON
    176   // object into a DictionaryValue.
    177   Value* ConsumeDictionary();
    178 
    179   // Assuming that the parser is wound to '[', this parses a JSON list into a
    180   // ListValue.
    181   Value* ConsumeList();
    182 
    183   // Calls through ConsumeStringRaw and wraps it in a value.
    184   Value* ConsumeString();
    185 
    186   // Assuming that the parser is wound to a double quote, this parses a string,
    187   // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on
    188   // success and Swap()s the result into |out|. Returns false on failure with
    189   // error information set.
    190   bool ConsumeStringRaw(StringBuilder* out);
    191   // Helper function for ConsumeStringRaw() that consumes the next four or 10
    192   // bytes (parser is wound to the first character of a HEX sequence, with the
    193   // potential for consuming another \uXXXX for a surrogate). Returns true on
    194   // success and places the UTF8 code units in |dest_string|, and false on
    195   // failure.
    196   bool DecodeUTF16(std::string* dest_string);
    197   // Helper function for ConsumeStringRaw() that takes a single code point,
    198   // decodes it into UTF-8 units, and appends it to the given builder. The
    199   // point must be valid.
    200   void DecodeUTF8(const int32& point, StringBuilder* dest);
    201 
    202   // Assuming that the parser is wound to the start of a valid JSON number,
    203   // this parses and converts it to either an int or double value.
    204   Value* ConsumeNumber();
    205   // Helper that reads characters that are ints. Returns true if a number was
    206   // read and false on error.
    207   bool ReadInt(bool allow_leading_zeros);
    208 
    209   // Consumes the literal values of |true|, |false|, and |null|, assuming the
    210   // parser is wound to the first character of any of those.
    211   Value* ConsumeLiteral();
    212 
    213   // Compares two string buffers of a given length.
    214   static bool StringsAreEqual(const char* left, const char* right, size_t len);
    215 
    216   // Sets the error information to |code| at the current column, based on
    217   // |index_| and |index_last_line_|, with an optional positive/negative
    218   // adjustment by |column_adjust|.
    219   void ReportError(JSONReader::JsonParseError code, int column_adjust);
    220 
    221   // Given the line and column number of an error, formats one of the error
    222   // message contants from json_reader.h for human display.
    223   static std::string FormatErrorMessage(int line, int column,
    224                                         const std::string& description);
    225 
    226   // base::JSONParserOptions that control parsing.
    227   int options_;
    228 
    229   // Pointer to the start of the input data.
    230   const char* start_pos_;
    231 
    232   // Pointer to the current position in the input data. Equivalent to
    233   // |start_pos_ + index_|.
    234   const char* pos_;
    235 
    236   // Pointer to the last character of the input data.
    237   const char* end_pos_;
    238 
    239   // The index in the input stream to which the parser is wound.
    240   int index_;
    241 
    242   // The number of times the parser has recursed (current stack depth).
    243   int stack_depth_;
    244 
    245   // The line number that the parser is at currently.
    246   int line_number_;
    247 
    248   // The last value of |index_| on the previous line.
    249   int index_last_line_;
    250 
    251   // Error information.
    252   JSONReader::JsonParseError error_code_;
    253   int error_line_;
    254   int error_column_;
    255 
    256   friend class JSONParserTest;
    257   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar);
    258   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary);
    259   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList);
    260   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString);
    261   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals);
    262   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers);
    263   FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages);
    264 
    265   DISALLOW_COPY_AND_ASSIGN(JSONParser);
    266 };
    267 
    268 }  // namespace internal
    269 }  // namespace base
    270 
    271 #endif  // BASE_JSON_JSON_PARSER_H_
    272