Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // A JSON parser.  Converts strings of JSON into a Value object (see
      6 // base/values.h).
      7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627
      8 //
      9 // Known limitations/deviations from the RFC:
     10 // - Only knows how to parse ints within the range of a signed 32 bit int and
     11 //   decimal numbers within a double.
     12 // - Assumes input is encoded as UTF8.  The spec says we should allow UTF-16
     13 //   (BE or LE) and UTF-32 (BE or LE) as well.
     14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed
     15 //   by the RFC).
     16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
     17 //   stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
     18 //   UTF-8 string for the JSONReader::JsonToValue() function may start with a
     19 //   UTF-8 BOM (0xEF, 0xBB, 0xBF).
     20 //   To avoid the function from mis-treating a UTF-8 BOM as an invalid
     21 //   character, the function skips a Unicode BOM at the beginning of the
     22 //   Unicode string (converted from the input UTF-8 string) before parsing it.
     23 //
     24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in
     25 //   double quotes
     26 // TODO(tc): Add an option to disable comment stripping
     27 // TODO(aa): Consider making the constructor public and the static Read() method
     28 // only a convenience for the common uses with more complex configuration going
     29 // on the instance.
     30 
     31 #ifndef BASE_JSON_JSON_READER_H_
     32 #define BASE_JSON_JSON_READER_H_
     33 #pragma once
     34 
     35 #include <string>
     36 
     37 #include "base/base_api.h"
     38 #include "base/basictypes.h"
     39 
     40 // Chromium and Chromium OS check out gtest to different places, so we're
     41 // unable to compile on both if we include gtest_prod.h here.  Instead, include
     42 // its only contents -- this will need to be updated if the macro ever changes.
     43 #define FRIEND_TEST(test_case_name, test_name)\
     44 friend class test_case_name##_##test_name##_Test
     45 
     46 class Value;
     47 
     48 namespace base {
     49 
     50 class BASE_API JSONReader {
     51  public:
     52   // A struct to hold a JS token.
     53   class Token {
     54    public:
     55     enum Type {
     56      OBJECT_BEGIN,           // {
     57      OBJECT_END,             // }
     58      ARRAY_BEGIN,            // [
     59      ARRAY_END,              // ]
     60      STRING,
     61      NUMBER,
     62      BOOL_TRUE,              // true
     63      BOOL_FALSE,             // false
     64      NULL_TOKEN,             // null
     65      LIST_SEPARATOR,         // ,
     66      OBJECT_PAIR_SEPARATOR,  // :
     67      END_OF_INPUT,
     68      INVALID_TOKEN,
     69     };
     70     Token(Type t, const wchar_t* b, int len)
     71       : type(t), begin(b), length(len) {}
     72 
     73     // Get the character that's one past the end of this token.
     74     wchar_t NextChar() {
     75       return *(begin + length);
     76     }
     77 
     78     Type type;
     79 
     80     // A pointer into JSONReader::json_pos_ that's the beginning of this token.
     81     const wchar_t* begin;
     82 
     83     // End should be one char past the end of the token.
     84     int length;
     85   };
     86 
     87   // Error codes during parsing.
     88   enum JsonParseError {
     89     JSON_NO_ERROR = 0,
     90     JSON_BAD_ROOT_ELEMENT_TYPE,
     91     JSON_INVALID_ESCAPE,
     92     JSON_SYNTAX_ERROR,
     93     JSON_TRAILING_COMMA,
     94     JSON_TOO_MUCH_NESTING,
     95     JSON_UNEXPECTED_DATA_AFTER_ROOT,
     96     JSON_UNSUPPORTED_ENCODING,
     97     JSON_UNQUOTED_DICTIONARY_KEY,
     98   };
     99 
    100   // String versions of parse error codes.
    101   static const char* kBadRootElementType;
    102   static const char* kInvalidEscape;
    103   static const char* kSyntaxError;
    104   static const char* kTrailingComma;
    105   static const char* kTooMuchNesting;
    106   static const char* kUnexpectedDataAfterRoot;
    107   static const char* kUnsupportedEncoding;
    108   static const char* kUnquotedDictionaryKey;
    109 
    110   JSONReader();
    111 
    112   // Reads and parses |json|, returning a Value. The caller owns the returned
    113   // instance. If |json| is not a properly formed JSON string, returns NULL.
    114   // If |allow_trailing_comma| is true, we will ignore trailing commas in
    115   // objects and arrays even though this goes against the RFC.
    116   static Value* Read(const std::string& json, bool allow_trailing_comma);
    117 
    118   // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out|
    119   // are optional. If specified and NULL is returned, they will be populated
    120   // an error code and a formatted error message (including error location if
    121   // appropriate). Otherwise, they will be unmodified.
    122   static Value* ReadAndReturnError(const std::string& json,
    123                                    bool allow_trailing_comma,
    124                                    int* error_code_out,
    125                                    std::string* error_msg_out);
    126 
    127   // Converts a JSON parse error code into a human readable message.
    128   // Returns an empty string if error_code is JSON_NO_ERROR.
    129   static std::string ErrorCodeToString(JsonParseError error_code);
    130 
    131   // Returns the error code if the last call to JsonToValue() failed.
    132   // Returns JSON_NO_ERROR otherwise.
    133   JsonParseError error_code() const { return error_code_; }
    134 
    135   // Converts error_code_ to a human-readable string, including line and column
    136   // numbers if appropriate.
    137   std::string GetErrorMessage() const;
    138 
    139   // Reads and parses |json|, returning a Value. The caller owns the returned
    140   // instance. If |json| is not a properly formed JSON string, returns NULL and
    141   // a detailed error can be retrieved from |error_message()|.
    142   // If |check_root| is true, we require that the root object be an object or
    143   // array. Otherwise, it can be any valid JSON type.
    144   // If |allow_trailing_comma| is true, we will ignore trailing commas in
    145   // objects and arrays even though this goes against the RFC.
    146   Value* JsonToValue(const std::string& json, bool check_root,
    147                      bool allow_trailing_comma);
    148 
    149  private:
    150   FRIEND_TEST(JSONReaderTest, Reading);
    151   FRIEND_TEST(JSONReaderTest, ErrorMessages);
    152 
    153   static std::string FormatErrorMessage(int line, int column,
    154                                         const std::string& description);
    155 
    156   // Recursively build Value.  Returns NULL if we don't have a valid JSON
    157   // string.  If |is_root| is true, we verify that the root element is either
    158   // an object or an array.
    159   Value* BuildValue(bool is_root);
    160 
    161   // Parses a sequence of characters into a Token::NUMBER. If the sequence of
    162   // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
    163   // that DecodeNumber is used to actually convert from a string to an
    164   // int/double.
    165   Token ParseNumberToken();
    166 
    167   // Try and convert the substring that token holds into an int or a double. If
    168   // we can (ie., no overflow), return the value, else return NULL.
    169   Value* DecodeNumber(const Token& token);
    170 
    171   // Parses a sequence of characters into a Token::STRING. If the sequence of
    172   // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
    173   // that DecodeString is used to actually decode the escaped string into an
    174   // actual wstring.
    175   Token ParseStringToken();
    176 
    177   // Convert the substring into a value string.  This should always succeed
    178   // (otherwise ParseStringToken would have failed).
    179   Value* DecodeString(const Token& token);
    180 
    181   // Grabs the next token in the JSON stream.  This does not increment the
    182   // stream so it can be used to look ahead at the next token.
    183   Token ParseToken();
    184 
    185   // Increments |json_pos_| past leading whitespace and comments.
    186   void EatWhitespaceAndComments();
    187 
    188   // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
    189   // false.
    190   bool EatComment();
    191 
    192   // Checks if |json_pos_| matches str.
    193   bool NextStringMatch(const std::wstring& str);
    194 
    195   // Sets the error code that will be returned to the caller. The current
    196   // line and column are determined and added into the final message.
    197   void SetErrorCode(const JsonParseError error, const wchar_t* error_pos);
    198 
    199   // Pointer to the starting position in the input string.
    200   const wchar_t* start_pos_;
    201 
    202   // Pointer to the current position in the input string.
    203   const wchar_t* json_pos_;
    204 
    205   // Used to keep track of how many nested lists/dicts there are.
    206   int stack_depth_;
    207 
    208   // A parser flag that allows trailing commas in objects and arrays.
    209   bool allow_trailing_comma_;
    210 
    211   // Contains the error code for the last call to JsonToValue(), if any.
    212   JsonParseError error_code_;
    213   int error_line_;
    214   int error_col_;
    215 
    216   DISALLOW_COPY_AND_ASSIGN(JSONReader);
    217 };
    218 
    219 }  // namespace base
    220 
    221 #endif  // BASE_JSON_JSON_READER_H_
    222