Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // A JSON parser.  Converts strings of JSON into a Value object (see
      6 // base/values.h).
      7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627
      8 //
      9 // Known limitations/deviations from the RFC:
     10 // - Only knows how to parse ints within the range of a signed 32 bit int and
     11 //   decimal numbers within a double.
     12 // - Assumes input is encoded as UTF8.  The spec says we should allow UTF-16
     13 //   (BE or LE) and UTF-32 (BE or LE) as well.
     14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed
     15 //   by the RFC).
     16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data
     17 //   stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input
     18 //   UTF-8 string for the JSONReader::JsonToValue() function may start with a
     19 //   UTF-8 BOM (0xEF, 0xBB, 0xBF).
     20 //   To avoid the function from mis-treating a UTF-8 BOM as an invalid
     21 //   character, the function skips a Unicode BOM at the beginning of the
     22 //   Unicode string (converted from the input UTF-8 string) before parsing it.
     23 //
     24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in
     25 //   double quotes
     26 // TODO(tc): Add an option to disable comment stripping
     27 // TODO(aa): Consider making the constructor public and the static Read() method
     28 // only a convenience for the common uses with more complex configuration going
     29 // on the instance.
     30 
     31 #ifndef BASE_JSON_JSON_READER_H_
     32 #define BASE_JSON_JSON_READER_H_
     33 
     34 #include <string>
     35 
     36 #include "base/basictypes.h"
     37 
     38 // Chromium and Chromium OS check out gtest to different places, so we're
     39 // unable to compile on both if we include gtest_prod.h here.  Instead, include
     40 // its only contents -- this will need to be updated if the macro ever changes.
     41 #define FRIEND_TEST(test_case_name, test_name)\
     42 friend class test_case_name##_##test_name##_Test
     43 
     44 class Value;
     45 
     46 namespace base {
     47 
     48 class JSONReader {
     49  public:
     50   // A struct to hold a JS token.
     51   class Token {
     52    public:
     53     enum Type {
     54      OBJECT_BEGIN,           // {
     55      OBJECT_END,             // }
     56      ARRAY_BEGIN,            // [
     57      ARRAY_END,              // ]
     58      STRING,
     59      NUMBER,
     60      BOOL_TRUE,              // true
     61      BOOL_FALSE,             // false
     62      NULL_TOKEN,             // null
     63      LIST_SEPARATOR,         // ,
     64      OBJECT_PAIR_SEPARATOR,  // :
     65      END_OF_INPUT,
     66      INVALID_TOKEN,
     67     };
     68     Token(Type t, const wchar_t* b, int len)
     69       : type(t), begin(b), length(len) {}
     70 
     71     Type type;
     72 
     73     // A pointer into JSONReader::json_pos_ that's the beginning of this token.
     74     const wchar_t* begin;
     75 
     76     // End should be one char past the end of the token.
     77     int length;
     78 
     79     // Get the character that's one past the end of this token.
     80     wchar_t NextChar() {
     81       return *(begin + length);
     82     }
     83   };
     84 
     85   // Error messages that can be returned.
     86   static const char* kBadRootElementType;
     87   static const char* kInvalidEscape;
     88   static const char* kSyntaxError;
     89   static const char* kTrailingComma;
     90   static const char* kTooMuchNesting;
     91   static const char* kUnexpectedDataAfterRoot;
     92   static const char* kUnsupportedEncoding;
     93   static const char* kUnquotedDictionaryKey;
     94 
     95   JSONReader();
     96 
     97   // Reads and parses |json|, returning a Value. The caller owns the returned
     98   // instance. If |json| is not a properly formed JSON string, returns NULL.
     99   // If |allow_trailing_comma| is true, we will ignore trailing commas in
    100   // objects and arrays even though this goes against the RFC.
    101   static Value* Read(const std::string& json, bool allow_trailing_comma);
    102 
    103   // Reads and parses |json| like Read(). |error_message_out| is optional. If
    104   // specified and NULL is returned, |error_message_out| will be populated with
    105   // a string describing the error. Otherwise, |error_message_out| is
    106   // unmodified.
    107   static Value* ReadAndReturnError(const std::string& json,
    108                                    bool allow_trailing_comma,
    109                                    std::string* error_message_out);
    110 
    111   // Returns the error message if the last call to JsonToValue() failed. If the
    112   // last call did not fail, returns a valid empty string.
    113   std::string error_message() { return error_message_; }
    114 
    115   // Reads and parses |json|, returning a Value. The caller owns the returned
    116   // instance. If |json| is not a properly formed JSON string, returns NULL and
    117   // a detailed error can be retrieved from |error_message()|.
    118   // If |check_root| is true, we require that the root object be an object or
    119   // array. Otherwise, it can be any valid JSON type.
    120   // If |allow_trailing_comma| is true, we will ignore trailing commas in
    121   // objects and arrays even though this goes against the RFC.
    122   Value* JsonToValue(const std::string& json, bool check_root,
    123                      bool allow_trailing_comma);
    124 
    125  private:
    126   static std::string FormatErrorMessage(int line, int column,
    127                                         const char* description);
    128 
    129   DISALLOW_COPY_AND_ASSIGN(JSONReader);
    130 
    131   FRIEND_TEST(JSONReaderTest, Reading);
    132   FRIEND_TEST(JSONReaderTest, ErrorMessages);
    133 
    134   // Recursively build Value.  Returns NULL if we don't have a valid JSON
    135   // string.  If |is_root| is true, we verify that the root element is either
    136   // an object or an array.
    137   Value* BuildValue(bool is_root);
    138 
    139   // Parses a sequence of characters into a Token::NUMBER. If the sequence of
    140   // characters is not a valid number, returns a Token::INVALID_TOKEN. Note
    141   // that DecodeNumber is used to actually convert from a string to an
    142   // int/double.
    143   Token ParseNumberToken();
    144 
    145   // Try and convert the substring that token holds into an int or a double. If
    146   // we can (ie., no overflow), return the value, else return NULL.
    147   Value* DecodeNumber(const Token& token);
    148 
    149   // Parses a sequence of characters into a Token::STRING. If the sequence of
    150   // characters is not a valid string, returns a Token::INVALID_TOKEN. Note
    151   // that DecodeString is used to actually decode the escaped string into an
    152   // actual wstring.
    153   Token ParseStringToken();
    154 
    155   // Convert the substring into a value string.  This should always succeed
    156   // (otherwise ParseStringToken would have failed).
    157   Value* DecodeString(const Token& token);
    158 
    159   // Grabs the next token in the JSON stream.  This does not increment the
    160   // stream so it can be used to look ahead at the next token.
    161   Token ParseToken();
    162 
    163   // Increments |json_pos_| past leading whitespace and comments.
    164   void EatWhitespaceAndComments();
    165 
    166   // If |json_pos_| is at the start of a comment, eat it, otherwise, returns
    167   // false.
    168   bool EatComment();
    169 
    170   // Checks if |json_pos_| matches str.
    171   bool NextStringMatch(const std::wstring& str);
    172 
    173   // Creates the error message that will be returned to the caller. The current
    174   // line and column are determined and added into the final message.
    175   void SetErrorMessage(const char* description, const wchar_t* error_pos);
    176 
    177   // Pointer to the starting position in the input string.
    178   const wchar_t* start_pos_;
    179 
    180   // Pointer to the current position in the input string.
    181   const wchar_t* json_pos_;
    182 
    183   // Used to keep track of how many nested lists/dicts there are.
    184   int stack_depth_;
    185 
    186   // A parser flag that allows trailing commas in objects and arrays.
    187   bool allow_trailing_comma_;
    188 
    189   // Contains the error message for the last call to JsonToValue(), if any.
    190   std::string error_message_;
    191 };
    192 
    193 }  // namespace base
    194 
    195 #endif  // BASE_JSON_JSON_READER_H_
    196