1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // A JSON parser. Converts strings of JSON into a Value object (see 6 // base/values.h). 7 // http://www.ietf.org/rfc/rfc4627.txt?number=4627 8 // 9 // Known limitations/deviations from the RFC: 10 // - Only knows how to parse ints within the range of a signed 32 bit int and 11 // decimal numbers within a double. 12 // - Assumes input is encoded as UTF8. The spec says we should allow UTF-16 13 // (BE or LE) and UTF-32 (BE or LE) as well. 14 // - We limit nesting to 100 levels to prevent stack overflow (this is allowed 15 // by the RFC). 16 // - A Unicode FAQ ("http://unicode.org/faq/utf_bom.html") writes a data 17 // stream may start with a Unicode Byte-Order-Mark (U+FEFF), i.e. the input 18 // UTF-8 string for the JSONReader::JsonToValue() function may start with a 19 // UTF-8 BOM (0xEF, 0xBB, 0xBF). 20 // To avoid the function from mis-treating a UTF-8 BOM as an invalid 21 // character, the function skips a Unicode BOM at the beginning of the 22 // Unicode string (converted from the input UTF-8 string) before parsing it. 23 // 24 // TODO(tc): Add a parsing option to to relax object keys being wrapped in 25 // double quotes 26 // TODO(tc): Add an option to disable comment stripping 27 // TODO(aa): Consider making the constructor public and the static Read() method 28 // only a convenience for the common uses with more complex configuration going 29 // on the instance. 30 31 #ifndef BASE_JSON_JSON_READER_H_ 32 #define BASE_JSON_JSON_READER_H_ 33 #pragma once 34 35 #include <string> 36 37 #include "base/base_api.h" 38 #include "base/basictypes.h" 39 40 // Chromium and Chromium OS check out gtest to different places, so we're 41 // unable to compile on both if we include gtest_prod.h here. Instead, include 42 // its only contents -- this will need to be updated if the macro ever changes. 43 #define FRIEND_TEST(test_case_name, test_name)\ 44 friend class test_case_name##_##test_name##_Test 45 46 class Value; 47 48 namespace base { 49 50 class BASE_API JSONReader { 51 public: 52 // A struct to hold a JS token. 53 class Token { 54 public: 55 enum Type { 56 OBJECT_BEGIN, // { 57 OBJECT_END, // } 58 ARRAY_BEGIN, // [ 59 ARRAY_END, // ] 60 STRING, 61 NUMBER, 62 BOOL_TRUE, // true 63 BOOL_FALSE, // false 64 NULL_TOKEN, // null 65 LIST_SEPARATOR, // , 66 OBJECT_PAIR_SEPARATOR, // : 67 END_OF_INPUT, 68 INVALID_TOKEN, 69 }; 70 Token(Type t, const wchar_t* b, int len) 71 : type(t), begin(b), length(len) {} 72 73 // Get the character that's one past the end of this token. 74 wchar_t NextChar() { 75 return *(begin + length); 76 } 77 78 Type type; 79 80 // A pointer into JSONReader::json_pos_ that's the beginning of this token. 81 const wchar_t* begin; 82 83 // End should be one char past the end of the token. 84 int length; 85 }; 86 87 // Error codes during parsing. 88 enum JsonParseError { 89 JSON_NO_ERROR = 0, 90 JSON_BAD_ROOT_ELEMENT_TYPE, 91 JSON_INVALID_ESCAPE, 92 JSON_SYNTAX_ERROR, 93 JSON_TRAILING_COMMA, 94 JSON_TOO_MUCH_NESTING, 95 JSON_UNEXPECTED_DATA_AFTER_ROOT, 96 JSON_UNSUPPORTED_ENCODING, 97 JSON_UNQUOTED_DICTIONARY_KEY, 98 }; 99 100 // String versions of parse error codes. 101 static const char* kBadRootElementType; 102 static const char* kInvalidEscape; 103 static const char* kSyntaxError; 104 static const char* kTrailingComma; 105 static const char* kTooMuchNesting; 106 static const char* kUnexpectedDataAfterRoot; 107 static const char* kUnsupportedEncoding; 108 static const char* kUnquotedDictionaryKey; 109 110 JSONReader(); 111 112 // Reads and parses |json|, returning a Value. The caller owns the returned 113 // instance. If |json| is not a properly formed JSON string, returns NULL. 114 // If |allow_trailing_comma| is true, we will ignore trailing commas in 115 // objects and arrays even though this goes against the RFC. 116 static Value* Read(const std::string& json, bool allow_trailing_comma); 117 118 // Reads and parses |json| like Read(). |error_code_out| and |error_msg_out| 119 // are optional. If specified and NULL is returned, they will be populated 120 // an error code and a formatted error message (including error location if 121 // appropriate). Otherwise, they will be unmodified. 122 static Value* ReadAndReturnError(const std::string& json, 123 bool allow_trailing_comma, 124 int* error_code_out, 125 std::string* error_msg_out); 126 127 // Converts a JSON parse error code into a human readable message. 128 // Returns an empty string if error_code is JSON_NO_ERROR. 129 static std::string ErrorCodeToString(JsonParseError error_code); 130 131 // Returns the error code if the last call to JsonToValue() failed. 132 // Returns JSON_NO_ERROR otherwise. 133 JsonParseError error_code() const { return error_code_; } 134 135 // Converts error_code_ to a human-readable string, including line and column 136 // numbers if appropriate. 137 std::string GetErrorMessage() const; 138 139 // Reads and parses |json|, returning a Value. The caller owns the returned 140 // instance. If |json| is not a properly formed JSON string, returns NULL and 141 // a detailed error can be retrieved from |error_message()|. 142 // If |check_root| is true, we require that the root object be an object or 143 // array. Otherwise, it can be any valid JSON type. 144 // If |allow_trailing_comma| is true, we will ignore trailing commas in 145 // objects and arrays even though this goes against the RFC. 146 Value* JsonToValue(const std::string& json, bool check_root, 147 bool allow_trailing_comma); 148 149 private: 150 FRIEND_TEST(JSONReaderTest, Reading); 151 FRIEND_TEST(JSONReaderTest, ErrorMessages); 152 153 static std::string FormatErrorMessage(int line, int column, 154 const std::string& description); 155 156 // Recursively build Value. Returns NULL if we don't have a valid JSON 157 // string. If |is_root| is true, we verify that the root element is either 158 // an object or an array. 159 Value* BuildValue(bool is_root); 160 161 // Parses a sequence of characters into a Token::NUMBER. If the sequence of 162 // characters is not a valid number, returns a Token::INVALID_TOKEN. Note 163 // that DecodeNumber is used to actually convert from a string to an 164 // int/double. 165 Token ParseNumberToken(); 166 167 // Try and convert the substring that token holds into an int or a double. If 168 // we can (ie., no overflow), return the value, else return NULL. 169 Value* DecodeNumber(const Token& token); 170 171 // Parses a sequence of characters into a Token::STRING. If the sequence of 172 // characters is not a valid string, returns a Token::INVALID_TOKEN. Note 173 // that DecodeString is used to actually decode the escaped string into an 174 // actual wstring. 175 Token ParseStringToken(); 176 177 // Convert the substring into a value string. This should always succeed 178 // (otherwise ParseStringToken would have failed). 179 Value* DecodeString(const Token& token); 180 181 // Grabs the next token in the JSON stream. This does not increment the 182 // stream so it can be used to look ahead at the next token. 183 Token ParseToken(); 184 185 // Increments |json_pos_| past leading whitespace and comments. 186 void EatWhitespaceAndComments(); 187 188 // If |json_pos_| is at the start of a comment, eat it, otherwise, returns 189 // false. 190 bool EatComment(); 191 192 // Checks if |json_pos_| matches str. 193 bool NextStringMatch(const std::wstring& str); 194 195 // Sets the error code that will be returned to the caller. The current 196 // line and column are determined and added into the final message. 197 void SetErrorCode(const JsonParseError error, const wchar_t* error_pos); 198 199 // Pointer to the starting position in the input string. 200 const wchar_t* start_pos_; 201 202 // Pointer to the current position in the input string. 203 const wchar_t* json_pos_; 204 205 // Used to keep track of how many nested lists/dicts there are. 206 int stack_depth_; 207 208 // A parser flag that allows trailing commas in objects and arrays. 209 bool allow_trailing_comma_; 210 211 // Contains the error code for the last call to JsonToValue(), if any. 212 JsonParseError error_code_; 213 int error_line_; 214 int error_col_; 215 216 DISALLOW_COPY_AND_ASSIGN(JSONReader); 217 }; 218 219 } // namespace base 220 221 #endif // BASE_JSON_JSON_READER_H_ 222