1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_JSON_JSON_PARSER_H_ 6 #define BASE_JSON_JSON_PARSER_H_ 7 8 #include <string> 9 10 #include "base/base_export.h" 11 #include "base/basictypes.h" 12 #include "base/compiler_specific.h" 13 #include "base/json/json_reader.h" 14 #include "base/strings/string_piece.h" 15 16 #if !defined(OS_CHROMEOS) 17 #include "base/gtest_prod_util.h" 18 #endif 19 20 namespace base { 21 class Value; 22 } 23 24 #if defined(OS_CHROMEOS) 25 // Chromium and Chromium OS check out gtest to different places, so this is 26 // unable to compile on both if gtest_prod.h is included here. Instead, include 27 // its only contents -- this will need to be updated if the macro ever changes. 28 #define FRIEND_TEST(test_case_name, test_name)\ 29 friend class test_case_name##_##test_name##_Test 30 31 #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ 32 FRIEND_TEST(test_case_name, test_name); \ 33 FRIEND_TEST(test_case_name, DISABLED_##test_name); \ 34 FRIEND_TEST(test_case_name, FLAKY_##test_name) 35 #endif // OS_CHROMEOS 36 37 namespace base { 38 namespace internal { 39 40 class JSONParserTest; 41 42 // The implementation behind the JSONReader interface. This class is not meant 43 // to be used directly; it encapsulates logic that need not be exposed publicly. 44 // 45 // This parser guarantees O(n) time through the input string. It also optimizes 46 // base::StringValue by using StringPiece where possible when returning Value 47 // objects by using "hidden roots," discussed in the implementation. 48 // 49 // Iteration happens on the byte level, with the functions CanConsume and 50 // NextChar. The conversion from byte to JSON token happens without advancing 51 // the parser in GetNextToken/ParseToken, that is tokenization operates on 52 // the current parser position without advancing. 53 // 54 // Built on top of these are a family of Consume functions that iterate 55 // internally. Invariant: on entry of a Consume function, the parser is wound 56 // to the first byte of a valid JSON token. On exit, it is on the last byte 57 // of a token, such that the next iteration of the parser will be at the byte 58 // immediately following the token, which would likely be the first byte of the 59 // next token. 60 class BASE_EXPORT_PRIVATE JSONParser { 61 public: 62 explicit JSONParser(int options); 63 ~JSONParser(); 64 65 // Parses the input string according to the set options and returns the 66 // result as a Value owned by the caller. 67 Value* Parse(const StringPiece& input); 68 69 // Returns the error code. 70 JSONReader::JsonParseError error_code() const; 71 72 // Returns the human-friendly error message. 73 std::string GetErrorMessage() const; 74 75 private: 76 enum Token { 77 T_OBJECT_BEGIN, // { 78 T_OBJECT_END, // } 79 T_ARRAY_BEGIN, // [ 80 T_ARRAY_END, // ] 81 T_STRING, 82 T_NUMBER, 83 T_BOOL_TRUE, // true 84 T_BOOL_FALSE, // false 85 T_NULL, // null 86 T_LIST_SEPARATOR, // , 87 T_OBJECT_PAIR_SEPARATOR, // : 88 T_END_OF_INPUT, 89 T_INVALID_TOKEN, 90 }; 91 92 // A helper class used for parsing strings. One optimization performed is to 93 // create base::Value with a StringPiece to avoid unnecessary std::string 94 // copies. This is not possible if the input string needs to be decoded from 95 // UTF-16 to UTF-8, or if an escape sequence causes characters to be skipped. 96 // This class centralizes that logic. 97 class StringBuilder { 98 public: 99 // Empty constructor. Used for creating a builder with which to Swap(). 100 StringBuilder(); 101 102 // |pos| is the beginning of an input string, excluding the |"|. 103 explicit StringBuilder(const char* pos); 104 105 ~StringBuilder(); 106 107 // Swaps the contents of |other| with this. 108 void Swap(StringBuilder* other); 109 110 // Either increases the |length_| of the string or copies the character if 111 // the StringBuilder has been converted. |c| must be in the basic ASCII 112 // plane; all other characters need to be in UTF-8 units, appended with 113 // AppendString below. 114 void Append(const char& c); 115 116 // Appends a string to the std::string. Must be Convert()ed to use. 117 void AppendString(const std::string& str); 118 119 // Converts the builder from its default StringPiece to a full std::string, 120 // performing a copy. Once a builder is converted, it cannot be made a 121 // StringPiece again. 122 void Convert(); 123 124 // Returns whether the builder can be converted to a StringPiece. 125 bool CanBeStringPiece() const; 126 127 // Returns the StringPiece representation. Returns an empty piece if it 128 // cannot be converted. 129 StringPiece AsStringPiece(); 130 131 // Returns the builder as a std::string. 132 const std::string& AsString(); 133 134 private: 135 // The beginning of the input string. 136 const char* pos_; 137 138 // Number of bytes in |pos_| that make up the string being built. 139 size_t length_; 140 141 // The copied string representation. NULL until Convert() is called. 142 // Strong. scoped_ptr<T> has too much of an overhead here. 143 std::string* string_; 144 }; 145 146 // Quick check that the stream has capacity to consume |length| more bytes. 147 bool CanConsume(int length); 148 149 // The basic way to consume a single character in the stream. Consumes one 150 // byte of the input stream and returns a pointer to the rest of it. 151 const char* NextChar(); 152 153 // Performs the equivalent of NextChar N times. 154 void NextNChars(int n); 155 156 // Skips over whitespace and comments to find the next token in the stream. 157 // This does not advance the parser for non-whitespace or comment chars. 158 Token GetNextToken(); 159 160 // Consumes whitespace characters and comments until the next non-that is 161 // encountered. 162 void EatWhitespaceAndComments(); 163 // Helper function that consumes a comment, assuming that the parser is 164 // currently wound to a '/'. 165 bool EatComment(); 166 167 // Calls GetNextToken() and then ParseToken(). Caller owns the result. 168 Value* ParseNextToken(); 169 170 // Takes a token that represents the start of a Value ("a structural token" 171 // in RFC terms) and consumes it, returning the result as an object the 172 // caller owns. 173 Value* ParseToken(Token token); 174 175 // Assuming that the parser is currently wound to '{', this parses a JSON 176 // object into a DictionaryValue. 177 Value* ConsumeDictionary(); 178 179 // Assuming that the parser is wound to '[', this parses a JSON list into a 180 // ListValue. 181 Value* ConsumeList(); 182 183 // Calls through ConsumeStringRaw and wraps it in a value. 184 Value* ConsumeString(); 185 186 // Assuming that the parser is wound to a double quote, this parses a string, 187 // decoding any escape sequences and converts UTF-16 to UTF-8. Returns true on 188 // success and Swap()s the result into |out|. Returns false on failure with 189 // error information set. 190 bool ConsumeStringRaw(StringBuilder* out); 191 // Helper function for ConsumeStringRaw() that consumes the next four or 10 192 // bytes (parser is wound to the first character of a HEX sequence, with the 193 // potential for consuming another \uXXXX for a surrogate). Returns true on 194 // success and places the UTF8 code units in |dest_string|, and false on 195 // failure. 196 bool DecodeUTF16(std::string* dest_string); 197 // Helper function for ConsumeStringRaw() that takes a single code point, 198 // decodes it into UTF-8 units, and appends it to the given builder. The 199 // point must be valid. 200 void DecodeUTF8(const int32& point, StringBuilder* dest); 201 202 // Assuming that the parser is wound to the start of a valid JSON number, 203 // this parses and converts it to either an int or double value. 204 Value* ConsumeNumber(); 205 // Helper that reads characters that are ints. Returns true if a number was 206 // read and false on error. 207 bool ReadInt(bool allow_leading_zeros); 208 209 // Consumes the literal values of |true|, |false|, and |null|, assuming the 210 // parser is wound to the first character of any of those. 211 Value* ConsumeLiteral(); 212 213 // Compares two string buffers of a given length. 214 static bool StringsAreEqual(const char* left, const char* right, size_t len); 215 216 // Sets the error information to |code| at the current column, based on 217 // |index_| and |index_last_line_|, with an optional positive/negative 218 // adjustment by |column_adjust|. 219 void ReportError(JSONReader::JsonParseError code, int column_adjust); 220 221 // Given the line and column number of an error, formats one of the error 222 // message contants from json_reader.h for human display. 223 static std::string FormatErrorMessage(int line, int column, 224 const std::string& description); 225 226 // base::JSONParserOptions that control parsing. 227 int options_; 228 229 // Pointer to the start of the input data. 230 const char* start_pos_; 231 232 // Pointer to the current position in the input data. Equivalent to 233 // |start_pos_ + index_|. 234 const char* pos_; 235 236 // Pointer to the last character of the input data. 237 const char* end_pos_; 238 239 // The index in the input stream to which the parser is wound. 240 int index_; 241 242 // The number of times the parser has recursed (current stack depth). 243 int stack_depth_; 244 245 // The line number that the parser is at currently. 246 int line_number_; 247 248 // The last value of |index_| on the previous line. 249 int index_last_line_; 250 251 // Error information. 252 JSONReader::JsonParseError error_code_; 253 int error_line_; 254 int error_column_; 255 256 friend class JSONParserTest; 257 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, NextChar); 258 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeDictionary); 259 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeList); 260 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeString); 261 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeLiterals); 262 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ConsumeNumbers); 263 FRIEND_TEST_ALL_PREFIXES(JSONParserTest, ErrorMessages); 264 265 DISALLOW_COPY_AND_ASSIGN(JSONParser); 266 }; 267 268 } // namespace internal 269 } // namespace base 270 271 #endif // BASE_JSON_JSON_PARSER_H_ 272