1 /* 2 * Copyright (C) 2010 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include "config.h" 32 #include "core/inspector/JSONParser.h" 33 34 #include "platform/JSONValues.h" 35 #include "wtf/text/StringBuilder.h" 36 37 namespace WebCore { 38 39 namespace { 40 41 const int stackLimit = 1000; 42 43 enum Token { 44 ObjectBegin, 45 ObjectEnd, 46 ArrayBegin, 47 ArrayEnd, 48 StringLiteral, 49 Number, 50 BoolTrue, 51 BoolFalse, 52 NullToken, 53 ListSeparator, 54 ObjectPairSeparator, 55 InvalidToken, 56 }; 57 58 const char* const nullString = "null"; 59 const char* const trueString = "true"; 60 const char* const falseString = "false"; 61 62 template<typename CharType> 63 bool parseConstToken(const CharType* start, const CharType* end, const CharType** tokenEnd, const char* token) 64 { 65 while (start < end && *token != '\0' && *start++ == *token++) { } 66 if (*token != '\0') 67 return false; 68 *tokenEnd = start; 69 return true; 70 } 71 72 template<typename CharType> 73 bool readInt(const CharType* start, const CharType* end, const CharType** tokenEnd, bool canHaveLeadingZeros) 74 { 75 if (start == end) 76 return false; 77 bool haveLeadingZero = '0' == *start; 78 int length = 0; 79 while (start < end && '0' <= *start && *start <= '9') { 80 ++start; 81 ++length; 82 } 83 if (!length) 84 return false; 85 if (!canHaveLeadingZeros && length > 1 && haveLeadingZero) 86 return false; 87 *tokenEnd = start; 88 return true; 89 } 90 91 template<typename CharType> 92 bool parseNumberToken(const CharType* start, const CharType* end, const CharType** tokenEnd) 93 { 94 // We just grab the number here. We validate the size in DecodeNumber. 95 // According to RFC4627, a valid number is: [minus] int [frac] [exp] 96 if (start == end) 97 return false; 98 CharType c = *start; 99 if ('-' == c) 100 ++start; 101 102 if (!readInt(start, end, &start, false)) 103 return false; 104 if (start == end) { 105 *tokenEnd = start; 106 return true; 107 } 108 109 // Optional fraction part 110 c = *start; 111 if ('.' == c) { 112 ++start; 113 if (!readInt(start, end, &start, true)) 114 return false; 115 if (start == end) { 116 *tokenEnd = start; 117 return true; 118 } 119 c = *start; 120 } 121 122 // Optional exponent part 123 if ('e' == c || 'E' == c) { 124 ++start; 125 if (start == end) 126 return false; 127 c = *start; 128 if ('-' == c || '+' == c) { 129 ++start; 130 if (start == end) 131 return false; 132 } 133 if (!readInt(start, end, &start, true)) 134 return false; 135 } 136 137 *tokenEnd = start; 138 return true; 139 } 140 141 template<typename CharType> 142 bool readHexDigits(const CharType* start, const CharType* end, const CharType** tokenEnd, int digits) 143 { 144 if (end - start < digits) 145 return false; 146 for (int i = 0; i < digits; ++i) { 147 CharType c = *start++; 148 if (!(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))) 149 return false; 150 } 151 *tokenEnd = start; 152 return true; 153 } 154 155 template<typename CharType> 156 bool parseStringToken(const CharType* start, const CharType* end, const CharType** tokenEnd) 157 { 158 while (start < end) { 159 CharType c = *start++; 160 if ('\\' == c) { 161 c = *start++; 162 // Make sure the escaped char is valid. 163 switch (c) { 164 case 'x': 165 if (!readHexDigits(start, end, &start, 2)) 166 return false; 167 break; 168 case 'u': 169 if (!readHexDigits(start, end, &start, 4)) 170 return false; 171 break; 172 case '\\': 173 case '/': 174 case 'b': 175 case 'f': 176 case 'n': 177 case 'r': 178 case 't': 179 case 'v': 180 case '"': 181 break; 182 default: 183 return false; 184 } 185 } else if ('"' == c) { 186 *tokenEnd = start; 187 return true; 188 } 189 } 190 return false; 191 } 192 193 template<typename CharType> 194 Token parseToken(const CharType* start, const CharType* end, const CharType** tokenStart, const CharType** tokenEnd) 195 { 196 while (start < end && isSpaceOrNewline(*start)) 197 ++start; 198 199 if (start == end) 200 return InvalidToken; 201 202 *tokenStart = start; 203 204 switch (*start) { 205 case 'n': 206 if (parseConstToken(start, end, tokenEnd, nullString)) 207 return NullToken; 208 break; 209 case 't': 210 if (parseConstToken(start, end, tokenEnd, trueString)) 211 return BoolTrue; 212 break; 213 case 'f': 214 if (parseConstToken(start, end, tokenEnd, falseString)) 215 return BoolFalse; 216 break; 217 case '[': 218 *tokenEnd = start + 1; 219 return ArrayBegin; 220 case ']': 221 *tokenEnd = start + 1; 222 return ArrayEnd; 223 case ',': 224 *tokenEnd = start + 1; 225 return ListSeparator; 226 case '{': 227 *tokenEnd = start + 1; 228 return ObjectBegin; 229 case '}': 230 *tokenEnd = start + 1; 231 return ObjectEnd; 232 case ':': 233 *tokenEnd = start + 1; 234 return ObjectPairSeparator; 235 case '0': 236 case '1': 237 case '2': 238 case '3': 239 case '4': 240 case '5': 241 case '6': 242 case '7': 243 case '8': 244 case '9': 245 case '-': 246 if (parseNumberToken(start, end, tokenEnd)) 247 return Number; 248 break; 249 case '"': 250 if (parseStringToken(start + 1, end, tokenEnd)) 251 return StringLiteral; 252 break; 253 } 254 return InvalidToken; 255 } 256 257 template<typename CharType> 258 inline int hexToInt(CharType c) 259 { 260 if ('0' <= c && c <= '9') 261 return c - '0'; 262 if ('A' <= c && c <= 'F') 263 return c - 'A' + 10; 264 if ('a' <= c && c <= 'f') 265 return c - 'a' + 10; 266 ASSERT_NOT_REACHED(); 267 return 0; 268 } 269 270 template<typename CharType> 271 bool decodeString(const CharType* start, const CharType* end, StringBuilder* output) 272 { 273 while (start < end) { 274 UChar c = *start++; 275 if ('\\' != c) { 276 output->append(c); 277 continue; 278 } 279 c = *start++; 280 switch (c) { 281 case '"': 282 case '/': 283 case '\\': 284 break; 285 case 'b': 286 c = '\b'; 287 break; 288 case 'f': 289 c = '\f'; 290 break; 291 case 'n': 292 c = '\n'; 293 break; 294 case 'r': 295 c = '\r'; 296 break; 297 case 't': 298 c = '\t'; 299 break; 300 case 'v': 301 c = '\v'; 302 break; 303 case 'x': 304 c = (hexToInt(*start) << 4) + 305 hexToInt(*(start + 1)); 306 start += 2; 307 break; 308 case 'u': 309 c = (hexToInt(*start) << 12) + 310 (hexToInt(*(start + 1)) << 8) + 311 (hexToInt(*(start + 2)) << 4) + 312 hexToInt(*(start + 3)); 313 start += 4; 314 break; 315 default: 316 return false; 317 } 318 output->append(c); 319 } 320 return true; 321 } 322 323 template<typename CharType> 324 bool decodeString(const CharType* start, const CharType* end, String* output) 325 { 326 if (start == end) { 327 *output = ""; 328 return true; 329 } 330 if (start > end) 331 return false; 332 StringBuilder buffer; 333 buffer.reserveCapacity(end - start); 334 if (!decodeString(start, end, &buffer)) 335 return false; 336 *output = buffer.toString(); 337 return true; 338 } 339 340 template<typename CharType> 341 PassRefPtr<JSONValue> buildValue(const CharType* start, const CharType* end, const CharType** valueTokenEnd, int depth) 342 { 343 if (depth > stackLimit) 344 return nullptr; 345 346 RefPtr<JSONValue> result; 347 const CharType* tokenStart; 348 const CharType* tokenEnd; 349 Token token = parseToken(start, end, &tokenStart, &tokenEnd); 350 switch (token) { 351 case InvalidToken: 352 return nullptr; 353 case NullToken: 354 result = JSONValue::null(); 355 break; 356 case BoolTrue: 357 result = JSONBasicValue::create(true); 358 break; 359 case BoolFalse: 360 result = JSONBasicValue::create(false); 361 break; 362 case Number: { 363 bool ok; 364 double value = charactersToDouble(tokenStart, tokenEnd - tokenStart, &ok); 365 if (!ok) 366 return nullptr; 367 result = JSONBasicValue::create(value); 368 break; 369 } 370 case StringLiteral: { 371 String value; 372 bool ok = decodeString(tokenStart + 1, tokenEnd - 1, &value); 373 if (!ok) 374 return nullptr; 375 result = JSONString::create(value); 376 break; 377 } 378 case ArrayBegin: { 379 RefPtr<JSONArray> array = JSONArray::create(); 380 start = tokenEnd; 381 token = parseToken(start, end, &tokenStart, &tokenEnd); 382 while (token != ArrayEnd) { 383 RefPtr<JSONValue> arrayNode = buildValue(start, end, &tokenEnd, depth + 1); 384 if (!arrayNode) 385 return nullptr; 386 array->pushValue(arrayNode); 387 388 // After a list value, we expect a comma or the end of the list. 389 start = tokenEnd; 390 token = parseToken(start, end, &tokenStart, &tokenEnd); 391 if (token == ListSeparator) { 392 start = tokenEnd; 393 token = parseToken(start, end, &tokenStart, &tokenEnd); 394 if (token == ArrayEnd) 395 return nullptr; 396 } else if (token != ArrayEnd) { 397 // Unexpected value after list value. Bail out. 398 return nullptr; 399 } 400 } 401 if (token != ArrayEnd) 402 return nullptr; 403 result = array.release(); 404 break; 405 } 406 case ObjectBegin: { 407 RefPtr<JSONObject> object = JSONObject::create(); 408 start = tokenEnd; 409 token = parseToken(start, end, &tokenStart, &tokenEnd); 410 while (token != ObjectEnd) { 411 if (token != StringLiteral) 412 return nullptr; 413 String key; 414 if (!decodeString(tokenStart + 1, tokenEnd - 1, &key)) 415 return nullptr; 416 start = tokenEnd; 417 418 token = parseToken(start, end, &tokenStart, &tokenEnd); 419 if (token != ObjectPairSeparator) 420 return nullptr; 421 start = tokenEnd; 422 423 RefPtr<JSONValue> value = buildValue(start, end, &tokenEnd, depth + 1); 424 if (!value) 425 return nullptr; 426 object->setValue(key, value); 427 start = tokenEnd; 428 429 // After a key/value pair, we expect a comma or the end of the 430 // object. 431 token = parseToken(start, end, &tokenStart, &tokenEnd); 432 if (token == ListSeparator) { 433 start = tokenEnd; 434 token = parseToken(start, end, &tokenStart, &tokenEnd); 435 if (token == ObjectEnd) 436 return nullptr; 437 } else if (token != ObjectEnd) { 438 // Unexpected value after last object value. Bail out. 439 return nullptr; 440 } 441 } 442 if (token != ObjectEnd) 443 return nullptr; 444 result = object.release(); 445 break; 446 } 447 448 default: 449 // We got a token that's not a value. 450 return nullptr; 451 } 452 *valueTokenEnd = tokenEnd; 453 return result.release(); 454 } 455 456 template<typename CharType> 457 PassRefPtr<JSONValue> parseJSONInternal(const CharType* start, unsigned length) 458 { 459 const CharType* end = start + length; 460 const CharType *tokenEnd; 461 RefPtr<JSONValue> value = buildValue(start, end, &tokenEnd, 0); 462 if (!value || tokenEnd != end) 463 return nullptr; 464 return value.release(); 465 } 466 467 } // anonymous namespace 468 469 PassRefPtr<JSONValue> parseJSON(const String& json) 470 { 471 if (json.isEmpty()) 472 return nullptr; 473 if (json.is8Bit()) 474 return parseJSONInternal(json.characters8(), json.length()); 475 return parseJSONInternal(json.characters16(), json.length()); 476 } 477 478 } // namespace WebCore 479