1 /* 2 * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $ 3 * 4 * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd. 5 * Michael Clark <michael (at) metaparadigm.com> 6 * 7 * This library is free software; you can redistribute it and/or modify 8 * it under the terms of the MIT license. See COPYING for details. 9 * 10 * 11 * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved. 12 * The copyrights to the contents of this file are licensed under the MIT License 13 * (http://www.opensource.org/licenses/mit-license.php) 14 */ 15 16 #include "config.h" 17 18 #include <math.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <stddef.h> 22 #include <ctype.h> 23 #include <string.h> 24 #include <limits.h> 25 26 #include "debug.h" 27 #include "printbuf.h" 28 #include "arraylist.h" 29 #include "json_inttypes.h" 30 #include "json_object.h" 31 #include "json_tokener.h" 32 #include "json_util.h" 33 34 #ifdef HAVE_LOCALE_H 35 #include <locale.h> 36 #endif /* HAVE_LOCALE_H */ 37 38 #define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x) & 7) + 9) 39 40 #if !HAVE_STRDUP && defined(_MSC_VER) 41 /* MSC has the version as _strdup */ 42 # define strdup _strdup 43 #elif !HAVE_STRDUP 44 # error You do not have strdup on your system. 45 #endif /* HAVE_STRDUP */ 46 47 #if !HAVE_STRNCASECMP && defined(_MSC_VER) 48 /* MSC has the version as _strnicmp */ 49 # define strncasecmp _strnicmp 50 #elif !HAVE_STRNCASECMP 51 # error You do not have strncasecmp on your system. 52 #endif /* HAVE_STRNCASECMP */ 53 54 /* Use C99 NAN by default; if not available, nan("") should work too. */ 55 #ifndef NAN 56 #define NAN nan("") 57 #endif /* !NAN */ 58 59 static const char json_null_str[] = "null"; 60 static const int json_null_str_len = sizeof(json_null_str) - 1; 61 static const char json_inf_str[] = "Infinity"; 62 static const int json_inf_str_len = sizeof(json_inf_str) - 1; 63 static const char json_nan_str[] = "NaN"; 64 static const int json_nan_str_len = sizeof(json_nan_str) - 1; 65 static const char json_true_str[] = "true"; 66 static const int json_true_str_len = sizeof(json_true_str) - 1; 67 static const char json_false_str[] = "false"; 68 static const int json_false_str_len = sizeof(json_false_str) - 1; 69 70 static const char* json_tokener_errors[] = { 71 "success", 72 "continue", 73 "nesting too deep", 74 "unexpected end of data", 75 "unexpected character", 76 "null expected", 77 "boolean expected", 78 "number expected", 79 "array value separator ',' expected", 80 "quoted object property name expected", 81 "object property name separator ':' expected", 82 "object value separator ',' expected", 83 "invalid string sequence", 84 "expected comment", 85 "buffer size overflow" 86 }; 87 88 const char *json_tokener_error_desc(enum json_tokener_error jerr) 89 { 90 int jerr_int = (int)jerr; 91 if (jerr_int < 0 || jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0]))) 92 return "Unknown error, invalid json_tokener_error value passed to json_tokener_error_desc()"; 93 return json_tokener_errors[jerr]; 94 } 95 96 enum json_tokener_error json_tokener_get_error(json_tokener *tok) 97 { 98 return tok->err; 99 } 100 101 /* Stuff for decoding unicode sequences */ 102 #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800) 103 #define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00) 104 #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000) 105 static unsigned char utf8_replacement_char[3] = { 0xEF, 0xBF, 0xBD }; 106 107 struct json_tokener* json_tokener_new_ex(int depth) 108 { 109 struct json_tokener *tok; 110 111 tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener)); 112 if (!tok) return NULL; 113 tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec)); 114 if (!tok->stack) { 115 free(tok); 116 return NULL; 117 } 118 tok->pb = printbuf_new(); 119 tok->max_depth = depth; 120 json_tokener_reset(tok); 121 return tok; 122 } 123 124 struct json_tokener* json_tokener_new(void) 125 { 126 return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH); 127 } 128 129 void json_tokener_free(struct json_tokener *tok) 130 { 131 json_tokener_reset(tok); 132 if (tok->pb) printbuf_free(tok->pb); 133 if (tok->stack) free(tok->stack); 134 free(tok); 135 } 136 137 static void json_tokener_reset_level(struct json_tokener *tok, int depth) 138 { 139 tok->stack[depth].state = json_tokener_state_eatws; 140 tok->stack[depth].saved_state = json_tokener_state_start; 141 json_object_put(tok->stack[depth].current); 142 tok->stack[depth].current = NULL; 143 free(tok->stack[depth].obj_field_name); 144 tok->stack[depth].obj_field_name = NULL; 145 } 146 147 void json_tokener_reset(struct json_tokener *tok) 148 { 149 int i; 150 if (!tok) 151 return; 152 153 for(i = tok->depth; i >= 0; i--) 154 json_tokener_reset_level(tok, i); 155 tok->depth = 0; 156 tok->err = json_tokener_success; 157 } 158 159 struct json_object* json_tokener_parse(const char *str) 160 { 161 enum json_tokener_error jerr_ignored; 162 struct json_object* obj; 163 obj = json_tokener_parse_verbose(str, &jerr_ignored); 164 return obj; 165 } 166 167 struct json_object* json_tokener_parse_verbose(const char *str, enum json_tokener_error *error) 168 { 169 struct json_tokener* tok; 170 struct json_object* obj; 171 172 tok = json_tokener_new(); 173 if (!tok) 174 return NULL; 175 obj = json_tokener_parse_ex(tok, str, -1); 176 *error = tok->err; 177 if(tok->err != json_tokener_success) { 178 if (obj != NULL) 179 json_object_put(obj); 180 obj = NULL; 181 } 182 183 json_tokener_free(tok); 184 return obj; 185 } 186 187 #define state tok->stack[tok->depth].state 188 #define saved_state tok->stack[tok->depth].saved_state 189 #define current tok->stack[tok->depth].current 190 #define obj_field_name tok->stack[tok->depth].obj_field_name 191 192 /* Optimization: 193 * json_tokener_parse_ex() consumed a lot of CPU in its main loop, 194 * iterating character-by character. A large performance boost is 195 * achieved by using tighter loops to locally handle units such as 196 * comments and strings. Loops that handle an entire token within 197 * their scope also gather entire strings and pass them to 198 * printbuf_memappend() in a single call, rather than calling 199 * printbuf_memappend() one char at a time. 200 * 201 * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is 202 * common to both the main loop and the tighter loops. 203 */ 204 205 /* PEEK_CHAR(dest, tok) macro: 206 * Peeks at the current char and stores it in dest. 207 * Returns 1 on success, sets tok->err and returns 0 if no more chars. 208 * Implicit inputs: str, len vars 209 */ 210 #define PEEK_CHAR(dest, tok) \ 211 (((tok)->char_offset == len) ? \ 212 (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \ 213 (((tok)->err = json_tokener_success), 0) \ 214 : \ 215 (((tok)->err = json_tokener_continue), 0) \ 216 ) : \ 217 (((dest) = *str), 1) \ 218 ) 219 220 /* ADVANCE_CHAR() macro: 221 * Incrementes str & tok->char_offset. 222 * For convenience of existing conditionals, returns the old value of c (0 on eof) 223 * Implicit inputs: c var 224 */ 225 #define ADVANCE_CHAR(str, tok) \ 226 ( ++(str), ((tok)->char_offset)++, c) 227 228 229 /* End optimization macro defs */ 230 231 232 struct json_object* json_tokener_parse_ex(struct json_tokener *tok, 233 const char *str, int len) 234 { 235 struct json_object *obj = NULL; 236 char c = '\1'; 237 #ifdef HAVE_SETLOCALE 238 char *oldlocale=NULL, *tmplocale; 239 240 tmplocale = setlocale(LC_NUMERIC, NULL); 241 if (tmplocale) oldlocale = strdup(tmplocale); 242 setlocale(LC_NUMERIC, "C"); 243 #endif 244 245 tok->char_offset = 0; 246 tok->err = json_tokener_success; 247 248 /* this interface is presently not 64-bit clean due to the int len argument 249 and the internal printbuf interface that takes 32-bit int len arguments 250 so the function limits the maximum string size to INT32_MAX (2GB). 251 If the function is called with len == -1 then strlen is called to check 252 the string length is less than INT32_MAX (2GB) */ 253 if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX)) { 254 tok->err = json_tokener_error_size; 255 return NULL; 256 } 257 258 while (PEEK_CHAR(c, tok)) { 259 260 redo_char: 261 switch(state) { 262 263 case json_tokener_state_eatws: 264 /* Advance until we change state */ 265 while (isspace((int)c)) { 266 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok))) 267 goto out; 268 } 269 if(c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) { 270 printbuf_reset(tok->pb); 271 printbuf_memappend_fast(tok->pb, &c, 1); 272 state = json_tokener_state_comment_start; 273 } else { 274 state = saved_state; 275 goto redo_char; 276 } 277 break; 278 279 case json_tokener_state_start: 280 switch(c) { 281 case '{': 282 state = json_tokener_state_eatws; 283 saved_state = json_tokener_state_object_field_start; 284 current = json_object_new_object(); 285 break; 286 case '[': 287 state = json_tokener_state_eatws; 288 saved_state = json_tokener_state_array; 289 current = json_object_new_array(); 290 break; 291 case 'I': 292 case 'i': 293 state = json_tokener_state_inf; 294 printbuf_reset(tok->pb); 295 tok->st_pos = 0; 296 goto redo_char; 297 case 'N': 298 case 'n': 299 state = json_tokener_state_null; // or NaN 300 printbuf_reset(tok->pb); 301 tok->st_pos = 0; 302 goto redo_char; 303 case '\'': 304 if (tok->flags & JSON_TOKENER_STRICT) { 305 /* in STRICT mode only double-quote are allowed */ 306 tok->err = json_tokener_error_parse_unexpected; 307 goto out; 308 } 309 case '"': 310 state = json_tokener_state_string; 311 printbuf_reset(tok->pb); 312 tok->quote_char = c; 313 break; 314 case 'T': 315 case 't': 316 case 'F': 317 case 'f': 318 state = json_tokener_state_boolean; 319 printbuf_reset(tok->pb); 320 tok->st_pos = 0; 321 goto redo_char; 322 #if defined(__GNUC__) 323 case '0' ... '9': 324 #else 325 case '0': 326 case '1': 327 case '2': 328 case '3': 329 case '4': 330 case '5': 331 case '6': 332 case '7': 333 case '8': 334 case '9': 335 #endif 336 case '-': 337 state = json_tokener_state_number; 338 printbuf_reset(tok->pb); 339 tok->is_double = 0; 340 goto redo_char; 341 default: 342 tok->err = json_tokener_error_parse_unexpected; 343 goto out; 344 } 345 break; 346 347 case json_tokener_state_finish: 348 if(tok->depth == 0) goto out; 349 obj = json_object_get(current); 350 json_tokener_reset_level(tok, tok->depth); 351 tok->depth--; 352 goto redo_char; 353 354 case json_tokener_state_inf: /* aka starts with 'i' */ 355 { 356 int size_inf; 357 int is_negative = 0; 358 359 printbuf_memappend_fast(tok->pb, &c, 1); 360 size_inf = json_min(tok->st_pos+1, json_inf_str_len); 361 char *infbuf = tok->pb->buf; 362 if (*infbuf == '-') 363 { 364 infbuf++; 365 is_negative = 1; 366 } 367 if ((!(tok->flags & JSON_TOKENER_STRICT) && 368 strncasecmp(json_inf_str, infbuf, size_inf) == 0) || 369 (strncmp(json_inf_str, infbuf, size_inf) == 0) 370 ) 371 { 372 if (tok->st_pos == json_inf_str_len) 373 { 374 current = json_object_new_double(is_negative ? -INFINITY : INFINITY); 375 saved_state = json_tokener_state_finish; 376 state = json_tokener_state_eatws; 377 goto redo_char; 378 } 379 } else { 380 tok->err = json_tokener_error_parse_unexpected; 381 goto out; 382 } 383 tok->st_pos++; 384 } 385 break; 386 case json_tokener_state_null: /* aka starts with 'n' */ 387 { 388 int size; 389 int size_nan; 390 printbuf_memappend_fast(tok->pb, &c, 1); 391 size = json_min(tok->st_pos+1, json_null_str_len); 392 size_nan = json_min(tok->st_pos+1, json_nan_str_len); 393 if((!(tok->flags & JSON_TOKENER_STRICT) && 394 strncasecmp(json_null_str, tok->pb->buf, size) == 0) 395 || (strncmp(json_null_str, tok->pb->buf, size) == 0) 396 ) { 397 if (tok->st_pos == json_null_str_len) { 398 current = NULL; 399 saved_state = json_tokener_state_finish; 400 state = json_tokener_state_eatws; 401 goto redo_char; 402 } 403 } 404 else if ((!(tok->flags & JSON_TOKENER_STRICT) && 405 strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) || 406 (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0) 407 ) 408 { 409 if (tok->st_pos == json_nan_str_len) 410 { 411 current = json_object_new_double(NAN); 412 saved_state = json_tokener_state_finish; 413 state = json_tokener_state_eatws; 414 goto redo_char; 415 } 416 } else { 417 tok->err = json_tokener_error_parse_null; 418 goto out; 419 } 420 tok->st_pos++; 421 } 422 break; 423 424 case json_tokener_state_comment_start: 425 if(c == '*') { 426 state = json_tokener_state_comment; 427 } else if(c == '/') { 428 state = json_tokener_state_comment_eol; 429 } else { 430 tok->err = json_tokener_error_parse_comment; 431 goto out; 432 } 433 printbuf_memappend_fast(tok->pb, &c, 1); 434 break; 435 436 case json_tokener_state_comment: 437 { 438 /* Advance until we change state */ 439 const char *case_start = str; 440 while(c != '*') { 441 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 442 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 443 goto out; 444 } 445 } 446 printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start); 447 state = json_tokener_state_comment_end; 448 } 449 break; 450 451 case json_tokener_state_comment_eol: 452 { 453 /* Advance until we change state */ 454 const char *case_start = str; 455 while(c != '\n') { 456 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 457 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 458 goto out; 459 } 460 } 461 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 462 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); 463 state = json_tokener_state_eatws; 464 } 465 break; 466 467 case json_tokener_state_comment_end: 468 printbuf_memappend_fast(tok->pb, &c, 1); 469 if(c == '/') { 470 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); 471 state = json_tokener_state_eatws; 472 } else { 473 state = json_tokener_state_comment; 474 } 475 break; 476 477 case json_tokener_state_string: 478 { 479 /* Advance until we change state */ 480 const char *case_start = str; 481 while(1) { 482 if(c == tok->quote_char) { 483 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 484 current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos); 485 saved_state = json_tokener_state_finish; 486 state = json_tokener_state_eatws; 487 break; 488 } else if(c == '\\') { 489 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 490 saved_state = json_tokener_state_string; 491 state = json_tokener_state_string_escape; 492 break; 493 } 494 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 495 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 496 goto out; 497 } 498 } 499 } 500 break; 501 502 case json_tokener_state_string_escape: 503 switch(c) { 504 case '"': 505 case '\\': 506 case '/': 507 printbuf_memappend_fast(tok->pb, &c, 1); 508 state = saved_state; 509 break; 510 case 'b': 511 case 'n': 512 case 'r': 513 case 't': 514 case 'f': 515 if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1); 516 else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1); 517 else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1); 518 else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1); 519 else if(c == 'f') printbuf_memappend_fast(tok->pb, "\f", 1); 520 state = saved_state; 521 break; 522 case 'u': 523 tok->ucs_char = 0; 524 tok->st_pos = 0; 525 state = json_tokener_state_escape_unicode; 526 break; 527 default: 528 tok->err = json_tokener_error_parse_string; 529 goto out; 530 } 531 break; 532 533 case json_tokener_state_escape_unicode: 534 { 535 unsigned int got_hi_surrogate = 0; 536 537 /* Handle a 4-byte sequence, or two sequences if a surrogate pair */ 538 while(1) { 539 if(strchr(json_hex_chars, c)) { 540 tok->ucs_char += ((unsigned int)jt_hexdigit(c) << ((3-tok->st_pos++)*4)); 541 if(tok->st_pos == 4) { 542 unsigned char unescaped_utf[4]; 543 544 if (got_hi_surrogate) { 545 if (IS_LOW_SURROGATE(tok->ucs_char)) { 546 /* Recalculate the ucs_char, then fall thru to process normally */ 547 tok->ucs_char = DECODE_SURROGATE_PAIR(got_hi_surrogate, tok->ucs_char); 548 } else { 549 /* Hi surrogate was not followed by a low surrogate */ 550 /* Replace the hi and process the rest normally */ 551 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 552 } 553 got_hi_surrogate = 0; 554 } 555 556 if (tok->ucs_char < 0x80) { 557 unescaped_utf[0] = tok->ucs_char; 558 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 1); 559 } else if (tok->ucs_char < 0x800) { 560 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); 561 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); 562 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 2); 563 } else if (IS_HIGH_SURROGATE(tok->ucs_char)) { 564 /* Got a high surrogate. Remember it and look for the 565 * the beginning of another sequence, which should be the 566 * low surrogate. 567 */ 568 got_hi_surrogate = tok->ucs_char; 569 /* Not at end, and the next two chars should be "\u" */ 570 if ((tok->char_offset+1 != len) && 571 (tok->char_offset+2 != len) && 572 (str[1] == '\\') && 573 (str[2] == 'u')) 574 { 575 /* Advance through the 16 bit surrogate, and move on to the 576 * next sequence. The next step is to process the following 577 * characters. 578 */ 579 if( !ADVANCE_CHAR(str, tok) || !ADVANCE_CHAR(str, tok) ) { 580 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 581 } 582 /* Advance to the first char of the next sequence and 583 * continue processing with the next sequence. 584 */ 585 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 586 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 587 goto out; 588 } 589 tok->ucs_char = 0; 590 tok->st_pos = 0; 591 continue; /* other json_tokener_state_escape_unicode */ 592 } else { 593 /* Got a high surrogate without another sequence following 594 * it. Put a replacement char in for the hi surrogate 595 * and pretend we finished. 596 */ 597 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 598 } 599 } else if (IS_LOW_SURROGATE(tok->ucs_char)) { 600 /* Got a low surrogate not preceded by a high */ 601 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 602 } else if (tok->ucs_char < 0x10000) { 603 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); 604 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); 605 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); 606 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 3); 607 } else if (tok->ucs_char < 0x110000) { 608 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07); 609 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); 610 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); 611 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); 612 printbuf_memappend_fast(tok->pb, (char*)unescaped_utf, 4); 613 } else { 614 /* Don't know what we got--insert the replacement char */ 615 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 616 } 617 state = saved_state; 618 break; 619 } 620 } else { 621 tok->err = json_tokener_error_parse_string; 622 goto out; 623 } 624 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 625 if (got_hi_surrogate) /* Clean up any pending chars */ 626 printbuf_memappend_fast(tok->pb, (char*)utf8_replacement_char, 3); 627 goto out; 628 } 629 } 630 } 631 break; 632 633 case json_tokener_state_boolean: 634 { 635 int size1, size2; 636 printbuf_memappend_fast(tok->pb, &c, 1); 637 size1 = json_min(tok->st_pos+1, json_true_str_len); 638 size2 = json_min(tok->st_pos+1, json_false_str_len); 639 if((!(tok->flags & JSON_TOKENER_STRICT) && 640 strncasecmp(json_true_str, tok->pb->buf, size1) == 0) 641 || (strncmp(json_true_str, tok->pb->buf, size1) == 0) 642 ) { 643 if(tok->st_pos == json_true_str_len) { 644 current = json_object_new_boolean(1); 645 saved_state = json_tokener_state_finish; 646 state = json_tokener_state_eatws; 647 goto redo_char; 648 } 649 } else if((!(tok->flags & JSON_TOKENER_STRICT) && 650 strncasecmp(json_false_str, tok->pb->buf, size2) == 0) 651 || (strncmp(json_false_str, tok->pb->buf, size2) == 0)) { 652 if(tok->st_pos == json_false_str_len) { 653 current = json_object_new_boolean(0); 654 saved_state = json_tokener_state_finish; 655 state = json_tokener_state_eatws; 656 goto redo_char; 657 } 658 } else { 659 tok->err = json_tokener_error_parse_boolean; 660 goto out; 661 } 662 tok->st_pos++; 663 } 664 break; 665 666 case json_tokener_state_number: 667 { 668 /* Advance until we change state */ 669 const char *case_start = str; 670 int case_len=0; 671 while(c && strchr(json_number_chars, c)) { 672 ++case_len; 673 if(c == '.' || c == 'e' || c == 'E') 674 tok->is_double = 1; 675 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 676 printbuf_memappend_fast(tok->pb, case_start, case_len); 677 goto out; 678 } 679 } 680 if (case_len>0) 681 printbuf_memappend_fast(tok->pb, case_start, case_len); 682 683 // Check for -Infinity 684 if (tok->pb->buf[0] == '-' && case_len == 1 && 685 (c == 'i' || c == 'I')) 686 { 687 state = json_tokener_state_inf; 688 goto redo_char; 689 } 690 } 691 { 692 int64_t num64; 693 double numd; 694 if (!tok->is_double && json_parse_int64(tok->pb->buf, &num64) == 0) { 695 if (num64 && tok->pb->buf[0]=='0' && (tok->flags & JSON_TOKENER_STRICT)) { 696 /* in strict mode, number must not start with 0 */ 697 tok->err = json_tokener_error_parse_number; 698 goto out; 699 } 700 current = json_object_new_int64(num64); 701 } 702 else if(tok->is_double && json_parse_double(tok->pb->buf, &numd) == 0) 703 { 704 current = json_object_new_double_s(numd, tok->pb->buf); 705 } else { 706 tok->err = json_tokener_error_parse_number; 707 goto out; 708 } 709 saved_state = json_tokener_state_finish; 710 state = json_tokener_state_eatws; 711 goto redo_char; 712 } 713 break; 714 715 case json_tokener_state_array_after_sep: 716 case json_tokener_state_array: 717 if(c == ']') { 718 if (state == json_tokener_state_array_after_sep && 719 (tok->flags & JSON_TOKENER_STRICT)) 720 { 721 tok->err = json_tokener_error_parse_unexpected; 722 goto out; 723 } 724 saved_state = json_tokener_state_finish; 725 state = json_tokener_state_eatws; 726 } else { 727 if(tok->depth >= tok->max_depth-1) { 728 tok->err = json_tokener_error_depth; 729 goto out; 730 } 731 state = json_tokener_state_array_add; 732 tok->depth++; 733 json_tokener_reset_level(tok, tok->depth); 734 goto redo_char; 735 } 736 break; 737 738 case json_tokener_state_array_add: 739 json_object_array_add(current, obj); 740 saved_state = json_tokener_state_array_sep; 741 state = json_tokener_state_eatws; 742 goto redo_char; 743 744 case json_tokener_state_array_sep: 745 if(c == ']') { 746 saved_state = json_tokener_state_finish; 747 state = json_tokener_state_eatws; 748 } else if(c == ',') { 749 saved_state = json_tokener_state_array_after_sep; 750 state = json_tokener_state_eatws; 751 } else { 752 tok->err = json_tokener_error_parse_array; 753 goto out; 754 } 755 break; 756 757 case json_tokener_state_object_field_start: 758 case json_tokener_state_object_field_start_after_sep: 759 if(c == '}') { 760 if (state == json_tokener_state_object_field_start_after_sep && 761 (tok->flags & JSON_TOKENER_STRICT)) 762 { 763 tok->err = json_tokener_error_parse_unexpected; 764 goto out; 765 } 766 saved_state = json_tokener_state_finish; 767 state = json_tokener_state_eatws; 768 } else if (c == '"' || c == '\'') { 769 tok->quote_char = c; 770 printbuf_reset(tok->pb); 771 state = json_tokener_state_object_field; 772 } else { 773 tok->err = json_tokener_error_parse_object_key_name; 774 goto out; 775 } 776 break; 777 778 case json_tokener_state_object_field: 779 { 780 /* Advance until we change state */ 781 const char *case_start = str; 782 while(1) { 783 if(c == tok->quote_char) { 784 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 785 obj_field_name = strdup(tok->pb->buf); 786 saved_state = json_tokener_state_object_field_end; 787 state = json_tokener_state_eatws; 788 break; 789 } else if(c == '\\') { 790 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 791 saved_state = json_tokener_state_object_field; 792 state = json_tokener_state_string_escape; 793 break; 794 } 795 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { 796 printbuf_memappend_fast(tok->pb, case_start, str-case_start); 797 goto out; 798 } 799 } 800 } 801 break; 802 803 case json_tokener_state_object_field_end: 804 if(c == ':') { 805 saved_state = json_tokener_state_object_value; 806 state = json_tokener_state_eatws; 807 } else { 808 tok->err = json_tokener_error_parse_object_key_sep; 809 goto out; 810 } 811 break; 812 813 case json_tokener_state_object_value: 814 if(tok->depth >= tok->max_depth-1) { 815 tok->err = json_tokener_error_depth; 816 goto out; 817 } 818 state = json_tokener_state_object_value_add; 819 tok->depth++; 820 json_tokener_reset_level(tok, tok->depth); 821 goto redo_char; 822 823 case json_tokener_state_object_value_add: 824 json_object_object_add(current, obj_field_name, obj); 825 free(obj_field_name); 826 obj_field_name = NULL; 827 saved_state = json_tokener_state_object_sep; 828 state = json_tokener_state_eatws; 829 goto redo_char; 830 831 case json_tokener_state_object_sep: 832 if(c == '}') { 833 saved_state = json_tokener_state_finish; 834 state = json_tokener_state_eatws; 835 } else if(c == ',') { 836 saved_state = json_tokener_state_object_field_start_after_sep; 837 state = json_tokener_state_eatws; 838 } else { 839 tok->err = json_tokener_error_parse_object_value_sep; 840 goto out; 841 } 842 break; 843 844 } 845 if (!ADVANCE_CHAR(str, tok)) 846 goto out; 847 } /* while(POP_CHAR) */ 848 849 out: 850 if (c && 851 (state == json_tokener_state_finish) && 852 (tok->depth == 0) && 853 (tok->flags & JSON_TOKENER_STRICT)) { 854 /* unexpected char after JSON data */ 855 tok->err = json_tokener_error_parse_unexpected; 856 } 857 if (!c) { /* We hit an eof char (0) */ 858 if(state != json_tokener_state_finish && 859 saved_state != json_tokener_state_finish) 860 tok->err = json_tokener_error_parse_eof; 861 } 862 863 #ifdef HAVE_SETLOCALE 864 setlocale(LC_NUMERIC, oldlocale); 865 if (oldlocale) free(oldlocale); 866 #endif 867 868 if (tok->err == json_tokener_success) 869 { 870 json_object *ret = json_object_get(current); 871 int ii; 872 873 /* Partially reset, so we parse additional objects on subsequent calls. */ 874 for(ii = tok->depth; ii >= 0; ii--) 875 json_tokener_reset_level(tok, ii); 876 return ret; 877 } 878 879 MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", 880 json_tokener_errors[tok->err], tok->char_offset); 881 return NULL; 882 } 883 884 void json_tokener_set_flags(struct json_tokener *tok, int flags) 885 { 886 tok->flags = flags; 887 } 888