1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori (at) us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qapi/qmp/qstring.h" 18 #include "qapi/qmp/qint.h" 19 #include "qapi/qmp/qdict.h" 20 #include "qapi/qmp/qlist.h" 21 #include "qapi/qmp/qfloat.h" 22 #include "qapi/qmp/qbool.h" 23 #include "qapi/qmp/json-parser.h" 24 #include "qapi/qmp/json-lexer.h" 25 #include "qapi/qmp/qerror.h" 26 27 typedef struct JSONParserContext 28 { 29 Error *err; 30 struct { 31 QObject **buf; 32 size_t pos; 33 size_t count; 34 } tokens; 35 } JSONParserContext; 36 37 #define BUG_ON(cond) assert(!(cond)) 38 39 /** 40 * TODO 41 * 42 * 0) make errors meaningful again 43 * 1) add geometry information to tokens 44 * 3) should we return a parsed size? 45 * 4) deal with premature EOI 46 */ 47 48 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap); 49 50 /** 51 * Token manipulators 52 * 53 * tokens are dictionaries that contain a type, a string value, and geometry information 54 * about a token identified by the lexer. These are routines that make working with 55 * these objects a bit easier. 56 */ 57 static const char *token_get_value(QObject *obj) 58 { 59 return qdict_get_str(qobject_to_qdict(obj), "token"); 60 } 61 62 static JSONTokenType token_get_type(QObject *obj) 63 { 64 return qdict_get_int(qobject_to_qdict(obj), "type"); 65 } 66 67 static int token_is_operator(QObject *obj, char op) 68 { 69 const char *val; 70 71 if (token_get_type(obj) != JSON_OPERATOR) { 72 return 0; 73 } 74 75 val = token_get_value(obj); 76 77 return (val[0] == op) && (val[1] == 0); 78 } 79 80 static int token_is_keyword(QObject *obj, const char *value) 81 { 82 if (token_get_type(obj) != JSON_KEYWORD) { 83 return 0; 84 } 85 86 return strcmp(token_get_value(obj), value) == 0; 87 } 88 89 static int token_is_escape(QObject *obj, const char *value) 90 { 91 if (token_get_type(obj) != JSON_ESCAPE) { 92 return 0; 93 } 94 95 return (strcmp(token_get_value(obj), value) == 0); 96 } 97 98 /** 99 * Error handler 100 */ 101 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 102 QObject *token, const char *msg, ...) 103 { 104 va_list ap; 105 char message[1024]; 106 va_start(ap, msg); 107 vsnprintf(message, sizeof(message), msg, ap); 108 va_end(ap); 109 if (ctxt->err) { 110 error_free(ctxt->err); 111 ctxt->err = NULL; 112 } 113 error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message); 114 } 115 116 /** 117 * String helpers 118 * 119 * These helpers are used to unescape strings. 120 */ 121 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 122 { 123 if (wchar <= 0x007F) { 124 BUG_ON(buffer_length < 2); 125 126 buffer[0] = wchar & 0x7F; 127 buffer[1] = 0; 128 } else if (wchar <= 0x07FF) { 129 BUG_ON(buffer_length < 3); 130 131 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 132 buffer[1] = 0x80 | (wchar & 0x3F); 133 buffer[2] = 0; 134 } else { 135 BUG_ON(buffer_length < 4); 136 137 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 138 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 139 buffer[2] = 0x80 | (wchar & 0x3F); 140 buffer[3] = 0; 141 } 142 } 143 144 static int hex2decimal(char ch) 145 { 146 if (ch >= '0' && ch <= '9') { 147 return (ch - '0'); 148 } else if (ch >= 'a' && ch <= 'f') { 149 return 10 + (ch - 'a'); 150 } else if (ch >= 'A' && ch <= 'F') { 151 return 10 + (ch - 'A'); 152 } 153 154 return -1; 155 } 156 157 /** 158 * parse_string(): Parse a json string and return a QObject 159 * 160 * string 161 * "" 162 * " chars " 163 * chars 164 * char 165 * char chars 166 * char 167 * any-Unicode-character- 168 * except-"-or-\-or- 169 * control-character 170 * \" 171 * \\ 172 * \/ 173 * \b 174 * \f 175 * \n 176 * \r 177 * \t 178 * \u four-hex-digits 179 */ 180 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 181 { 182 const char *ptr = token_get_value(token); 183 QString *str; 184 int double_quote = 1; 185 186 if (*ptr == '"') { 187 double_quote = 1; 188 } else { 189 double_quote = 0; 190 } 191 ptr++; 192 193 str = qstring_new(); 194 while (*ptr && 195 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 196 if (*ptr == '\\') { 197 ptr++; 198 199 switch (*ptr) { 200 case '"': 201 qstring_append(str, "\""); 202 ptr++; 203 break; 204 case '\'': 205 qstring_append(str, "'"); 206 ptr++; 207 break; 208 case '\\': 209 qstring_append(str, "\\"); 210 ptr++; 211 break; 212 case '/': 213 qstring_append(str, "/"); 214 ptr++; 215 break; 216 case 'b': 217 qstring_append(str, "\b"); 218 ptr++; 219 break; 220 case 'f': 221 qstring_append(str, "\f"); 222 ptr++; 223 break; 224 case 'n': 225 qstring_append(str, "\n"); 226 ptr++; 227 break; 228 case 'r': 229 qstring_append(str, "\r"); 230 ptr++; 231 break; 232 case 't': 233 qstring_append(str, "\t"); 234 ptr++; 235 break; 236 case 'u': { 237 uint16_t unicode_char = 0; 238 char utf8_char[4]; 239 int i = 0; 240 241 ptr++; 242 243 for (i = 0; i < 4; i++) { 244 if (qemu_isxdigit(*ptr)) { 245 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 246 } else { 247 parse_error(ctxt, token, 248 "invalid hex escape sequence in string"); 249 goto out; 250 } 251 ptr++; 252 } 253 254 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 255 qstring_append(str, utf8_char); 256 } break; 257 default: 258 parse_error(ctxt, token, "invalid escape sequence in string"); 259 goto out; 260 } 261 } else { 262 char dummy[2]; 263 264 dummy[0] = *ptr++; 265 dummy[1] = 0; 266 267 qstring_append(str, dummy); 268 } 269 } 270 271 return str; 272 273 out: 274 QDECREF(str); 275 return NULL; 276 } 277 278 static QObject *parser_context_pop_token(JSONParserContext *ctxt) 279 { 280 QObject *token; 281 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 282 token = ctxt->tokens.buf[ctxt->tokens.pos]; 283 ctxt->tokens.pos++; 284 return token; 285 } 286 287 /* Note: parser_context_{peek|pop}_token do not increment the 288 * token object's refcount. In both cases the references will continue 289 * to be tracked and cleaned up in parser_context_free(), so do not 290 * attempt to free the token object. 291 */ 292 static QObject *parser_context_peek_token(JSONParserContext *ctxt) 293 { 294 QObject *token; 295 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 296 token = ctxt->tokens.buf[ctxt->tokens.pos]; 297 return token; 298 } 299 300 static JSONParserContext parser_context_save(JSONParserContext *ctxt) 301 { 302 JSONParserContext saved_ctxt = {0}; 303 saved_ctxt.tokens.pos = ctxt->tokens.pos; 304 saved_ctxt.tokens.count = ctxt->tokens.count; 305 saved_ctxt.tokens.buf = ctxt->tokens.buf; 306 return saved_ctxt; 307 } 308 309 static void parser_context_restore(JSONParserContext *ctxt, 310 JSONParserContext saved_ctxt) 311 { 312 ctxt->tokens.pos = saved_ctxt.tokens.pos; 313 ctxt->tokens.count = saved_ctxt.tokens.count; 314 ctxt->tokens.buf = saved_ctxt.tokens.buf; 315 } 316 317 static void tokens_append_from_iter(QObject *obj, void *opaque) 318 { 319 JSONParserContext *ctxt = opaque; 320 g_assert(ctxt->tokens.pos < ctxt->tokens.count); 321 ctxt->tokens.buf[ctxt->tokens.pos++] = obj; 322 qobject_incref(obj); 323 } 324 325 static JSONParserContext *parser_context_new(QList *tokens) 326 { 327 JSONParserContext *ctxt; 328 size_t count; 329 330 if (!tokens) { 331 return NULL; 332 } 333 334 count = qlist_size(tokens); 335 if (count == 0) { 336 return NULL; 337 } 338 339 ctxt = g_malloc0(sizeof(JSONParserContext)); 340 ctxt->tokens.pos = 0; 341 ctxt->tokens.count = count; 342 ctxt->tokens.buf = g_malloc(count * sizeof(QObject *)); 343 qlist_iter(tokens, tokens_append_from_iter, ctxt); 344 ctxt->tokens.pos = 0; 345 346 return ctxt; 347 } 348 349 /* to support error propagation, ctxt->err must be freed separately */ 350 static void parser_context_free(JSONParserContext *ctxt) 351 { 352 int i; 353 if (ctxt) { 354 for (i = 0; i < ctxt->tokens.count; i++) { 355 qobject_decref(ctxt->tokens.buf[i]); 356 } 357 g_free(ctxt->tokens.buf); 358 g_free(ctxt); 359 } 360 } 361 362 /** 363 * Parsing rules 364 */ 365 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap) 366 { 367 QObject *key = NULL, *token = NULL, *value, *peek; 368 JSONParserContext saved_ctxt = parser_context_save(ctxt); 369 370 peek = parser_context_peek_token(ctxt); 371 if (peek == NULL) { 372 parse_error(ctxt, NULL, "premature EOI"); 373 goto out; 374 } 375 376 key = parse_value(ctxt, ap); 377 if (!key || qobject_type(key) != QTYPE_QSTRING) { 378 parse_error(ctxt, peek, "key is not a string in object"); 379 goto out; 380 } 381 382 token = parser_context_pop_token(ctxt); 383 if (token == NULL) { 384 parse_error(ctxt, NULL, "premature EOI"); 385 goto out; 386 } 387 388 if (!token_is_operator(token, ':')) { 389 parse_error(ctxt, token, "missing : in object pair"); 390 goto out; 391 } 392 393 value = parse_value(ctxt, ap); 394 if (value == NULL) { 395 parse_error(ctxt, token, "Missing value in dict"); 396 goto out; 397 } 398 399 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 400 401 qobject_decref(key); 402 403 return 0; 404 405 out: 406 parser_context_restore(ctxt, saved_ctxt); 407 qobject_decref(key); 408 409 return -1; 410 } 411 412 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap) 413 { 414 QDict *dict = NULL; 415 QObject *token, *peek; 416 JSONParserContext saved_ctxt = parser_context_save(ctxt); 417 418 token = parser_context_pop_token(ctxt); 419 if (token == NULL) { 420 goto out; 421 } 422 423 if (!token_is_operator(token, '{')) { 424 goto out; 425 } 426 token = NULL; 427 428 dict = qdict_new(); 429 430 peek = parser_context_peek_token(ctxt); 431 if (peek == NULL) { 432 parse_error(ctxt, NULL, "premature EOI"); 433 goto out; 434 } 435 436 if (!token_is_operator(peek, '}')) { 437 if (parse_pair(ctxt, dict, ap) == -1) { 438 goto out; 439 } 440 441 token = parser_context_pop_token(ctxt); 442 if (token == NULL) { 443 parse_error(ctxt, NULL, "premature EOI"); 444 goto out; 445 } 446 447 while (!token_is_operator(token, '}')) { 448 if (!token_is_operator(token, ',')) { 449 parse_error(ctxt, token, "expected separator in dict"); 450 goto out; 451 } 452 token = NULL; 453 454 if (parse_pair(ctxt, dict, ap) == -1) { 455 goto out; 456 } 457 458 token = parser_context_pop_token(ctxt); 459 if (token == NULL) { 460 parse_error(ctxt, NULL, "premature EOI"); 461 goto out; 462 } 463 } 464 token = NULL; 465 } else { 466 token = parser_context_pop_token(ctxt); 467 token = NULL; 468 } 469 470 return QOBJECT(dict); 471 472 out: 473 parser_context_restore(ctxt, saved_ctxt); 474 QDECREF(dict); 475 return NULL; 476 } 477 478 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap) 479 { 480 QList *list = NULL; 481 QObject *token, *peek; 482 JSONParserContext saved_ctxt = parser_context_save(ctxt); 483 484 token = parser_context_pop_token(ctxt); 485 if (token == NULL) { 486 goto out; 487 } 488 489 if (!token_is_operator(token, '[')) { 490 token = NULL; 491 goto out; 492 } 493 token = NULL; 494 495 list = qlist_new(); 496 497 peek = parser_context_peek_token(ctxt); 498 if (peek == NULL) { 499 parse_error(ctxt, NULL, "premature EOI"); 500 goto out; 501 } 502 503 if (!token_is_operator(peek, ']')) { 504 QObject *obj; 505 506 obj = parse_value(ctxt, ap); 507 if (obj == NULL) { 508 parse_error(ctxt, token, "expecting value"); 509 goto out; 510 } 511 512 qlist_append_obj(list, obj); 513 514 token = parser_context_pop_token(ctxt); 515 if (token == NULL) { 516 parse_error(ctxt, NULL, "premature EOI"); 517 goto out; 518 } 519 520 while (!token_is_operator(token, ']')) { 521 if (!token_is_operator(token, ',')) { 522 parse_error(ctxt, token, "expected separator in list"); 523 goto out; 524 } 525 526 token = NULL; 527 528 obj = parse_value(ctxt, ap); 529 if (obj == NULL) { 530 parse_error(ctxt, token, "expecting value"); 531 goto out; 532 } 533 534 qlist_append_obj(list, obj); 535 536 token = parser_context_pop_token(ctxt); 537 if (token == NULL) { 538 parse_error(ctxt, NULL, "premature EOI"); 539 goto out; 540 } 541 } 542 543 token = NULL; 544 } else { 545 token = parser_context_pop_token(ctxt); 546 token = NULL; 547 } 548 549 return QOBJECT(list); 550 551 out: 552 parser_context_restore(ctxt, saved_ctxt); 553 QDECREF(list); 554 return NULL; 555 } 556 557 static QObject *parse_keyword(JSONParserContext *ctxt) 558 { 559 QObject *token, *ret; 560 JSONParserContext saved_ctxt = parser_context_save(ctxt); 561 562 token = parser_context_pop_token(ctxt); 563 if (token == NULL) { 564 goto out; 565 } 566 567 if (token_get_type(token) != JSON_KEYWORD) { 568 goto out; 569 } 570 571 if (token_is_keyword(token, "true")) { 572 ret = QOBJECT(qbool_from_int(true)); 573 } else if (token_is_keyword(token, "false")) { 574 ret = QOBJECT(qbool_from_int(false)); 575 } else { 576 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); 577 goto out; 578 } 579 580 return ret; 581 582 out: 583 parser_context_restore(ctxt, saved_ctxt); 584 585 return NULL; 586 } 587 588 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap) 589 { 590 QObject *token = NULL, *obj; 591 JSONParserContext saved_ctxt = parser_context_save(ctxt); 592 593 if (ap == NULL) { 594 goto out; 595 } 596 597 token = parser_context_pop_token(ctxt); 598 if (token == NULL) { 599 goto out; 600 } 601 602 if (token_is_escape(token, "%p")) { 603 obj = va_arg(*ap, QObject *); 604 } else if (token_is_escape(token, "%i")) { 605 obj = QOBJECT(qbool_from_int(va_arg(*ap, int))); 606 } else if (token_is_escape(token, "%d")) { 607 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 608 } else if (token_is_escape(token, "%ld")) { 609 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 610 } else if (token_is_escape(token, "%lld") || 611 token_is_escape(token, "%I64d")) { 612 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 613 } else if (token_is_escape(token, "%s")) { 614 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 615 } else if (token_is_escape(token, "%f")) { 616 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 617 } else { 618 goto out; 619 } 620 621 return obj; 622 623 out: 624 parser_context_restore(ctxt, saved_ctxt); 625 626 return NULL; 627 } 628 629 static QObject *parse_literal(JSONParserContext *ctxt) 630 { 631 QObject *token, *obj; 632 JSONParserContext saved_ctxt = parser_context_save(ctxt); 633 634 token = parser_context_pop_token(ctxt); 635 if (token == NULL) { 636 goto out; 637 } 638 639 switch (token_get_type(token)) { 640 case JSON_STRING: 641 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 642 break; 643 case JSON_INTEGER: { 644 /* A possibility exists that this is a whole-valued float where the 645 * fractional part was left out due to being 0 (.0). It's not a big 646 * deal to treat these as ints in the parser, so long as users of the 647 * resulting QObject know to expect a QInt in place of a QFloat in 648 * cases like these. 649 * 650 * However, in some cases these values will overflow/underflow a 651 * QInt/int64 container, thus we should assume these are to be handled 652 * as QFloats/doubles rather than silently changing their values. 653 * 654 * strtoll() indicates these instances by setting errno to ERANGE 655 */ 656 int64_t value; 657 658 errno = 0; /* strtoll doesn't set errno on success */ 659 value = strtoll(token_get_value(token), NULL, 10); 660 if (errno != ERANGE) { 661 obj = QOBJECT(qint_from_int(value)); 662 break; 663 } 664 /* fall through to JSON_FLOAT */ 665 } 666 case JSON_FLOAT: 667 /* FIXME dependent on locale */ 668 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 669 break; 670 default: 671 goto out; 672 } 673 674 return obj; 675 676 out: 677 parser_context_restore(ctxt, saved_ctxt); 678 679 return NULL; 680 } 681 682 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap) 683 { 684 QObject *obj; 685 686 obj = parse_object(ctxt, ap); 687 if (obj == NULL) { 688 obj = parse_array(ctxt, ap); 689 } 690 if (obj == NULL) { 691 obj = parse_escape(ctxt, ap); 692 } 693 if (obj == NULL) { 694 obj = parse_keyword(ctxt); 695 } 696 if (obj == NULL) { 697 obj = parse_literal(ctxt); 698 } 699 700 return obj; 701 } 702 703 QObject *json_parser_parse(QList *tokens, va_list *ap) 704 { 705 return json_parser_parse_err(tokens, ap, NULL); 706 } 707 708 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp) 709 { 710 JSONParserContext *ctxt = parser_context_new(tokens); 711 QObject *result; 712 713 if (!ctxt) { 714 return NULL; 715 } 716 717 result = parse_value(ctxt, ap); 718 719 error_propagate(errp, ctxt->err); 720 721 parser_context_free(ctxt); 722 723 return result; 724 } 725