1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori (at) us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdarg.h> 15 16 #include "qemu-common.h" 17 #include "qstring.h" 18 #include "qint.h" 19 #include "qdict.h" 20 #include "qlist.h" 21 #include "qfloat.h" 22 #include "qbool.h" 23 #include "json-parser.h" 24 #include "json-lexer.h" 25 26 typedef struct JSONParserContext 27 { 28 } JSONParserContext; 29 30 #define BUG_ON(cond) assert(!(cond)) 31 32 /** 33 * TODO 34 * 35 * 0) make errors meaningful again 36 * 1) add geometry information to tokens 37 * 3) should we return a parsed size? 38 * 4) deal with premature EOI 39 */ 40 41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap); 42 43 /** 44 * Token manipulators 45 * 46 * tokens are dictionaries that contain a type, a string value, and geometry information 47 * about a token identified by the lexer. These are routines that make working with 48 * these objects a bit easier. 49 */ 50 static const char *token_get_value(QObject *obj) 51 { 52 return qdict_get_str(qobject_to_qdict(obj), "token"); 53 } 54 55 static JSONTokenType token_get_type(QObject *obj) 56 { 57 return qdict_get_int(qobject_to_qdict(obj), "type"); 58 } 59 60 static int token_is_operator(QObject *obj, char op) 61 { 62 const char *val; 63 64 if (token_get_type(obj) != JSON_OPERATOR) { 65 return 0; 66 } 67 68 val = token_get_value(obj); 69 70 return (val[0] == op) && (val[1] == 0); 71 } 72 73 static int token_is_keyword(QObject *obj, const char *value) 74 { 75 if (token_get_type(obj) != JSON_KEYWORD) { 76 return 0; 77 } 78 79 return strcmp(token_get_value(obj), value) == 0; 80 } 81 82 static int token_is_escape(QObject *obj, const char *value) 83 { 84 if (token_get_type(obj) != JSON_ESCAPE) { 85 return 0; 86 } 87 88 return (strcmp(token_get_value(obj), value) == 0); 89 } 90 91 /** 92 * Error handler 93 */ 94 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt, 95 QObject *token, const char *msg, ...) 96 { 97 va_list ap; 98 va_start(ap, msg); 99 fprintf(stderr, "parse error: "); 100 vfprintf(stderr, msg, ap); 101 fprintf(stderr, "\n"); 102 va_end(ap); 103 } 104 105 /** 106 * String helpers 107 * 108 * These helpers are used to unescape strings. 109 */ 110 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 111 { 112 if (wchar <= 0x007F) { 113 BUG_ON(buffer_length < 2); 114 115 buffer[0] = wchar & 0x7F; 116 buffer[1] = 0; 117 } else if (wchar <= 0x07FF) { 118 BUG_ON(buffer_length < 3); 119 120 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 121 buffer[1] = 0x80 | (wchar & 0x3F); 122 buffer[2] = 0; 123 } else { 124 BUG_ON(buffer_length < 4); 125 126 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 127 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 128 buffer[2] = 0x80 | (wchar & 0x3F); 129 buffer[3] = 0; 130 } 131 } 132 133 static int hex2decimal(char ch) 134 { 135 if (ch >= '0' && ch <= '9') { 136 return (ch - '0'); 137 } else if (ch >= 'a' && ch <= 'f') { 138 return 10 + (ch - 'a'); 139 } else if (ch >= 'A' && ch <= 'F') { 140 return 10 + (ch - 'A'); 141 } 142 143 return -1; 144 } 145 146 /** 147 * parse_string(): Parse a json string and return a QObject 148 * 149 * string 150 * "" 151 * " chars " 152 * chars 153 * char 154 * char chars 155 * char 156 * any-Unicode-character- 157 * except-"-or-\-or- 158 * control-character 159 * \" 160 * \\ 161 * \/ 162 * \b 163 * \f 164 * \n 165 * \r 166 * \t 167 * \u four-hex-digits 168 */ 169 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 170 { 171 const char *ptr = token_get_value(token); 172 QString *str; 173 int double_quote = 1; 174 175 if (*ptr == '"') { 176 double_quote = 1; 177 } else { 178 double_quote = 0; 179 } 180 ptr++; 181 182 str = qstring_new(); 183 while (*ptr && 184 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 185 if (*ptr == '\\') { 186 ptr++; 187 188 switch (*ptr) { 189 case '"': 190 qstring_append(str, "\""); 191 ptr++; 192 break; 193 case '\'': 194 qstring_append(str, "'"); 195 ptr++; 196 break; 197 case '\\': 198 qstring_append(str, "\\"); 199 ptr++; 200 break; 201 case '/': 202 qstring_append(str, "/"); 203 ptr++; 204 break; 205 case 'b': 206 qstring_append(str, "\b"); 207 ptr++; 208 break; 209 case 'f': 210 qstring_append(str, "\f"); 211 ptr++; 212 break; 213 case 'n': 214 qstring_append(str, "\n"); 215 ptr++; 216 break; 217 case 'r': 218 qstring_append(str, "\r"); 219 ptr++; 220 break; 221 case 't': 222 qstring_append(str, "\t"); 223 ptr++; 224 break; 225 case 'u': { 226 uint16_t unicode_char = 0; 227 char utf8_char[4]; 228 int i = 0; 229 230 ptr++; 231 232 for (i = 0; i < 4; i++) { 233 if (qemu_isxdigit(*ptr)) { 234 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 235 } else { 236 parse_error(ctxt, token, 237 "invalid hex escape sequence in string"); 238 goto out; 239 } 240 ptr++; 241 } 242 243 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 244 qstring_append(str, utf8_char); 245 } break; 246 default: 247 parse_error(ctxt, token, "invalid escape sequence in string"); 248 goto out; 249 } 250 } else { 251 char dummy[2]; 252 253 dummy[0] = *ptr++; 254 dummy[1] = 0; 255 256 qstring_append(str, dummy); 257 } 258 } 259 260 return str; 261 262 out: 263 QDECREF(str); 264 return NULL; 265 } 266 267 /** 268 * Parsing rules 269 */ 270 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap) 271 { 272 QObject *key, *token = NULL, *value, *peek; 273 QList *working = qlist_copy(*tokens); 274 275 peek = qlist_peek(working); 276 key = parse_value(ctxt, &working, ap); 277 if (!key || qobject_type(key) != QTYPE_QSTRING) { 278 parse_error(ctxt, peek, "key is not a string in object"); 279 goto out; 280 } 281 282 token = qlist_pop(working); 283 if (!token_is_operator(token, ':')) { 284 parse_error(ctxt, token, "missing : in object pair"); 285 goto out; 286 } 287 288 value = parse_value(ctxt, &working, ap); 289 if (value == NULL) { 290 parse_error(ctxt, token, "Missing value in dict"); 291 goto out; 292 } 293 294 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 295 296 qobject_decref(token); 297 qobject_decref(key); 298 QDECREF(*tokens); 299 *tokens = working; 300 301 return 0; 302 303 out: 304 qobject_decref(token); 305 qobject_decref(key); 306 QDECREF(working); 307 308 return -1; 309 } 310 311 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap) 312 { 313 QDict *dict = NULL; 314 QObject *token, *peek; 315 QList *working = qlist_copy(*tokens); 316 317 token = qlist_pop(working); 318 if (!token_is_operator(token, '{')) { 319 goto out; 320 } 321 qobject_decref(token); 322 token = NULL; 323 324 dict = qdict_new(); 325 326 peek = qlist_peek(working); 327 if (!token_is_operator(peek, '}')) { 328 if (parse_pair(ctxt, dict, &working, ap) == -1) { 329 goto out; 330 } 331 332 token = qlist_pop(working); 333 while (!token_is_operator(token, '}')) { 334 if (!token_is_operator(token, ',')) { 335 parse_error(ctxt, token, "expected separator in dict"); 336 goto out; 337 } 338 qobject_decref(token); 339 token = NULL; 340 341 if (parse_pair(ctxt, dict, &working, ap) == -1) { 342 goto out; 343 } 344 345 token = qlist_pop(working); 346 } 347 qobject_decref(token); 348 token = NULL; 349 } else { 350 token = qlist_pop(working); 351 qobject_decref(token); 352 token = NULL; 353 } 354 355 QDECREF(*tokens); 356 *tokens = working; 357 358 return QOBJECT(dict); 359 360 out: 361 qobject_decref(token); 362 QDECREF(working); 363 QDECREF(dict); 364 return NULL; 365 } 366 367 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap) 368 { 369 QList *list = NULL; 370 QObject *token, *peek; 371 QList *working = qlist_copy(*tokens); 372 373 token = qlist_pop(working); 374 if (!token_is_operator(token, '[')) { 375 goto out; 376 } 377 qobject_decref(token); 378 token = NULL; 379 380 list = qlist_new(); 381 382 peek = qlist_peek(working); 383 if (!token_is_operator(peek, ']')) { 384 QObject *obj; 385 386 obj = parse_value(ctxt, &working, ap); 387 if (obj == NULL) { 388 parse_error(ctxt, token, "expecting value"); 389 goto out; 390 } 391 392 qlist_append_obj(list, obj); 393 394 token = qlist_pop(working); 395 while (!token_is_operator(token, ']')) { 396 if (!token_is_operator(token, ',')) { 397 parse_error(ctxt, token, "expected separator in list"); 398 goto out; 399 } 400 401 qobject_decref(token); 402 token = NULL; 403 404 obj = parse_value(ctxt, &working, ap); 405 if (obj == NULL) { 406 parse_error(ctxt, token, "expecting value"); 407 goto out; 408 } 409 410 qlist_append_obj(list, obj); 411 412 token = qlist_pop(working); 413 } 414 415 qobject_decref(token); 416 token = NULL; 417 } else { 418 token = qlist_pop(working); 419 qobject_decref(token); 420 token = NULL; 421 } 422 423 QDECREF(*tokens); 424 *tokens = working; 425 426 return QOBJECT(list); 427 428 out: 429 qobject_decref(token); 430 QDECREF(working); 431 QDECREF(list); 432 return NULL; 433 } 434 435 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens) 436 { 437 QObject *token, *ret; 438 QList *working = qlist_copy(*tokens); 439 440 token = qlist_pop(working); 441 442 if (token_get_type(token) != JSON_KEYWORD) { 443 goto out; 444 } 445 446 if (token_is_keyword(token, "true")) { 447 ret = QOBJECT(qbool_from_int(true)); 448 } else if (token_is_keyword(token, "false")) { 449 ret = QOBJECT(qbool_from_int(false)); 450 } else { 451 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); 452 goto out; 453 } 454 455 qobject_decref(token); 456 QDECREF(*tokens); 457 *tokens = working; 458 459 return ret; 460 461 out: 462 qobject_decref(token); 463 QDECREF(working); 464 465 return NULL; 466 } 467 468 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap) 469 { 470 QObject *token = NULL, *obj; 471 QList *working = qlist_copy(*tokens); 472 473 if (ap == NULL) { 474 goto out; 475 } 476 477 token = qlist_pop(working); 478 479 if (token_is_escape(token, "%p")) { 480 obj = va_arg(*ap, QObject *); 481 } else if (token_is_escape(token, "%i")) { 482 obj = QOBJECT(qbool_from_int(va_arg(*ap, int))); 483 } else if (token_is_escape(token, "%d")) { 484 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 485 } else if (token_is_escape(token, "%ld")) { 486 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 487 } else if (token_is_escape(token, "%lld") || 488 token_is_escape(token, "%I64d")) { 489 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 490 } else if (token_is_escape(token, "%s")) { 491 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 492 } else if (token_is_escape(token, "%f")) { 493 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 494 } else { 495 goto out; 496 } 497 498 qobject_decref(token); 499 QDECREF(*tokens); 500 *tokens = working; 501 502 return obj; 503 504 out: 505 qobject_decref(token); 506 QDECREF(working); 507 508 return NULL; 509 } 510 511 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens) 512 { 513 QObject *token, *obj; 514 QList *working = qlist_copy(*tokens); 515 516 token = qlist_pop(working); 517 switch (token_get_type(token)) { 518 case JSON_STRING: 519 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 520 break; 521 case JSON_INTEGER: 522 obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10))); 523 break; 524 case JSON_FLOAT: 525 /* FIXME dependent on locale */ 526 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 527 break; 528 default: 529 goto out; 530 } 531 532 qobject_decref(token); 533 QDECREF(*tokens); 534 *tokens = working; 535 536 return obj; 537 538 out: 539 qobject_decref(token); 540 QDECREF(working); 541 542 return NULL; 543 } 544 545 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap) 546 { 547 QObject *obj; 548 549 obj = parse_object(ctxt, tokens, ap); 550 if (obj == NULL) { 551 obj = parse_array(ctxt, tokens, ap); 552 } 553 if (obj == NULL) { 554 obj = parse_escape(ctxt, tokens, ap); 555 } 556 if (obj == NULL) { 557 obj = parse_keyword(ctxt, tokens); 558 } 559 if (obj == NULL) { 560 obj = parse_literal(ctxt, tokens); 561 } 562 563 return obj; 564 } 565 566 QObject *json_parser_parse(QList *tokens, va_list *ap) 567 { 568 JSONParserContext ctxt = {}; 569 QList *working = qlist_copy(tokens); 570 QObject *result; 571 572 result = parse_value(&ctxt, &working, ap); 573 574 QDECREF(working); 575 576 return result; 577 } 578