1 /* 2 * JSON Parser 3 * 4 * Copyright IBM, Corp. 2009 5 * 6 * Authors: 7 * Anthony Liguori <aliguori (at) us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later. 10 * See the COPYING.LIB file in the top-level directory. 11 * 12 */ 13 14 #include <stdbool.h> 15 16 #include "qemu-common.h" 17 #include "qstring.h" 18 #include "qint.h" 19 #include "qdict.h" 20 #include "qlist.h" 21 #include "qfloat.h" 22 #include "qbool.h" 23 #include "json-parser.h" 24 #include "json-lexer.h" 25 26 typedef struct JSONParserContext 27 { 28 } JSONParserContext; 29 30 #define BUG_ON(cond) assert(!(cond)) 31 32 /** 33 * TODO 34 * 35 * 0) make errors meaningful again 36 * 1) add geometry information to tokens 37 * 3) should we return a parsed size? 38 * 4) deal with premature EOI 39 */ 40 41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap); 42 43 /** 44 * Token manipulators 45 * 46 * tokens are dictionaries that contain a type, a string value, and geometry information 47 * about a token identified by the lexer. These are routines that make working with 48 * these objects a bit easier. 49 */ 50 static const char *token_get_value(QObject *obj) 51 { 52 return qdict_get_str(qobject_to_qdict(obj), "token"); 53 } 54 55 static JSONTokenType token_get_type(QObject *obj) 56 { 57 return qdict_get_int(qobject_to_qdict(obj), "type"); 58 } 59 60 static int token_is_operator(QObject *obj, char op) 61 { 62 const char *val; 63 64 if (token_get_type(obj) != JSON_OPERATOR) { 65 return 0; 66 } 67 68 val = token_get_value(obj); 69 70 return (val[0] == op) && (val[1] == 0); 71 } 72 73 static int token_is_keyword(QObject *obj, const char *value) 74 { 75 if (token_get_type(obj) != JSON_KEYWORD) { 76 return 0; 77 } 78 79 return strcmp(token_get_value(obj), value) == 0; 80 } 81 82 static int token_is_escape(QObject *obj, const char *value) 83 { 84 if (token_get_type(obj) != JSON_ESCAPE) { 85 return 0; 86 } 87 88 return (strcmp(token_get_value(obj), value) == 0); 89 } 90 91 /** 92 * Error handler 93 */ 94 static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...) 95 { 96 fprintf(stderr, "parse error: %s\n", msg); 97 } 98 99 /** 100 * String helpers 101 * 102 * These helpers are used to unescape strings. 103 */ 104 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length) 105 { 106 if (wchar <= 0x007F) { 107 BUG_ON(buffer_length < 2); 108 109 buffer[0] = wchar & 0x7F; 110 buffer[1] = 0; 111 } else if (wchar <= 0x07FF) { 112 BUG_ON(buffer_length < 3); 113 114 buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F); 115 buffer[1] = 0x80 | (wchar & 0x3F); 116 buffer[2] = 0; 117 } else { 118 BUG_ON(buffer_length < 4); 119 120 buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F); 121 buffer[1] = 0x80 | ((wchar >> 6) & 0x3F); 122 buffer[2] = 0x80 | (wchar & 0x3F); 123 buffer[3] = 0; 124 } 125 } 126 127 static int hex2decimal(char ch) 128 { 129 if (ch >= '0' && ch <= '9') { 130 return (ch - '0'); 131 } else if (ch >= 'a' && ch <= 'f') { 132 return 10 + (ch - 'a'); 133 } else if (ch >= 'A' && ch <= 'F') { 134 return 10 + (ch - 'A'); 135 } 136 137 return -1; 138 } 139 140 /** 141 * parse_string(): Parse a json string and return a QObject 142 * 143 * string 144 * "" 145 * " chars " 146 * chars 147 * char 148 * char chars 149 * char 150 * any-Unicode-character- 151 * except-"-or-\-or- 152 * control-character 153 * \" 154 * \\ 155 * \/ 156 * \b 157 * \f 158 * \n 159 * \r 160 * \t 161 * \u four-hex-digits 162 */ 163 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token) 164 { 165 const char *ptr = token_get_value(token); 166 QString *str; 167 int double_quote = 1; 168 169 if (*ptr == '"') { 170 double_quote = 1; 171 } else { 172 double_quote = 0; 173 } 174 ptr++; 175 176 str = qstring_new(); 177 while (*ptr && 178 ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) { 179 if (*ptr == '\\') { 180 ptr++; 181 182 switch (*ptr) { 183 case '"': 184 qstring_append(str, "\""); 185 ptr++; 186 break; 187 case '\'': 188 qstring_append(str, "'"); 189 ptr++; 190 break; 191 case '\\': 192 qstring_append(str, "\\"); 193 ptr++; 194 break; 195 case '/': 196 qstring_append(str, "/"); 197 ptr++; 198 break; 199 case 'b': 200 qstring_append(str, "\b"); 201 ptr++; 202 break; 203 case 'n': 204 qstring_append(str, "\n"); 205 ptr++; 206 break; 207 case 'r': 208 qstring_append(str, "\r"); 209 ptr++; 210 break; 211 case 't': 212 qstring_append(str, "\t"); 213 ptr++; 214 break; 215 case 'u': { 216 uint16_t unicode_char = 0; 217 char utf8_char[4]; 218 int i = 0; 219 220 ptr++; 221 222 for (i = 0; i < 4; i++) { 223 if (qemu_isxdigit(*ptr)) { 224 unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4); 225 } else { 226 parse_error(ctxt, token, 227 "invalid hex escape sequence in string"); 228 goto out; 229 } 230 ptr++; 231 } 232 233 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char)); 234 qstring_append(str, utf8_char); 235 } break; 236 default: 237 parse_error(ctxt, token, "invalid escape sequence in string"); 238 goto out; 239 } 240 } else { 241 char dummy[2]; 242 243 dummy[0] = *ptr++; 244 dummy[1] = 0; 245 246 qstring_append(str, dummy); 247 } 248 } 249 250 return str; 251 252 out: 253 QDECREF(str); 254 return NULL; 255 } 256 257 /** 258 * Parsing rules 259 */ 260 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap) 261 { 262 QObject *key, *token = NULL, *value, *peek; 263 QList *working = qlist_copy(*tokens); 264 265 peek = qlist_peek(working); 266 key = parse_value(ctxt, &working, ap); 267 if (qobject_type(key) != QTYPE_QSTRING) { 268 parse_error(ctxt, peek, "key is not a string in object"); 269 goto out; 270 } 271 272 token = qlist_pop(working); 273 if (!token_is_operator(token, ':')) { 274 parse_error(ctxt, token, "missing : in object pair"); 275 goto out; 276 } 277 278 value = parse_value(ctxt, &working, ap); 279 if (value == NULL) { 280 parse_error(ctxt, token, "Missing value in dict"); 281 goto out; 282 } 283 284 qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value); 285 286 qobject_decref(token); 287 qobject_decref(key); 288 QDECREF(*tokens); 289 *tokens = working; 290 291 return 0; 292 293 out: 294 qobject_decref(token); 295 qobject_decref(key); 296 QDECREF(working); 297 298 return -1; 299 } 300 301 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap) 302 { 303 QDict *dict = NULL; 304 QObject *token, *peek; 305 QList *working = qlist_copy(*tokens); 306 307 token = qlist_pop(working); 308 if (!token_is_operator(token, '{')) { 309 goto out; 310 } 311 qobject_decref(token); 312 token = NULL; 313 314 dict = qdict_new(); 315 316 peek = qlist_peek(working); 317 if (!token_is_operator(peek, '}')) { 318 if (parse_pair(ctxt, dict, &working, ap) == -1) { 319 goto out; 320 } 321 322 token = qlist_pop(working); 323 while (!token_is_operator(token, '}')) { 324 if (!token_is_operator(token, ',')) { 325 parse_error(ctxt, token, "expected separator in dict"); 326 goto out; 327 } 328 qobject_decref(token); 329 token = NULL; 330 331 if (parse_pair(ctxt, dict, &working, ap) == -1) { 332 goto out; 333 } 334 335 token = qlist_pop(working); 336 } 337 qobject_decref(token); 338 token = NULL; 339 } else { 340 token = qlist_pop(working); 341 qobject_decref(token); 342 token = NULL; 343 } 344 345 QDECREF(*tokens); 346 *tokens = working; 347 348 return QOBJECT(dict); 349 350 out: 351 qobject_decref(token); 352 QDECREF(working); 353 QDECREF(dict); 354 return NULL; 355 } 356 357 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap) 358 { 359 QList *list = NULL; 360 QObject *token, *peek; 361 QList *working = qlist_copy(*tokens); 362 363 token = qlist_pop(working); 364 if (!token_is_operator(token, '[')) { 365 goto out; 366 } 367 qobject_decref(token); 368 token = NULL; 369 370 list = qlist_new(); 371 372 peek = qlist_peek(working); 373 if (!token_is_operator(peek, ']')) { 374 QObject *obj; 375 376 obj = parse_value(ctxt, &working, ap); 377 if (obj == NULL) { 378 parse_error(ctxt, token, "expecting value"); 379 goto out; 380 } 381 382 qlist_append_obj(list, obj); 383 384 token = qlist_pop(working); 385 while (!token_is_operator(token, ']')) { 386 if (!token_is_operator(token, ',')) { 387 parse_error(ctxt, token, "expected separator in list"); 388 goto out; 389 } 390 391 qobject_decref(token); 392 token = NULL; 393 394 obj = parse_value(ctxt, &working, ap); 395 if (obj == NULL) { 396 parse_error(ctxt, token, "expecting value"); 397 goto out; 398 } 399 400 qlist_append_obj(list, obj); 401 402 token = qlist_pop(working); 403 } 404 405 qobject_decref(token); 406 token = NULL; 407 } else { 408 token = qlist_pop(working); 409 qobject_decref(token); 410 token = NULL; 411 } 412 413 QDECREF(*tokens); 414 *tokens = working; 415 416 return QOBJECT(list); 417 418 out: 419 qobject_decref(token); 420 QDECREF(working); 421 QDECREF(list); 422 return NULL; 423 } 424 425 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens) 426 { 427 QObject *token, *ret; 428 QList *working = qlist_copy(*tokens); 429 430 token = qlist_pop(working); 431 432 if (token_get_type(token) != JSON_KEYWORD) { 433 goto out; 434 } 435 436 if (token_is_keyword(token, "true")) { 437 ret = QOBJECT(qbool_from_int(true)); 438 } else if (token_is_keyword(token, "false")) { 439 ret = QOBJECT(qbool_from_int(false)); 440 } else { 441 parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token)); 442 goto out; 443 } 444 445 qobject_decref(token); 446 QDECREF(*tokens); 447 *tokens = working; 448 449 return ret; 450 451 out: 452 qobject_decref(token); 453 QDECREF(working); 454 455 return NULL; 456 } 457 458 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap) 459 { 460 QObject *token = NULL, *obj; 461 QList *working = qlist_copy(*tokens); 462 463 if (ap == NULL) { 464 goto out; 465 } 466 467 token = qlist_pop(working); 468 469 if (token_is_escape(token, "%p")) { 470 obj = va_arg(*ap, QObject *); 471 } else if (token_is_escape(token, "%i")) { 472 obj = QOBJECT(qbool_from_int(va_arg(*ap, int))); 473 } else if (token_is_escape(token, "%d")) { 474 obj = QOBJECT(qint_from_int(va_arg(*ap, int))); 475 } else if (token_is_escape(token, "%ld")) { 476 obj = QOBJECT(qint_from_int(va_arg(*ap, long))); 477 } else if (token_is_escape(token, "%lld") || 478 token_is_escape(token, "%I64d")) { 479 obj = QOBJECT(qint_from_int(va_arg(*ap, long long))); 480 } else if (token_is_escape(token, "%s")) { 481 obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *))); 482 } else if (token_is_escape(token, "%f")) { 483 obj = QOBJECT(qfloat_from_double(va_arg(*ap, double))); 484 } else { 485 goto out; 486 } 487 488 qobject_decref(token); 489 QDECREF(*tokens); 490 *tokens = working; 491 492 return obj; 493 494 out: 495 qobject_decref(token); 496 QDECREF(working); 497 498 return NULL; 499 } 500 501 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens) 502 { 503 QObject *token, *obj; 504 QList *working = qlist_copy(*tokens); 505 506 token = qlist_pop(working); 507 switch (token_get_type(token)) { 508 case JSON_STRING: 509 obj = QOBJECT(qstring_from_escaped_str(ctxt, token)); 510 break; 511 case JSON_INTEGER: 512 obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10))); 513 break; 514 case JSON_FLOAT: 515 /* FIXME dependent on locale */ 516 obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL))); 517 break; 518 default: 519 goto out; 520 } 521 522 qobject_decref(token); 523 QDECREF(*tokens); 524 *tokens = working; 525 526 return obj; 527 528 out: 529 qobject_decref(token); 530 QDECREF(working); 531 532 return NULL; 533 } 534 535 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap) 536 { 537 QObject *obj; 538 539 obj = parse_object(ctxt, tokens, ap); 540 if (obj == NULL) { 541 obj = parse_array(ctxt, tokens, ap); 542 } 543 if (obj == NULL) { 544 obj = parse_escape(ctxt, tokens, ap); 545 } 546 if (obj == NULL) { 547 obj = parse_keyword(ctxt, tokens); 548 } 549 if (obj == NULL) { 550 obj = parse_literal(ctxt, tokens); 551 } 552 553 return obj; 554 } 555 556 QObject *json_parser_parse(QList *tokens, va_list *ap) 557 { 558 JSONParserContext ctxt = {}; 559 QList *working = qlist_copy(tokens); 560 QObject *result; 561 562 result = parse_value(&ctxt, &working, ap); 563 564 QDECREF(working); 565 566 return result; 567 } 568