Home | History | Annotate | Download | only in qemu
      1 /*
      2  * JSON Parser
      3  *
      4  * Copyright IBM, Corp. 2009
      5  *
      6  * Authors:
      7  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      8  *
      9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
     10  * See the COPYING.LIB file in the top-level directory.
     11  *
     12  */
     13 
     14 #include <stdarg.h>
     15 
     16 #include "qemu-common.h"
     17 #include "qstring.h"
     18 #include "qint.h"
     19 #include "qdict.h"
     20 #include "qlist.h"
     21 #include "qfloat.h"
     22 #include "qbool.h"
     23 #include "json-parser.h"
     24 #include "json-lexer.h"
     25 
     26 typedef struct JSONParserContext
     27 {
     28 } JSONParserContext;
     29 
     30 #define BUG_ON(cond) assert(!(cond))
     31 
     32 /**
     33  * TODO
     34  *
     35  * 0) make errors meaningful again
     36  * 1) add geometry information to tokens
     37  * 3) should we return a parsed size?
     38  * 4) deal with premature EOI
     39  */
     40 
     41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
     42 
     43 /**
     44  * Token manipulators
     45  *
     46  * tokens are dictionaries that contain a type, a string value, and geometry information
     47  * about a token identified by the lexer.  These are routines that make working with
     48  * these objects a bit easier.
     49  */
     50 static const char *token_get_value(QObject *obj)
     51 {
     52     return qdict_get_str(qobject_to_qdict(obj), "token");
     53 }
     54 
     55 static JSONTokenType token_get_type(QObject *obj)
     56 {
     57     return qdict_get_int(qobject_to_qdict(obj), "type");
     58 }
     59 
     60 static int token_is_operator(QObject *obj, char op)
     61 {
     62     const char *val;
     63 
     64     if (token_get_type(obj) != JSON_OPERATOR) {
     65         return 0;
     66     }
     67 
     68     val = token_get_value(obj);
     69 
     70     return (val[0] == op) && (val[1] == 0);
     71 }
     72 
     73 static int token_is_keyword(QObject *obj, const char *value)
     74 {
     75     if (token_get_type(obj) != JSON_KEYWORD) {
     76         return 0;
     77     }
     78 
     79     return strcmp(token_get_value(obj), value) == 0;
     80 }
     81 
     82 static int token_is_escape(QObject *obj, const char *value)
     83 {
     84     if (token_get_type(obj) != JSON_ESCAPE) {
     85         return 0;
     86     }
     87 
     88     return (strcmp(token_get_value(obj), value) == 0);
     89 }
     90 
     91 /**
     92  * Error handler
     93  */
     94 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
     95                                            QObject *token, const char *msg, ...)
     96 {
     97     va_list ap;
     98     va_start(ap, msg);
     99     fprintf(stderr, "parse error: ");
    100     vfprintf(stderr, msg, ap);
    101     fprintf(stderr, "\n");
    102     va_end(ap);
    103 }
    104 
    105 /**
    106  * String helpers
    107  *
    108  * These helpers are used to unescape strings.
    109  */
    110 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
    111 {
    112     if (wchar <= 0x007F) {
    113         BUG_ON(buffer_length < 2);
    114 
    115         buffer[0] = wchar & 0x7F;
    116         buffer[1] = 0;
    117     } else if (wchar <= 0x07FF) {
    118         BUG_ON(buffer_length < 3);
    119 
    120         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
    121         buffer[1] = 0x80 | (wchar & 0x3F);
    122         buffer[2] = 0;
    123     } else {
    124         BUG_ON(buffer_length < 4);
    125 
    126         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
    127         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
    128         buffer[2] = 0x80 | (wchar & 0x3F);
    129         buffer[3] = 0;
    130     }
    131 }
    132 
    133 static int hex2decimal(char ch)
    134 {
    135     if (ch >= '0' && ch <= '9') {
    136         return (ch - '0');
    137     } else if (ch >= 'a' && ch <= 'f') {
    138         return 10 + (ch - 'a');
    139     } else if (ch >= 'A' && ch <= 'F') {
    140         return 10 + (ch - 'A');
    141     }
    142 
    143     return -1;
    144 }
    145 
    146 /**
    147  * parse_string(): Parse a json string and return a QObject
    148  *
    149  *  string
    150  *      ""
    151  *      " chars "
    152  *  chars
    153  *      char
    154  *      char chars
    155  *  char
    156  *      any-Unicode-character-
    157  *          except-"-or-\-or-
    158  *          control-character
    159  *      \"
    160  *      \\
    161  *      \/
    162  *      \b
    163  *      \f
    164  *      \n
    165  *      \r
    166  *      \t
    167  *      \u four-hex-digits
    168  */
    169 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
    170 {
    171     const char *ptr = token_get_value(token);
    172     QString *str;
    173     int double_quote = 1;
    174 
    175     if (*ptr == '"') {
    176         double_quote = 1;
    177     } else {
    178         double_quote = 0;
    179     }
    180     ptr++;
    181 
    182     str = qstring_new();
    183     while (*ptr &&
    184            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
    185         if (*ptr == '\\') {
    186             ptr++;
    187 
    188             switch (*ptr) {
    189             case '"':
    190                 qstring_append(str, "\"");
    191                 ptr++;
    192                 break;
    193             case '\'':
    194                 qstring_append(str, "'");
    195                 ptr++;
    196                 break;
    197             case '\\':
    198                 qstring_append(str, "\\");
    199                 ptr++;
    200                 break;
    201             case '/':
    202                 qstring_append(str, "/");
    203                 ptr++;
    204                 break;
    205             case 'b':
    206                 qstring_append(str, "\b");
    207                 ptr++;
    208                 break;
    209             case 'f':
    210                 qstring_append(str, "\f");
    211                 ptr++;
    212                 break;
    213             case 'n':
    214                 qstring_append(str, "\n");
    215                 ptr++;
    216                 break;
    217             case 'r':
    218                 qstring_append(str, "\r");
    219                 ptr++;
    220                 break;
    221             case 't':
    222                 qstring_append(str, "\t");
    223                 ptr++;
    224                 break;
    225             case 'u': {
    226                 uint16_t unicode_char = 0;
    227                 char utf8_char[4];
    228                 int i = 0;
    229 
    230                 ptr++;
    231 
    232                 for (i = 0; i < 4; i++) {
    233                     if (qemu_isxdigit(*ptr)) {
    234                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
    235                     } else {
    236                         parse_error(ctxt, token,
    237                                     "invalid hex escape sequence in string");
    238                         goto out;
    239                     }
    240                     ptr++;
    241                 }
    242 
    243                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
    244                 qstring_append(str, utf8_char);
    245             }   break;
    246             default:
    247                 parse_error(ctxt, token, "invalid escape sequence in string");
    248                 goto out;
    249             }
    250         } else {
    251             char dummy[2];
    252 
    253             dummy[0] = *ptr++;
    254             dummy[1] = 0;
    255 
    256             qstring_append(str, dummy);
    257         }
    258     }
    259 
    260     return str;
    261 
    262 out:
    263     QDECREF(str);
    264     return NULL;
    265 }
    266 
    267 /**
    268  * Parsing rules
    269  */
    270 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
    271 {
    272     QObject *key, *token = NULL, *value, *peek;
    273     QList *working = qlist_copy(*tokens);
    274 
    275     peek = qlist_peek(working);
    276     key = parse_value(ctxt, &working, ap);
    277     if (!key || qobject_type(key) != QTYPE_QSTRING) {
    278         parse_error(ctxt, peek, "key is not a string in object");
    279         goto out;
    280     }
    281 
    282     token = qlist_pop(working);
    283     if (!token_is_operator(token, ':')) {
    284         parse_error(ctxt, token, "missing : in object pair");
    285         goto out;
    286     }
    287 
    288     value = parse_value(ctxt, &working, ap);
    289     if (value == NULL) {
    290         parse_error(ctxt, token, "Missing value in dict");
    291         goto out;
    292     }
    293 
    294     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
    295 
    296     qobject_decref(token);
    297     qobject_decref(key);
    298     QDECREF(*tokens);
    299     *tokens = working;
    300 
    301     return 0;
    302 
    303 out:
    304     qobject_decref(token);
    305     qobject_decref(key);
    306     QDECREF(working);
    307 
    308     return -1;
    309 }
    310 
    311 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    312 {
    313     QDict *dict = NULL;
    314     QObject *token, *peek;
    315     QList *working = qlist_copy(*tokens);
    316 
    317     token = qlist_pop(working);
    318     if (!token_is_operator(token, '{')) {
    319         goto out;
    320     }
    321     qobject_decref(token);
    322     token = NULL;
    323 
    324     dict = qdict_new();
    325 
    326     peek = qlist_peek(working);
    327     if (!token_is_operator(peek, '}')) {
    328         if (parse_pair(ctxt, dict, &working, ap) == -1) {
    329             goto out;
    330         }
    331 
    332         token = qlist_pop(working);
    333         while (!token_is_operator(token, '}')) {
    334             if (!token_is_operator(token, ',')) {
    335                 parse_error(ctxt, token, "expected separator in dict");
    336                 goto out;
    337             }
    338             qobject_decref(token);
    339             token = NULL;
    340 
    341             if (parse_pair(ctxt, dict, &working, ap) == -1) {
    342                 goto out;
    343             }
    344 
    345             token = qlist_pop(working);
    346         }
    347         qobject_decref(token);
    348         token = NULL;
    349     } else {
    350         token = qlist_pop(working);
    351         qobject_decref(token);
    352         token = NULL;
    353     }
    354 
    355     QDECREF(*tokens);
    356     *tokens = working;
    357 
    358     return QOBJECT(dict);
    359 
    360 out:
    361     qobject_decref(token);
    362     QDECREF(working);
    363     QDECREF(dict);
    364     return NULL;
    365 }
    366 
    367 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    368 {
    369     QList *list = NULL;
    370     QObject *token, *peek;
    371     QList *working = qlist_copy(*tokens);
    372 
    373     token = qlist_pop(working);
    374     if (!token_is_operator(token, '[')) {
    375         goto out;
    376     }
    377     qobject_decref(token);
    378     token = NULL;
    379 
    380     list = qlist_new();
    381 
    382     peek = qlist_peek(working);
    383     if (!token_is_operator(peek, ']')) {
    384         QObject *obj;
    385 
    386         obj = parse_value(ctxt, &working, ap);
    387         if (obj == NULL) {
    388             parse_error(ctxt, token, "expecting value");
    389             goto out;
    390         }
    391 
    392         qlist_append_obj(list, obj);
    393 
    394         token = qlist_pop(working);
    395         while (!token_is_operator(token, ']')) {
    396             if (!token_is_operator(token, ',')) {
    397                 parse_error(ctxt, token, "expected separator in list");
    398                 goto out;
    399             }
    400 
    401             qobject_decref(token);
    402             token = NULL;
    403 
    404             obj = parse_value(ctxt, &working, ap);
    405             if (obj == NULL) {
    406                 parse_error(ctxt, token, "expecting value");
    407                 goto out;
    408             }
    409 
    410             qlist_append_obj(list, obj);
    411 
    412             token = qlist_pop(working);
    413         }
    414 
    415         qobject_decref(token);
    416         token = NULL;
    417     } else {
    418         token = qlist_pop(working);
    419         qobject_decref(token);
    420         token = NULL;
    421     }
    422 
    423     QDECREF(*tokens);
    424     *tokens = working;
    425 
    426     return QOBJECT(list);
    427 
    428 out:
    429     qobject_decref(token);
    430     QDECREF(working);
    431     QDECREF(list);
    432     return NULL;
    433 }
    434 
    435 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
    436 {
    437     QObject *token, *ret;
    438     QList *working = qlist_copy(*tokens);
    439 
    440     token = qlist_pop(working);
    441 
    442     if (token_get_type(token) != JSON_KEYWORD) {
    443         goto out;
    444     }
    445 
    446     if (token_is_keyword(token, "true")) {
    447         ret = QOBJECT(qbool_from_int(true));
    448     } else if (token_is_keyword(token, "false")) {
    449         ret = QOBJECT(qbool_from_int(false));
    450     } else {
    451         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
    452         goto out;
    453     }
    454 
    455     qobject_decref(token);
    456     QDECREF(*tokens);
    457     *tokens = working;
    458 
    459     return ret;
    460 
    461 out:
    462     qobject_decref(token);
    463     QDECREF(working);
    464 
    465     return NULL;
    466 }
    467 
    468 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    469 {
    470     QObject *token = NULL, *obj;
    471     QList *working = qlist_copy(*tokens);
    472 
    473     if (ap == NULL) {
    474         goto out;
    475     }
    476 
    477     token = qlist_pop(working);
    478 
    479     if (token_is_escape(token, "%p")) {
    480         obj = va_arg(*ap, QObject *);
    481     } else if (token_is_escape(token, "%i")) {
    482         obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
    483     } else if (token_is_escape(token, "%d")) {
    484         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
    485     } else if (token_is_escape(token, "%ld")) {
    486         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
    487     } else if (token_is_escape(token, "%lld") ||
    488                token_is_escape(token, "%I64d")) {
    489         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
    490     } else if (token_is_escape(token, "%s")) {
    491         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
    492     } else if (token_is_escape(token, "%f")) {
    493         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
    494     } else {
    495         goto out;
    496     }
    497 
    498     qobject_decref(token);
    499     QDECREF(*tokens);
    500     *tokens = working;
    501 
    502     return obj;
    503 
    504 out:
    505     qobject_decref(token);
    506     QDECREF(working);
    507 
    508     return NULL;
    509 }
    510 
    511 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
    512 {
    513     QObject *token, *obj;
    514     QList *working = qlist_copy(*tokens);
    515 
    516     token = qlist_pop(working);
    517     switch (token_get_type(token)) {
    518     case JSON_STRING:
    519         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
    520         break;
    521     case JSON_INTEGER:
    522         obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
    523         break;
    524     case JSON_FLOAT:
    525         /* FIXME dependent on locale */
    526         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
    527         break;
    528     default:
    529         goto out;
    530     }
    531 
    532     qobject_decref(token);
    533     QDECREF(*tokens);
    534     *tokens = working;
    535 
    536     return obj;
    537 
    538 out:
    539     qobject_decref(token);
    540     QDECREF(working);
    541 
    542     return NULL;
    543 }
    544 
    545 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    546 {
    547     QObject *obj;
    548 
    549     obj = parse_object(ctxt, tokens, ap);
    550     if (obj == NULL) {
    551         obj = parse_array(ctxt, tokens, ap);
    552     }
    553     if (obj == NULL) {
    554         obj = parse_escape(ctxt, tokens, ap);
    555     }
    556     if (obj == NULL) {
    557         obj = parse_keyword(ctxt, tokens);
    558     }
    559     if (obj == NULL) {
    560         obj = parse_literal(ctxt, tokens);
    561     }
    562 
    563     return obj;
    564 }
    565 
    566 QObject *json_parser_parse(QList *tokens, va_list *ap)
    567 {
    568     JSONParserContext ctxt = {};
    569     QList *working = qlist_copy(tokens);
    570     QObject *result;
    571 
    572     result = parse_value(&ctxt, &working, ap);
    573 
    574     QDECREF(working);
    575 
    576     return result;
    577 }
    578