Home | History | Annotate | Download | only in qemu
      1 /*
      2  * JSON Parser
      3  *
      4  * Copyright IBM, Corp. 2009
      5  *
      6  * Authors:
      7  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      8  *
      9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
     10  * See the COPYING.LIB file in the top-level directory.
     11  *
     12  */
     13 
     14 #include <stdbool.h>
     15 
     16 #include "qemu-common.h"
     17 #include "qstring.h"
     18 #include "qint.h"
     19 #include "qdict.h"
     20 #include "qlist.h"
     21 #include "qfloat.h"
     22 #include "qbool.h"
     23 #include "json-parser.h"
     24 #include "json-lexer.h"
     25 
     26 typedef struct JSONParserContext
     27 {
     28 } JSONParserContext;
     29 
     30 #define BUG_ON(cond) assert(!(cond))
     31 
     32 /**
     33  * TODO
     34  *
     35  * 0) make errors meaningful again
     36  * 1) add geometry information to tokens
     37  * 3) should we return a parsed size?
     38  * 4) deal with premature EOI
     39  */
     40 
     41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
     42 
     43 /**
     44  * Token manipulators
     45  *
     46  * tokens are dictionaries that contain a type, a string value, and geometry information
     47  * about a token identified by the lexer.  These are routines that make working with
     48  * these objects a bit easier.
     49  */
     50 static const char *token_get_value(QObject *obj)
     51 {
     52     return qdict_get_str(qobject_to_qdict(obj), "token");
     53 }
     54 
     55 static JSONTokenType token_get_type(QObject *obj)
     56 {
     57     return qdict_get_int(qobject_to_qdict(obj), "type");
     58 }
     59 
     60 static int token_is_operator(QObject *obj, char op)
     61 {
     62     const char *val;
     63 
     64     if (token_get_type(obj) != JSON_OPERATOR) {
     65         return 0;
     66     }
     67 
     68     val = token_get_value(obj);
     69 
     70     return (val[0] == op) && (val[1] == 0);
     71 }
     72 
     73 static int token_is_keyword(QObject *obj, const char *value)
     74 {
     75     if (token_get_type(obj) != JSON_KEYWORD) {
     76         return 0;
     77     }
     78 
     79     return strcmp(token_get_value(obj), value) == 0;
     80 }
     81 
     82 static int token_is_escape(QObject *obj, const char *value)
     83 {
     84     if (token_get_type(obj) != JSON_ESCAPE) {
     85         return 0;
     86     }
     87 
     88     return (strcmp(token_get_value(obj), value) == 0);
     89 }
     90 
     91 /**
     92  * Error handler
     93  */
     94 static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
     95 {
     96     fprintf(stderr, "parse error: %s\n", msg);
     97 }
     98 
     99 /**
    100  * String helpers
    101  *
    102  * These helpers are used to unescape strings.
    103  */
    104 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
    105 {
    106     if (wchar <= 0x007F) {
    107         BUG_ON(buffer_length < 2);
    108 
    109         buffer[0] = wchar & 0x7F;
    110         buffer[1] = 0;
    111     } else if (wchar <= 0x07FF) {
    112         BUG_ON(buffer_length < 3);
    113 
    114         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
    115         buffer[1] = 0x80 | (wchar & 0x3F);
    116         buffer[2] = 0;
    117     } else {
    118         BUG_ON(buffer_length < 4);
    119 
    120         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
    121         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
    122         buffer[2] = 0x80 | (wchar & 0x3F);
    123         buffer[3] = 0;
    124     }
    125 }
    126 
    127 static int hex2decimal(char ch)
    128 {
    129     if (ch >= '0' && ch <= '9') {
    130         return (ch - '0');
    131     } else if (ch >= 'a' && ch <= 'f') {
    132         return 10 + (ch - 'a');
    133     } else if (ch >= 'A' && ch <= 'F') {
    134         return 10 + (ch - 'A');
    135     }
    136 
    137     return -1;
    138 }
    139 
    140 /**
    141  * parse_string(): Parse a json string and return a QObject
    142  *
    143  *  string
    144  *      ""
    145  *      " chars "
    146  *  chars
    147  *      char
    148  *      char chars
    149  *  char
    150  *      any-Unicode-character-
    151  *          except-"-or-\-or-
    152  *          control-character
    153  *      \"
    154  *      \\
    155  *      \/
    156  *      \b
    157  *      \f
    158  *      \n
    159  *      \r
    160  *      \t
    161  *      \u four-hex-digits
    162  */
    163 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
    164 {
    165     const char *ptr = token_get_value(token);
    166     QString *str;
    167     int double_quote = 1;
    168 
    169     if (*ptr == '"') {
    170         double_quote = 1;
    171     } else {
    172         double_quote = 0;
    173     }
    174     ptr++;
    175 
    176     str = qstring_new();
    177     while (*ptr &&
    178            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
    179         if (*ptr == '\\') {
    180             ptr++;
    181 
    182             switch (*ptr) {
    183             case '"':
    184                 qstring_append(str, "\"");
    185                 ptr++;
    186                 break;
    187             case '\'':
    188                 qstring_append(str, "'");
    189                 ptr++;
    190                 break;
    191             case '\\':
    192                 qstring_append(str, "\\");
    193                 ptr++;
    194                 break;
    195             case '/':
    196                 qstring_append(str, "/");
    197                 ptr++;
    198                 break;
    199             case 'b':
    200                 qstring_append(str, "\b");
    201                 ptr++;
    202                 break;
    203             case 'n':
    204                 qstring_append(str, "\n");
    205                 ptr++;
    206                 break;
    207             case 'r':
    208                 qstring_append(str, "\r");
    209                 ptr++;
    210                 break;
    211             case 't':
    212                 qstring_append(str, "\t");
    213                 ptr++;
    214                 break;
    215             case 'u': {
    216                 uint16_t unicode_char = 0;
    217                 char utf8_char[4];
    218                 int i = 0;
    219 
    220                 ptr++;
    221 
    222                 for (i = 0; i < 4; i++) {
    223                     if (qemu_isxdigit(*ptr)) {
    224                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
    225                     } else {
    226                         parse_error(ctxt, token,
    227                                     "invalid hex escape sequence in string");
    228                         goto out;
    229                     }
    230                     ptr++;
    231                 }
    232 
    233                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
    234                 qstring_append(str, utf8_char);
    235             }   break;
    236             default:
    237                 parse_error(ctxt, token, "invalid escape sequence in string");
    238                 goto out;
    239             }
    240         } else {
    241             char dummy[2];
    242 
    243             dummy[0] = *ptr++;
    244             dummy[1] = 0;
    245 
    246             qstring_append(str, dummy);
    247         }
    248     }
    249 
    250     return str;
    251 
    252 out:
    253     QDECREF(str);
    254     return NULL;
    255 }
    256 
    257 /**
    258  * Parsing rules
    259  */
    260 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
    261 {
    262     QObject *key, *token = NULL, *value, *peek;
    263     QList *working = qlist_copy(*tokens);
    264 
    265     peek = qlist_peek(working);
    266     key = parse_value(ctxt, &working, ap);
    267     if (qobject_type(key) != QTYPE_QSTRING) {
    268         parse_error(ctxt, peek, "key is not a string in object");
    269         goto out;
    270     }
    271 
    272     token = qlist_pop(working);
    273     if (!token_is_operator(token, ':')) {
    274         parse_error(ctxt, token, "missing : in object pair");
    275         goto out;
    276     }
    277 
    278     value = parse_value(ctxt, &working, ap);
    279     if (value == NULL) {
    280         parse_error(ctxt, token, "Missing value in dict");
    281         goto out;
    282     }
    283 
    284     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
    285 
    286     qobject_decref(token);
    287     qobject_decref(key);
    288     QDECREF(*tokens);
    289     *tokens = working;
    290 
    291     return 0;
    292 
    293 out:
    294     qobject_decref(token);
    295     qobject_decref(key);
    296     QDECREF(working);
    297 
    298     return -1;
    299 }
    300 
    301 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    302 {
    303     QDict *dict = NULL;
    304     QObject *token, *peek;
    305     QList *working = qlist_copy(*tokens);
    306 
    307     token = qlist_pop(working);
    308     if (!token_is_operator(token, '{')) {
    309         goto out;
    310     }
    311     qobject_decref(token);
    312     token = NULL;
    313 
    314     dict = qdict_new();
    315 
    316     peek = qlist_peek(working);
    317     if (!token_is_operator(peek, '}')) {
    318         if (parse_pair(ctxt, dict, &working, ap) == -1) {
    319             goto out;
    320         }
    321 
    322         token = qlist_pop(working);
    323         while (!token_is_operator(token, '}')) {
    324             if (!token_is_operator(token, ',')) {
    325                 parse_error(ctxt, token, "expected separator in dict");
    326                 goto out;
    327             }
    328             qobject_decref(token);
    329             token = NULL;
    330 
    331             if (parse_pair(ctxt, dict, &working, ap) == -1) {
    332                 goto out;
    333             }
    334 
    335             token = qlist_pop(working);
    336         }
    337         qobject_decref(token);
    338         token = NULL;
    339     } else {
    340         token = qlist_pop(working);
    341         qobject_decref(token);
    342         token = NULL;
    343     }
    344 
    345     QDECREF(*tokens);
    346     *tokens = working;
    347 
    348     return QOBJECT(dict);
    349 
    350 out:
    351     qobject_decref(token);
    352     QDECREF(working);
    353     QDECREF(dict);
    354     return NULL;
    355 }
    356 
    357 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    358 {
    359     QList *list = NULL;
    360     QObject *token, *peek;
    361     QList *working = qlist_copy(*tokens);
    362 
    363     token = qlist_pop(working);
    364     if (!token_is_operator(token, '[')) {
    365         goto out;
    366     }
    367     qobject_decref(token);
    368     token = NULL;
    369 
    370     list = qlist_new();
    371 
    372     peek = qlist_peek(working);
    373     if (!token_is_operator(peek, ']')) {
    374         QObject *obj;
    375 
    376         obj = parse_value(ctxt, &working, ap);
    377         if (obj == NULL) {
    378             parse_error(ctxt, token, "expecting value");
    379             goto out;
    380         }
    381 
    382         qlist_append_obj(list, obj);
    383 
    384         token = qlist_pop(working);
    385         while (!token_is_operator(token, ']')) {
    386             if (!token_is_operator(token, ',')) {
    387                 parse_error(ctxt, token, "expected separator in list");
    388                 goto out;
    389             }
    390 
    391             qobject_decref(token);
    392             token = NULL;
    393 
    394             obj = parse_value(ctxt, &working, ap);
    395             if (obj == NULL) {
    396                 parse_error(ctxt, token, "expecting value");
    397                 goto out;
    398             }
    399 
    400             qlist_append_obj(list, obj);
    401 
    402             token = qlist_pop(working);
    403         }
    404 
    405         qobject_decref(token);
    406         token = NULL;
    407     } else {
    408         token = qlist_pop(working);
    409         qobject_decref(token);
    410         token = NULL;
    411     }
    412 
    413     QDECREF(*tokens);
    414     *tokens = working;
    415 
    416     return QOBJECT(list);
    417 
    418 out:
    419     qobject_decref(token);
    420     QDECREF(working);
    421     QDECREF(list);
    422     return NULL;
    423 }
    424 
    425 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
    426 {
    427     QObject *token, *ret;
    428     QList *working = qlist_copy(*tokens);
    429 
    430     token = qlist_pop(working);
    431 
    432     if (token_get_type(token) != JSON_KEYWORD) {
    433         goto out;
    434     }
    435 
    436     if (token_is_keyword(token, "true")) {
    437         ret = QOBJECT(qbool_from_int(true));
    438     } else if (token_is_keyword(token, "false")) {
    439         ret = QOBJECT(qbool_from_int(false));
    440     } else {
    441         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
    442         goto out;
    443     }
    444 
    445     qobject_decref(token);
    446     QDECREF(*tokens);
    447     *tokens = working;
    448 
    449     return ret;
    450 
    451 out:
    452     qobject_decref(token);
    453     QDECREF(working);
    454 
    455     return NULL;
    456 }
    457 
    458 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    459 {
    460     QObject *token = NULL, *obj;
    461     QList *working = qlist_copy(*tokens);
    462 
    463     if (ap == NULL) {
    464         goto out;
    465     }
    466 
    467     token = qlist_pop(working);
    468 
    469     if (token_is_escape(token, "%p")) {
    470         obj = va_arg(*ap, QObject *);
    471     } else if (token_is_escape(token, "%i")) {
    472         obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
    473     } else if (token_is_escape(token, "%d")) {
    474         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
    475     } else if (token_is_escape(token, "%ld")) {
    476         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
    477     } else if (token_is_escape(token, "%lld") ||
    478                token_is_escape(token, "%I64d")) {
    479         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
    480     } else if (token_is_escape(token, "%s")) {
    481         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
    482     } else if (token_is_escape(token, "%f")) {
    483         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
    484     } else {
    485         goto out;
    486     }
    487 
    488     qobject_decref(token);
    489     QDECREF(*tokens);
    490     *tokens = working;
    491 
    492     return obj;
    493 
    494 out:
    495     qobject_decref(token);
    496     QDECREF(working);
    497 
    498     return NULL;
    499 }
    500 
    501 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
    502 {
    503     QObject *token, *obj;
    504     QList *working = qlist_copy(*tokens);
    505 
    506     token = qlist_pop(working);
    507     switch (token_get_type(token)) {
    508     case JSON_STRING:
    509         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
    510         break;
    511     case JSON_INTEGER:
    512         obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
    513         break;
    514     case JSON_FLOAT:
    515         /* FIXME dependent on locale */
    516         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
    517         break;
    518     default:
    519         goto out;
    520     }
    521 
    522     qobject_decref(token);
    523     QDECREF(*tokens);
    524     *tokens = working;
    525 
    526     return obj;
    527 
    528 out:
    529     qobject_decref(token);
    530     QDECREF(working);
    531 
    532     return NULL;
    533 }
    534 
    535 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
    536 {
    537     QObject *obj;
    538 
    539     obj = parse_object(ctxt, tokens, ap);
    540     if (obj == NULL) {
    541         obj = parse_array(ctxt, tokens, ap);
    542     }
    543     if (obj == NULL) {
    544         obj = parse_escape(ctxt, tokens, ap);
    545     }
    546     if (obj == NULL) {
    547         obj = parse_keyword(ctxt, tokens);
    548     }
    549     if (obj == NULL) {
    550         obj = parse_literal(ctxt, tokens);
    551     }
    552 
    553     return obj;
    554 }
    555 
    556 QObject *json_parser_parse(QList *tokens, va_list *ap)
    557 {
    558     JSONParserContext ctxt = {};
    559     QList *working = qlist_copy(tokens);
    560     QObject *result;
    561 
    562     result = parse_value(&ctxt, &working, ap);
    563 
    564     QDECREF(working);
    565 
    566     return result;
    567 }
    568