Home | History | Annotate | Download | only in qobject
      1 /*
      2  * JSON Parser
      3  *
      4  * Copyright IBM, Corp. 2009
      5  *
      6  * Authors:
      7  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      8  *
      9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
     10  * See the COPYING.LIB file in the top-level directory.
     11  *
     12  */
     13 
     14 #include <stdarg.h>
     15 
     16 #include "qemu-common.h"
     17 #include "qapi/qmp/qstring.h"
     18 #include "qapi/qmp/qint.h"
     19 #include "qapi/qmp/qdict.h"
     20 #include "qapi/qmp/qlist.h"
     21 #include "qapi/qmp/qfloat.h"
     22 #include "qapi/qmp/qbool.h"
     23 #include "qapi/qmp/json-parser.h"
     24 #include "qapi/qmp/json-lexer.h"
     25 #include "qapi/qmp/qerror.h"
     26 
     27 typedef struct JSONParserContext
     28 {
     29     Error *err;
     30     struct {
     31         QObject **buf;
     32         size_t pos;
     33         size_t count;
     34     } tokens;
     35 } JSONParserContext;
     36 
     37 #define BUG_ON(cond) assert(!(cond))
     38 
     39 /**
     40  * TODO
     41  *
     42  * 0) make errors meaningful again
     43  * 1) add geometry information to tokens
     44  * 3) should we return a parsed size?
     45  * 4) deal with premature EOI
     46  */
     47 
     48 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
     49 
     50 /**
     51  * Token manipulators
     52  *
     53  * tokens are dictionaries that contain a type, a string value, and geometry information
     54  * about a token identified by the lexer.  These are routines that make working with
     55  * these objects a bit easier.
     56  */
     57 static const char *token_get_value(QObject *obj)
     58 {
     59     return qdict_get_str(qobject_to_qdict(obj), "token");
     60 }
     61 
     62 static JSONTokenType token_get_type(QObject *obj)
     63 {
     64     return qdict_get_int(qobject_to_qdict(obj), "type");
     65 }
     66 
     67 static int token_is_operator(QObject *obj, char op)
     68 {
     69     const char *val;
     70 
     71     if (token_get_type(obj) != JSON_OPERATOR) {
     72         return 0;
     73     }
     74 
     75     val = token_get_value(obj);
     76 
     77     return (val[0] == op) && (val[1] == 0);
     78 }
     79 
     80 static int token_is_keyword(QObject *obj, const char *value)
     81 {
     82     if (token_get_type(obj) != JSON_KEYWORD) {
     83         return 0;
     84     }
     85 
     86     return strcmp(token_get_value(obj), value) == 0;
     87 }
     88 
     89 static int token_is_escape(QObject *obj, const char *value)
     90 {
     91     if (token_get_type(obj) != JSON_ESCAPE) {
     92         return 0;
     93     }
     94 
     95     return (strcmp(token_get_value(obj), value) == 0);
     96 }
     97 
     98 /**
     99  * Error handler
    100  */
    101 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
    102                                            QObject *token, const char *msg, ...)
    103 {
    104     va_list ap;
    105     char message[1024];
    106     va_start(ap, msg);
    107     vsnprintf(message, sizeof(message), msg, ap);
    108     va_end(ap);
    109     if (ctxt->err) {
    110         error_free(ctxt->err);
    111         ctxt->err = NULL;
    112     }
    113     error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message);
    114 }
    115 
    116 /**
    117  * String helpers
    118  *
    119  * These helpers are used to unescape strings.
    120  */
    121 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
    122 {
    123     if (wchar <= 0x007F) {
    124         BUG_ON(buffer_length < 2);
    125 
    126         buffer[0] = wchar & 0x7F;
    127         buffer[1] = 0;
    128     } else if (wchar <= 0x07FF) {
    129         BUG_ON(buffer_length < 3);
    130 
    131         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
    132         buffer[1] = 0x80 | (wchar & 0x3F);
    133         buffer[2] = 0;
    134     } else {
    135         BUG_ON(buffer_length < 4);
    136 
    137         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
    138         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
    139         buffer[2] = 0x80 | (wchar & 0x3F);
    140         buffer[3] = 0;
    141     }
    142 }
    143 
    144 static int hex2decimal(char ch)
    145 {
    146     if (ch >= '0' && ch <= '9') {
    147         return (ch - '0');
    148     } else if (ch >= 'a' && ch <= 'f') {
    149         return 10 + (ch - 'a');
    150     } else if (ch >= 'A' && ch <= 'F') {
    151         return 10 + (ch - 'A');
    152     }
    153 
    154     return -1;
    155 }
    156 
    157 /**
    158  * parse_string(): Parse a json string and return a QObject
    159  *
    160  *  string
    161  *      ""
    162  *      " chars "
    163  *  chars
    164  *      char
    165  *      char chars
    166  *  char
    167  *      any-Unicode-character-
    168  *          except-"-or-\-or-
    169  *          control-character
    170  *      \"
    171  *      \\
    172  *      \/
    173  *      \b
    174  *      \f
    175  *      \n
    176  *      \r
    177  *      \t
    178  *      \u four-hex-digits
    179  */
    180 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
    181 {
    182     const char *ptr = token_get_value(token);
    183     QString *str;
    184     int double_quote = 1;
    185 
    186     if (*ptr == '"') {
    187         double_quote = 1;
    188     } else {
    189         double_quote = 0;
    190     }
    191     ptr++;
    192 
    193     str = qstring_new();
    194     while (*ptr &&
    195            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
    196         if (*ptr == '\\') {
    197             ptr++;
    198 
    199             switch (*ptr) {
    200             case '"':
    201                 qstring_append(str, "\"");
    202                 ptr++;
    203                 break;
    204             case '\'':
    205                 qstring_append(str, "'");
    206                 ptr++;
    207                 break;
    208             case '\\':
    209                 qstring_append(str, "\\");
    210                 ptr++;
    211                 break;
    212             case '/':
    213                 qstring_append(str, "/");
    214                 ptr++;
    215                 break;
    216             case 'b':
    217                 qstring_append(str, "\b");
    218                 ptr++;
    219                 break;
    220             case 'f':
    221                 qstring_append(str, "\f");
    222                 ptr++;
    223                 break;
    224             case 'n':
    225                 qstring_append(str, "\n");
    226                 ptr++;
    227                 break;
    228             case 'r':
    229                 qstring_append(str, "\r");
    230                 ptr++;
    231                 break;
    232             case 't':
    233                 qstring_append(str, "\t");
    234                 ptr++;
    235                 break;
    236             case 'u': {
    237                 uint16_t unicode_char = 0;
    238                 char utf8_char[4];
    239                 int i = 0;
    240 
    241                 ptr++;
    242 
    243                 for (i = 0; i < 4; i++) {
    244                     if (qemu_isxdigit(*ptr)) {
    245                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
    246                     } else {
    247                         parse_error(ctxt, token,
    248                                     "invalid hex escape sequence in string");
    249                         goto out;
    250                     }
    251                     ptr++;
    252                 }
    253 
    254                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
    255                 qstring_append(str, utf8_char);
    256             }   break;
    257             default:
    258                 parse_error(ctxt, token, "invalid escape sequence in string");
    259                 goto out;
    260             }
    261         } else {
    262             char dummy[2];
    263 
    264             dummy[0] = *ptr++;
    265             dummy[1] = 0;
    266 
    267             qstring_append(str, dummy);
    268         }
    269     }
    270 
    271     return str;
    272 
    273 out:
    274     QDECREF(str);
    275     return NULL;
    276 }
    277 
    278 static QObject *parser_context_pop_token(JSONParserContext *ctxt)
    279 {
    280     QObject *token;
    281     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
    282     token = ctxt->tokens.buf[ctxt->tokens.pos];
    283     ctxt->tokens.pos++;
    284     return token;
    285 }
    286 
    287 /* Note: parser_context_{peek|pop}_token do not increment the
    288  * token object's refcount. In both cases the references will continue
    289  * to be tracked and cleaned up in parser_context_free(), so do not
    290  * attempt to free the token object.
    291  */
    292 static QObject *parser_context_peek_token(JSONParserContext *ctxt)
    293 {
    294     QObject *token;
    295     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
    296     token = ctxt->tokens.buf[ctxt->tokens.pos];
    297     return token;
    298 }
    299 
    300 static JSONParserContext parser_context_save(JSONParserContext *ctxt)
    301 {
    302     JSONParserContext saved_ctxt = {0};
    303     saved_ctxt.tokens.pos = ctxt->tokens.pos;
    304     saved_ctxt.tokens.count = ctxt->tokens.count;
    305     saved_ctxt.tokens.buf = ctxt->tokens.buf;
    306     return saved_ctxt;
    307 }
    308 
    309 static void parser_context_restore(JSONParserContext *ctxt,
    310                                    JSONParserContext saved_ctxt)
    311 {
    312     ctxt->tokens.pos = saved_ctxt.tokens.pos;
    313     ctxt->tokens.count = saved_ctxt.tokens.count;
    314     ctxt->tokens.buf = saved_ctxt.tokens.buf;
    315 }
    316 
    317 static void tokens_append_from_iter(QObject *obj, void *opaque)
    318 {
    319     JSONParserContext *ctxt = opaque;
    320     g_assert(ctxt->tokens.pos < ctxt->tokens.count);
    321     ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
    322     qobject_incref(obj);
    323 }
    324 
    325 static JSONParserContext *parser_context_new(QList *tokens)
    326 {
    327     JSONParserContext *ctxt;
    328     size_t count;
    329 
    330     if (!tokens) {
    331         return NULL;
    332     }
    333 
    334     count = qlist_size(tokens);
    335     if (count == 0) {
    336         return NULL;
    337     }
    338 
    339     ctxt = g_malloc0(sizeof(JSONParserContext));
    340     ctxt->tokens.pos = 0;
    341     ctxt->tokens.count = count;
    342     ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
    343     qlist_iter(tokens, tokens_append_from_iter, ctxt);
    344     ctxt->tokens.pos = 0;
    345 
    346     return ctxt;
    347 }
    348 
    349 /* to support error propagation, ctxt->err must be freed separately */
    350 static void parser_context_free(JSONParserContext *ctxt)
    351 {
    352     int i;
    353     if (ctxt) {
    354         for (i = 0; i < ctxt->tokens.count; i++) {
    355             qobject_decref(ctxt->tokens.buf[i]);
    356         }
    357         g_free(ctxt->tokens.buf);
    358         g_free(ctxt);
    359     }
    360 }
    361 
    362 /**
    363  * Parsing rules
    364  */
    365 static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
    366 {
    367     QObject *key = NULL, *token = NULL, *value, *peek;
    368     JSONParserContext saved_ctxt = parser_context_save(ctxt);
    369 
    370     peek = parser_context_peek_token(ctxt);
    371     if (peek == NULL) {
    372         parse_error(ctxt, NULL, "premature EOI");
    373         goto out;
    374     }
    375 
    376     key = parse_value(ctxt, ap);
    377     if (!key || qobject_type(key) != QTYPE_QSTRING) {
    378         parse_error(ctxt, peek, "key is not a string in object");
    379         goto out;
    380     }
    381 
    382     token = parser_context_pop_token(ctxt);
    383     if (token == NULL) {
    384         parse_error(ctxt, NULL, "premature EOI");
    385         goto out;
    386     }
    387 
    388     if (!token_is_operator(token, ':')) {
    389         parse_error(ctxt, token, "missing : in object pair");
    390         goto out;
    391     }
    392 
    393     value = parse_value(ctxt, ap);
    394     if (value == NULL) {
    395         parse_error(ctxt, token, "Missing value in dict");
    396         goto out;
    397     }
    398 
    399     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
    400 
    401     qobject_decref(key);
    402 
    403     return 0;
    404 
    405 out:
    406     parser_context_restore(ctxt, saved_ctxt);
    407     qobject_decref(key);
    408 
    409     return -1;
    410 }
    411 
    412 static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
    413 {
    414     QDict *dict = NULL;
    415     QObject *token, *peek;
    416     JSONParserContext saved_ctxt = parser_context_save(ctxt);
    417 
    418     token = parser_context_pop_token(ctxt);
    419     if (token == NULL) {
    420         goto out;
    421     }
    422 
    423     if (!token_is_operator(token, '{')) {
    424         goto out;
    425     }
    426     token = NULL;
    427 
    428     dict = qdict_new();
    429 
    430     peek = parser_context_peek_token(ctxt);
    431     if (peek == NULL) {
    432         parse_error(ctxt, NULL, "premature EOI");
    433         goto out;
    434     }
    435 
    436     if (!token_is_operator(peek, '}')) {
    437         if (parse_pair(ctxt, dict, ap) == -1) {
    438             goto out;
    439         }
    440 
    441         token = parser_context_pop_token(ctxt);
    442         if (token == NULL) {
    443             parse_error(ctxt, NULL, "premature EOI");
    444             goto out;
    445         }
    446 
    447         while (!token_is_operator(token, '}')) {
    448             if (!token_is_operator(token, ',')) {
    449                 parse_error(ctxt, token, "expected separator in dict");
    450                 goto out;
    451             }
    452             token = NULL;
    453 
    454             if (parse_pair(ctxt, dict, ap) == -1) {
    455                 goto out;
    456             }
    457 
    458             token = parser_context_pop_token(ctxt);
    459             if (token == NULL) {
    460                 parse_error(ctxt, NULL, "premature EOI");
    461                 goto out;
    462             }
    463         }
    464         token = NULL;
    465     } else {
    466         token = parser_context_pop_token(ctxt);
    467         token = NULL;
    468     }
    469 
    470     return QOBJECT(dict);
    471 
    472 out:
    473     parser_context_restore(ctxt, saved_ctxt);
    474     QDECREF(dict);
    475     return NULL;
    476 }
    477 
    478 static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
    479 {
    480     QList *list = NULL;
    481     QObject *token, *peek;
    482     JSONParserContext saved_ctxt = parser_context_save(ctxt);
    483 
    484     token = parser_context_pop_token(ctxt);
    485     if (token == NULL) {
    486         goto out;
    487     }
    488 
    489     if (!token_is_operator(token, '[')) {
    490         token = NULL;
    491         goto out;
    492     }
    493     token = NULL;
    494 
    495     list = qlist_new();
    496 
    497     peek = parser_context_peek_token(ctxt);
    498     if (peek == NULL) {
    499         parse_error(ctxt, NULL, "premature EOI");
    500         goto out;
    501     }
    502 
    503     if (!token_is_operator(peek, ']')) {
    504         QObject *obj;
    505 
    506         obj = parse_value(ctxt, ap);
    507         if (obj == NULL) {
    508             parse_error(ctxt, token, "expecting value");
    509             goto out;
    510         }
    511 
    512         qlist_append_obj(list, obj);
    513 
    514         token = parser_context_pop_token(ctxt);
    515         if (token == NULL) {
    516             parse_error(ctxt, NULL, "premature EOI");
    517             goto out;
    518         }
    519 
    520         while (!token_is_operator(token, ']')) {
    521             if (!token_is_operator(token, ',')) {
    522                 parse_error(ctxt, token, "expected separator in list");
    523                 goto out;
    524             }
    525 
    526             token = NULL;
    527 
    528             obj = parse_value(ctxt, ap);
    529             if (obj == NULL) {
    530                 parse_error(ctxt, token, "expecting value");
    531                 goto out;
    532             }
    533 
    534             qlist_append_obj(list, obj);
    535 
    536             token = parser_context_pop_token(ctxt);
    537             if (token == NULL) {
    538                 parse_error(ctxt, NULL, "premature EOI");
    539                 goto out;
    540             }
    541         }
    542 
    543         token = NULL;
    544     } else {
    545         token = parser_context_pop_token(ctxt);
    546         token = NULL;
    547     }
    548 
    549     return QOBJECT(list);
    550 
    551 out:
    552     parser_context_restore(ctxt, saved_ctxt);
    553     QDECREF(list);
    554     return NULL;
    555 }
    556 
    557 static QObject *parse_keyword(JSONParserContext *ctxt)
    558 {
    559     QObject *token, *ret;
    560     JSONParserContext saved_ctxt = parser_context_save(ctxt);
    561 
    562     token = parser_context_pop_token(ctxt);
    563     if (token == NULL) {
    564         goto out;
    565     }
    566 
    567     if (token_get_type(token) != JSON_KEYWORD) {
    568         goto out;
    569     }
    570 
    571     if (token_is_keyword(token, "true")) {
    572         ret = QOBJECT(qbool_from_int(true));
    573     } else if (token_is_keyword(token, "false")) {
    574         ret = QOBJECT(qbool_from_int(false));
    575     } else {
    576         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
    577         goto out;
    578     }
    579 
    580     return ret;
    581 
    582 out:
    583     parser_context_restore(ctxt, saved_ctxt);
    584 
    585     return NULL;
    586 }
    587 
    588 static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
    589 {
    590     QObject *token = NULL, *obj;
    591     JSONParserContext saved_ctxt = parser_context_save(ctxt);
    592 
    593     if (ap == NULL) {
    594         goto out;
    595     }
    596 
    597     token = parser_context_pop_token(ctxt);
    598     if (token == NULL) {
    599         goto out;
    600     }
    601 
    602     if (token_is_escape(token, "%p")) {
    603         obj = va_arg(*ap, QObject *);
    604     } else if (token_is_escape(token, "%i")) {
    605         obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
    606     } else if (token_is_escape(token, "%d")) {
    607         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
    608     } else if (token_is_escape(token, "%ld")) {
    609         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
    610     } else if (token_is_escape(token, "%lld") ||
    611                token_is_escape(token, "%I64d")) {
    612         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
    613     } else if (token_is_escape(token, "%s")) {
    614         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
    615     } else if (token_is_escape(token, "%f")) {
    616         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
    617     } else {
    618         goto out;
    619     }
    620 
    621     return obj;
    622 
    623 out:
    624     parser_context_restore(ctxt, saved_ctxt);
    625 
    626     return NULL;
    627 }
    628 
    629 static QObject *parse_literal(JSONParserContext *ctxt)
    630 {
    631     QObject *token, *obj;
    632     JSONParserContext saved_ctxt = parser_context_save(ctxt);
    633 
    634     token = parser_context_pop_token(ctxt);
    635     if (token == NULL) {
    636         goto out;
    637     }
    638 
    639     switch (token_get_type(token)) {
    640     case JSON_STRING:
    641         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
    642         break;
    643     case JSON_INTEGER: {
    644         /* A possibility exists that this is a whole-valued float where the
    645          * fractional part was left out due to being 0 (.0). It's not a big
    646          * deal to treat these as ints in the parser, so long as users of the
    647          * resulting QObject know to expect a QInt in place of a QFloat in
    648          * cases like these.
    649          *
    650          * However, in some cases these values will overflow/underflow a
    651          * QInt/int64 container, thus we should assume these are to be handled
    652          * as QFloats/doubles rather than silently changing their values.
    653          *
    654          * strtoll() indicates these instances by setting errno to ERANGE
    655          */
    656         int64_t value;
    657 
    658         errno = 0; /* strtoll doesn't set errno on success */
    659         value = strtoll(token_get_value(token), NULL, 10);
    660         if (errno != ERANGE) {
    661             obj = QOBJECT(qint_from_int(value));
    662             break;
    663         }
    664         /* fall through to JSON_FLOAT */
    665     }
    666     case JSON_FLOAT:
    667         /* FIXME dependent on locale */
    668         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
    669         break;
    670     default:
    671         goto out;
    672     }
    673 
    674     return obj;
    675 
    676 out:
    677     parser_context_restore(ctxt, saved_ctxt);
    678 
    679     return NULL;
    680 }
    681 
    682 static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
    683 {
    684     QObject *obj;
    685 
    686     obj = parse_object(ctxt, ap);
    687     if (obj == NULL) {
    688         obj = parse_array(ctxt, ap);
    689     }
    690     if (obj == NULL) {
    691         obj = parse_escape(ctxt, ap);
    692     }
    693     if (obj == NULL) {
    694         obj = parse_keyword(ctxt);
    695     }
    696     if (obj == NULL) {
    697         obj = parse_literal(ctxt);
    698     }
    699 
    700     return obj;
    701 }
    702 
    703 QObject *json_parser_parse(QList *tokens, va_list *ap)
    704 {
    705     return json_parser_parse_err(tokens, ap, NULL);
    706 }
    707 
    708 QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
    709 {
    710     JSONParserContext *ctxt = parser_context_new(tokens);
    711     QObject *result;
    712 
    713     if (!ctxt) {
    714         return NULL;
    715     }
    716 
    717     result = parse_value(ctxt, ap);
    718 
    719     error_propagate(errp, ctxt->err);
    720 
    721     parser_context_free(ctxt);
    722 
    723     return result;
    724 }
    725