Home | History | Annotate | Download | only in qemu
      1 /*
      2  * JSON lexer
      3  *
      4  * Copyright IBM, Corp. 2009
      5  *
      6  * Authors:
      7  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      8  *
      9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
     10  * See the COPYING.LIB file in the top-level directory.
     11  *
     12  */
     13 
     14 #include "qstring.h"
     15 #include "qlist.h"
     16 #include "qdict.h"
     17 #include "qint.h"
     18 #include "qemu-common.h"
     19 #include "json-lexer.h"
     20 
     21 /*
     22  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
     23  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
     24  * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
     25  * [{}\[\],:]
     26  * [a-z]+
     27  *
     28  */
     29 
     30 #undef ERROR
     31 
     32 enum json_lexer_state {
     33     ERROR = 0,
     34     IN_DONE_STRING,
     35     IN_DQ_UCODE3,
     36     IN_DQ_UCODE2,
     37     IN_DQ_UCODE1,
     38     IN_DQ_UCODE0,
     39     IN_DQ_STRING_ESCAPE,
     40     IN_DQ_STRING,
     41     IN_SQ_UCODE3,
     42     IN_SQ_UCODE2,
     43     IN_SQ_UCODE1,
     44     IN_SQ_UCODE0,
     45     IN_SQ_STRING_ESCAPE,
     46     IN_SQ_STRING,
     47     IN_ZERO,
     48     IN_DIGITS,
     49     IN_DIGIT,
     50     IN_EXP_E,
     51     IN_MANTISSA,
     52     IN_MANTISSA_DIGITS,
     53     IN_NONZERO_NUMBER,
     54     IN_NEG_NONZERO_NUMBER,
     55     IN_KEYWORD,
     56     IN_ESCAPE,
     57     IN_ESCAPE_L,
     58     IN_ESCAPE_LL,
     59     IN_ESCAPE_I,
     60     IN_ESCAPE_I6,
     61     IN_ESCAPE_I64,
     62     IN_ESCAPE_DONE,
     63     IN_WHITESPACE,
     64     IN_OPERATOR_DONE,
     65     IN_START,
     66 };
     67 
     68 #define TERMINAL(state) [0 ... 0x7F] = (state)
     69 
     70 static const uint8_t json_lexer[][256] =  {
     71     [IN_DONE_STRING] = {
     72         TERMINAL(JSON_STRING),
     73     },
     74 
     75     /* double quote string */
     76     [IN_DQ_UCODE3] = {
     77         ['0' ... '9'] = IN_DQ_STRING,
     78         ['a' ... 'f'] = IN_DQ_STRING,
     79         ['A' ... 'F'] = IN_DQ_STRING,
     80     },
     81     [IN_DQ_UCODE2] = {
     82         ['0' ... '9'] = IN_DQ_UCODE3,
     83         ['a' ... 'f'] = IN_DQ_UCODE3,
     84         ['A' ... 'F'] = IN_DQ_UCODE3,
     85     },
     86     [IN_DQ_UCODE1] = {
     87         ['0' ... '9'] = IN_DQ_UCODE2,
     88         ['a' ... 'f'] = IN_DQ_UCODE2,
     89         ['A' ... 'F'] = IN_DQ_UCODE2,
     90     },
     91     [IN_DQ_UCODE0] = {
     92         ['0' ... '9'] = IN_DQ_UCODE1,
     93         ['a' ... 'f'] = IN_DQ_UCODE1,
     94         ['A' ... 'F'] = IN_DQ_UCODE1,
     95     },
     96     [IN_DQ_STRING_ESCAPE] = {
     97         ['b'] = IN_DQ_STRING,
     98         ['f'] =  IN_DQ_STRING,
     99         ['n'] =  IN_DQ_STRING,
    100         ['r'] =  IN_DQ_STRING,
    101         ['t'] =  IN_DQ_STRING,
    102         ['\''] = IN_DQ_STRING,
    103         ['\"'] = IN_DQ_STRING,
    104         ['u'] = IN_DQ_UCODE0,
    105     },
    106     [IN_DQ_STRING] = {
    107         [1 ... 0xFF] = IN_DQ_STRING,
    108         ['\\'] = IN_DQ_STRING_ESCAPE,
    109         ['"'] = IN_DONE_STRING,
    110     },
    111 
    112     /* single quote string */
    113     [IN_SQ_UCODE3] = {
    114         ['0' ... '9'] = IN_SQ_STRING,
    115         ['a' ... 'f'] = IN_SQ_STRING,
    116         ['A' ... 'F'] = IN_SQ_STRING,
    117     },
    118     [IN_SQ_UCODE2] = {
    119         ['0' ... '9'] = IN_SQ_UCODE3,
    120         ['a' ... 'f'] = IN_SQ_UCODE3,
    121         ['A' ... 'F'] = IN_SQ_UCODE3,
    122     },
    123     [IN_SQ_UCODE1] = {
    124         ['0' ... '9'] = IN_SQ_UCODE2,
    125         ['a' ... 'f'] = IN_SQ_UCODE2,
    126         ['A' ... 'F'] = IN_SQ_UCODE2,
    127     },
    128     [IN_SQ_UCODE0] = {
    129         ['0' ... '9'] = IN_SQ_UCODE1,
    130         ['a' ... 'f'] = IN_SQ_UCODE1,
    131         ['A' ... 'F'] = IN_SQ_UCODE1,
    132     },
    133     [IN_SQ_STRING_ESCAPE] = {
    134         ['b'] = IN_SQ_STRING,
    135         ['f'] =  IN_SQ_STRING,
    136         ['n'] =  IN_SQ_STRING,
    137         ['r'] =  IN_SQ_STRING,
    138         ['t'] =  IN_SQ_STRING,
    139         ['\''] = IN_SQ_STRING,
    140         ['\"'] = IN_SQ_STRING,
    141         ['u'] = IN_SQ_UCODE0,
    142     },
    143     [IN_SQ_STRING] = {
    144         [1 ... 0xFF] = IN_SQ_STRING,
    145         ['\\'] = IN_SQ_STRING_ESCAPE,
    146         ['\''] = IN_DONE_STRING,
    147     },
    148 
    149     /* Zero */
    150     [IN_ZERO] = {
    151         TERMINAL(JSON_INTEGER),
    152         ['0' ... '9'] = ERROR,
    153         ['.'] = IN_MANTISSA,
    154     },
    155 
    156     /* Float */
    157     [IN_DIGITS] = {
    158         TERMINAL(JSON_FLOAT),
    159         ['0' ... '9'] = IN_DIGITS,
    160     },
    161 
    162     [IN_DIGIT] = {
    163         ['0' ... '9'] = IN_DIGITS,
    164     },
    165 
    166     [IN_EXP_E] = {
    167         ['-'] = IN_DIGIT,
    168         ['+'] = IN_DIGIT,
    169         ['0' ... '9'] = IN_DIGITS,
    170     },
    171 
    172     [IN_MANTISSA_DIGITS] = {
    173         TERMINAL(JSON_FLOAT),
    174         ['0' ... '9'] = IN_MANTISSA_DIGITS,
    175         ['e'] = IN_EXP_E,
    176         ['E'] = IN_EXP_E,
    177     },
    178 
    179     [IN_MANTISSA] = {
    180         ['0' ... '9'] = IN_MANTISSA_DIGITS,
    181     },
    182 
    183     /* Number */
    184     [IN_NONZERO_NUMBER] = {
    185         TERMINAL(JSON_INTEGER),
    186         ['0' ... '9'] = IN_NONZERO_NUMBER,
    187         ['e'] = IN_EXP_E,
    188         ['E'] = IN_EXP_E,
    189         ['.'] = IN_MANTISSA,
    190     },
    191 
    192     [IN_NEG_NONZERO_NUMBER] = {
    193         ['0'] = IN_ZERO,
    194         ['1' ... '9'] = IN_NONZERO_NUMBER,
    195     },
    196 
    197     /* keywords */
    198     [IN_KEYWORD] = {
    199         TERMINAL(JSON_KEYWORD),
    200         ['a' ... 'z'] = IN_KEYWORD,
    201     },
    202 
    203     /* whitespace */
    204     [IN_WHITESPACE] = {
    205         TERMINAL(JSON_SKIP),
    206         [' '] = IN_WHITESPACE,
    207         ['\t'] = IN_WHITESPACE,
    208         ['\r'] = IN_WHITESPACE,
    209         ['\n'] = IN_WHITESPACE,
    210     },
    211 
    212     /* operator */
    213     [IN_OPERATOR_DONE] = {
    214         TERMINAL(JSON_OPERATOR),
    215     },
    216 
    217     /* escape */
    218     [IN_ESCAPE_DONE] = {
    219         TERMINAL(JSON_ESCAPE),
    220     },
    221 
    222     [IN_ESCAPE_LL] = {
    223         ['d'] = IN_ESCAPE_DONE,
    224     },
    225 
    226     [IN_ESCAPE_L] = {
    227         ['d'] = IN_ESCAPE_DONE,
    228         ['l'] = IN_ESCAPE_LL,
    229     },
    230 
    231     [IN_ESCAPE_I64] = {
    232         ['d'] = IN_ESCAPE_DONE,
    233     },
    234 
    235     [IN_ESCAPE_I6] = {
    236         ['4'] = IN_ESCAPE_I64,
    237     },
    238 
    239     [IN_ESCAPE_I] = {
    240         ['6'] = IN_ESCAPE_I6,
    241     },
    242 
    243     [IN_ESCAPE] = {
    244         ['d'] = IN_ESCAPE_DONE,
    245         ['i'] = IN_ESCAPE_DONE,
    246         ['p'] = IN_ESCAPE_DONE,
    247         ['s'] = IN_ESCAPE_DONE,
    248         ['f'] = IN_ESCAPE_DONE,
    249         ['l'] = IN_ESCAPE_L,
    250         ['I'] = IN_ESCAPE_I,
    251     },
    252 
    253     /* top level rule */
    254     [IN_START] = {
    255         ['"'] = IN_DQ_STRING,
    256         ['\''] = IN_SQ_STRING,
    257         ['0'] = IN_ZERO,
    258         ['1' ... '9'] = IN_NONZERO_NUMBER,
    259         ['-'] = IN_NEG_NONZERO_NUMBER,
    260         ['{'] = IN_OPERATOR_DONE,
    261         ['}'] = IN_OPERATOR_DONE,
    262         ['['] = IN_OPERATOR_DONE,
    263         [']'] = IN_OPERATOR_DONE,
    264         [','] = IN_OPERATOR_DONE,
    265         [':'] = IN_OPERATOR_DONE,
    266         ['a' ... 'z'] = IN_KEYWORD,
    267         ['%'] = IN_ESCAPE,
    268         [' '] = IN_WHITESPACE,
    269         ['\t'] = IN_WHITESPACE,
    270         ['\r'] = IN_WHITESPACE,
    271         ['\n'] = IN_WHITESPACE,
    272     },
    273 };
    274 
    275 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
    276 {
    277     lexer->emit = func;
    278     lexer->state = IN_START;
    279     lexer->token = qstring_new();
    280 }
    281 
    282 static int json_lexer_feed_char(JSONLexer *lexer, char ch)
    283 {
    284     char buf[2];
    285 
    286     lexer->x++;
    287     if (ch == '\n') {
    288         lexer->x = 0;
    289         lexer->y++;
    290     }
    291 
    292     lexer->state = json_lexer[lexer->state][(uint8_t)ch];
    293 
    294     switch (lexer->state) {
    295     case JSON_OPERATOR:
    296     case JSON_ESCAPE:
    297     case JSON_INTEGER:
    298     case JSON_FLOAT:
    299     case JSON_KEYWORD:
    300     case JSON_STRING:
    301         lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
    302     case JSON_SKIP:
    303         lexer->state = json_lexer[IN_START][(uint8_t)ch];
    304         QDECREF(lexer->token);
    305         lexer->token = qstring_new();
    306         break;
    307     case ERROR:
    308         return -EINVAL;
    309     default:
    310         break;
    311     }
    312 
    313     buf[0] = ch;
    314     buf[1] = 0;
    315 
    316     qstring_append(lexer->token, buf);
    317 
    318     return 0;
    319 }
    320 
    321 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
    322 {
    323     size_t i;
    324 
    325     for (i = 0; i < size; i++) {
    326         int err;
    327 
    328         err = json_lexer_feed_char(lexer, buffer[i]);
    329         if (err < 0) {
    330             return err;
    331         }
    332     }
    333 
    334     return 0;
    335 }
    336 
    337 int json_lexer_flush(JSONLexer *lexer)
    338 {
    339     return json_lexer_feed_char(lexer, 0);
    340 }
    341 
    342 void json_lexer_destroy(JSONLexer *lexer)
    343 {
    344     QDECREF(lexer->token);
    345 }
    346