Home | History | Annotate | Download | only in qemu
      1 /*
      2  * JSON lexer
      3  *
      4  * Copyright IBM, Corp. 2009
      5  *
      6  * Authors:
      7  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      8  *
      9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
     10  * See the COPYING.LIB file in the top-level directory.
     11  *
     12  */
     13 
     14 #include "qstring.h"
     15 #include "qlist.h"
     16 #include "qdict.h"
     17 #include "qint.h"
     18 #include "qemu-common.h"
     19 #include "json-lexer.h"
     20 
     21 /*
     22  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
     23  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
     24  * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
     25  * [{}\[\],:]
     26  * [a-z]+
     27  *
     28  */
     29 
     30 enum json_lexer_state {
     31     IN_ERROR = 0,
     32     IN_DQ_UCODE3,
     33     IN_DQ_UCODE2,
     34     IN_DQ_UCODE1,
     35     IN_DQ_UCODE0,
     36     IN_DQ_STRING_ESCAPE,
     37     IN_DQ_STRING,
     38     IN_SQ_UCODE3,
     39     IN_SQ_UCODE2,
     40     IN_SQ_UCODE1,
     41     IN_SQ_UCODE0,
     42     IN_SQ_STRING_ESCAPE,
     43     IN_SQ_STRING,
     44     IN_ZERO,
     45     IN_DIGITS,
     46     IN_DIGIT,
     47     IN_EXP_E,
     48     IN_MANTISSA,
     49     IN_MANTISSA_DIGITS,
     50     IN_NONZERO_NUMBER,
     51     IN_NEG_NONZERO_NUMBER,
     52     IN_KEYWORD,
     53     IN_ESCAPE,
     54     IN_ESCAPE_L,
     55     IN_ESCAPE_LL,
     56     IN_ESCAPE_I,
     57     IN_ESCAPE_I6,
     58     IN_ESCAPE_I64,
     59     IN_WHITESPACE,
     60     IN_START,
     61 };
     62 
     63 #define TERMINAL(state) [0 ... 0x7F] = (state)
     64 
     65 /* Return whether TERMINAL is a terminal state and the transition to it
     66    from OLD_STATE required lookahead.  This happens whenever the table
     67    below uses the TERMINAL macro.  */
     68 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
     69             (json_lexer[(old_state)][0] == (terminal))
     70 
     71 static const uint8_t json_lexer[][256] =  {
     72     /* double quote string */
     73     [IN_DQ_UCODE3] = {
     74         ['0' ... '9'] = IN_DQ_STRING,
     75         ['a' ... 'f'] = IN_DQ_STRING,
     76         ['A' ... 'F'] = IN_DQ_STRING,
     77     },
     78     [IN_DQ_UCODE2] = {
     79         ['0' ... '9'] = IN_DQ_UCODE3,
     80         ['a' ... 'f'] = IN_DQ_UCODE3,
     81         ['A' ... 'F'] = IN_DQ_UCODE3,
     82     },
     83     [IN_DQ_UCODE1] = {
     84         ['0' ... '9'] = IN_DQ_UCODE2,
     85         ['a' ... 'f'] = IN_DQ_UCODE2,
     86         ['A' ... 'F'] = IN_DQ_UCODE2,
     87     },
     88     [IN_DQ_UCODE0] = {
     89         ['0' ... '9'] = IN_DQ_UCODE1,
     90         ['a' ... 'f'] = IN_DQ_UCODE1,
     91         ['A' ... 'F'] = IN_DQ_UCODE1,
     92     },
     93     [IN_DQ_STRING_ESCAPE] = {
     94         ['b'] = IN_DQ_STRING,
     95         ['f'] =  IN_DQ_STRING,
     96         ['n'] =  IN_DQ_STRING,
     97         ['r'] =  IN_DQ_STRING,
     98         ['t'] =  IN_DQ_STRING,
     99         ['/'] = IN_DQ_STRING,
    100         ['\\'] = IN_DQ_STRING,
    101         ['\''] = IN_DQ_STRING,
    102         ['\"'] = IN_DQ_STRING,
    103         ['u'] = IN_DQ_UCODE0,
    104     },
    105     [IN_DQ_STRING] = {
    106         [1 ... 0xFF] = IN_DQ_STRING,
    107         ['\\'] = IN_DQ_STRING_ESCAPE,
    108         ['"'] = JSON_STRING,
    109     },
    110 
    111     /* single quote string */
    112     [IN_SQ_UCODE3] = {
    113         ['0' ... '9'] = IN_SQ_STRING,
    114         ['a' ... 'f'] = IN_SQ_STRING,
    115         ['A' ... 'F'] = IN_SQ_STRING,
    116     },
    117     [IN_SQ_UCODE2] = {
    118         ['0' ... '9'] = IN_SQ_UCODE3,
    119         ['a' ... 'f'] = IN_SQ_UCODE3,
    120         ['A' ... 'F'] = IN_SQ_UCODE3,
    121     },
    122     [IN_SQ_UCODE1] = {
    123         ['0' ... '9'] = IN_SQ_UCODE2,
    124         ['a' ... 'f'] = IN_SQ_UCODE2,
    125         ['A' ... 'F'] = IN_SQ_UCODE2,
    126     },
    127     [IN_SQ_UCODE0] = {
    128         ['0' ... '9'] = IN_SQ_UCODE1,
    129         ['a' ... 'f'] = IN_SQ_UCODE1,
    130         ['A' ... 'F'] = IN_SQ_UCODE1,
    131     },
    132     [IN_SQ_STRING_ESCAPE] = {
    133         ['b'] = IN_SQ_STRING,
    134         ['f'] =  IN_SQ_STRING,
    135         ['n'] =  IN_SQ_STRING,
    136         ['r'] =  IN_SQ_STRING,
    137         ['t'] =  IN_SQ_STRING,
    138         ['/'] = IN_DQ_STRING,
    139         ['\\'] = IN_DQ_STRING,
    140         ['\''] = IN_SQ_STRING,
    141         ['\"'] = IN_SQ_STRING,
    142         ['u'] = IN_SQ_UCODE0,
    143     },
    144     [IN_SQ_STRING] = {
    145         [1 ... 0xFF] = IN_SQ_STRING,
    146         ['\\'] = IN_SQ_STRING_ESCAPE,
    147         ['\''] = JSON_STRING,
    148     },
    149 
    150     /* Zero */
    151     [IN_ZERO] = {
    152         TERMINAL(JSON_INTEGER),
    153         ['0' ... '9'] = IN_ERROR,
    154         ['.'] = IN_MANTISSA,
    155     },
    156 
    157     /* Float */
    158     [IN_DIGITS] = {
    159         TERMINAL(JSON_FLOAT),
    160         ['0' ... '9'] = IN_DIGITS,
    161     },
    162 
    163     [IN_DIGIT] = {
    164         ['0' ... '9'] = IN_DIGITS,
    165     },
    166 
    167     [IN_EXP_E] = {
    168         ['-'] = IN_DIGIT,
    169         ['+'] = IN_DIGIT,
    170         ['0' ... '9'] = IN_DIGITS,
    171     },
    172 
    173     [IN_MANTISSA_DIGITS] = {
    174         TERMINAL(JSON_FLOAT),
    175         ['0' ... '9'] = IN_MANTISSA_DIGITS,
    176         ['e'] = IN_EXP_E,
    177         ['E'] = IN_EXP_E,
    178     },
    179 
    180     [IN_MANTISSA] = {
    181         ['0' ... '9'] = IN_MANTISSA_DIGITS,
    182     },
    183 
    184     /* Number */
    185     [IN_NONZERO_NUMBER] = {
    186         TERMINAL(JSON_INTEGER),
    187         ['0' ... '9'] = IN_NONZERO_NUMBER,
    188         ['e'] = IN_EXP_E,
    189         ['E'] = IN_EXP_E,
    190         ['.'] = IN_MANTISSA,
    191     },
    192 
    193     [IN_NEG_NONZERO_NUMBER] = {
    194         ['0'] = IN_ZERO,
    195         ['1' ... '9'] = IN_NONZERO_NUMBER,
    196     },
    197 
    198     /* keywords */
    199     [IN_KEYWORD] = {
    200         TERMINAL(JSON_KEYWORD),
    201         ['a' ... 'z'] = IN_KEYWORD,
    202     },
    203 
    204     /* whitespace */
    205     [IN_WHITESPACE] = {
    206         TERMINAL(JSON_SKIP),
    207         [' '] = IN_WHITESPACE,
    208         ['\t'] = IN_WHITESPACE,
    209         ['\r'] = IN_WHITESPACE,
    210         ['\n'] = IN_WHITESPACE,
    211     },
    212 
    213     /* escape */
    214     [IN_ESCAPE_LL] = {
    215         ['d'] = JSON_ESCAPE,
    216     },
    217 
    218     [IN_ESCAPE_L] = {
    219         ['d'] = JSON_ESCAPE,
    220         ['l'] = IN_ESCAPE_LL,
    221     },
    222 
    223     [IN_ESCAPE_I64] = {
    224         ['d'] = JSON_ESCAPE,
    225     },
    226 
    227     [IN_ESCAPE_I6] = {
    228         ['4'] = IN_ESCAPE_I64,
    229     },
    230 
    231     [IN_ESCAPE_I] = {
    232         ['6'] = IN_ESCAPE_I6,
    233     },
    234 
    235     [IN_ESCAPE] = {
    236         ['d'] = JSON_ESCAPE,
    237         ['i'] = JSON_ESCAPE,
    238         ['p'] = JSON_ESCAPE,
    239         ['s'] = JSON_ESCAPE,
    240         ['f'] = JSON_ESCAPE,
    241         ['l'] = IN_ESCAPE_L,
    242         ['I'] = IN_ESCAPE_I,
    243     },
    244 
    245     /* top level rule */
    246     [IN_START] = {
    247         ['"'] = IN_DQ_STRING,
    248         ['\''] = IN_SQ_STRING,
    249         ['0'] = IN_ZERO,
    250         ['1' ... '9'] = IN_NONZERO_NUMBER,
    251         ['-'] = IN_NEG_NONZERO_NUMBER,
    252         ['{'] = JSON_OPERATOR,
    253         ['}'] = JSON_OPERATOR,
    254         ['['] = JSON_OPERATOR,
    255         [']'] = JSON_OPERATOR,
    256         [','] = JSON_OPERATOR,
    257         [':'] = JSON_OPERATOR,
    258         ['a' ... 'z'] = IN_KEYWORD,
    259         ['%'] = IN_ESCAPE,
    260         [' '] = IN_WHITESPACE,
    261         ['\t'] = IN_WHITESPACE,
    262         ['\r'] = IN_WHITESPACE,
    263         ['\n'] = IN_WHITESPACE,
    264     },
    265 };
    266 
    267 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
    268 {
    269     lexer->emit = func;
    270     lexer->state = IN_START;
    271     lexer->token = qstring_new();
    272     lexer->x = lexer->y = 0;
    273 }
    274 
    275 static int json_lexer_feed_char(JSONLexer *lexer, char ch)
    276 {
    277     int char_consumed, new_state;
    278 
    279     lexer->x++;
    280     if (ch == '\n') {
    281         lexer->x = 0;
    282         lexer->y++;
    283     }
    284 
    285     do {
    286         new_state = json_lexer[lexer->state][(uint8_t)ch];
    287         char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
    288         if (char_consumed) {
    289             qstring_append_chr(lexer->token, ch);
    290         }
    291 
    292         switch (new_state) {
    293         case JSON_OPERATOR:
    294         case JSON_ESCAPE:
    295         case JSON_INTEGER:
    296         case JSON_FLOAT:
    297         case JSON_KEYWORD:
    298         case JSON_STRING:
    299             lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
    300         case JSON_SKIP:
    301             QDECREF(lexer->token);
    302             lexer->token = qstring_new();
    303             new_state = IN_START;
    304             break;
    305         case IN_ERROR:
    306             return -EINVAL;
    307         default:
    308             break;
    309         }
    310         lexer->state = new_state;
    311     } while (!char_consumed);
    312     return 0;
    313 }
    314 
    315 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
    316 {
    317     size_t i;
    318 
    319     for (i = 0; i < size; i++) {
    320         int err;
    321 
    322         err = json_lexer_feed_char(lexer, buffer[i]);
    323         if (err < 0) {
    324             return err;
    325         }
    326     }
    327 
    328     return 0;
    329 }
    330 
    331 int json_lexer_flush(JSONLexer *lexer)
    332 {
    333     return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0);
    334 }
    335 
    336 void json_lexer_destroy(JSONLexer *lexer)
    337 {
    338     QDECREF(lexer->token);
    339 }
    340