Home | History | Annotate | Download | only in gas
      1 /*
      2  * GAS-compatible re2c lexer
      3  *
      4  *  Copyright (C) 2005-2007  Peter Johnson
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  * 3. Neither the name of the author nor the names of other contributors
     15  *    may be used to endorse or promote products derived from this
     16  *    software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
     19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
     22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     28  * POSSIBILITY OF SUCH DAMAGE.
     29  */
     30 #include <util.h>
     31 
     32 #include <libyasm.h>
     33 
     34 #include "modules/parsers/gas/gas-parser.h"
     35 
     36 
     37 #define BSIZE   8192
     38 
     39 #define YYCURSOR        cursor
     40 #define YYLIMIT         (s->lim)
     41 #define YYMARKER        (s->ptr)
     42 #define YYFILL(n)       {cursor = fill(parser_gas, cursor);}
     43 
     44 #define RETURN(i)       do {s->cur = cursor; parser_gas->tokch = s->tok[0]; \
     45                          return i;} while (0)
     46 
     47 #define SCANINIT()      {s->tok = cursor;}
     48 
     49 #define TOK             ((char *)s->tok)
     50 #define TOKLEN          (size_t)(cursor-s->tok)
     51 
     52 /* Bridge function to convert byte-oriented parser with line-oriented
     53  * preprocessor.
     54  */
     55 static size_t
     56 preproc_input(yasm_parser_gas *parser_gas, /*@out@*/ YYCTYPE *buf,
     57               size_t max_size)
     58 {
     59     size_t tot=0;
     60     while (max_size > 0) {
     61         size_t n;
     62 
     63         if (!parser_gas->line) {
     64             parser_gas->line = yasm_preproc_get_line(parser_gas->preproc);
     65             if (!parser_gas->line)
     66                 return tot; /* EOF */
     67             parser_gas->linepos = parser_gas->line;
     68             parser_gas->lineleft = strlen(parser_gas->line) + 1;
     69             parser_gas->line[parser_gas->lineleft-1] = '\n';
     70         }
     71 
     72         n = parser_gas->lineleft<max_size ? parser_gas->lineleft : max_size;
     73         strncpy((char *)buf+tot, parser_gas->linepos, n);
     74 
     75         if (n == parser_gas->lineleft) {
     76             yasm_xfree(parser_gas->line);
     77             parser_gas->line = NULL;
     78         } else {
     79             parser_gas->lineleft -= n;
     80             parser_gas->linepos += n;
     81         }
     82 
     83         tot += n;
     84         max_size -= n;
     85     }
     86     return tot;
     87 }
     88 #if 0
     89 static size_t
     90 fill_input(void *d, unsigned char *buf, size_t max)
     91 {
     92     return yasm_preproc_input((yasm_preproc *)d, (char *)buf, max);
     93 }
     94 #endif
     95 static YYCTYPE *
     96 fill(yasm_parser_gas *parser_gas, YYCTYPE *cursor)
     97 {
     98     yasm_scanner *s = &parser_gas->s;
     99     int first = 0;
    100     if(!s->eof){
    101         size_t cnt = s->tok - s->bot;
    102         if(cnt){
    103             memmove(s->bot, s->tok, (size_t)(s->lim - s->tok));
    104             s->tok = s->bot;
    105             s->ptr -= cnt;
    106             cursor -= cnt;
    107             s->lim -= cnt;
    108         }
    109         if (!s->bot)
    110             first = 1;
    111         if((s->top - s->lim) < BSIZE){
    112             YYCTYPE *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE);
    113             memcpy(buf, s->tok, (size_t)(s->lim - s->tok));
    114             s->tok = buf;
    115             s->ptr = &buf[s->ptr - s->bot];
    116             cursor = &buf[cursor - s->bot];
    117             s->lim = &buf[s->lim - s->bot];
    118             s->top = &s->lim[BSIZE];
    119             if (s->bot)
    120                 yasm_xfree(s->bot);
    121             s->bot = buf;
    122         }
    123         if((cnt = preproc_input(parser_gas, s->lim, BSIZE)) == 0) {
    124             s->eof = &s->lim[cnt]; *s->eof++ = '\n';
    125         }
    126         s->lim += cnt;
    127         if (first && parser_gas->save_input) {
    128             int i;
    129             YYCTYPE *saveline;
    130             parser_gas->save_last ^= 1;
    131             saveline = parser_gas->save_line[parser_gas->save_last];
    132             /* save next line into cur_line */
    133             for (i=0; i<79 && &s->tok[i] < s->lim && s->tok[i] != '\n'; i++)
    134                 saveline[i] = s->tok[i];
    135             saveline[i] = '\0';
    136         }
    137     }
    138     return cursor;
    139 }
    140 
    141 static YYCTYPE *
    142 save_line(yasm_parser_gas *parser_gas, YYCTYPE *cursor)
    143 {
    144     yasm_scanner *s = &parser_gas->s;
    145     int i = 0;
    146     YYCTYPE *saveline;
    147 
    148     parser_gas->save_last ^= 1;
    149     saveline = parser_gas->save_line[parser_gas->save_last];
    150 
    151     /* save next line into cur_line */
    152     if ((YYLIMIT - YYCURSOR) < 80)
    153         YYFILL(80);
    154     for (i=0; i<79 && &cursor[i] < s->lim && cursor[i] != '\n'; i++)
    155         saveline[i] = cursor[i];
    156     saveline[i] = '\0';
    157     return cursor;
    158 }
    159 
    160 /* starting size of string buffer */
    161 #define STRBUF_ALLOC_SIZE       128
    162 
    163 /* string buffer used when parsing strings/character constants */
    164 static YYCTYPE *strbuf = NULL;
    165 
    166 /* length of strbuf (including terminating NULL character) */
    167 static size_t strbuf_size = 0;
    168 
    169 static void
    170 strbuf_append(size_t count, YYCTYPE *cursor, yasm_scanner *s, int ch)
    171 {
    172     if (count >= strbuf_size) {
    173         strbuf = yasm_xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
    174         strbuf_size += STRBUF_ALLOC_SIZE;
    175     }
    176     strbuf[count] = ch;
    177 }
    178 
    179 /*!re2c
    180   any = [\000-\377];
    181   digit = [0-9];
    182   iletter = [a-zA-Z];
    183   bindigit = [01];
    184   octdigit = [0-7];
    185   hexdigit = [0-9a-fA-F];
    186   ws = [ \t\r];
    187   dquot = ["];
    188 */
    189 
    190 
    191 int
    192 gas_parser_lex(YYSTYPE *lvalp, yasm_parser_gas *parser_gas)
    193 {
    194     yasm_scanner *s = &parser_gas->s;
    195     YYCTYPE *cursor = s->cur;
    196     size_t count;
    197     YYCTYPE savech;
    198 
    199     /* Handle one token of lookahead */
    200     if (parser_gas->peek_token != NONE) {
    201         int tok = parser_gas->peek_token;
    202         *lvalp = parser_gas->peek_tokval;  /* structure copy */
    203         parser_gas->tokch = parser_gas->peek_tokch;
    204         parser_gas->peek_token = NONE;
    205         return tok;
    206     }
    207 
    208     /* Catch EOF */
    209     if (s->eof && cursor == s->eof)
    210         return 0;
    211 
    212     /* Jump to proper "exclusive" states */
    213     switch (parser_gas->state) {
    214         case COMMENT:
    215             goto comment;
    216         case SECTION_DIRECTIVE:
    217             goto section_directive;
    218         case NASM_FILENAME:
    219             goto nasm_filename;
    220         default:
    221             break;
    222     }
    223 
    224 scan:
    225     SCANINIT();
    226 
    227     /*!re2c
    228         /* standard decimal integer */
    229         ([1-9] digit*) | "0" {
    230             savech = s->tok[TOKLEN];
    231             s->tok[TOKLEN] = '\0';
    232             lvalp->intn = yasm_intnum_create_dec(TOK);
    233             s->tok[TOKLEN] = savech;
    234             RETURN(INTNUM);
    235         }
    236 
    237         /* 0b10010011 - binary number */
    238         '0b' bindigit+ {
    239             savech = s->tok[TOKLEN];
    240             s->tok[TOKLEN] = '\0';
    241             lvalp->intn = yasm_intnum_create_bin(TOK+2);
    242             s->tok[TOKLEN] = savech;
    243             RETURN(INTNUM);
    244         }
    245 
    246         /* 0777 - octal number */
    247         "0" octdigit+ {
    248             savech = s->tok[TOKLEN];
    249             s->tok[TOKLEN] = '\0';
    250             lvalp->intn = yasm_intnum_create_oct(TOK);
    251             s->tok[TOKLEN] = savech;
    252             RETURN(INTNUM);
    253         }
    254 
    255         /* 0xAA - hexidecimal number */
    256         '0x' hexdigit+ {
    257             savech = s->tok[TOKLEN];
    258             s->tok[TOKLEN] = '\0';
    259             /* skip 0 and x */
    260             lvalp->intn = yasm_intnum_create_hex(TOK+2);
    261             s->tok[TOKLEN] = savech;
    262             RETURN(INTNUM);
    263         }
    264 
    265         /* floating point value */
    266         [-+]? digit* "." digit+ ('e' [-+]? digit+)? {
    267             savech = s->tok[TOKLEN];
    268             s->tok[TOKLEN] = '\0';
    269             lvalp->flt = yasm_floatnum_create(TOK);
    270             s->tok[TOKLEN] = savech;
    271             RETURN(FLTNUM);
    272         }
    273         [-+]? digit+ "." digit* ('e' [-+]? digit+)? {
    274             savech = s->tok[TOKLEN];
    275             s->tok[TOKLEN] = '\0';
    276             lvalp->flt = yasm_floatnum_create(TOK);
    277             s->tok[TOKLEN] = savech;
    278             RETURN(FLTNUM);
    279         }
    280         "0" [DdEeFfTt] [-+]? digit* ("." digit*)? ('e' [-+]? digit+)? {
    281             savech = s->tok[TOKLEN];
    282             s->tok[TOKLEN] = '\0';
    283             lvalp->flt = yasm_floatnum_create(TOK+2);
    284             s->tok[TOKLEN] = savech;
    285             RETURN(FLTNUM);
    286         }
    287 
    288         /* character constant values */
    289         ['] {
    290             goto charconst;
    291         }
    292 
    293         /* string constant values */
    294         dquot {
    295             goto stringconst;
    296         }
    297 
    298         /* operators */
    299         "<<"                    { RETURN(LEFT_OP); }
    300         ">>"                    { RETURN(RIGHT_OP); }
    301         "<"                     { RETURN(LEFT_OP); }
    302         ">"                     { RETURN(RIGHT_OP); }
    303         [-+|^!*&/~$():@=,]      { RETURN(s->tok[0]); }
    304         ";"     {
    305             parser_gas->state = INITIAL;
    306             RETURN(s->tok[0]);
    307         }
    308 
    309         /* identifier */
    310         [a-zA-Z_.][a-zA-Z0-9_$.]* {
    311             lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
    312             lvalp->str.len = TOKLEN;
    313             RETURN(ID);
    314         }
    315 
    316         /* identifier with @ */
    317         [a-zA-Z_.]([a-zA-Z0-9_$.]*[@][a-zA-Z0-9_$.]*)+ {
    318             /* if @ not part of ID, move the scanner cursor to the first @ */
    319             if (!((yasm_objfmt_base *)p_object->objfmt)->module->id_at_ok)
    320                 cursor = (unsigned char *)strchr(TOK, '@');
    321             lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
    322             lvalp->str.len = TOKLEN;
    323             RETURN(ID);
    324         }
    325 
    326         /* register or segment register */
    327         [%][a-zA-Z0-9]+ {
    328             savech = s->tok[TOKLEN];
    329             s->tok[TOKLEN] = '\0';
    330             if (parser_gas->is_nasm_preproc && strcmp(TOK+1, "line") == 0) {
    331                 s->tok[TOKLEN] = savech;
    332                 RETURN(NASM_LINE_MARKER);
    333             }
    334 
    335             switch (yasm_arch_parse_check_regtmod
    336                     (p_object->arch, TOK+1, TOKLEN-1, &lvalp->arch_data)) {
    337                 case YASM_ARCH_REG:
    338                     s->tok[TOKLEN] = savech;
    339                     RETURN(REG);
    340                 case YASM_ARCH_REGGROUP:
    341                     s->tok[TOKLEN] = savech;
    342                     RETURN(REGGROUP);
    343                 case YASM_ARCH_SEGREG:
    344                     s->tok[TOKLEN] = savech;
    345                     RETURN(SEGREG);
    346                 default:
    347                     break;
    348             }
    349             yasm_error_set(YASM_ERROR_GENERAL,
    350                            N_("Unrecognized register name `%s'"), s->tok);
    351             s->tok[TOKLEN] = savech;
    352             lvalp->arch_data = 0;
    353             RETURN(REG);
    354         }
    355 
    356         /* local label */
    357         [0-9] ':' {
    358             /* increment label index */
    359             parser_gas->local[s->tok[0]-'0']++;
    360             /* build local label name */
    361             lvalp->str.contents = yasm_xmalloc(30);
    362             lvalp->str.len =
    363                 sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0],
    364                         parser_gas->local[s->tok[0]-'0']);
    365             RETURN(LABEL);
    366         }
    367 
    368         /* local label forward reference */
    369         [0-9] 'f' {
    370             /* build local label name */
    371             lvalp->str.contents = yasm_xmalloc(30);
    372             lvalp->str.len =
    373                 sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0],
    374                         parser_gas->local[s->tok[0]-'0']+1);
    375             RETURN(ID);
    376         }
    377 
    378         /* local label backward reference */
    379         [0-9] 'b' {
    380             /* build local label name */
    381             lvalp->str.contents = yasm_xmalloc(30);
    382             lvalp->str.len =
    383                 sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0],
    384                         parser_gas->local[s->tok[0]-'0']);
    385             RETURN(ID);
    386         }
    387 
    388         "/*"                    { parser_gas->state = COMMENT; goto comment; }
    389         "#"                     {
    390             if (parser_gas->is_cpp_preproc)
    391             {
    392                 RETURN(CPP_LINE_MARKER);
    393             } else
    394                 goto line_comment;
    395         }
    396         "//"                    { goto line_comment; }
    397 
    398         ws+                     { goto scan; }
    399 
    400         "\n"                    {
    401             if (parser_gas->save_input)
    402                 cursor = save_line(parser_gas, cursor);
    403             parser_gas->state = INITIAL;
    404             RETURN(s->tok[0]);
    405         }
    406 
    407         any {
    408             yasm_warn_set(YASM_WARN_UNREC_CHAR,
    409                           N_("ignoring unrecognized character `%s'"),
    410                           yasm__conv_unprint(s->tok[0]));
    411             goto scan;
    412         }
    413     */
    414 
    415     /* C-style comment; nesting not supported */
    416 comment:
    417     SCANINIT();
    418 
    419     /*!re2c
    420         /* End of comment */
    421         "*/"    { parser_gas->state = INITIAL; goto scan; }
    422 
    423         "\n"                    {
    424             if (parser_gas->save_input)
    425                 cursor = save_line(parser_gas, cursor);
    426             RETURN(s->tok[0]);
    427         }
    428 
    429         any     {
    430             if (cursor == s->eof)
    431                 return 0;
    432             goto comment;
    433         }
    434     */
    435 
    436     /* Single line comment. */
    437 line_comment:
    438     /*!re2c
    439         (any \ [\n])*   { goto scan; }
    440     */
    441 
    442     /* .section directive (the section name portion thereof) */
    443 section_directive:
    444     SCANINIT();
    445 
    446     /*!re2c
    447         [a-zA-Z0-9_$.-]+ {
    448             lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
    449             lvalp->str.len = TOKLEN;
    450             parser_gas->state = INITIAL;
    451             RETURN(ID);
    452         }
    453 
    454         dquot                   { goto stringconst; }
    455 
    456         ws+                     { goto section_directive; }
    457 
    458         ","                     {
    459             parser_gas->state = INITIAL;
    460             RETURN(s->tok[0]);
    461         }
    462 
    463         "\n"                    {
    464             if (parser_gas->save_input)
    465                 cursor = save_line(parser_gas, cursor);
    466             parser_gas->state = INITIAL;
    467             RETURN(s->tok[0]);
    468         }
    469 
    470         any {
    471             yasm_warn_set(YASM_WARN_UNREC_CHAR,
    472                           N_("ignoring unrecognized character `%s'"),
    473                           yasm__conv_unprint(s->tok[0]));
    474             goto section_directive;
    475         }
    476     */
    477 
    478     /* filename portion of nasm preproc %line */
    479 nasm_filename:
    480     strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE);
    481     strbuf_size = STRBUF_ALLOC_SIZE;
    482     count = 0;
    483 
    484 nasm_filename_scan:
    485     SCANINIT();
    486 
    487     /*!re2c
    488         "\n" {
    489             strbuf_append(count++, cursor, s, '\0');
    490             lvalp->str.contents = (char *)strbuf;
    491             lvalp->str.len = count;
    492             parser_gas->state = INITIAL;
    493             RETURN(STRING);
    494         }
    495 
    496         ws+ { goto nasm_filename_scan; }
    497 
    498         any {
    499             if (cursor == s->eof) {
    500                 strbuf_append(count++, cursor, s, '\0');
    501                 lvalp->str.contents = (char *)strbuf;
    502                 lvalp->str.len = count;
    503                 parser_gas->state = INITIAL;
    504                 RETURN(STRING);
    505             }
    506             strbuf_append(count++, cursor, s, s->tok[0]);
    507             goto nasm_filename_scan;
    508         }
    509     */
    510 
    511     /* character constant values */
    512 charconst:
    513     /*TODO*/
    514 
    515     /* string constant values */
    516 stringconst:
    517     strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE);
    518     strbuf_size = STRBUF_ALLOC_SIZE;
    519     count = 0;
    520 
    521 stringconst_scan:
    522     SCANINIT();
    523 
    524     /*!re2c
    525         /* Handle escaped character by copying both and continuing. */
    526         "\\".   {
    527             if (cursor == s->eof) {
    528                 yasm_error_set(YASM_ERROR_SYNTAX,
    529                                N_("unexpected end of file in string"));
    530                 lvalp->str.contents = (char *)strbuf;
    531                 lvalp->str.len = count;
    532                 RETURN(STRING);
    533             }
    534             strbuf_append(count++, cursor, s, '\\');
    535             strbuf_append(count++, cursor, s, s->tok[1]);
    536             goto stringconst_scan;
    537         }
    538 
    539         dquot   {
    540             strbuf_append(count, cursor, s, '\0');
    541             yasm_unescape_cstring(strbuf, &count);
    542             lvalp->str.contents = (char *)strbuf;
    543             lvalp->str.len = count;
    544             RETURN(STRING);
    545         }
    546 
    547         any     {
    548             if (cursor == s->eof) {
    549                 yasm_error_set(YASM_ERROR_SYNTAX,
    550                                N_("unexpected end of file in string"));
    551                 lvalp->str.contents = (char *)strbuf;
    552                 lvalp->str.len = count;
    553                 RETURN(STRING);
    554             }
    555             strbuf_append(count++, cursor, s, s->tok[0]);
    556             goto stringconst_scan;
    557         }
    558     */
    559 }
    560