1 /* 2 * GAS-compatible re2c lexer 3 * 4 * Copyright (C) 2005-2007 Peter Johnson 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of the author nor the names of other contributors 15 * may be used to endorse or promote products derived from this 16 * software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS'' 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <util.h> 31 32 #include <libyasm.h> 33 34 #include "modules/parsers/gas/gas-parser.h" 35 36 37 #define BSIZE 8192 38 39 #define YYCURSOR cursor 40 #define YYLIMIT (s->lim) 41 #define YYMARKER (s->ptr) 42 #define YYFILL(n) {cursor = fill(parser_gas, cursor);} 43 44 #define RETURN(i) do {s->cur = cursor; parser_gas->tokch = s->tok[0]; \ 45 return i;} while (0) 46 47 #define SCANINIT() {s->tok = cursor;} 48 49 #define TOK ((char *)s->tok) 50 #define TOKLEN (size_t)(cursor-s->tok) 51 52 /* Bridge function to convert byte-oriented parser with line-oriented 53 * preprocessor. 54 */ 55 static size_t 56 preproc_input(yasm_parser_gas *parser_gas, /*@out@*/ YYCTYPE *buf, 57 size_t max_size) 58 { 59 size_t tot=0; 60 while (max_size > 0) { 61 size_t n; 62 63 if (!parser_gas->line) { 64 parser_gas->line = yasm_preproc_get_line(parser_gas->preproc); 65 if (!parser_gas->line) 66 return tot; /* EOF */ 67 parser_gas->linepos = parser_gas->line; 68 parser_gas->lineleft = strlen(parser_gas->line) + 1; 69 parser_gas->line[parser_gas->lineleft-1] = '\n'; 70 } 71 72 n = parser_gas->lineleft<max_size ? parser_gas->lineleft : max_size; 73 strncpy((char *)buf+tot, parser_gas->linepos, n); 74 75 if (n == parser_gas->lineleft) { 76 yasm_xfree(parser_gas->line); 77 parser_gas->line = NULL; 78 } else { 79 parser_gas->lineleft -= n; 80 parser_gas->linepos += n; 81 } 82 83 tot += n; 84 max_size -= n; 85 } 86 return tot; 87 } 88 #if 0 89 static size_t 90 fill_input(void *d, unsigned char *buf, size_t max) 91 { 92 return yasm_preproc_input((yasm_preproc *)d, (char *)buf, max); 93 } 94 #endif 95 static YYCTYPE * 96 fill(yasm_parser_gas *parser_gas, YYCTYPE *cursor) 97 { 98 yasm_scanner *s = &parser_gas->s; 99 int first = 0; 100 if(!s->eof){ 101 size_t cnt = s->tok - s->bot; 102 if(cnt){ 103 memmove(s->bot, s->tok, (size_t)(s->lim - s->tok)); 104 s->tok = s->bot; 105 s->ptr -= cnt; 106 cursor -= cnt; 107 s->lim -= cnt; 108 } 109 if (!s->bot) 110 first = 1; 111 if((s->top - s->lim) < BSIZE){ 112 YYCTYPE *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE); 113 memcpy(buf, s->tok, (size_t)(s->lim - s->tok)); 114 s->tok = buf; 115 s->ptr = &buf[s->ptr - s->bot]; 116 cursor = &buf[cursor - s->bot]; 117 s->lim = &buf[s->lim - s->bot]; 118 s->top = &s->lim[BSIZE]; 119 if (s->bot) 120 yasm_xfree(s->bot); 121 s->bot = buf; 122 } 123 if((cnt = preproc_input(parser_gas, s->lim, BSIZE)) == 0) { 124 s->eof = &s->lim[cnt]; *s->eof++ = '\n'; 125 } 126 s->lim += cnt; 127 if (first && parser_gas->save_input) { 128 int i; 129 YYCTYPE *saveline; 130 parser_gas->save_last ^= 1; 131 saveline = parser_gas->save_line[parser_gas->save_last]; 132 /* save next line into cur_line */ 133 for (i=0; i<79 && &s->tok[i] < s->lim && s->tok[i] != '\n'; i++) 134 saveline[i] = s->tok[i]; 135 saveline[i] = '\0'; 136 } 137 } 138 return cursor; 139 } 140 141 static YYCTYPE * 142 save_line(yasm_parser_gas *parser_gas, YYCTYPE *cursor) 143 { 144 yasm_scanner *s = &parser_gas->s; 145 int i = 0; 146 YYCTYPE *saveline; 147 148 parser_gas->save_last ^= 1; 149 saveline = parser_gas->save_line[parser_gas->save_last]; 150 151 /* save next line into cur_line */ 152 if ((YYLIMIT - YYCURSOR) < 80) 153 YYFILL(80); 154 for (i=0; i<79 && &cursor[i] < s->lim && cursor[i] != '\n'; i++) 155 saveline[i] = cursor[i]; 156 saveline[i] = '\0'; 157 return cursor; 158 } 159 160 /* starting size of string buffer */ 161 #define STRBUF_ALLOC_SIZE 128 162 163 /* string buffer used when parsing strings/character constants */ 164 static YYCTYPE *strbuf = NULL; 165 166 /* length of strbuf (including terminating NULL character) */ 167 static size_t strbuf_size = 0; 168 169 static void 170 strbuf_append(size_t count, YYCTYPE *cursor, yasm_scanner *s, int ch) 171 { 172 if (count >= strbuf_size) { 173 strbuf = yasm_xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE); 174 strbuf_size += STRBUF_ALLOC_SIZE; 175 } 176 strbuf[count] = ch; 177 } 178 179 /*!re2c 180 any = [\000-\377]; 181 digit = [0-9]; 182 iletter = [a-zA-Z]; 183 bindigit = [01]; 184 octdigit = [0-7]; 185 hexdigit = [0-9a-fA-F]; 186 ws = [ \t\r]; 187 dquot = ["]; 188 */ 189 190 191 int 192 gas_parser_lex(YYSTYPE *lvalp, yasm_parser_gas *parser_gas) 193 { 194 yasm_scanner *s = &parser_gas->s; 195 YYCTYPE *cursor = s->cur; 196 size_t count; 197 YYCTYPE savech; 198 199 /* Handle one token of lookahead */ 200 if (parser_gas->peek_token != NONE) { 201 int tok = parser_gas->peek_token; 202 *lvalp = parser_gas->peek_tokval; /* structure copy */ 203 parser_gas->tokch = parser_gas->peek_tokch; 204 parser_gas->peek_token = NONE; 205 return tok; 206 } 207 208 /* Catch EOF */ 209 if (s->eof && cursor == s->eof) 210 return 0; 211 212 /* Jump to proper "exclusive" states */ 213 switch (parser_gas->state) { 214 case COMMENT: 215 goto comment; 216 case SECTION_DIRECTIVE: 217 goto section_directive; 218 case NASM_FILENAME: 219 goto nasm_filename; 220 default: 221 break; 222 } 223 224 scan: 225 SCANINIT(); 226 227 /*!re2c 228 /* standard decimal integer */ 229 ([1-9] digit*) | "0" { 230 savech = s->tok[TOKLEN]; 231 s->tok[TOKLEN] = '\0'; 232 lvalp->intn = yasm_intnum_create_dec(TOK); 233 s->tok[TOKLEN] = savech; 234 RETURN(INTNUM); 235 } 236 237 /* 0b10010011 - binary number */ 238 '0b' bindigit+ { 239 savech = s->tok[TOKLEN]; 240 s->tok[TOKLEN] = '\0'; 241 lvalp->intn = yasm_intnum_create_bin(TOK+2); 242 s->tok[TOKLEN] = savech; 243 RETURN(INTNUM); 244 } 245 246 /* 0777 - octal number */ 247 "0" octdigit+ { 248 savech = s->tok[TOKLEN]; 249 s->tok[TOKLEN] = '\0'; 250 lvalp->intn = yasm_intnum_create_oct(TOK); 251 s->tok[TOKLEN] = savech; 252 RETURN(INTNUM); 253 } 254 255 /* 0xAA - hexidecimal number */ 256 '0x' hexdigit+ { 257 savech = s->tok[TOKLEN]; 258 s->tok[TOKLEN] = '\0'; 259 /* skip 0 and x */ 260 lvalp->intn = yasm_intnum_create_hex(TOK+2); 261 s->tok[TOKLEN] = savech; 262 RETURN(INTNUM); 263 } 264 265 /* floating point value */ 266 [-+]? digit* "." digit+ ('e' [-+]? digit+)? { 267 savech = s->tok[TOKLEN]; 268 s->tok[TOKLEN] = '\0'; 269 lvalp->flt = yasm_floatnum_create(TOK); 270 s->tok[TOKLEN] = savech; 271 RETURN(FLTNUM); 272 } 273 [-+]? digit+ "." digit* ('e' [-+]? digit+)? { 274 savech = s->tok[TOKLEN]; 275 s->tok[TOKLEN] = '\0'; 276 lvalp->flt = yasm_floatnum_create(TOK); 277 s->tok[TOKLEN] = savech; 278 RETURN(FLTNUM); 279 } 280 "0" [DdEeFfTt] [-+]? digit* ("." digit*)? ('e' [-+]? digit+)? { 281 savech = s->tok[TOKLEN]; 282 s->tok[TOKLEN] = '\0'; 283 lvalp->flt = yasm_floatnum_create(TOK+2); 284 s->tok[TOKLEN] = savech; 285 RETURN(FLTNUM); 286 } 287 288 /* character constant values */ 289 ['] { 290 goto charconst; 291 } 292 293 /* string constant values */ 294 dquot { 295 goto stringconst; 296 } 297 298 /* operators */ 299 "<<" { RETURN(LEFT_OP); } 300 ">>" { RETURN(RIGHT_OP); } 301 "<" { RETURN(LEFT_OP); } 302 ">" { RETURN(RIGHT_OP); } 303 [-+|^!*&/~$():@=,] { RETURN(s->tok[0]); } 304 ";" { 305 parser_gas->state = INITIAL; 306 RETURN(s->tok[0]); 307 } 308 309 /* identifier */ 310 [a-zA-Z_.][a-zA-Z0-9_$.]* { 311 lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN); 312 lvalp->str.len = TOKLEN; 313 RETURN(ID); 314 } 315 316 /* identifier with @ */ 317 [a-zA-Z_.]([a-zA-Z0-9_$.]*[@][a-zA-Z0-9_$.]*)+ { 318 /* if @ not part of ID, move the scanner cursor to the first @ */ 319 if (!((yasm_objfmt_base *)p_object->objfmt)->module->id_at_ok) 320 cursor = (unsigned char *)strchr(TOK, '@'); 321 lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN); 322 lvalp->str.len = TOKLEN; 323 RETURN(ID); 324 } 325 326 /* register or segment register */ 327 [%][a-zA-Z0-9]+ { 328 savech = s->tok[TOKLEN]; 329 s->tok[TOKLEN] = '\0'; 330 if (parser_gas->is_nasm_preproc && strcmp(TOK+1, "line") == 0) { 331 s->tok[TOKLEN] = savech; 332 RETURN(NASM_LINE_MARKER); 333 } 334 335 switch (yasm_arch_parse_check_regtmod 336 (p_object->arch, TOK+1, TOKLEN-1, &lvalp->arch_data)) { 337 case YASM_ARCH_REG: 338 s->tok[TOKLEN] = savech; 339 RETURN(REG); 340 case YASM_ARCH_REGGROUP: 341 s->tok[TOKLEN] = savech; 342 RETURN(REGGROUP); 343 case YASM_ARCH_SEGREG: 344 s->tok[TOKLEN] = savech; 345 RETURN(SEGREG); 346 default: 347 break; 348 } 349 yasm_error_set(YASM_ERROR_GENERAL, 350 N_("Unrecognized register name `%s'"), s->tok); 351 s->tok[TOKLEN] = savech; 352 lvalp->arch_data = 0; 353 RETURN(REG); 354 } 355 356 /* local label */ 357 [0-9] ':' { 358 /* increment label index */ 359 parser_gas->local[s->tok[0]-'0']++; 360 /* build local label name */ 361 lvalp->str.contents = yasm_xmalloc(30); 362 lvalp->str.len = 363 sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0], 364 parser_gas->local[s->tok[0]-'0']); 365 RETURN(LABEL); 366 } 367 368 /* local label forward reference */ 369 [0-9] 'f' { 370 /* build local label name */ 371 lvalp->str.contents = yasm_xmalloc(30); 372 lvalp->str.len = 373 sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0], 374 parser_gas->local[s->tok[0]-'0']+1); 375 RETURN(ID); 376 } 377 378 /* local label backward reference */ 379 [0-9] 'b' { 380 /* build local label name */ 381 lvalp->str.contents = yasm_xmalloc(30); 382 lvalp->str.len = 383 sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0], 384 parser_gas->local[s->tok[0]-'0']); 385 RETURN(ID); 386 } 387 388 "/*" { parser_gas->state = COMMENT; goto comment; } 389 "#" { 390 if (parser_gas->is_cpp_preproc) 391 { 392 RETURN(CPP_LINE_MARKER); 393 } else 394 goto line_comment; 395 } 396 "//" { goto line_comment; } 397 398 ws+ { goto scan; } 399 400 "\n" { 401 if (parser_gas->save_input) 402 cursor = save_line(parser_gas, cursor); 403 parser_gas->state = INITIAL; 404 RETURN(s->tok[0]); 405 } 406 407 any { 408 yasm_warn_set(YASM_WARN_UNREC_CHAR, 409 N_("ignoring unrecognized character `%s'"), 410 yasm__conv_unprint(s->tok[0])); 411 goto scan; 412 } 413 */ 414 415 /* C-style comment; nesting not supported */ 416 comment: 417 SCANINIT(); 418 419 /*!re2c 420 /* End of comment */ 421 "*/" { parser_gas->state = INITIAL; goto scan; } 422 423 "\n" { 424 if (parser_gas->save_input) 425 cursor = save_line(parser_gas, cursor); 426 RETURN(s->tok[0]); 427 } 428 429 any { 430 if (cursor == s->eof) 431 return 0; 432 goto comment; 433 } 434 */ 435 436 /* Single line comment. */ 437 line_comment: 438 /*!re2c 439 (any \ [\n])* { goto scan; } 440 */ 441 442 /* .section directive (the section name portion thereof) */ 443 section_directive: 444 SCANINIT(); 445 446 /*!re2c 447 [a-zA-Z0-9_$.-]+ { 448 lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN); 449 lvalp->str.len = TOKLEN; 450 parser_gas->state = INITIAL; 451 RETURN(ID); 452 } 453 454 dquot { goto stringconst; } 455 456 ws+ { goto section_directive; } 457 458 "," { 459 parser_gas->state = INITIAL; 460 RETURN(s->tok[0]); 461 } 462 463 "\n" { 464 if (parser_gas->save_input) 465 cursor = save_line(parser_gas, cursor); 466 parser_gas->state = INITIAL; 467 RETURN(s->tok[0]); 468 } 469 470 any { 471 yasm_warn_set(YASM_WARN_UNREC_CHAR, 472 N_("ignoring unrecognized character `%s'"), 473 yasm__conv_unprint(s->tok[0])); 474 goto section_directive; 475 } 476 */ 477 478 /* filename portion of nasm preproc %line */ 479 nasm_filename: 480 strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE); 481 strbuf_size = STRBUF_ALLOC_SIZE; 482 count = 0; 483 484 nasm_filename_scan: 485 SCANINIT(); 486 487 /*!re2c 488 "\n" { 489 strbuf_append(count++, cursor, s, '\0'); 490 lvalp->str.contents = (char *)strbuf; 491 lvalp->str.len = count; 492 parser_gas->state = INITIAL; 493 RETURN(STRING); 494 } 495 496 ws+ { goto nasm_filename_scan; } 497 498 any { 499 if (cursor == s->eof) { 500 strbuf_append(count++, cursor, s, '\0'); 501 lvalp->str.contents = (char *)strbuf; 502 lvalp->str.len = count; 503 parser_gas->state = INITIAL; 504 RETURN(STRING); 505 } 506 strbuf_append(count++, cursor, s, s->tok[0]); 507 goto nasm_filename_scan; 508 } 509 */ 510 511 /* character constant values */ 512 charconst: 513 /*TODO*/ 514 515 /* string constant values */ 516 stringconst: 517 strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE); 518 strbuf_size = STRBUF_ALLOC_SIZE; 519 count = 0; 520 521 stringconst_scan: 522 SCANINIT(); 523 524 /*!re2c 525 /* Handle escaped character by copying both and continuing. */ 526 "\\". { 527 if (cursor == s->eof) { 528 yasm_error_set(YASM_ERROR_SYNTAX, 529 N_("unexpected end of file in string")); 530 lvalp->str.contents = (char *)strbuf; 531 lvalp->str.len = count; 532 RETURN(STRING); 533 } 534 strbuf_append(count++, cursor, s, '\\'); 535 strbuf_append(count++, cursor, s, s->tok[1]); 536 goto stringconst_scan; 537 } 538 539 dquot { 540 strbuf_append(count, cursor, s, '\0'); 541 yasm_unescape_cstring(strbuf, &count); 542 lvalp->str.contents = (char *)strbuf; 543 lvalp->str.len = count; 544 RETURN(STRING); 545 } 546 547 any { 548 if (cursor == s->eof) { 549 yasm_error_set(YASM_ERROR_SYNTAX, 550 N_("unexpected end of file in string")); 551 lvalp->str.contents = (char *)strbuf; 552 lvalp->str.len = count; 553 RETURN(STRING); 554 } 555 strbuf_append(count++, cursor, s, s->tok[0]); 556 goto stringconst_scan; 557 } 558 */ 559 } 560