1 /* Bison Grammar Scanner -*- C -*- 2 3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. 4 5 This file is part of Bison, the GNU Compiler Compiler. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20 02110-1301 USA 21 */ 22 23 %option debug nodefault nounput noyywrap never-interactive 24 %option prefix="gram_" outfile="lex.yy.c" 25 26 %{ 27 /* Work around a bug in flex 2.5.31. See Debian bug 333231 28 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */ 29 #undef gram_wrap 30 #define gram_wrap() 1 31 32 #include "system.h" 33 34 #include <mbswidth.h> 35 #include <quote.h> 36 37 #include "complain.h" 38 #include "files.h" 39 #include "getargs.h" 40 #include "gram.h" 41 #include "quotearg.h" 42 #include "reader.h" 43 #include "uniqstr.h" 44 45 #define YY_USER_INIT \ 46 do \ 47 { \ 48 scanner_cursor.file = current_file; \ 49 scanner_cursor.line = 1; \ 50 scanner_cursor.column = 1; \ 51 code_start = scanner_cursor; \ 52 } \ 53 while (0) 54 55 /* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */ 56 int gram_get_lineno (void); 57 FILE *gram_get_in (void); 58 FILE *gram_get_out (void); 59 int gram_get_leng (void); 60 char *gram_get_text (void); 61 void gram_set_lineno (int); 62 void gram_set_in (FILE *); 63 void gram_set_out (FILE *); 64 int gram_get_debug (void); 65 void gram_set_debug (int); 66 int gram_lex_destroy (void); 67 68 /* Location of scanner cursor. */ 69 boundary scanner_cursor; 70 71 static void adjust_location (location *, char const *, size_t); 72 #define YY_USER_ACTION adjust_location (loc, yytext, yyleng); 73 74 static size_t no_cr_read (FILE *, char *, size_t); 75 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size)) 76 77 78 /* OBSTACK_FOR_STRING -- Used to store all the characters that we need to 79 keep (to construct ID, STRINGS etc.). Use the following macros to 80 use it. 81 82 Use STRING_GROW to append what has just been matched, and 83 STRING_FINISH to end the string (it puts the ending 0). 84 STRING_FINISH also stores this string in LAST_STRING, which can be 85 used, and which is used by STRING_FREE to free the last string. */ 86 87 static struct obstack obstack_for_string; 88 89 /* A string representing the most recently saved token. */ 90 char *last_string; 91 92 /* The location of the most recently saved token, if it was a 93 BRACED_CODE token; otherwise, this has an unspecified value. */ 94 location last_braced_code_loc; 95 96 #define STRING_GROW \ 97 obstack_grow (&obstack_for_string, yytext, yyleng) 98 99 #define STRING_FINISH \ 100 do { \ 101 obstack_1grow (&obstack_for_string, '\0'); \ 102 last_string = obstack_finish (&obstack_for_string); \ 103 } while (0) 104 105 #define STRING_FREE \ 106 obstack_free (&obstack_for_string, last_string) 107 108 void 109 scanner_last_string_free (void) 110 { 111 STRING_FREE; 112 } 113 114 /* Within well-formed rules, RULE_LENGTH is the number of values in 115 the current rule so far, which says where to find `$0' with respect 116 to the top of the stack. It is not the same as the rule->length in 117 the case of mid rule actions. 118 119 Outside of well-formed rules, RULE_LENGTH has an undefined value. */ 120 static int rule_length; 121 122 static void rule_length_overflow (location) __attribute__ ((__noreturn__)); 123 124 /* Increment the rule length by one, checking for overflow. */ 125 static inline void 126 increment_rule_length (location loc) 127 { 128 rule_length++; 129 130 /* Don't allow rule_length == INT_MAX, since that might cause 131 confusion with strtol if INT_MAX == LONG_MAX. */ 132 if (rule_length == INT_MAX) 133 rule_length_overflow (loc); 134 } 135 136 static void handle_dollar (int token_type, char *cp, location loc); 137 static void handle_at (int token_type, char *cp, location loc); 138 static void handle_syncline (char *, location); 139 static unsigned long int scan_integer (char const *p, int base, location loc); 140 static int convert_ucn_to_byte (char const *hex_text); 141 static void unexpected_eof (boundary, char const *); 142 static void unexpected_newline (boundary, char const *); 143 144 %} 145 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT 146 %x SC_STRING SC_CHARACTER 147 %x SC_AFTER_IDENTIFIER 148 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER 149 %x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE 150 151 letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] 152 id {letter}({letter}|[0-9])* 153 directive %{letter}({letter}|[0-9]|-)* 154 int [0-9]+ 155 156 /* POSIX says that a tag must be both an id and a C union member, but 157 historically almost any character is allowed in a tag. We disallow 158 NUL and newline, as this simplifies our implementation. */ 159 tag [^\0\n>]+ 160 161 /* Zero or more instances of backslash-newline. Following GCC, allow 162 white space between the backslash and the newline. */ 163 splice (\\[ \f\t\v]*\n)* 164 165 %% 166 %{ 167 /* Nesting level of the current code in braces. */ 168 int braces_level IF_LINT (= 0); 169 170 /* Parent context state, when applicable. */ 171 int context_state IF_LINT (= 0); 172 173 /* Token type to return, when applicable. */ 174 int token_type IF_LINT (= 0); 175 176 /* Location of most recent identifier, when applicable. */ 177 location id_loc IF_LINT (= empty_location); 178 179 /* Where containing code started, when applicable. Its initial 180 value is relevant only when yylex is invoked in the SC_EPILOGUE 181 start condition. */ 182 boundary code_start = scanner_cursor; 183 184 /* Where containing comment or string or character literal started, 185 when applicable. */ 186 boundary token_start IF_LINT (= scanner_cursor); 187 %} 188 189 190 /*-----------------------. 191 | Scanning white space. | 192 `-----------------------*/ 193 194 <INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE> 195 { 196 /* Comments and white space. */ 197 "," warn_at (*loc, _("stray `,' treated as white space")); 198 [ \f\n\t\v] | 199 "//".* ; 200 "/*" { 201 token_start = loc->start; 202 context_state = YY_START; 203 BEGIN SC_YACC_COMMENT; 204 } 205 206 /* #line directives are not documented, and may be withdrawn or 207 modified in future versions of Bison. */ 208 ^"#line "{int}" \"".*"\"\n" { 209 handle_syncline (yytext + sizeof "#line " - 1, *loc); 210 } 211 } 212 213 214 /*----------------------------. 215 | Scanning Bison directives. | 216 `----------------------------*/ 217 <INITIAL> 218 { 219 "%binary" return PERCENT_NONASSOC; 220 "%debug" return PERCENT_DEBUG; 221 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC; 222 "%define" return PERCENT_DEFINE; 223 "%defines" return PERCENT_DEFINES; 224 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE; 225 "%dprec" return PERCENT_DPREC; 226 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE; 227 "%expect" return PERCENT_EXPECT; 228 "%expect"[-_]"rr" return PERCENT_EXPECT_RR; 229 "%file-prefix" return PERCENT_FILE_PREFIX; 230 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC; 231 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE; 232 "%glr-parser" return PERCENT_GLR_PARSER; 233 "%left" return PERCENT_LEFT; 234 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE; 235 "%locations" return PERCENT_LOCATIONS; 236 "%merge" return PERCENT_MERGE; 237 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX; 238 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC; 239 "%no"[-_]"lines" return PERCENT_NO_LINES; 240 "%nonassoc" return PERCENT_NONASSOC; 241 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER; 242 "%nterm" return PERCENT_NTERM; 243 "%output" return PERCENT_OUTPUT; 244 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE; 245 "%prec" rule_length--; return PERCENT_PREC; 246 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE; 247 "%pure"[-_]"parser" return PERCENT_PURE_PARSER; 248 "%require" return PERCENT_REQUIRE; 249 "%right" return PERCENT_RIGHT; 250 "%skeleton" return PERCENT_SKELETON; 251 "%start" return PERCENT_START; 252 "%term" return PERCENT_TOKEN; 253 "%token" return PERCENT_TOKEN; 254 "%token"[-_]"table" return PERCENT_TOKEN_TABLE; 255 "%type" return PERCENT_TYPE; 256 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE; 257 "%verbose" return PERCENT_VERBOSE; 258 "%yacc" return PERCENT_YACC; 259 260 {directive} { 261 complain_at (*loc, _("invalid directive: %s"), quote (yytext)); 262 } 263 264 "=" return EQUAL; 265 "|" rule_length = 0; return PIPE; 266 ";" return SEMICOLON; 267 268 {id} { 269 val->symbol = symbol_get (yytext, *loc); 270 id_loc = *loc; 271 increment_rule_length (*loc); 272 BEGIN SC_AFTER_IDENTIFIER; 273 } 274 275 {int} { 276 val->integer = scan_integer (yytext, 10, *loc); 277 return INT; 278 } 279 0[xX][0-9abcdefABCDEF]+ { 280 val->integer = scan_integer (yytext, 16, *loc); 281 return INT; 282 } 283 284 /* Characters. We don't check there is only one. */ 285 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER; 286 287 /* Strings. */ 288 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING; 289 290 /* Prologue. */ 291 "%{" code_start = loc->start; BEGIN SC_PROLOGUE; 292 293 /* Code in between braces. */ 294 "{" { 295 if (current_rule && current_rule->action) 296 grammar_midrule_action (); 297 STRING_GROW; 298 token_type = BRACED_CODE; 299 braces_level = 0; 300 code_start = loc->start; 301 BEGIN SC_BRACED_CODE; 302 } 303 304 /* A type. */ 305 "<"{tag}">" { 306 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2); 307 STRING_FINISH; 308 val->uniqstr = uniqstr_new (last_string); 309 STRING_FREE; 310 return TYPE; 311 } 312 313 "%%" { 314 static int percent_percent_count; 315 if (++percent_percent_count == 2) 316 BEGIN SC_EPILOGUE; 317 return PERCENT_PERCENT; 318 } 319 320 . { 321 complain_at (*loc, _("invalid character: %s"), quote (yytext)); 322 } 323 324 <<EOF>> { 325 loc->start = loc->end = scanner_cursor; 326 yyterminate (); 327 } 328 } 329 330 331 /*-----------------------------------------------------------------. 332 | Scanning after an identifier, checking whether a colon is next. | 333 `-----------------------------------------------------------------*/ 334 335 <SC_AFTER_IDENTIFIER> 336 { 337 ":" { 338 rule_length = 0; 339 *loc = id_loc; 340 BEGIN INITIAL; 341 return ID_COLON; 342 } 343 . { 344 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); 345 yyless (0); 346 *loc = id_loc; 347 BEGIN INITIAL; 348 return ID; 349 } 350 <<EOF>> { 351 *loc = id_loc; 352 BEGIN INITIAL; 353 return ID; 354 } 355 } 356 357 358 /*---------------------------------------------------------------. 359 | Scanning a Yacc comment. The initial `/ *' is already eaten. | 360 `---------------------------------------------------------------*/ 361 362 <SC_YACC_COMMENT> 363 { 364 "*/" BEGIN context_state; 365 .|\n ; 366 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state; 367 } 368 369 370 /*------------------------------------------------------------. 371 | Scanning a C comment. The initial `/ *' is already eaten. | 372 `------------------------------------------------------------*/ 373 374 <SC_COMMENT> 375 { 376 "*"{splice}"/" STRING_GROW; BEGIN context_state; 377 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state; 378 } 379 380 381 /*--------------------------------------------------------------. 382 | Scanning a line comment. The initial `//' is already eaten. | 383 `--------------------------------------------------------------*/ 384 385 <SC_LINE_COMMENT> 386 { 387 "\n" STRING_GROW; BEGIN context_state; 388 {splice} STRING_GROW; 389 <<EOF>> BEGIN context_state; 390 } 391 392 393 /*------------------------------------------------. 394 | Scanning a Bison string, including its escapes. | 395 | The initial quote is already eaten. | 396 `------------------------------------------------*/ 397 398 <SC_ESCAPED_STRING> 399 { 400 "\"" { 401 STRING_FINISH; 402 loc->start = token_start; 403 val->chars = last_string; 404 increment_rule_length (*loc); 405 BEGIN INITIAL; 406 return STRING; 407 } 408 \n unexpected_newline (token_start, "\""); BEGIN INITIAL; 409 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL; 410 } 411 412 /*----------------------------------------------------------. 413 | Scanning a Bison character literal, decoding its escapes. | 414 | The initial quote is already eaten. | 415 `----------------------------------------------------------*/ 416 417 <SC_ESCAPED_CHARACTER> 418 { 419 "'" { 420 unsigned char last_string_1; 421 STRING_GROW; 422 STRING_FINISH; 423 loc->start = token_start; 424 val->symbol = symbol_get (quotearg_style (escape_quoting_style, 425 last_string), 426 *loc); 427 symbol_class_set (val->symbol, token_sym, *loc, false); 428 last_string_1 = last_string[1]; 429 symbol_user_token_number_set (val->symbol, last_string_1, *loc); 430 STRING_FREE; 431 increment_rule_length (*loc); 432 BEGIN INITIAL; 433 return ID; 434 } 435 \n unexpected_newline (token_start, "'"); BEGIN INITIAL; 436 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL; 437 } 438 439 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING> 440 { 441 \0 complain_at (*loc, _("invalid null character")); 442 } 443 444 445 /*----------------------------. 446 | Decode escaped characters. | 447 `----------------------------*/ 448 449 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER> 450 { 451 \\[0-7]{1,3} { 452 unsigned long int c = strtoul (yytext + 1, NULL, 8); 453 if (UCHAR_MAX < c) 454 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); 455 else if (! c) 456 complain_at (*loc, _("invalid null character: %s"), quote (yytext)); 457 else 458 obstack_1grow (&obstack_for_string, c); 459 } 460 461 \\x[0-9abcdefABCDEF]+ { 462 verify (UCHAR_MAX < ULONG_MAX); 463 unsigned long int c = strtoul (yytext + 2, NULL, 16); 464 if (UCHAR_MAX < c) 465 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); 466 else if (! c) 467 complain_at (*loc, _("invalid null character: %s"), quote (yytext)); 468 else 469 obstack_1grow (&obstack_for_string, c); 470 } 471 472 \\a obstack_1grow (&obstack_for_string, '\a'); 473 \\b obstack_1grow (&obstack_for_string, '\b'); 474 \\f obstack_1grow (&obstack_for_string, '\f'); 475 \\n obstack_1grow (&obstack_for_string, '\n'); 476 \\r obstack_1grow (&obstack_for_string, '\r'); 477 \\t obstack_1grow (&obstack_for_string, '\t'); 478 \\v obstack_1grow (&obstack_for_string, '\v'); 479 480 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */ 481 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]); 482 483 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} { 484 int c = convert_ucn_to_byte (yytext); 485 if (c < 0) 486 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); 487 else if (! c) 488 complain_at (*loc, _("invalid null character: %s"), quote (yytext)); 489 else 490 obstack_1grow (&obstack_for_string, c); 491 } 492 \\(.|\n) { 493 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext)); 494 STRING_GROW; 495 } 496 } 497 498 /*--------------------------------------------. 499 | Scanning user-code characters and strings. | 500 `--------------------------------------------*/ 501 502 <SC_CHARACTER,SC_STRING> 503 { 504 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW; 505 } 506 507 <SC_CHARACTER> 508 { 509 "'" STRING_GROW; BEGIN context_state; 510 \n unexpected_newline (token_start, "'"); BEGIN context_state; 511 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state; 512 } 513 514 <SC_STRING> 515 { 516 "\"" STRING_GROW; BEGIN context_state; 517 \n unexpected_newline (token_start, "\""); BEGIN context_state; 518 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state; 519 } 520 521 522 /*---------------------------------------------------. 523 | Strings, comments etc. can be found in user code. | 524 `---------------------------------------------------*/ 525 526 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> 527 { 528 "'" { 529 STRING_GROW; 530 context_state = YY_START; 531 token_start = loc->start; 532 BEGIN SC_CHARACTER; 533 } 534 "\"" { 535 STRING_GROW; 536 context_state = YY_START; 537 token_start = loc->start; 538 BEGIN SC_STRING; 539 } 540 "/"{splice}"*" { 541 STRING_GROW; 542 context_state = YY_START; 543 token_start = loc->start; 544 BEGIN SC_COMMENT; 545 } 546 "/"{splice}"/" { 547 STRING_GROW; 548 context_state = YY_START; 549 BEGIN SC_LINE_COMMENT; 550 } 551 } 552 553 554 /*---------------------------------------------------------------. 555 | Scanning after %union etc., possibly followed by white space. | 556 | For %union only, allow arbitrary C code to appear before the | 557 | following brace, as an extension to POSIX. | 558 `---------------------------------------------------------------*/ 559 560 <SC_PRE_CODE> 561 { 562 . { 563 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION; 564 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); 565 yyless (0); 566 567 if (valid) 568 { 569 braces_level = -1; 570 code_start = loc->start; 571 BEGIN SC_BRACED_CODE; 572 } 573 else 574 { 575 complain_at (*loc, _("missing `{' in %s"), 576 token_name (token_type)); 577 obstack_sgrow (&obstack_for_string, "{}"); 578 STRING_FINISH; 579 val->chars = last_string; 580 BEGIN INITIAL; 581 return token_type; 582 } 583 } 584 585 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL; 586 } 587 588 589 /*---------------------------------------------------------------. 590 | Scanning some code in braces (%union and actions). The initial | 591 | "{" is already eaten. | 592 `---------------------------------------------------------------*/ 593 594 <SC_BRACED_CODE> 595 { 596 "{"|"<"{splice}"%" STRING_GROW; braces_level++; 597 "%"{splice}">" STRING_GROW; braces_level--; 598 "}" { 599 bool outer_brace = --braces_level < 0; 600 601 /* As an undocumented Bison extension, append `;' before the last 602 brace in braced code, so that the user code can omit trailing 603 `;'. But do not append `;' if emulating Yacc, since Yacc does 604 not append one. 605 606 FIXME: Bison should warn if a semicolon seems to be necessary 607 here, and should omit the semicolon if it seems unnecessary 608 (e.g., after ';', '{', or '}', each followed by comments or 609 white space). Such a warning shouldn't depend on --yacc; it 610 should depend on a new --pedantic option, which would cause 611 Bison to warn if it detects an extension to POSIX. --pedantic 612 should also diagnose other Bison extensions like %yacc. 613 Perhaps there should also be a GCC-style --pedantic-errors 614 option, so that such warnings are diagnosed as errors. */ 615 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag) 616 obstack_1grow (&obstack_for_string, ';'); 617 618 obstack_1grow (&obstack_for_string, '}'); 619 620 if (outer_brace) 621 { 622 STRING_FINISH; 623 loc->start = code_start; 624 val->chars = last_string; 625 increment_rule_length (*loc); 626 last_braced_code_loc = *loc; 627 BEGIN INITIAL; 628 return token_type; 629 } 630 } 631 632 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly 633 (as `<' `<%'). */ 634 "<"{splice}"<" STRING_GROW; 635 636 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc); 637 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc); 638 639 "$" { 640 warn_at (*loc, _("stray `$'")); 641 obstack_sgrow (&obstack_for_string, "$]["); 642 } 643 "@" { 644 warn_at (*loc, _("stray `@'")); 645 obstack_sgrow (&obstack_for_string, "@@"); 646 } 647 648 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL; 649 } 650 651 652 /*--------------------------------------------------------------. 653 | Scanning some prologue: from "%{" (already scanned) to "%}". | 654 `--------------------------------------------------------------*/ 655 656 <SC_PROLOGUE> 657 { 658 "%}" { 659 STRING_FINISH; 660 loc->start = code_start; 661 val->chars = last_string; 662 BEGIN INITIAL; 663 return PROLOGUE; 664 } 665 666 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL; 667 } 668 669 670 /*---------------------------------------------------------------. 671 | Scanning the epilogue (everything after the second "%%", which | 672 | has already been eaten). | 673 `---------------------------------------------------------------*/ 674 675 <SC_EPILOGUE> 676 { 677 <<EOF>> { 678 STRING_FINISH; 679 loc->start = code_start; 680 val->chars = last_string; 681 BEGIN INITIAL; 682 return EPILOGUE; 683 } 684 } 685 686 687 /*-----------------------------------------. 688 | Escape M4 quoting characters in C code. | 689 `-----------------------------------------*/ 690 691 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> 692 { 693 \$ obstack_sgrow (&obstack_for_string, "$]["); 694 \@ obstack_sgrow (&obstack_for_string, "@@"); 695 \[ obstack_sgrow (&obstack_for_string, "@{"); 696 \] obstack_sgrow (&obstack_for_string, "@}"); 697 } 698 699 700 /*-----------------------------------------------------. 701 | By default, grow the string obstack with the input. | 702 `-----------------------------------------------------*/ 703 704 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. | 705 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW; 706 707 %% 708 709 /* Keeps track of the maximum number of semantic values to the left of 710 a handle (those referenced by $0, $-1, etc.) are required by the 711 semantic actions of this grammar. */ 712 int max_left_semantic_context = 0; 713 714 /* If BUF is null, add BUFSIZE (which in this case must be less than 715 INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to 716 COLUMN. If an overflow occurs, or might occur but is undetectable, 717 return INT_MAX. Assume COLUMN is nonnegative. */ 718 719 static inline int 720 add_column_width (int column, char const *buf, size_t bufsize) 721 { 722 size_t width; 723 unsigned int remaining_columns = INT_MAX - column; 724 725 if (buf) 726 { 727 if (INT_MAX / 2 <= bufsize) 728 return INT_MAX; 729 width = mbsnwidth (buf, bufsize, 0); 730 } 731 else 732 width = bufsize; 733 734 return width <= remaining_columns ? column + width : INT_MAX; 735 } 736 737 /* Set *LOC and adjust scanner cursor to account for token TOKEN of 738 size SIZE. */ 739 740 static void 741 adjust_location (location *loc, char const *token, size_t size) 742 { 743 int line = scanner_cursor.line; 744 int column = scanner_cursor.column; 745 char const *p0 = token; 746 char const *p = token; 747 char const *lim = token + size; 748 749 loc->start = scanner_cursor; 750 751 for (p = token; p < lim; p++) 752 switch (*p) 753 { 754 case '\n': 755 line += line < INT_MAX; 756 column = 1; 757 p0 = p + 1; 758 break; 759 760 case '\t': 761 column = add_column_width (column, p0, p - p0); 762 column = add_column_width (column, NULL, 8 - ((column - 1) & 7)); 763 p0 = p + 1; 764 break; 765 766 default: 767 break; 768 } 769 770 scanner_cursor.line = line; 771 scanner_cursor.column = column = add_column_width (column, p0, p - p0); 772 773 loc->end = scanner_cursor; 774 775 if (line == INT_MAX && loc->start.line != INT_MAX) 776 warn_at (*loc, _("line number overflow")); 777 if (column == INT_MAX && loc->start.column != INT_MAX) 778 warn_at (*loc, _("column number overflow")); 779 } 780 781 782 /* Read bytes from FP into buffer BUF of size SIZE. Return the 783 number of bytes read. Remove '\r' from input, treating \r\n 784 and isolated \r as \n. */ 785 786 static size_t 787 no_cr_read (FILE *fp, char *buf, size_t size) 788 { 789 size_t bytes_read = fread (buf, 1, size, fp); 790 if (bytes_read) 791 { 792 char *w = memchr (buf, '\r', bytes_read); 793 if (w) 794 { 795 char const *r = ++w; 796 char const *lim = buf + bytes_read; 797 798 for (;;) 799 { 800 /* Found an '\r'. Treat it like '\n', but ignore any 801 '\n' that immediately follows. */ 802 w[-1] = '\n'; 803 if (r == lim) 804 { 805 int ch = getc (fp); 806 if (ch != '\n' && ungetc (ch, fp) != ch) 807 break; 808 } 809 else if (*r == '\n') 810 r++; 811 812 /* Copy until the next '\r'. */ 813 do 814 { 815 if (r == lim) 816 return w - buf; 817 } 818 while ((*w++ = *r++) != '\r'); 819 } 820 821 return w - buf; 822 } 823 } 824 825 return bytes_read; 826 } 827 828 829 /*------------------------------------------------------------------. 830 | TEXT is pointing to a wannabee semantic value (i.e., a `$'). | 831 | | 832 | Possible inputs: $[<TYPENAME>]($|integer) | 833 | | 834 | Output to OBSTACK_FOR_STRING a reference to this semantic value. | 835 `------------------------------------------------------------------*/ 836 837 static inline bool 838 handle_action_dollar (char *text, location loc) 839 { 840 const char *type_name = NULL; 841 char *cp = text + 1; 842 843 if (! current_rule) 844 return false; 845 846 /* Get the type name if explicit. */ 847 if (*cp == '<') 848 { 849 type_name = ++cp; 850 while (*cp != '>') 851 ++cp; 852 *cp = '\0'; 853 ++cp; 854 } 855 856 if (*cp == '$') 857 { 858 if (!type_name) 859 type_name = symbol_list_n_type_name_get (current_rule, loc, 0); 860 if (!type_name && typed) 861 complain_at (loc, _("$$ of `%s' has no declared type"), 862 current_rule->sym->tag); 863 if (!type_name) 864 type_name = ""; 865 obstack_fgrow1 (&obstack_for_string, 866 "]b4_lhs_value([%s])[", type_name); 867 current_rule->used = true; 868 } 869 else 870 { 871 long int num = strtol (cp, NULL, 10); 872 873 if (1 - INT_MAX + rule_length <= num && num <= rule_length) 874 { 875 int n = num; 876 if (max_left_semantic_context < 1 - n) 877 max_left_semantic_context = 1 - n; 878 if (!type_name && 0 < n) 879 type_name = symbol_list_n_type_name_get (current_rule, loc, n); 880 if (!type_name && typed) 881 complain_at (loc, _("$%d of `%s' has no declared type"), 882 n, current_rule->sym->tag); 883 if (!type_name) 884 type_name = ""; 885 obstack_fgrow3 (&obstack_for_string, 886 "]b4_rhs_value(%d, %d, [%s])[", 887 rule_length, n, type_name); 888 symbol_list_n_used_set (current_rule, n, true); 889 } 890 else 891 complain_at (loc, _("integer out of range: %s"), quote (text)); 892 } 893 894 return true; 895 } 896 897 898 /*----------------------------------------------------------------. 899 | Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE | 900 | (are we in an action?). | 901 `----------------------------------------------------------------*/ 902 903 static void 904 handle_dollar (int token_type, char *text, location loc) 905 { 906 switch (token_type) 907 { 908 case BRACED_CODE: 909 if (handle_action_dollar (text, loc)) 910 return; 911 break; 912 913 case PERCENT_DESTRUCTOR: 914 case PERCENT_INITIAL_ACTION: 915 case PERCENT_PRINTER: 916 if (text[1] == '$') 917 { 918 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar["); 919 return; 920 } 921 break; 922 923 default: 924 break; 925 } 926 927 complain_at (loc, _("invalid value: %s"), quote (text)); 928 } 929 930 931 /*------------------------------------------------------. 932 | TEXT is a location token (i.e., a `@...'). Output to | 933 | OBSTACK_FOR_STRING a reference to this location. | 934 `------------------------------------------------------*/ 935 936 static inline bool 937 handle_action_at (char *text, location loc) 938 { 939 char *cp = text + 1; 940 locations_flag = true; 941 942 if (! current_rule) 943 return false; 944 945 if (*cp == '$') 946 obstack_sgrow (&obstack_for_string, "]b4_lhs_location["); 947 else 948 { 949 long int num = strtol (cp, NULL, 10); 950 951 if (1 - INT_MAX + rule_length <= num && num <= rule_length) 952 { 953 int n = num; 954 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[", 955 rule_length, n); 956 } 957 else 958 complain_at (loc, _("integer out of range: %s"), quote (text)); 959 } 960 961 return true; 962 } 963 964 965 /*----------------------------------------------------------------. 966 | Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE | 967 | (are we in an action?). | 968 `----------------------------------------------------------------*/ 969 970 static void 971 handle_at (int token_type, char *text, location loc) 972 { 973 switch (token_type) 974 { 975 case BRACED_CODE: 976 handle_action_at (text, loc); 977 return; 978 979 case PERCENT_INITIAL_ACTION: 980 case PERCENT_DESTRUCTOR: 981 case PERCENT_PRINTER: 982 if (text[1] == '$') 983 { 984 obstack_sgrow (&obstack_for_string, "]b4_at_dollar["); 985 return; 986 } 987 break; 988 989 default: 990 break; 991 } 992 993 complain_at (loc, _("invalid value: %s"), quote (text)); 994 } 995 996 997 /*------------------------------------------------------. 998 | Scan NUMBER for a base-BASE integer at location LOC. | 999 `------------------------------------------------------*/ 1000 1001 static unsigned long int 1002 scan_integer (char const *number, int base, location loc) 1003 { 1004 verify (INT_MAX < ULONG_MAX); 1005 unsigned long int num = strtoul (number, NULL, base); 1006 1007 if (INT_MAX < num) 1008 { 1009 complain_at (loc, _("integer out of range: %s"), quote (number)); 1010 num = INT_MAX; 1011 } 1012 1013 return num; 1014 } 1015 1016 1017 /*------------------------------------------------------------------. 1018 | Convert universal character name UCN to a single-byte character, | 1019 | and return that character. Return -1 if UCN does not correspond | 1020 | to a single-byte character. | 1021 `------------------------------------------------------------------*/ 1022 1023 static int 1024 convert_ucn_to_byte (char const *ucn) 1025 { 1026 verify (UCHAR_MAX <= INT_MAX); 1027 unsigned long int code = strtoul (ucn + 2, NULL, 16); 1028 1029 /* FIXME: Currently we assume Unicode-compatible unibyte characters 1030 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On 1031 non-ASCII hosts we support only the portable C character set. 1032 These limitations should be removed once we add support for 1033 multibyte characters. */ 1034 1035 if (UCHAR_MAX < code) 1036 return -1; 1037 1038 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e) 1039 { 1040 /* A non-ASCII host. Use CODE to index into a table of the C 1041 basic execution character set, which is guaranteed to exist on 1042 all Standard C platforms. This table also includes '$', '@', 1043 and '`', which are not in the basic execution character set but 1044 which are unibyte characters on all the platforms that we know 1045 about. */ 1046 static signed char const table[] = 1047 { 1048 '\0', -1, -1, -1, -1, -1, -1, '\a', 1049 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1, 1050 -1, -1, -1, -1, -1, -1, -1, -1, 1051 -1, -1, -1, -1, -1, -1, -1, -1, 1052 ' ', '!', '"', '#', '$', '%', '&', '\'', 1053 '(', ')', '*', '+', ',', '-', '.', '/', 1054 '0', '1', '2', '3', '4', '5', '6', '7', 1055 '8', '9', ':', ';', '<', '=', '>', '?', 1056 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 1057 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 1058 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 1059 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', 1060 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 1061 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 1062 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 1063 'x', 'y', 'z', '{', '|', '}', '~' 1064 }; 1065 1066 code = code < sizeof table ? table[code] : -1; 1067 } 1068 #endif 1069 1070 return code; 1071 } 1072 1073 1074 /*----------------------------------------------------------------. 1075 | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. | 1076 `----------------------------------------------------------------*/ 1077 1078 static void 1079 handle_syncline (char *args, location loc) 1080 { 1081 char *after_num; 1082 unsigned long int lineno = strtoul (args, &after_num, 10); 1083 char *file = strchr (after_num, '"') + 1; 1084 *strchr (file, '"') = '\0'; 1085 if (INT_MAX <= lineno) 1086 { 1087 warn_at (loc, _("line number overflow")); 1088 lineno = INT_MAX; 1089 } 1090 scanner_cursor.file = current_file = uniqstr_new (file); 1091 scanner_cursor.line = lineno; 1092 scanner_cursor.column = 1; 1093 } 1094 1095 1096 /*---------------------------------. 1097 | Report a rule that is too long. | 1098 `---------------------------------*/ 1099 1100 static void 1101 rule_length_overflow (location loc) 1102 { 1103 fatal_at (loc, _("rule is too long")); 1104 } 1105 1106 1107 /*----------------------------------------------------------------. 1108 | For a token or comment starting at START, report message MSGID, | 1109 | which should say that an end marker was found before | 1110 | the expected TOKEN_END. | 1111 `----------------------------------------------------------------*/ 1112 1113 static void 1114 unexpected_end (boundary start, char const *msgid, char const *token_end) 1115 { 1116 location loc; 1117 loc.start = start; 1118 loc.end = scanner_cursor; 1119 complain_at (loc, _(msgid), token_end); 1120 } 1121 1122 1123 /*------------------------------------------------------------------------. 1124 | Report an unexpected EOF in a token or comment starting at START. | 1125 | An end of file was encountered and the expected TOKEN_END was missing. | 1126 `------------------------------------------------------------------------*/ 1127 1128 static void 1129 unexpected_eof (boundary start, char const *token_end) 1130 { 1131 unexpected_end (start, N_("missing `%s' at end of file"), token_end); 1132 } 1133 1134 1135 /*----------------------------------------. 1136 | Likewise, but for unexpected newlines. | 1137 `----------------------------------------*/ 1138 1139 static void 1140 unexpected_newline (boundary start, char const *token_end) 1141 { 1142 unexpected_end (start, N_("missing `%s' at end of line"), token_end); 1143 } 1144 1145 1146 /*-------------------------. 1147 | Initialize the scanner. | 1148 `-------------------------*/ 1149 1150 void 1151 scanner_initialize (void) 1152 { 1153 obstack_init (&obstack_for_string); 1154 } 1155 1156 1157 /*-----------------------------------------------. 1158 | Free all the memory allocated to the scanner. | 1159 `-----------------------------------------------*/ 1160 1161 void 1162 scanner_free (void) 1163 { 1164 obstack_free (&obstack_for_string, 0); 1165 /* Reclaim Flex's buffers. */ 1166 yy_delete_buffer (YY_CURRENT_BUFFER); 1167 } 1168