1 /* Input parser for Bison 2 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003, 4 2005, 2006 Free Software Foundation, Inc. 5 6 This file is part of Bison, the GNU Compiler Compiler. 7 8 Bison is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 Bison is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with Bison; see the file COPYING. If not, write to 20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 Boston, MA 02110-1301, USA. */ 22 23 #include <config.h> 24 #include "system.h" 25 26 #include <quotearg.h> 27 28 #include "complain.h" 29 #include "conflicts.h" 30 #include "files.h" 31 #include "getargs.h" 32 #include "gram.h" 33 #include "muscle_tab.h" 34 #include "reader.h" 35 #include "symlist.h" 36 #include "symtab.h" 37 38 static void check_and_convert_grammar (void); 39 40 static symbol_list *grammar = NULL; 41 static bool start_flag = false; 42 merger_list *merge_functions; 43 44 /* Was %union seen? */ 45 bool typed = false; 46 47 /* Should rules have a default precedence? */ 48 bool default_prec = true; 49 50 /*-----------------------. 52 | Set the start symbol. | 53 `-----------------------*/ 54 55 void 56 grammar_start_symbol_set (symbol *sym, location loc) 57 { 58 if (start_flag) 59 complain_at (loc, _("multiple %s declarations"), "%start"); 60 else 61 { 62 start_flag = true; 63 startsymbol = sym; 64 startsymbol_location = loc; 65 } 66 } 67 68 69 /*----------------------------------------------------------------. 70 | There are two prologues: one before %union, one after. Augment | 71 | the current one. | 72 `----------------------------------------------------------------*/ 73 74 void 75 prologue_augment (const char *prologue, location loc) 76 { 77 struct obstack *oout = 78 !typed ? &pre_prologue_obstack : &post_prologue_obstack; 79 80 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line); 81 MUSCLE_OBSTACK_SGROW (oout, 82 quotearg_style (c_quoting_style, loc.start.file)); 83 obstack_sgrow (oout, "]])[\n"); 84 obstack_sgrow (oout, prologue); 85 } 86 87 88 90 /*-------------------------------------------------------------------. 91 | Return the merger index for a merging function named NAME, whose | 92 | arguments have type TYPE. Records the function, if new, in | 93 | MERGER_LIST. | 94 `-------------------------------------------------------------------*/ 95 96 static int 97 get_merge_function (uniqstr name, uniqstr type, location loc) 98 { 99 merger_list *syms; 100 merger_list head; 101 int n; 102 103 if (! glr_parser) 104 return 0; 105 106 if (type == NULL) 107 type = uniqstr_new (""); 108 109 head.next = merge_functions; 110 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1) 111 if (UNIQSTR_EQ (name, syms->next->name)) 112 break; 113 if (syms->next == NULL) 114 { 115 syms->next = xmalloc (sizeof syms->next[0]); 116 syms->next->name = uniqstr_new (name); 117 syms->next->type = uniqstr_new (type); 118 syms->next->next = NULL; 119 merge_functions = head.next; 120 } 121 else if (!UNIQSTR_EQ (type, syms->next->type)) 122 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"), 123 name, type, syms->next->type); 124 return n; 125 } 126 127 /*--------------------------------------. 128 | Free all merge-function definitions. | 129 `--------------------------------------*/ 130 131 void 132 free_merger_functions (void) 133 { 134 merger_list *L0 = merge_functions; 135 while (L0) 136 { 137 merger_list *L1 = L0->next; 138 free (L0); 139 L0 = L1; 140 } 141 } 142 143 144 /*-------------------------------------------------------------------. 146 | Parse the input grammar into a one symbol_list structure. Each | 147 | rule is represented by a sequence of symbols: the left hand side | 148 | followed by the contents of the right hand side, followed by a | 149 | null pointer instead of a symbol to terminate the rule. The next | 150 | symbol is the lhs of the following rule. | 151 | | 152 | All actions are copied out, labelled by the rule number they apply | 153 | to. | 154 `-------------------------------------------------------------------*/ 155 156 /* The (currently) last symbol of GRAMMAR. */ 157 static symbol_list *grammar_end = NULL; 158 159 /* Append SYM to the grammar. */ 160 static void 161 grammar_symbol_append (symbol *sym, location loc) 162 { 163 symbol_list *p = symbol_list_new (sym, loc); 164 165 if (grammar_end) 166 grammar_end->next = p; 167 else 168 grammar = p; 169 170 grammar_end = p; 171 172 /* A null SYM stands for an end of rule; it is not an actual 173 part of it. */ 174 if (sym) 175 ++nritems; 176 } 177 178 /* The rule currently being defined, and the previous rule. 179 CURRENT_RULE points to the first LHS of the current rule, while 180 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */ 181 symbol_list *current_rule = NULL; 182 static symbol_list *previous_rule_end = NULL; 183 184 185 /*----------------------------------------------. 186 | Create a new rule for LHS in to the GRAMMAR. | 187 `----------------------------------------------*/ 188 189 void 190 grammar_current_rule_begin (symbol *lhs, location loc) 191 { 192 if (!start_flag) 193 { 194 startsymbol = lhs; 195 startsymbol_location = loc; 196 start_flag = true; 197 } 198 199 /* Start a new rule and record its lhs. */ 200 ++nrules; 201 previous_rule_end = grammar_end; 202 grammar_symbol_append (lhs, loc); 203 current_rule = grammar_end; 204 205 /* Mark the rule's lhs as a nonterminal if not already so. */ 206 if (lhs->class == unknown_sym) 207 { 208 lhs->class = nterm_sym; 209 lhs->number = nvars; 210 ++nvars; 211 } 212 else if (lhs->class == token_sym) 213 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag); 214 } 215 216 217 /*----------------------------------------------------------------------. 218 | A symbol should be used if it has a destructor, or if it is a | 219 | mid-rule symbol (i.e., the generated LHS replacing a mid-rule | 220 | action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }". | 221 `----------------------------------------------------------------------*/ 222 223 static bool 224 symbol_should_be_used (symbol_list const *s) 225 { 226 return (s->sym->destructor 227 || (s->midrule && s->midrule->used)); 228 } 229 230 /*----------------------------------------------------------------. 231 | Check that the rule R is properly defined. For instance, there | 232 | should be no type clash on the default action. | 233 `----------------------------------------------------------------*/ 234 235 static void 236 grammar_rule_check (const symbol_list *r) 237 { 238 /* Type check. 239 240 If there is an action, then there is nothing we can do: the user 241 is allowed to shoot herself in the foot. 242 243 Don't worry about the default action if $$ is untyped, since $$'s 244 value can't be used. */ 245 if (!r->action && r->sym->type_name) 246 { 247 symbol *first_rhs = r->next->sym; 248 /* If $$ is being set in default way, report if any type mismatch. */ 249 if (first_rhs) 250 { 251 char const *lhs_type = r->sym->type_name; 252 const char *rhs_type = 253 first_rhs->type_name ? first_rhs->type_name : ""; 254 if (!UNIQSTR_EQ (lhs_type, rhs_type)) 255 warn_at (r->location, 256 _("type clash on default action: <%s> != <%s>"), 257 lhs_type, rhs_type); 258 } 259 /* Warn if there is no default for $$ but we need one. */ 260 else 261 warn_at (r->location, 262 _("empty rule for typed nonterminal, and no action")); 263 } 264 265 /* Check that symbol values that should be used are in fact used. */ 266 { 267 symbol_list const *l = r; 268 int n = 0; 269 for (; l && l->sym; l = l->next, ++n) 270 if (! (l->used 271 || !symbol_should_be_used (l) 272 /* The default action, $$ = $1, `uses' both. */ 273 || (!r->action && (n == 0 || n == 1)))) 274 { 275 if (n) 276 warn_at (r->location, _("unused value: $%d"), n); 277 else 278 warn_at (r->location, _("unset value: $$")); 279 } 280 } 281 } 282 283 284 /*-------------------------------------. 285 | End the currently being grown rule. | 286 `-------------------------------------*/ 287 288 void 289 grammar_current_rule_end (location loc) 290 { 291 /* Put an empty link in the list to mark the end of this rule */ 292 grammar_symbol_append (NULL, grammar_end->location); 293 current_rule->location = loc; 294 grammar_rule_check (current_rule); 295 } 296 297 298 /*-------------------------------------------------------------------. 299 | The previous action turns out the be a mid-rule action. Attach it | 300 | to the current rule, i.e., create a dummy symbol, attach it this | 301 | mid-rule action, and append this dummy nonterminal to the current | 302 | rule. | 303 `-------------------------------------------------------------------*/ 304 305 void 306 grammar_midrule_action (void) 307 { 308 /* Since the action was written out with this rule's number, we must 309 give the new rule this number by inserting the new rule before 310 it. */ 311 312 /* Make a DUMMY nonterminal, whose location is that of the midrule 313 action. Create the MIDRULE. */ 314 location dummy_location = current_rule->action_location; 315 symbol *dummy = dummy_symbol_get (dummy_location); 316 symbol_list *midrule = symbol_list_new (dummy, dummy_location); 317 318 /* Make a new rule, whose body is empty, before the current one, so 319 that the action just read can belong to it. */ 320 ++nrules; 321 ++nritems; 322 /* Attach its location and actions to that of the DUMMY. */ 323 midrule->location = dummy_location; 324 midrule->action = current_rule->action; 325 midrule->action_location = dummy_location; 326 current_rule->action = NULL; 327 /* If $$ was used in the action, the LHS of the enclosing rule was 328 incorrectly flagged as used. */ 329 midrule->used = current_rule->used; 330 current_rule->used = false; 331 332 if (previous_rule_end) 333 previous_rule_end->next = midrule; 334 else 335 grammar = midrule; 336 337 /* End the dummy's rule. */ 338 midrule->next = symbol_list_new (NULL, dummy_location); 339 grammar_rule_check (midrule); 340 midrule->next->next = current_rule; 341 342 previous_rule_end = midrule->next; 343 344 /* Insert the dummy nonterminal replacing the midrule action into 345 the current rule. Bind it to its dedicated rule. */ 346 grammar_current_rule_symbol_append (dummy, dummy_location); 347 grammar_end->midrule = midrule; 348 } 349 350 /* Set the precedence symbol of the current rule to PRECSYM. */ 351 352 void 353 grammar_current_rule_prec_set (symbol *precsym, location loc) 354 { 355 if (current_rule->ruleprec) 356 complain_at (loc, _("only one %s allowed per rule"), "%prec"); 357 current_rule->ruleprec = precsym; 358 } 359 360 /* Attach dynamic precedence DPREC to the current rule. */ 361 362 void 363 grammar_current_rule_dprec_set (int dprec, location loc) 364 { 365 if (! glr_parser) 366 warn_at (loc, _("%s affects only GLR parsers"), "%dprec"); 367 if (dprec <= 0) 368 complain_at (loc, _("%s must be followed by positive number"), "%dprec"); 369 else if (current_rule->dprec != 0) 370 complain_at (loc, _("only one %s allowed per rule"), "%dprec"); 371 current_rule->dprec = dprec; 372 } 373 374 /* Attach a merge function NAME with argument type TYPE to current 375 rule. */ 376 377 void 378 grammar_current_rule_merge_set (uniqstr name, location loc) 379 { 380 if (! glr_parser) 381 warn_at (loc, _("%s affects only GLR parsers"), "%merge"); 382 if (current_rule->merger != 0) 383 complain_at (loc, _("only one %s allowed per rule"), "%merge"); 384 current_rule->merger = 385 get_merge_function (name, current_rule->sym->type_name, loc); 386 } 387 388 /* Attach SYM to the current rule. If needed, move the previous 389 action as a mid-rule action. */ 390 391 void 392 grammar_current_rule_symbol_append (symbol *sym, location loc) 393 { 394 if (current_rule->action) 395 grammar_midrule_action (); 396 grammar_symbol_append (sym, loc); 397 } 398 399 /* Attach an ACTION to the current rule. */ 400 401 void 402 grammar_current_rule_action_append (const char *action, location loc) 403 { 404 /* There's no need to invoke grammar_midrule_action here, since the 405 scanner already did it if necessary. */ 406 current_rule->action = action; 407 current_rule->action_location = loc; 408 } 409 410 411 /*---------------------------------------------------------------. 413 | Convert the rules into the representation using RRHS, RLHS and | 414 | RITEM. | 415 `---------------------------------------------------------------*/ 416 417 static void 418 packgram (void) 419 { 420 unsigned int itemno = 0; 421 rule_number ruleno = 0; 422 symbol_list *p = grammar; 423 424 ritem = xnmalloc (nritems + 1, sizeof *ritem); 425 426 /* This sentinel is used by build_relations in gram.c. */ 427 *ritem++ = 0; 428 429 rules = xnmalloc (nrules, sizeof *rules); 430 431 while (p) 432 { 433 symbol *ruleprec = p->ruleprec; 434 rules[ruleno].user_number = ruleno; 435 rules[ruleno].number = ruleno; 436 rules[ruleno].lhs = p->sym; 437 rules[ruleno].rhs = ritem + itemno; 438 rules[ruleno].prec = NULL; 439 rules[ruleno].dprec = p->dprec; 440 rules[ruleno].merger = p->merger; 441 rules[ruleno].precsym = NULL; 442 rules[ruleno].location = p->location; 443 rules[ruleno].useful = true; 444 rules[ruleno].action = p->action; 445 rules[ruleno].action_location = p->action_location; 446 447 p = p->next; 448 while (p && p->sym) 449 { 450 /* item_number = symbol_number. 451 But the former needs to contain more: negative rule numbers. */ 452 ritem[itemno++] = symbol_number_as_item_number (p->sym->number); 453 /* A rule gets by default the precedence and associativity 454 of the last token in it. */ 455 if (p->sym->class == token_sym && default_prec) 456 rules[ruleno].prec = p->sym; 457 if (p) 458 p = p->next; 459 } 460 461 /* If this rule has a %prec, 462 the specified symbol's precedence replaces the default. */ 463 if (ruleprec) 464 { 465 rules[ruleno].precsym = ruleprec; 466 rules[ruleno].prec = ruleprec; 467 } 468 ritem[itemno++] = rule_number_as_item_number (ruleno); 469 ++ruleno; 470 471 if (p) 472 p = p->next; 473 } 474 475 assert (itemno == nritems); 476 477 if (trace_flag & trace_sets) 478 ritem_print (stderr); 479 } 480 481 /*------------------------------------------------------------------. 483 | Read in the grammar specification and record it in the format | 484 | described in gram.h. All actions are copied into ACTION_OBSTACK, | 485 | in each case forming the body of a C function (YYACTION) which | 486 | contains a switch statement to decide which action to execute. | 487 `------------------------------------------------------------------*/ 488 489 void 490 reader (void) 491 { 492 /* Initialize the symbol table. */ 493 symbols_new (); 494 495 /* Construct the accept symbol. */ 496 accept = symbol_get ("$accept", empty_location); 497 accept->class = nterm_sym; 498 accept->number = nvars++; 499 500 /* Construct the error token */ 501 errtoken = symbol_get ("error", empty_location); 502 errtoken->class = token_sym; 503 errtoken->number = ntokens++; 504 505 /* Construct a token that represents all undefined literal tokens. 506 It is always token number 2. */ 507 undeftoken = symbol_get ("$undefined", empty_location); 508 undeftoken->class = token_sym; 509 undeftoken->number = ntokens++; 510 511 /* Initialize the obstacks. */ 512 obstack_init (&pre_prologue_obstack); 513 obstack_init (&post_prologue_obstack); 514 515 gram_in = xfopen (grammar_file, "r"); 516 517 gram__flex_debug = trace_flag & trace_scan; 518 gram_debug = trace_flag & trace_parse; 519 scanner_initialize (); 520 gram_parse (); 521 522 if (! complaint_issued) 523 check_and_convert_grammar (); 524 525 xfclose (gram_in); 526 } 527 528 529 /*-------------------------------------------------------------. 530 | Check the grammar that has just been read, and convert it to | 531 | internal form. | 532 `-------------------------------------------------------------*/ 533 534 static void 535 check_and_convert_grammar (void) 536 { 537 /* Grammar has been read. Do some checking. */ 538 if (nrules == 0) 539 fatal (_("no rules in the input grammar")); 540 541 /* Report any undefined symbols and consider them nonterminals. */ 542 symbols_check_defined (); 543 544 /* If the user did not define her ENDTOKEN, do it now. */ 545 if (!endtoken) 546 { 547 endtoken = symbol_get ("$end", empty_location); 548 endtoken->class = token_sym; 549 endtoken->number = 0; 550 /* Value specified by POSIX. */ 551 endtoken->user_token_number = 0; 552 } 553 554 /* Insert the initial rule, whose line is that of the first rule 555 (not that of the start symbol): 556 557 accept: %start EOF. */ 558 { 559 symbol_list *p = symbol_list_new (accept, empty_location); 560 p->location = grammar->location; 561 p->next = symbol_list_new (startsymbol, empty_location); 562 p->next->next = symbol_list_new (endtoken, empty_location); 563 p->next->next->next = symbol_list_new (NULL, empty_location); 564 p->next->next->next->next = grammar; 565 nrules += 1; 566 nritems += 3; 567 grammar = p; 568 } 569 570 assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars); 571 572 /* Assign the symbols their symbol numbers. Write #defines for the 573 token symbols into FDEFINES if requested. */ 574 symbols_pack (); 575 576 /* Convert the grammar into the format described in gram.h. */ 577 packgram (); 578 579 /* The grammar as a symbol_list is no longer needed. */ 580 LIST_FREE (symbol_list, grammar); 581 } 582