Home | History | Annotate | Download | only in src
      1 /* Input parser for Bison
      2 
      3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
      4    2005, 2006 Free Software Foundation, Inc.
      5 
      6    This file is part of Bison, the GNU Compiler Compiler.
      7 
      8    Bison is free software; you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 2, or (at your option)
     11    any later version.
     12 
     13    Bison is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with Bison; see the file COPYING.  If not, write to
     20    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21    Boston, MA 02110-1301, USA.  */
     22 
     23 #include <config.h>
     24 #include "system.h"
     25 
     26 #include <quotearg.h>
     27 
     28 #include "complain.h"
     29 #include "conflicts.h"
     30 #include "files.h"
     31 #include "getargs.h"
     32 #include "gram.h"
     33 #include "muscle_tab.h"
     34 #include "reader.h"
     35 #include "symlist.h"
     36 #include "symtab.h"
     37 
     38 static void check_and_convert_grammar (void);
     39 
     40 static symbol_list *grammar = NULL;
     41 static bool start_flag = false;
     42 merger_list *merge_functions;
     43 
     44 /* Was %union seen?  */
     45 bool typed = false;
     46 
     47 /* Should rules have a default precedence?  */
     48 bool default_prec = true;
     49 
     50 /*-----------------------.
     52 | Set the start symbol.  |
     53 `-----------------------*/
     54 
     55 void
     56 grammar_start_symbol_set (symbol *sym, location loc)
     57 {
     58   if (start_flag)
     59     complain_at (loc, _("multiple %s declarations"), "%start");
     60   else
     61     {
     62       start_flag = true;
     63       startsymbol = sym;
     64       startsymbol_location = loc;
     65     }
     66 }
     67 
     68 
     69 /*----------------------------------------------------------------.
     70 | There are two prologues: one before %union, one after.  Augment |
     71 | the current one.                                                |
     72 `----------------------------------------------------------------*/
     73 
     74 void
     75 prologue_augment (const char *prologue, location loc)
     76 {
     77   struct obstack *oout =
     78     !typed ? &pre_prologue_obstack : &post_prologue_obstack;
     79 
     80   obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
     81   MUSCLE_OBSTACK_SGROW (oout,
     82 			quotearg_style (c_quoting_style, loc.start.file));
     83   obstack_sgrow (oout, "]])[\n");
     84   obstack_sgrow (oout, prologue);
     85 }
     86 
     87 
     88 
     90 /*-------------------------------------------------------------------.
     91 | Return the merger index for a merging function named NAME, whose   |
     92 | arguments have type TYPE.  Records the function, if new, in        |
     93 | MERGER_LIST.							     |
     94 `-------------------------------------------------------------------*/
     95 
     96 static int
     97 get_merge_function (uniqstr name, uniqstr type, location loc)
     98 {
     99   merger_list *syms;
    100   merger_list head;
    101   int n;
    102 
    103   if (! glr_parser)
    104     return 0;
    105 
    106   if (type == NULL)
    107     type = uniqstr_new ("");
    108 
    109   head.next = merge_functions;
    110   for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
    111     if (UNIQSTR_EQ (name, syms->next->name))
    112       break;
    113   if (syms->next == NULL)
    114     {
    115       syms->next = xmalloc (sizeof syms->next[0]);
    116       syms->next->name = uniqstr_new (name);
    117       syms->next->type = uniqstr_new (type);
    118       syms->next->next = NULL;
    119       merge_functions = head.next;
    120     }
    121   else if (!UNIQSTR_EQ (type, syms->next->type))
    122     warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
    123 	     name, type, syms->next->type);
    124   return n;
    125 }
    126 
    127 /*--------------------------------------.
    128 | Free all merge-function definitions.	|
    129 `--------------------------------------*/
    130 
    131 void
    132 free_merger_functions (void)
    133 {
    134   merger_list *L0 = merge_functions;
    135   while (L0)
    136     {
    137       merger_list *L1 = L0->next;
    138       free (L0);
    139       L0 = L1;
    140     }
    141 }
    142 
    143 
    144 /*-------------------------------------------------------------------.
    146 | Parse the input grammar into a one symbol_list structure.  Each    |
    147 | rule is represented by a sequence of symbols: the left hand side   |
    148 | followed by the contents of the right hand side, followed by a     |
    149 | null pointer instead of a symbol to terminate the rule.  The next  |
    150 | symbol is the lhs of the following rule.                           |
    151 |                                                                    |
    152 | All actions are copied out, labelled by the rule number they apply |
    153 | to.                                                                |
    154 `-------------------------------------------------------------------*/
    155 
    156 /* The (currently) last symbol of GRAMMAR. */
    157 static symbol_list *grammar_end = NULL;
    158 
    159 /* Append SYM to the grammar.  */
    160 static void
    161 grammar_symbol_append (symbol *sym, location loc)
    162 {
    163   symbol_list *p = symbol_list_new (sym, loc);
    164 
    165   if (grammar_end)
    166     grammar_end->next = p;
    167   else
    168     grammar = p;
    169 
    170   grammar_end = p;
    171 
    172   /* A null SYM stands for an end of rule; it is not an actual
    173      part of it.  */
    174   if (sym)
    175     ++nritems;
    176 }
    177 
    178 /* The rule currently being defined, and the previous rule.
    179    CURRENT_RULE points to the first LHS of the current rule, while
    180    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
    181 symbol_list *current_rule = NULL;
    182 static symbol_list *previous_rule_end = NULL;
    183 
    184 
    185 /*----------------------------------------------.
    186 | Create a new rule for LHS in to the GRAMMAR.  |
    187 `----------------------------------------------*/
    188 
    189 void
    190 grammar_current_rule_begin (symbol *lhs, location loc)
    191 {
    192   if (!start_flag)
    193     {
    194       startsymbol = lhs;
    195       startsymbol_location = loc;
    196       start_flag = true;
    197     }
    198 
    199   /* Start a new rule and record its lhs.  */
    200   ++nrules;
    201   previous_rule_end = grammar_end;
    202   grammar_symbol_append (lhs, loc);
    203   current_rule = grammar_end;
    204 
    205   /* Mark the rule's lhs as a nonterminal if not already so.  */
    206   if (lhs->class == unknown_sym)
    207     {
    208       lhs->class = nterm_sym;
    209       lhs->number = nvars;
    210       ++nvars;
    211     }
    212   else if (lhs->class == token_sym)
    213     complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
    214 }
    215 
    216 
    217 /*----------------------------------------------------------------------.
    218 | A symbol should be used if it has a destructor, or if it is a         |
    219 | mid-rule symbol (i.e., the generated LHS replacing a mid-rule         |
    220 | action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }".  |
    221 `----------------------------------------------------------------------*/
    222 
    223 static bool
    224 symbol_should_be_used (symbol_list const *s)
    225 {
    226   return (s->sym->destructor
    227 	  || (s->midrule && s->midrule->used));
    228 }
    229 
    230 /*----------------------------------------------------------------.
    231 | Check that the rule R is properly defined.  For instance, there |
    232 | should be no type clash on the default action.                  |
    233 `----------------------------------------------------------------*/
    234 
    235 static void
    236 grammar_rule_check (const symbol_list *r)
    237 {
    238   /* Type check.
    239 
    240      If there is an action, then there is nothing we can do: the user
    241      is allowed to shoot herself in the foot.
    242 
    243      Don't worry about the default action if $$ is untyped, since $$'s
    244      value can't be used.  */
    245   if (!r->action && r->sym->type_name)
    246     {
    247       symbol *first_rhs = r->next->sym;
    248       /* If $$ is being set in default way, report if any type mismatch.  */
    249       if (first_rhs)
    250 	{
    251 	  char const *lhs_type = r->sym->type_name;
    252 	  const char *rhs_type =
    253 	    first_rhs->type_name ? first_rhs->type_name : "";
    254 	  if (!UNIQSTR_EQ (lhs_type, rhs_type))
    255 	    warn_at (r->location,
    256 		     _("type clash on default action: <%s> != <%s>"),
    257 		     lhs_type, rhs_type);
    258 	}
    259       /* Warn if there is no default for $$ but we need one.  */
    260       else
    261 	warn_at (r->location,
    262 		 _("empty rule for typed nonterminal, and no action"));
    263     }
    264 
    265   /* Check that symbol values that should be used are in fact used.  */
    266   {
    267     symbol_list const *l = r;
    268     int n = 0;
    269     for (; l && l->sym; l = l->next, ++n)
    270       if (! (l->used
    271 	     || !symbol_should_be_used (l)
    272 	     /* The default action, $$ = $1, `uses' both.  */
    273 	     || (!r->action && (n == 0 || n == 1))))
    274 	{
    275 	  if (n)
    276 	    warn_at (r->location, _("unused value: $%d"), n);
    277 	  else
    278 	    warn_at (r->location, _("unset value: $$"));
    279 	}
    280   }
    281 }
    282 
    283 
    284 /*-------------------------------------.
    285 | End the currently being grown rule.  |
    286 `-------------------------------------*/
    287 
    288 void
    289 grammar_current_rule_end (location loc)
    290 {
    291   /* Put an empty link in the list to mark the end of this rule  */
    292   grammar_symbol_append (NULL, grammar_end->location);
    293   current_rule->location = loc;
    294   grammar_rule_check (current_rule);
    295 }
    296 
    297 
    298 /*-------------------------------------------------------------------.
    299 | The previous action turns out the be a mid-rule action.  Attach it |
    300 | to the current rule, i.e., create a dummy symbol, attach it this   |
    301 | mid-rule action, and append this dummy nonterminal to the current  |
    302 | rule.                                                              |
    303 `-------------------------------------------------------------------*/
    304 
    305 void
    306 grammar_midrule_action (void)
    307 {
    308   /* Since the action was written out with this rule's number, we must
    309      give the new rule this number by inserting the new rule before
    310      it.  */
    311 
    312   /* Make a DUMMY nonterminal, whose location is that of the midrule
    313      action.  Create the MIDRULE.  */
    314   location dummy_location = current_rule->action_location;
    315   symbol *dummy = dummy_symbol_get (dummy_location);
    316   symbol_list *midrule = symbol_list_new (dummy, dummy_location);
    317 
    318   /* Make a new rule, whose body is empty, before the current one, so
    319      that the action just read can belong to it.  */
    320   ++nrules;
    321   ++nritems;
    322   /* Attach its location and actions to that of the DUMMY.  */
    323   midrule->location = dummy_location;
    324   midrule->action = current_rule->action;
    325   midrule->action_location = dummy_location;
    326   current_rule->action = NULL;
    327   /* If $$ was used in the action, the LHS of the enclosing rule was
    328      incorrectly flagged as used.  */
    329   midrule->used = current_rule->used;
    330   current_rule->used = false;
    331 
    332   if (previous_rule_end)
    333     previous_rule_end->next = midrule;
    334   else
    335     grammar = midrule;
    336 
    337   /* End the dummy's rule.  */
    338   midrule->next = symbol_list_new (NULL, dummy_location);
    339   grammar_rule_check (midrule);
    340   midrule->next->next = current_rule;
    341 
    342   previous_rule_end = midrule->next;
    343 
    344   /* Insert the dummy nonterminal replacing the midrule action into
    345      the current rule.  Bind it to its dedicated rule.  */
    346   grammar_current_rule_symbol_append (dummy, dummy_location);
    347   grammar_end->midrule = midrule;
    348 }
    349 
    350 /* Set the precedence symbol of the current rule to PRECSYM. */
    351 
    352 void
    353 grammar_current_rule_prec_set (symbol *precsym, location loc)
    354 {
    355   if (current_rule->ruleprec)
    356     complain_at (loc, _("only one %s allowed per rule"), "%prec");
    357   current_rule->ruleprec = precsym;
    358 }
    359 
    360 /* Attach dynamic precedence DPREC to the current rule. */
    361 
    362 void
    363 grammar_current_rule_dprec_set (int dprec, location loc)
    364 {
    365   if (! glr_parser)
    366     warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
    367   if (dprec <= 0)
    368     complain_at (loc, _("%s must be followed by positive number"), "%dprec");
    369   else if (current_rule->dprec != 0)
    370     complain_at (loc, _("only one %s allowed per rule"), "%dprec");
    371   current_rule->dprec = dprec;
    372 }
    373 
    374 /* Attach a merge function NAME with argument type TYPE to current
    375    rule. */
    376 
    377 void
    378 grammar_current_rule_merge_set (uniqstr name, location loc)
    379 {
    380   if (! glr_parser)
    381     warn_at (loc, _("%s affects only GLR parsers"), "%merge");
    382   if (current_rule->merger != 0)
    383     complain_at (loc, _("only one %s allowed per rule"), "%merge");
    384   current_rule->merger =
    385     get_merge_function (name, current_rule->sym->type_name, loc);
    386 }
    387 
    388 /* Attach SYM to the current rule.  If needed, move the previous
    389    action as a mid-rule action.  */
    390 
    391 void
    392 grammar_current_rule_symbol_append (symbol *sym, location loc)
    393 {
    394   if (current_rule->action)
    395     grammar_midrule_action ();
    396   grammar_symbol_append (sym, loc);
    397 }
    398 
    399 /* Attach an ACTION to the current rule.  */
    400 
    401 void
    402 grammar_current_rule_action_append (const char *action, location loc)
    403 {
    404   /* There's no need to invoke grammar_midrule_action here, since the
    405      scanner already did it if necessary.  */
    406   current_rule->action = action;
    407   current_rule->action_location = loc;
    408 }
    409 
    410 
    411 /*---------------------------------------------------------------.
    413 | Convert the rules into the representation using RRHS, RLHS and |
    414 | RITEM.                                                         |
    415 `---------------------------------------------------------------*/
    416 
    417 static void
    418 packgram (void)
    419 {
    420   unsigned int itemno = 0;
    421   rule_number ruleno = 0;
    422   symbol_list *p = grammar;
    423 
    424   ritem = xnmalloc (nritems + 1, sizeof *ritem);
    425 
    426   /* This sentinel is used by build_relations in gram.c.  */
    427   *ritem++ = 0;
    428 
    429   rules = xnmalloc (nrules, sizeof *rules);
    430 
    431   while (p)
    432     {
    433       symbol *ruleprec = p->ruleprec;
    434       rules[ruleno].user_number = ruleno;
    435       rules[ruleno].number = ruleno;
    436       rules[ruleno].lhs = p->sym;
    437       rules[ruleno].rhs = ritem + itemno;
    438       rules[ruleno].prec = NULL;
    439       rules[ruleno].dprec = p->dprec;
    440       rules[ruleno].merger = p->merger;
    441       rules[ruleno].precsym = NULL;
    442       rules[ruleno].location = p->location;
    443       rules[ruleno].useful = true;
    444       rules[ruleno].action = p->action;
    445       rules[ruleno].action_location = p->action_location;
    446 
    447       p = p->next;
    448       while (p && p->sym)
    449 	{
    450 	  /* item_number = symbol_number.
    451 	     But the former needs to contain more: negative rule numbers. */
    452 	  ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
    453 	  /* A rule gets by default the precedence and associativity
    454 	     of the last token in it.  */
    455 	  if (p->sym->class == token_sym && default_prec)
    456 	    rules[ruleno].prec = p->sym;
    457 	  if (p)
    458 	    p = p->next;
    459 	}
    460 
    461       /* If this rule has a %prec,
    462          the specified symbol's precedence replaces the default.  */
    463       if (ruleprec)
    464 	{
    465 	  rules[ruleno].precsym = ruleprec;
    466 	  rules[ruleno].prec = ruleprec;
    467 	}
    468       ritem[itemno++] = rule_number_as_item_number (ruleno);
    469       ++ruleno;
    470 
    471       if (p)
    472 	p = p->next;
    473     }
    474 
    475   assert (itemno == nritems);
    476 
    477   if (trace_flag & trace_sets)
    478     ritem_print (stderr);
    479 }
    480 
    481 /*------------------------------------------------------------------.
    483 | Read in the grammar specification and record it in the format     |
    484 | described in gram.h.  All actions are copied into ACTION_OBSTACK, |
    485 | in each case forming the body of a C function (YYACTION) which    |
    486 | contains a switch statement to decide which action to execute.    |
    487 `------------------------------------------------------------------*/
    488 
    489 void
    490 reader (void)
    491 {
    492   /* Initialize the symbol table.  */
    493   symbols_new ();
    494 
    495   /* Construct the accept symbol. */
    496   accept = symbol_get ("$accept", empty_location);
    497   accept->class = nterm_sym;
    498   accept->number = nvars++;
    499 
    500   /* Construct the error token */
    501   errtoken = symbol_get ("error", empty_location);
    502   errtoken->class = token_sym;
    503   errtoken->number = ntokens++;
    504 
    505   /* Construct a token that represents all undefined literal tokens.
    506      It is always token number 2.  */
    507   undeftoken = symbol_get ("$undefined", empty_location);
    508   undeftoken->class = token_sym;
    509   undeftoken->number = ntokens++;
    510 
    511   /* Initialize the obstacks. */
    512   obstack_init (&pre_prologue_obstack);
    513   obstack_init (&post_prologue_obstack);
    514 
    515   gram_in = xfopen (grammar_file, "r");
    516 
    517   gram__flex_debug = trace_flag & trace_scan;
    518   gram_debug = trace_flag & trace_parse;
    519   scanner_initialize ();
    520   gram_parse ();
    521 
    522   if (! complaint_issued)
    523     check_and_convert_grammar ();
    524 
    525   xfclose (gram_in);
    526 }
    527 
    528 
    529 /*-------------------------------------------------------------.
    530 | Check the grammar that has just been read, and convert it to |
    531 | internal form.					       |
    532 `-------------------------------------------------------------*/
    533 
    534 static void
    535 check_and_convert_grammar (void)
    536 {
    537   /* Grammar has been read.  Do some checking.  */
    538   if (nrules == 0)
    539     fatal (_("no rules in the input grammar"));
    540 
    541   /* Report any undefined symbols and consider them nonterminals.  */
    542   symbols_check_defined ();
    543 
    544   /* If the user did not define her ENDTOKEN, do it now. */
    545   if (!endtoken)
    546     {
    547       endtoken = symbol_get ("$end", empty_location);
    548       endtoken->class = token_sym;
    549       endtoken->number = 0;
    550       /* Value specified by POSIX.  */
    551       endtoken->user_token_number = 0;
    552     }
    553 
    554   /* Insert the initial rule, whose line is that of the first rule
    555      (not that of the start symbol):
    556 
    557      accept: %start EOF.  */
    558   {
    559     symbol_list *p = symbol_list_new (accept, empty_location);
    560     p->location = grammar->location;
    561     p->next = symbol_list_new (startsymbol, empty_location);
    562     p->next->next = symbol_list_new (endtoken, empty_location);
    563     p->next->next->next = symbol_list_new (NULL, empty_location);
    564     p->next->next->next->next = grammar;
    565     nrules += 1;
    566     nritems += 3;
    567     grammar = p;
    568   }
    569 
    570   assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
    571 
    572   /* Assign the symbols their symbol numbers.  Write #defines for the
    573      token symbols into FDEFINES if requested.  */
    574   symbols_pack ();
    575 
    576   /* Convert the grammar into the format described in gram.h.  */
    577   packgram ();
    578 
    579   /* The grammar as a symbol_list is no longer needed. */
    580   LIST_FREE (symbol_list, grammar);
    581 }
    582