Home | History | Annotate | Download | only in src
      1 /* Grammar reduction for Bison.
      2 
      3    Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006 Free
      4    Software Foundation, Inc.
      5 
      6    This file is part of Bison, the GNU Compiler Compiler.
      7 
      8    Bison is free software; you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 2, or (at your option)
     11    any later version.
     12 
     13    Bison is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with Bison; see the file COPYING.  If not, write to
     20    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21    Boston, MA 02110-1301, USA.  */
     22 
     23 
     24 /* Reduce the grammar: Find and eliminate unreachable terminals,
     25    nonterminals, and productions.  David S. Bakin.  */
     26 
     27 /* Don't eliminate unreachable terminals: They may be used by the
     28    user's parser.  */
     29 
     30 #include <config.h>
     31 #include "system.h"
     32 
     33 #include <bitset.h>
     34 #include <quotearg.h>
     35 
     36 #include "complain.h"
     37 #include "files.h"
     38 #include "getargs.h"
     39 #include "gram.h"
     40 #include "reader.h"
     41 #include "reduce.h"
     42 #include "symtab.h"
     43 
     44 /* Set of all nonterminals which are not useless.  */
     45 static bitset N;
     46 
     47 /* Set of all rules which have no useless nonterminals in their RHS.  */
     48 static bitset P;
     49 
     50 /* Set of all accessible symbols.  */
     51 static bitset V;
     52 
     53 /* Set of symbols used to define rule precedence (so they are
     54    `useless', but no warning should be issued).  */
     55 static bitset V1;
     56 
     57 static rule_number nuseful_productions;
     58 rule_number nuseless_productions;
     59 static int nuseful_nonterminals;
     60 symbol_number nuseless_nonterminals;
     61 
     62 /*-------------------------------------------------------------------.
     64 | Another way to do this would be with a set for each production and |
     65 | then do subset tests against N0, but even for the C grammar the    |
     66 | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
     67 `-------------------------------------------------------------------*/
     68 
     69 static bool
     70 useful_production (rule_number r, bitset N0)
     71 {
     72   item_number *rhsp;
     73 
     74   /* A production is useful if all of the nonterminals in its appear
     75      in the set of useful nonterminals.  */
     76 
     77   for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
     78     if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
     79       return false;
     80   return true;
     81 }
     82 
     83 
     84 /*---------------------------------------------------------.
     85 | Remember that rules are 1-origin, symbols are 0-origin.  |
     86 `---------------------------------------------------------*/
     87 
     88 static void
     89 useless_nonterminals (void)
     90 {
     91   bitset Np, Ns;
     92   rule_number r;
     93 
     94   /* N is set as built.  Np is set being built this iteration. P is
     95      set of all productions which have a RHS all in N.  */
     96 
     97   Np = bitset_create (nvars, BITSET_FIXED);
     98 
     99 
    100   /* The set being computed is a set of nonterminals which can derive
    101      the empty string or strings consisting of all terminals. At each
    102      iteration a nonterminal is added to the set if there is a
    103      production with that nonterminal as its LHS for which all the
    104      nonterminals in its RHS are already in the set.  Iterate until
    105      the set being computed remains unchanged.  Any nonterminals not
    106      in the set at that point are useless in that they will never be
    107      used in deriving a sentence of the language.
    108 
    109      This iteration doesn't use any special traversal over the
    110      productions.  A set is kept of all productions for which all the
    111      nonterminals in the RHS are in useful.  Only productions not in
    112      this set are scanned on each iteration.  At the end, this set is
    113      saved to be used when finding useful productions: only
    114      productions in this set will appear in the final grammar.  */
    115 
    116   while (1)
    117     {
    118       bitset_copy (Np, N);
    119       for (r = 0; r < nrules; r++)
    120 	if (!bitset_test (P, r)
    121 	    && useful_production (r, N))
    122 	  {
    123 	    bitset_set (Np, rules[r].lhs->number - ntokens);
    124 	    bitset_set (P, r);
    125 	  }
    126       if (bitset_equal_p (N, Np))
    127 	break;
    128       Ns = Np;
    129       Np = N;
    130       N = Ns;
    131     }
    132   bitset_free (N);
    133   N = Np;
    134 }
    135 
    136 
    137 static void
    138 inaccessable_symbols (void)
    139 {
    140   bitset Vp, Vs, Pp;
    141 
    142   /* Find out which productions are reachable and which symbols are
    143      used.  Starting with an empty set of productions and a set of
    144      symbols which only has the start symbol in it, iterate over all
    145      productions until the set of productions remains unchanged for an
    146      iteration.  For each production which has a LHS in the set of
    147      reachable symbols, add the production to the set of reachable
    148      productions, and add all of the nonterminals in the RHS of the
    149      production to the set of reachable symbols.
    150 
    151      Consider only the (partially) reduced grammar which has only
    152      nonterminals in N and productions in P.
    153 
    154      The result is the set P of productions in the reduced grammar,
    155      and the set V of symbols in the reduced grammar.
    156 
    157      Although this algorithm also computes the set of terminals which
    158      are reachable, no terminal will be deleted from the grammar. Some
    159      terminals might not be in the grammar but might be generated by
    160      semantic routines, and so the user might want them available with
    161      specified numbers.  (Is this true?)  However, the nonreachable
    162      terminals are printed (if running in verbose mode) so that the
    163      user can know.  */
    164 
    165   Vp = bitset_create (nsyms, BITSET_FIXED);
    166   Pp = bitset_create (nrules, BITSET_FIXED);
    167 
    168   /* If the start symbol isn't useful, then nothing will be useful. */
    169   if (bitset_test (N, accept->number - ntokens))
    170     {
    171       bitset_set (V, accept->number);
    172 
    173       while (1)
    174 	{
    175 	  rule_number r;
    176 	  bitset_copy (Vp, V);
    177 	  for (r = 0; r < nrules; r++)
    178 	    {
    179 	      if (!bitset_test (Pp, r)
    180 		  && bitset_test (P, r)
    181 		  && bitset_test (V, rules[r].lhs->number))
    182 		{
    183 		  item_number *rhsp;
    184 		  for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
    185 		    if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
    186 		      bitset_set (Vp, *rhsp);
    187 		  bitset_set (Pp, r);
    188 		}
    189 	    }
    190 	  if (bitset_equal_p (V, Vp))
    191 	    break;
    192 	  Vs = Vp;
    193 	  Vp = V;
    194 	  V = Vs;
    195 	}
    196     }
    197 
    198   bitset_free (V);
    199   V = Vp;
    200 
    201   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
    202   bitset_set (V, endtoken->number);		/* end-of-input token */
    203   bitset_set (V, errtoken->number);		/* error token */
    204   bitset_set (V, undeftoken->number);		/* some undefined token */
    205 
    206   bitset_free (P);
    207   P = Pp;
    208 
    209   nuseful_productions = bitset_count (P);
    210   nuseless_productions = nrules - nuseful_productions;
    211 
    212   nuseful_nonterminals = 0;
    213   {
    214     symbol_number i;
    215     for (i = ntokens; i < nsyms; i++)
    216       if (bitset_test (V, i))
    217 	nuseful_nonterminals++;
    218   }
    219   nuseless_nonterminals = nvars - nuseful_nonterminals;
    220 
    221   /* A token that was used in %prec should not be warned about.  */
    222   {
    223     rule_number r;
    224     for (r = 0; r < nrules; ++r)
    225       if (rules[r].precsym != 0)
    226 	bitset_set (V1, rules[r].precsym->number);
    227   }
    228 }
    229 
    230 
    231 /*-------------------------------------------------------------------.
    232 | Put the useless productions at the end of RULES, and adjust NRULES |
    233 | accordingly.                                                       |
    234 `-------------------------------------------------------------------*/
    235 
    236 static void
    237 reduce_grammar_tables (void)
    238 {
    239   /* Report and flag useless productions.  */
    240   {
    241     rule_number r;
    242     for (r = 0; r < nrules; r++)
    243       rules[r].useful = bitset_test (P, r);
    244     grammar_rules_never_reduced_report (_("useless rule"));
    245   }
    246 
    247   /* Map the nonterminals to their new index: useful first, useless
    248      afterwards.  Kept for later report.  */
    249   {
    250     int useful = 0;
    251     int useless = nrules - nuseless_productions;
    252     rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
    253     rule_number r;
    254     for (r = 0; r < nrules; ++r)
    255       rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
    256     free (rules);
    257     rules = rules_sorted;
    258 
    259     /* Renumber the rules markers in RITEMS.  */
    260     for (r = 0; r < nrules; ++r)
    261       {
    262 	item_number *rhsp = rules[r].rhs;
    263 	for (/* Nothing. */; *rhsp >= 0; ++rhsp)
    264 	  /* Nothing. */;
    265 	*rhsp = rule_number_as_item_number (r);
    266 	rules[r].number = r;
    267       }
    268     nrules -= nuseless_productions;
    269   }
    270 
    271   /* Adjust NRITEMS.  */
    272   {
    273     rule_number r;
    274     int length;
    275     for (r = nrules; r < nrules + nuseless_productions; ++r)
    276       {
    277 	length = rule_rhs_length (&rules[r]);
    278 	nritems -= length + 1;
    279       }
    280   }
    281 }
    282 
    283 
    284 /*------------------------------.
    285 | Remove useless nonterminals.  |
    286 `------------------------------*/
    287 
    288 static void
    289 nonterminals_reduce (void)
    290 {
    291   symbol_number i, n;
    292 
    293   /* Map the nonterminals to their new index: useful first, useless
    294      afterwards.  Kept for later report.  */
    295 
    296   symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
    297   n = ntokens;
    298   for (i = ntokens; i < nsyms; i++)
    299     if (bitset_test (V, i))
    300       nontermmap[i - ntokens] = n++;
    301   for (i = ntokens; i < nsyms; i++)
    302     if (!bitset_test (V, i))
    303       {
    304 	nontermmap[i - ntokens] = n++;
    305 	warn_at (symbols[i]->location, _("useless nonterminal: %s"),
    306 		 symbols[i]->tag);
    307       }
    308 
    309 
    310   /* Shuffle elements of tables indexed by symbol number.  */
    311   {
    312     symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
    313 
    314     for (i = ntokens; i < nsyms; i++)
    315       symbols[i]->number = nontermmap[i - ntokens];
    316     for (i = ntokens; i < nsyms; i++)
    317       symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
    318     for (i = ntokens; i < nsyms; i++)
    319       symbols[i] = symbols_sorted[i - ntokens];
    320     free (symbols_sorted);
    321   }
    322 
    323   {
    324     rule_number r;
    325     for (r = 0; r < nrules; ++r)
    326       {
    327 	item_number *rhsp;
    328 	for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
    329 	  if (ISVAR (*rhsp))
    330 	    *rhsp =  symbol_number_as_item_number (nontermmap[*rhsp
    331 							      - ntokens]);
    332       }
    333     accept->number = nontermmap[accept->number - ntokens];
    334   }
    335 
    336   nsyms -= nuseless_nonterminals;
    337   nvars -= nuseless_nonterminals;
    338 
    339   free (nontermmap);
    340 }
    341 
    342 
    343 /*------------------------------------------------------------------.
    344 | Output the detailed results of the reductions.  For FILE.output.  |
    345 `------------------------------------------------------------------*/
    346 
    347 void
    348 reduce_output (FILE *out)
    349 {
    350   if (nuseless_nonterminals > 0)
    351     {
    352       int i;
    353       fprintf (out, "%s\n\n", _("Useless nonterminals"));
    354       for (i = 0; i < nuseless_nonterminals; ++i)
    355 	fprintf (out, "   %s\n", symbols[nsyms + i]->tag);
    356       fputs ("\n\n", out);
    357     }
    358 
    359   {
    360     bool b = false;
    361     int i;
    362     for (i = 0; i < ntokens; i++)
    363       if (!bitset_test (V, i) && !bitset_test (V1, i))
    364 	{
    365 	  if (!b)
    366 	    fprintf (out, "%s\n\n", _("Terminals which are not used"));
    367 	  b = true;
    368 	  fprintf (out, "   %s\n", symbols[i]->tag);
    369 	}
    370     if (b)
    371       fputs ("\n\n", out);
    372   }
    373 
    374   if (nuseless_productions > 0)
    375     grammar_rules_partial_print (out, _("Useless rules"),
    376 				 rule_useless_p);
    377 }
    378 
    379 
    381 
    382 
    383 
    384 /*-------------------------------.
    385 | Report the results to STDERR.  |
    386 `-------------------------------*/
    387 
    388 static void
    389 reduce_print (void)
    390 {
    391   if (yacc_flag && nuseless_productions)
    392     fprintf (stderr, ngettext ("%d rule never reduced\n",
    393 			       "%d rules never reduced\n",
    394 			       nuseless_productions),
    395 	     nuseless_productions);
    396 
    397   fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
    398 
    399   if (nuseless_nonterminals > 0)
    400     fprintf (stderr, ngettext ("%d useless nonterminal",
    401 			       "%d useless nonterminals",
    402 			       nuseless_nonterminals),
    403 	     nuseless_nonterminals);
    404 
    405   if (nuseless_nonterminals > 0 && nuseless_productions > 0)
    406     fprintf (stderr, _(" and "));
    407 
    408   if (nuseless_productions > 0)
    409     fprintf (stderr, ngettext ("%d useless rule",
    410 			       "%d useless rules",
    411 			       nuseless_productions),
    412 	     nuseless_productions);
    413   fprintf (stderr, "\n");
    414 }
    415 
    416 void
    418 reduce_grammar (void)
    419 {
    420   bool reduced;
    421 
    422   /* Allocate the global sets used to compute the reduced grammar */
    423 
    424   N = bitset_create (nvars, BITSET_FIXED);
    425   P =  bitset_create (nrules, BITSET_FIXED);
    426   V = bitset_create (nsyms, BITSET_FIXED);
    427   V1 = bitset_create (nsyms, BITSET_FIXED);
    428 
    429   useless_nonterminals ();
    430   inaccessable_symbols ();
    431 
    432   reduced = (nuseless_nonterminals + nuseless_productions > 0);
    433   if (!reduced)
    434     return;
    435 
    436   reduce_print ();
    437 
    438   if (!bitset_test (N, accept->number - ntokens))
    439     fatal_at (startsymbol_location,
    440 	      _("start symbol %s does not derive any sentence"),
    441 	      startsymbol->tag);
    442 
    443   /* First reduce the nonterminals, as they renumber themselves in the
    444      whole grammar.  If you change the order, nonterms would be
    445      renumbered only in the reduced grammar.  */
    446   if (nuseless_nonterminals > 0)
    447     nonterminals_reduce ();
    448   if (nuseless_productions > 0)
    449     reduce_grammar_tables ();
    450 
    451   if (trace_flag & trace_grammar)
    452     {
    453       grammar_dump (stderr, "Reduced Grammar");
    454 
    455       fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
    456 , and %d productions.\n",
    457 	       grammar_file, ntokens, nvars, nrules);
    458     }
    459 }
    460 
    461 
    462 /*-----------------------------------------------------------.
    463 | Free the global sets used to compute the reduced grammar.  |
    464 `-----------------------------------------------------------*/
    465 
    466 void
    467 reduce_free (void)
    468 {
    469   bitset_free (N);
    470   bitset_free (V);
    471   bitset_free (V1);
    472   bitset_free (P);
    473 }
    474