Home | History | Annotate | Download | only in src
      1 /* Grammar reduction for Bison.
      2 
      3    Copyright (C) 1988-1989, 2000-2003, 2005-2012 Free Software
      4    Foundation, Inc.
      5 
      6    This file is part of Bison, the GNU Compiler Compiler.
      7 
      8    This program is free software: you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation, either version 3 of the License, or
     11    (at your option) any later version.
     12 
     13    This program is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
     20 
     21 
     22 /* Reduce the grammar: Find and eliminate unreachable terminals,
     23    nonterminals, and productions.  David S. Bakin.  */
     24 
     25 /* Don't eliminate unreachable terminals: They may be used by the
     26    user's parser.  */
     27 
     28 #include <config.h>
     29 #include "system.h"
     30 
     31 #include <bitset.h>
     32 
     33 #include "complain.h"
     34 #include "files.h"
     35 #include "getargs.h"
     36 #include "gram.h"
     37 #include "print-xml.h"
     38 #include "reader.h"
     39 #include "reduce.h"
     40 #include "symtab.h"
     41 
     42 /* Set of all nonterminals which are not useless.  */
     43 static bitset N;
     44 
     45 /* Set of all rules which have no useless nonterminals in their RHS.  */
     46 static bitset P;
     47 
     48 /* Set of all accessible symbols.  */
     49 static bitset V;
     50 
     51 /* Set of symbols used to define rule precedence (so they are
     52    `useless', but no warning should be issued).  */
     53 static bitset V1;
     54 
     55 static rule_number nuseful_productions;
     56 rule_number nuseless_productions;
     57 static int nuseful_nonterminals;
     58 symbol_number nuseless_nonterminals;
     59 
     60 /*-------------------------------------------------------------------.
     62 | Another way to do this would be with a set for each production and |
     63 | then do subset tests against N0, but even for the C grammar the    |
     64 | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
     65 `-------------------------------------------------------------------*/
     66 
     67 static bool
     68 useful_production (rule_number r, bitset N0)
     69 {
     70   item_number *rhsp;
     71 
     72   /* A production is useful if all of the nonterminals in its appear
     73      in the set of useful nonterminals.  */
     74 
     75   for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
     76     if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
     77       return false;
     78   return true;
     79 }
     80 
     81 
     82 /*---------------------------------------------------------.
     83 | Remember that rules are 1-origin, symbols are 0-origin.  |
     84 `---------------------------------------------------------*/
     85 
     86 static void
     87 useless_nonterminals (void)
     88 {
     89   bitset Np, Ns;
     90   rule_number r;
     91 
     92   /* N is set as built.  Np is set being built this iteration. P is
     93      set of all productions which have a RHS all in N.  */
     94 
     95   Np = bitset_create (nvars, BITSET_FIXED);
     96 
     97 
     98   /* The set being computed is a set of nonterminals which can derive
     99      the empty string or strings consisting of all terminals. At each
    100      iteration a nonterminal is added to the set if there is a
    101      production with that nonterminal as its LHS for which all the
    102      nonterminals in its RHS are already in the set.  Iterate until
    103      the set being computed remains unchanged.  Any nonterminals not
    104      in the set at that point are useless in that they will never be
    105      used in deriving a sentence of the language.
    106 
    107      This iteration doesn't use any special traversal over the
    108      productions.  A set is kept of all productions for which all the
    109      nonterminals in the RHS are in useful.  Only productions not in
    110      this set are scanned on each iteration.  At the end, this set is
    111      saved to be used when finding useful productions: only
    112      productions in this set will appear in the final grammar.  */
    113 
    114   while (1)
    115     {
    116       bitset_copy (Np, N);
    117       for (r = 0; r < nrules; r++)
    118 	if (!bitset_test (P, r)
    119 	    && useful_production (r, N))
    120 	  {
    121 	    bitset_set (Np, rules[r].lhs->number - ntokens);
    122 	    bitset_set (P, r);
    123 	  }
    124       if (bitset_equal_p (N, Np))
    125 	break;
    126       Ns = Np;
    127       Np = N;
    128       N = Ns;
    129     }
    130   bitset_free (N);
    131   N = Np;
    132 }
    133 
    134 
    135 static void
    136 inaccessable_symbols (void)
    137 {
    138   bitset Vp, Vs, Pp;
    139 
    140   /* Find out which productions are reachable and which symbols are
    141      used.  Starting with an empty set of productions and a set of
    142      symbols which only has the start symbol in it, iterate over all
    143      productions until the set of productions remains unchanged for an
    144      iteration.  For each production which has a LHS in the set of
    145      reachable symbols, add the production to the set of reachable
    146      productions, and add all of the nonterminals in the RHS of the
    147      production to the set of reachable symbols.
    148 
    149      Consider only the (partially) reduced grammar which has only
    150      nonterminals in N and productions in P.
    151 
    152      The result is the set P of productions in the reduced grammar,
    153      and the set V of symbols in the reduced grammar.
    154 
    155      Although this algorithm also computes the set of terminals which
    156      are reachable, no terminal will be deleted from the grammar. Some
    157      terminals might not be in the grammar but might be generated by
    158      semantic routines, and so the user might want them available with
    159      specified numbers.  (Is this true?)  However, the nonreachable
    160      terminals are printed (if running in verbose mode) so that the
    161      user can know.  */
    162 
    163   Vp = bitset_create (nsyms, BITSET_FIXED);
    164   Pp = bitset_create (nrules, BITSET_FIXED);
    165 
    166   /* If the start symbol isn't useful, then nothing will be useful. */
    167   if (bitset_test (N, accept->number - ntokens))
    168     {
    169       bitset_set (V, accept->number);
    170 
    171       while (1)
    172 	{
    173 	  rule_number r;
    174 	  bitset_copy (Vp, V);
    175 	  for (r = 0; r < nrules; r++)
    176 	    {
    177 	      if (!bitset_test (Pp, r)
    178 		  && bitset_test (P, r)
    179 		  && bitset_test (V, rules[r].lhs->number))
    180 		{
    181 		  item_number *rhsp;
    182 		  for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
    183 		    if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
    184 		      bitset_set (Vp, *rhsp);
    185 		  bitset_set (Pp, r);
    186 		}
    187 	    }
    188 	  if (bitset_equal_p (V, Vp))
    189 	    break;
    190 	  Vs = Vp;
    191 	  Vp = V;
    192 	  V = Vs;
    193 	}
    194     }
    195 
    196   bitset_free (V);
    197   V = Vp;
    198 
    199   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
    200   bitset_set (V, endtoken->number);		/* end-of-input token */
    201   bitset_set (V, errtoken->number);		/* error token */
    202   bitset_set (V, undeftoken->number);		/* some undefined token */
    203 
    204   bitset_free (P);
    205   P = Pp;
    206 
    207   nuseful_productions = bitset_count (P);
    208   nuseless_productions = nrules - nuseful_productions;
    209 
    210   nuseful_nonterminals = 0;
    211   {
    212     symbol_number i;
    213     for (i = ntokens; i < nsyms; i++)
    214       if (bitset_test (V, i))
    215 	nuseful_nonterminals++;
    216   }
    217   nuseless_nonterminals = nvars - nuseful_nonterminals;
    218 
    219   /* A token that was used in %prec should not be warned about.  */
    220   {
    221     rule_number r;
    222     for (r = 0; r < nrules; ++r)
    223       if (rules[r].precsym != 0)
    224 	bitset_set (V1, rules[r].precsym->number);
    225   }
    226 }
    227 
    228 
    229 /*-------------------------------------------------------------------.
    230 | Put the useless productions at the end of RULES, and adjust NRULES |
    231 | accordingly.                                                       |
    232 `-------------------------------------------------------------------*/
    233 
    234 static void
    235 reduce_grammar_tables (void)
    236 {
    237   /* Report and flag useless productions.  */
    238   {
    239     rule_number r;
    240     for (r = 0; r < nrules; r++)
    241       rules[r].useful = bitset_test (P, r);
    242     grammar_rules_useless_report (_("rule useless in grammar"));
    243   }
    244 
    245   /* Map the nonterminals to their new index: useful first, useless
    246      afterwards.  Kept for later report.  */
    247   {
    248     int useful = 0;
    249     int useless = nrules - nuseless_productions;
    250     rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
    251     rule_number r;
    252     for (r = 0; r < nrules; ++r)
    253       rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
    254     free (rules);
    255     rules = rules_sorted;
    256 
    257     /* Renumber the rules markers in RITEMS.  */
    258     for (r = 0; r < nrules; ++r)
    259       {
    260 	item_number *rhsp = rules[r].rhs;
    261 	for (/* Nothing. */; *rhsp >= 0; ++rhsp)
    262 	  /* Nothing. */;
    263 	*rhsp = rule_number_as_item_number (r);
    264 	rules[r].number = r;
    265       }
    266     nrules -= nuseless_productions;
    267   }
    268 
    269   /* Adjust NRITEMS.  */
    270   {
    271     rule_number r;
    272     int length;
    273     for (r = nrules; r < nrules + nuseless_productions; ++r)
    274       {
    275 	length = rule_rhs_length (&rules[r]);
    276 	nritems -= length + 1;
    277       }
    278   }
    279 }
    280 
    281 
    282 /*------------------------------.
    283 | Remove useless nonterminals.  |
    284 `------------------------------*/
    285 
    286 static void
    287 nonterminals_reduce (void)
    288 {
    289   symbol_number i, n;
    290 
    291   /* Map the nonterminals to their new index: useful first, useless
    292      afterwards.  Kept for later report.  */
    293 
    294   symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
    295   n = ntokens;
    296   for (i = ntokens; i < nsyms; i++)
    297     if (bitset_test (V, i))
    298       nontermmap[i - ntokens] = n++;
    299   for (i = ntokens; i < nsyms; i++)
    300     if (!bitset_test (V, i))
    301       {
    302 	nontermmap[i - ntokens] = n++;
    303 	warn_at (symbols[i]->location, _("nonterminal useless in grammar: %s"),
    304 		 symbols[i]->tag);
    305       }
    306 
    307 
    308   /* Shuffle elements of tables indexed by symbol number.  */
    309   {
    310     symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
    311 
    312     for (i = ntokens; i < nsyms; i++)
    313       symbols[i]->number = nontermmap[i - ntokens];
    314     for (i = ntokens; i < nsyms; i++)
    315       symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
    316     for (i = ntokens; i < nsyms; i++)
    317       symbols[i] = symbols_sorted[i - ntokens];
    318     free (symbols_sorted);
    319   }
    320 
    321   {
    322     rule_number r;
    323     for (r = 0; r < nrules; ++r)
    324       {
    325 	item_number *rhsp;
    326 	for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
    327 	  if (ISVAR (*rhsp))
    328 	    *rhsp =  symbol_number_as_item_number (nontermmap[*rhsp
    329 							      - ntokens]);
    330       }
    331     accept->number = nontermmap[accept->number - ntokens];
    332   }
    333 
    334   nsyms -= nuseless_nonterminals;
    335   nvars -= nuseless_nonterminals;
    336 
    337   free (nontermmap);
    338 }
    339 
    340 
    341 /*------------------------------------------------------------------.
    342 | Output the detailed results of the reductions.  For FILE.output.  |
    343 `------------------------------------------------------------------*/
    344 
    345 void
    346 reduce_output (FILE *out)
    347 {
    348   if (nuseless_nonterminals > 0)
    349     {
    350       int i;
    351       fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
    352       for (i = 0; i < nuseless_nonterminals; ++i)
    353 	fprintf (out, "   %s\n", symbols[nsyms + i]->tag);
    354       fputs ("\n\n", out);
    355     }
    356 
    357   {
    358     bool b = false;
    359     int i;
    360     for (i = 0; i < ntokens; i++)
    361       if (reduce_token_unused_in_grammar (i))
    362 	{
    363 	  if (!b)
    364 	    fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
    365 	  b = true;
    366 	  fprintf (out, "   %s\n", symbols[i]->tag);
    367 	}
    368     if (b)
    369       fputs ("\n\n", out);
    370   }
    371 
    372   if (nuseless_productions > 0)
    373     grammar_rules_partial_print (out, _("Rules useless in grammar"),
    374 				 rule_useless_in_grammar_p);
    375 }
    376 
    377 
    379 /*-------------------------------.
    380 | Report the results to STDERR.  |
    381 `-------------------------------*/
    382 
    383 static void
    384 reduce_print (void)
    385 {
    386   if (nuseless_nonterminals > 0)
    387     warn (ngettext ("%d nonterminal useless in grammar",
    388                     "%d nonterminals useless in grammar",
    389                     nuseless_nonterminals),
    390           nuseless_nonterminals);
    391   if (nuseless_productions > 0)
    392     warn (ngettext ("%d rule useless in grammar",
    393                     "%d rules useless in grammar",
    394                     nuseless_productions),
    395           nuseless_productions);
    396 }
    397 
    398 void
    400 reduce_grammar (void)
    401 {
    402   bool reduced;
    403 
    404   /* Allocate the global sets used to compute the reduced grammar */
    405 
    406   N = bitset_create (nvars, BITSET_FIXED);
    407   P =  bitset_create (nrules, BITSET_FIXED);
    408   V = bitset_create (nsyms, BITSET_FIXED);
    409   V1 = bitset_create (nsyms, BITSET_FIXED);
    410 
    411   useless_nonterminals ();
    412   inaccessable_symbols ();
    413 
    414   reduced = (nuseless_nonterminals + nuseless_productions > 0);
    415   if (!reduced)
    416     return;
    417 
    418   reduce_print ();
    419 
    420   if (!bitset_test (N, accept->number - ntokens))
    421     fatal_at (startsymbol_location,
    422 	      _("start symbol %s does not derive any sentence"),
    423 	      startsymbol->tag);
    424 
    425   /* First reduce the nonterminals, as they renumber themselves in the
    426      whole grammar.  If you change the order, nonterms would be
    427      renumbered only in the reduced grammar.  */
    428   if (nuseless_nonterminals > 0)
    429     nonterminals_reduce ();
    430   if (nuseless_productions > 0)
    431     reduce_grammar_tables ();
    432 
    433   if (trace_flag & trace_grammar)
    434     {
    435       grammar_dump (stderr, "Reduced Grammar");
    436 
    437       fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
    438 , and %d productions.\n",
    439 	       grammar_file, ntokens, nvars, nrules);
    440     }
    441 }
    442 
    443 bool
    444 reduce_token_unused_in_grammar (symbol_number i)
    445 {
    446   aver (i < ntokens);
    447   return !bitset_test (V, i) && !bitset_test (V1, i);
    448 }
    449 
    450 bool
    451 reduce_nonterminal_useless_in_grammar (symbol_number i)
    452 {
    453   aver (ntokens <= i && i < nsyms + nuseless_nonterminals);
    454   return nsyms <= i;
    455 }
    456 
    457 /*-----------------------------------------------------------.
    458 | Free the global sets used to compute the reduced grammar.  |
    459 `-----------------------------------------------------------*/
    460 
    461 void
    462 reduce_free (void)
    463 {
    464   bitset_free (N);
    465   bitset_free (V);
    466   bitset_free (V1);
    467   bitset_free (P);
    468 }
    469