1 /* Grammar reduction for Bison. 2 3 Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006 Free 4 Software Foundation, Inc. 5 6 This file is part of Bison, the GNU Compiler Compiler. 7 8 Bison is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 Bison is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with Bison; see the file COPYING. If not, write to 20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 Boston, MA 02110-1301, USA. */ 22 23 24 /* Reduce the grammar: Find and eliminate unreachable terminals, 25 nonterminals, and productions. David S. Bakin. */ 26 27 /* Don't eliminate unreachable terminals: They may be used by the 28 user's parser. */ 29 30 #include <config.h> 31 #include "system.h" 32 33 #include <bitset.h> 34 #include <quotearg.h> 35 36 #include "complain.h" 37 #include "files.h" 38 #include "getargs.h" 39 #include "gram.h" 40 #include "reader.h" 41 #include "reduce.h" 42 #include "symtab.h" 43 44 /* Set of all nonterminals which are not useless. */ 45 static bitset N; 46 47 /* Set of all rules which have no useless nonterminals in their RHS. */ 48 static bitset P; 49 50 /* Set of all accessible symbols. */ 51 static bitset V; 52 53 /* Set of symbols used to define rule precedence (so they are 54 `useless', but no warning should be issued). */ 55 static bitset V1; 56 57 static rule_number nuseful_productions; 58 rule_number nuseless_productions; 59 static int nuseful_nonterminals; 60 symbol_number nuseless_nonterminals; 61 62 /*-------------------------------------------------------------------. 64 | Another way to do this would be with a set for each production and | 65 | then do subset tests against N0, but even for the C grammar the | 66 | whole reducing process takes only 2 seconds on my 8Mhz AT. | 67 `-------------------------------------------------------------------*/ 68 69 static bool 70 useful_production (rule_number r, bitset N0) 71 { 72 item_number *rhsp; 73 74 /* A production is useful if all of the nonterminals in its appear 75 in the set of useful nonterminals. */ 76 77 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp) 78 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens)) 79 return false; 80 return true; 81 } 82 83 84 /*---------------------------------------------------------. 85 | Remember that rules are 1-origin, symbols are 0-origin. | 86 `---------------------------------------------------------*/ 87 88 static void 89 useless_nonterminals (void) 90 { 91 bitset Np, Ns; 92 rule_number r; 93 94 /* N is set as built. Np is set being built this iteration. P is 95 set of all productions which have a RHS all in N. */ 96 97 Np = bitset_create (nvars, BITSET_FIXED); 98 99 100 /* The set being computed is a set of nonterminals which can derive 101 the empty string or strings consisting of all terminals. At each 102 iteration a nonterminal is added to the set if there is a 103 production with that nonterminal as its LHS for which all the 104 nonterminals in its RHS are already in the set. Iterate until 105 the set being computed remains unchanged. Any nonterminals not 106 in the set at that point are useless in that they will never be 107 used in deriving a sentence of the language. 108 109 This iteration doesn't use any special traversal over the 110 productions. A set is kept of all productions for which all the 111 nonterminals in the RHS are in useful. Only productions not in 112 this set are scanned on each iteration. At the end, this set is 113 saved to be used when finding useful productions: only 114 productions in this set will appear in the final grammar. */ 115 116 while (1) 117 { 118 bitset_copy (Np, N); 119 for (r = 0; r < nrules; r++) 120 if (!bitset_test (P, r) 121 && useful_production (r, N)) 122 { 123 bitset_set (Np, rules[r].lhs->number - ntokens); 124 bitset_set (P, r); 125 } 126 if (bitset_equal_p (N, Np)) 127 break; 128 Ns = Np; 129 Np = N; 130 N = Ns; 131 } 132 bitset_free (N); 133 N = Np; 134 } 135 136 137 static void 138 inaccessable_symbols (void) 139 { 140 bitset Vp, Vs, Pp; 141 142 /* Find out which productions are reachable and which symbols are 143 used. Starting with an empty set of productions and a set of 144 symbols which only has the start symbol in it, iterate over all 145 productions until the set of productions remains unchanged for an 146 iteration. For each production which has a LHS in the set of 147 reachable symbols, add the production to the set of reachable 148 productions, and add all of the nonterminals in the RHS of the 149 production to the set of reachable symbols. 150 151 Consider only the (partially) reduced grammar which has only 152 nonterminals in N and productions in P. 153 154 The result is the set P of productions in the reduced grammar, 155 and the set V of symbols in the reduced grammar. 156 157 Although this algorithm also computes the set of terminals which 158 are reachable, no terminal will be deleted from the grammar. Some 159 terminals might not be in the grammar but might be generated by 160 semantic routines, and so the user might want them available with 161 specified numbers. (Is this true?) However, the nonreachable 162 terminals are printed (if running in verbose mode) so that the 163 user can know. */ 164 165 Vp = bitset_create (nsyms, BITSET_FIXED); 166 Pp = bitset_create (nrules, BITSET_FIXED); 167 168 /* If the start symbol isn't useful, then nothing will be useful. */ 169 if (bitset_test (N, accept->number - ntokens)) 170 { 171 bitset_set (V, accept->number); 172 173 while (1) 174 { 175 rule_number r; 176 bitset_copy (Vp, V); 177 for (r = 0; r < nrules; r++) 178 { 179 if (!bitset_test (Pp, r) 180 && bitset_test (P, r) 181 && bitset_test (V, rules[r].lhs->number)) 182 { 183 item_number *rhsp; 184 for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++) 185 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens)) 186 bitset_set (Vp, *rhsp); 187 bitset_set (Pp, r); 188 } 189 } 190 if (bitset_equal_p (V, Vp)) 191 break; 192 Vs = Vp; 193 Vp = V; 194 V = Vs; 195 } 196 } 197 198 bitset_free (V); 199 V = Vp; 200 201 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */ 202 bitset_set (V, endtoken->number); /* end-of-input token */ 203 bitset_set (V, errtoken->number); /* error token */ 204 bitset_set (V, undeftoken->number); /* some undefined token */ 205 206 bitset_free (P); 207 P = Pp; 208 209 nuseful_productions = bitset_count (P); 210 nuseless_productions = nrules - nuseful_productions; 211 212 nuseful_nonterminals = 0; 213 { 214 symbol_number i; 215 for (i = ntokens; i < nsyms; i++) 216 if (bitset_test (V, i)) 217 nuseful_nonterminals++; 218 } 219 nuseless_nonterminals = nvars - nuseful_nonterminals; 220 221 /* A token that was used in %prec should not be warned about. */ 222 { 223 rule_number r; 224 for (r = 0; r < nrules; ++r) 225 if (rules[r].precsym != 0) 226 bitset_set (V1, rules[r].precsym->number); 227 } 228 } 229 230 231 /*-------------------------------------------------------------------. 232 | Put the useless productions at the end of RULES, and adjust NRULES | 233 | accordingly. | 234 `-------------------------------------------------------------------*/ 235 236 static void 237 reduce_grammar_tables (void) 238 { 239 /* Report and flag useless productions. */ 240 { 241 rule_number r; 242 for (r = 0; r < nrules; r++) 243 rules[r].useful = bitset_test (P, r); 244 grammar_rules_never_reduced_report (_("useless rule")); 245 } 246 247 /* Map the nonterminals to their new index: useful first, useless 248 afterwards. Kept for later report. */ 249 { 250 int useful = 0; 251 int useless = nrules - nuseless_productions; 252 rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted); 253 rule_number r; 254 for (r = 0; r < nrules; ++r) 255 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r]; 256 free (rules); 257 rules = rules_sorted; 258 259 /* Renumber the rules markers in RITEMS. */ 260 for (r = 0; r < nrules; ++r) 261 { 262 item_number *rhsp = rules[r].rhs; 263 for (/* Nothing. */; *rhsp >= 0; ++rhsp) 264 /* Nothing. */; 265 *rhsp = rule_number_as_item_number (r); 266 rules[r].number = r; 267 } 268 nrules -= nuseless_productions; 269 } 270 271 /* Adjust NRITEMS. */ 272 { 273 rule_number r; 274 int length; 275 for (r = nrules; r < nrules + nuseless_productions; ++r) 276 { 277 length = rule_rhs_length (&rules[r]); 278 nritems -= length + 1; 279 } 280 } 281 } 282 283 284 /*------------------------------. 285 | Remove useless nonterminals. | 286 `------------------------------*/ 287 288 static void 289 nonterminals_reduce (void) 290 { 291 symbol_number i, n; 292 293 /* Map the nonterminals to their new index: useful first, useless 294 afterwards. Kept for later report. */ 295 296 symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap); 297 n = ntokens; 298 for (i = ntokens; i < nsyms; i++) 299 if (bitset_test (V, i)) 300 nontermmap[i - ntokens] = n++; 301 for (i = ntokens; i < nsyms; i++) 302 if (!bitset_test (V, i)) 303 { 304 nontermmap[i - ntokens] = n++; 305 warn_at (symbols[i]->location, _("useless nonterminal: %s"), 306 symbols[i]->tag); 307 } 308 309 310 /* Shuffle elements of tables indexed by symbol number. */ 311 { 312 symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted); 313 314 for (i = ntokens; i < nsyms; i++) 315 symbols[i]->number = nontermmap[i - ntokens]; 316 for (i = ntokens; i < nsyms; i++) 317 symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i]; 318 for (i = ntokens; i < nsyms; i++) 319 symbols[i] = symbols_sorted[i - ntokens]; 320 free (symbols_sorted); 321 } 322 323 { 324 rule_number r; 325 for (r = 0; r < nrules; ++r) 326 { 327 item_number *rhsp; 328 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp) 329 if (ISVAR (*rhsp)) 330 *rhsp = symbol_number_as_item_number (nontermmap[*rhsp 331 - ntokens]); 332 } 333 accept->number = nontermmap[accept->number - ntokens]; 334 } 335 336 nsyms -= nuseless_nonterminals; 337 nvars -= nuseless_nonterminals; 338 339 free (nontermmap); 340 } 341 342 343 /*------------------------------------------------------------------. 344 | Output the detailed results of the reductions. For FILE.output. | 345 `------------------------------------------------------------------*/ 346 347 void 348 reduce_output (FILE *out) 349 { 350 if (nuseless_nonterminals > 0) 351 { 352 int i; 353 fprintf (out, "%s\n\n", _("Useless nonterminals")); 354 for (i = 0; i < nuseless_nonterminals; ++i) 355 fprintf (out, " %s\n", symbols[nsyms + i]->tag); 356 fputs ("\n\n", out); 357 } 358 359 { 360 bool b = false; 361 int i; 362 for (i = 0; i < ntokens; i++) 363 if (!bitset_test (V, i) && !bitset_test (V1, i)) 364 { 365 if (!b) 366 fprintf (out, "%s\n\n", _("Terminals which are not used")); 367 b = true; 368 fprintf (out, " %s\n", symbols[i]->tag); 369 } 370 if (b) 371 fputs ("\n\n", out); 372 } 373 374 if (nuseless_productions > 0) 375 grammar_rules_partial_print (out, _("Useless rules"), 376 rule_useless_p); 377 } 378 379 381 382 383 384 /*-------------------------------. 385 | Report the results to STDERR. | 386 `-------------------------------*/ 387 388 static void 389 reduce_print (void) 390 { 391 if (yacc_flag && nuseless_productions) 392 fprintf (stderr, ngettext ("%d rule never reduced\n", 393 "%d rules never reduced\n", 394 nuseless_productions), 395 nuseless_productions); 396 397 fprintf (stderr, "%s: %s: ", grammar_file, _("warning")); 398 399 if (nuseless_nonterminals > 0) 400 fprintf (stderr, ngettext ("%d useless nonterminal", 401 "%d useless nonterminals", 402 nuseless_nonterminals), 403 nuseless_nonterminals); 404 405 if (nuseless_nonterminals > 0 && nuseless_productions > 0) 406 fprintf (stderr, _(" and ")); 407 408 if (nuseless_productions > 0) 409 fprintf (stderr, ngettext ("%d useless rule", 410 "%d useless rules", 411 nuseless_productions), 412 nuseless_productions); 413 fprintf (stderr, "\n"); 414 } 415 416 void 418 reduce_grammar (void) 419 { 420 bool reduced; 421 422 /* Allocate the global sets used to compute the reduced grammar */ 423 424 N = bitset_create (nvars, BITSET_FIXED); 425 P = bitset_create (nrules, BITSET_FIXED); 426 V = bitset_create (nsyms, BITSET_FIXED); 427 V1 = bitset_create (nsyms, BITSET_FIXED); 428 429 useless_nonterminals (); 430 inaccessable_symbols (); 431 432 reduced = (nuseless_nonterminals + nuseless_productions > 0); 433 if (!reduced) 434 return; 435 436 reduce_print (); 437 438 if (!bitset_test (N, accept->number - ntokens)) 439 fatal_at (startsymbol_location, 440 _("start symbol %s does not derive any sentence"), 441 startsymbol->tag); 442 443 /* First reduce the nonterminals, as they renumber themselves in the 444 whole grammar. If you change the order, nonterms would be 445 renumbered only in the reduced grammar. */ 446 if (nuseless_nonterminals > 0) 447 nonterminals_reduce (); 448 if (nuseless_productions > 0) 449 reduce_grammar_tables (); 450 451 if (trace_flag & trace_grammar) 452 { 453 grammar_dump (stderr, "Reduced Grammar"); 454 455 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\ 456 , and %d productions.\n", 457 grammar_file, ntokens, nvars, nrules); 458 } 459 } 460 461 462 /*-----------------------------------------------------------. 463 | Free the global sets used to compute the reduced grammar. | 464 `-----------------------------------------------------------*/ 465 466 void 467 reduce_free (void) 468 { 469 bitset_free (N); 470 bitset_free (V); 471 bitset_free (V1); 472 bitset_free (P); 473 } 474