Home | History | Annotate | Download | only in src
      1 /* Compute lookahead criteria for Bison.
      2 
      3    Copyright (C) 1984, 1986, 1989, 2000-2012 Free Software Foundation,
      4    Inc.
      5 
      6    This file is part of Bison, the GNU Compiler Compiler.
      7 
      8    This program is free software: you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation, either version 3 of the License, or
     11    (at your option) any later version.
     12 
     13    This program is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
     20 
     21 
     22 /* Find which rules need lookahead in each state, and which lookahead
     23    tokens they accept.  */
     24 
     25 #include <config.h>
     26 #include "system.h"
     27 
     28 #include <bitset.h>
     29 #include <bitsetv.h>
     30 
     31 #include "LR0.h"
     32 #include "complain.h"
     33 #include "derives.h"
     34 #include "getargs.h"
     35 #include "gram.h"
     36 #include "lalr.h"
     37 #include "muscle-tab.h"
     38 #include "nullable.h"
     39 #include "reader.h"
     40 #include "relation.h"
     41 #include "symtab.h"
     42 
     43 goto_number *goto_map;
     44 goto_number ngotos;
     45 state_number *from_state;
     46 state_number *to_state;
     47 bitsetv goto_follows = NULL;
     48 
     49 /* Linked list of goto numbers.  */
     50 typedef struct goto_list
     51 {
     52   struct goto_list *next;
     53   goto_number value;
     54 } goto_list;
     55 
     56 
     57 /* LA is an NLA by NTOKENS matrix of bits.  LA[l, i] is 1 if the rule
     58    LArule[l] is applicable in the appropriate state when the next
     59    token is symbol i.  If LA[l, i] and LA[l, j] are both 1 for i != j,
     60    it is a conflict.  */
     61 
     62 static bitsetv LA = NULL;
     63 size_t nLA;
     64 
     65 
     66 static goto_number **includes;
     67 static goto_list **lookback;
     68 
     69 
     70 
     71 
     72 void
     73 set_goto_map (void)
     74 {
     75   state_number s;
     76   goto_number *temp_map;
     77 
     78   goto_map = xcalloc (nvars + 1, sizeof *goto_map);
     79   temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
     80 
     81   ngotos = 0;
     82   for (s = 0; s < nstates; ++s)
     83     {
     84       transitions *sp = states[s]->transitions;
     85       int i;
     86       for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
     87 	{
     88 	  ngotos++;
     89 
     90 	  /* Abort if (ngotos + 1) would overflow.  */
     91 	  aver (ngotos != GOTO_NUMBER_MAXIMUM);
     92 
     93 	  goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
     94 	}
     95     }
     96 
     97   {
     98     goto_number k = 0;
     99     int i;
    100     for (i = ntokens; i < nsyms; i++)
    101       {
    102 	temp_map[i - ntokens] = k;
    103 	k += goto_map[i - ntokens];
    104       }
    105 
    106     for (i = ntokens; i < nsyms; i++)
    107       goto_map[i - ntokens] = temp_map[i - ntokens];
    108 
    109     goto_map[nsyms - ntokens] = ngotos;
    110     temp_map[nsyms - ntokens] = ngotos;
    111   }
    112 
    113   from_state = xcalloc (ngotos, sizeof *from_state);
    114   to_state = xcalloc (ngotos, sizeof *to_state);
    115 
    116   for (s = 0; s < nstates; ++s)
    117     {
    118       transitions *sp = states[s]->transitions;
    119       int i;
    120       for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
    121 	{
    122 	  goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
    123 	  from_state[k] = s;
    124 	  to_state[k] = sp->states[i]->number;
    125 	}
    126     }
    127 
    128   free (temp_map);
    129 }
    130 
    131 
    132 goto_number
    133 map_goto (state_number s0, symbol_number sym)
    134 {
    135   goto_number high;
    136   goto_number low;
    137   goto_number middle;
    138   state_number s;
    139 
    140   low = goto_map[sym - ntokens];
    141   high = goto_map[sym - ntokens + 1] - 1;
    142 
    143   for (;;)
    144     {
    145       aver (low <= high);
    146       middle = (low + high) / 2;
    147       s = from_state[middle];
    148       if (s == s0)
    149 	return middle;
    150       else if (s < s0)
    151 	low = middle + 1;
    152       else
    153 	high = middle - 1;
    154     }
    155 }
    156 
    157 
    158 static void
    159 initialize_F (void)
    160 {
    161   goto_number **reads = xnmalloc (ngotos, sizeof *reads);
    162   goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
    163   goto_number nedges = 0;
    164 
    165   goto_number i;
    166 
    167   goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
    168 
    169   for (i = 0; i < ngotos; i++)
    170     {
    171       state_number stateno = to_state[i];
    172       transitions *sp = states[stateno]->transitions;
    173 
    174       int j;
    175       FOR_EACH_SHIFT (sp, j)
    176 	bitset_set (goto_follows[i], TRANSITION_SYMBOL (sp, j));
    177 
    178       for (; j < sp->num; j++)
    179 	{
    180 	  symbol_number sym = TRANSITION_SYMBOL (sp, j);
    181 	  if (nullable[sym - ntokens])
    182 	    edge[nedges++] = map_goto (stateno, sym);
    183 	}
    184 
    185       if (nedges == 0)
    186 	reads[i] = NULL;
    187       else
    188 	{
    189 	  reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
    190 	  memcpy (reads[i], edge, nedges * sizeof edge[0]);
    191 	  reads[i][nedges] = END_NODE;
    192 	  nedges = 0;
    193 	}
    194     }
    195 
    196   relation_digraph (reads, ngotos, &goto_follows);
    197 
    198   for (i = 0; i < ngotos; i++)
    199     free (reads[i]);
    200 
    201   free (reads);
    202   free (edge);
    203 }
    204 
    205 
    206 static void
    207 add_lookback_edge (state *s, rule *r, goto_number gotono)
    208 {
    209   int ri = state_reduction_find (s, r);
    210   goto_list *sp = xmalloc (sizeof *sp);
    211   sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri];
    212   sp->value = gotono;
    213   lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp;
    214 }
    215 
    216 
    217 
    218 static void
    219 build_relations (void)
    220 {
    221   goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
    222   state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1);
    223   goto_number i;
    224 
    225   includes = xnmalloc (ngotos, sizeof *includes);
    226 
    227   for (i = 0; i < ngotos; i++)
    228     {
    229       int nedges = 0;
    230       symbol_number symbol1 = states[to_state[i]]->accessing_symbol;
    231       rule **rulep;
    232 
    233       for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++)
    234 	{
    235 	  bool done;
    236 	  int length = 1;
    237 	  item_number const *rp;
    238 	  state *s = states[from_state[i]];
    239 	  states1[0] = s->number;
    240 
    241 	  for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++)
    242 	    {
    243 	      s = transitions_to (s->transitions,
    244 				  item_number_as_symbol_number (*rp));
    245 	      states1[length++] = s->number;
    246 	    }
    247 
    248 	  if (!s->consistent)
    249 	    add_lookback_edge (s, *rulep, i);
    250 
    251 	  length--;
    252 	  done = false;
    253 	  while (!done)
    254 	    {
    255 	      done = true;
    256 	      /* Each rhs ends in a rule number, and there is a
    257 		 sentinel (ritem[-1]=0) before the first rhs, so it is safe to
    258 		 decrement RP here.  */
    259 	      rp--;
    260 	      if (ISVAR (*rp))
    261 		{
    262 		  /* Downcasting from item_number to symbol_number.  */
    263 		  edge[nedges++] = map_goto (states1[--length],
    264 					     item_number_as_symbol_number (*rp));
    265 		  if (nullable[*rp - ntokens])
    266 		    done = false;
    267 		}
    268 	    }
    269 	}
    270 
    271       if (nedges == 0)
    272 	includes[i] = NULL;
    273       else
    274 	{
    275 	  int j;
    276 	  includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
    277 	  for (j = 0; j < nedges; j++)
    278 	    includes[i][j] = edge[j];
    279 	  includes[i][nedges] = END_NODE;
    280 	}
    281     }
    282 
    283   free (edge);
    284   free (states1);
    285 
    286   relation_transpose (&includes, ngotos);
    287 }
    288 
    289 
    290 
    291 static void
    292 compute_FOLLOWS (void)
    293 {
    294   goto_number i;
    295 
    296   relation_digraph (includes, ngotos, &goto_follows);
    297 
    298   for (i = 0; i < ngotos; i++)
    299     free (includes[i]);
    300 
    301   free (includes);
    302 }
    303 
    304 
    305 static void
    306 compute_lookahead_tokens (void)
    307 {
    308   size_t i;
    309   goto_list *sp;
    310 
    311   for (i = 0; i < nLA; i++)
    312     for (sp = lookback[i]; sp; sp = sp->next)
    313       bitset_or (LA[i], LA[i], goto_follows[sp->value]);
    314 
    315   /* Free LOOKBACK. */
    316   for (i = 0; i < nLA; i++)
    317     LIST_FREE (goto_list, lookback[i]);
    318 
    319   free (lookback);
    320 }
    321 
    322 
    323 /*----------------------------------------------------.
    324 | Count the number of lookahead tokens required for S |
    325 | (N_LOOKAHEAD_TOKENS member).                        |
    326 `----------------------------------------------------*/
    327 
    328 static int
    329 state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept)
    330 {
    331   int n_lookahead_tokens = 0;
    332   reductions *rp = s->reductions;
    333   transitions *sp = s->transitions;
    334 
    335   /* Transitions are only disabled during conflict resolution, and that
    336      hasn't happened yet, so there should be no need to check that
    337      transition 0 hasn't been disabled before checking if it is a shift.
    338      However, this check was performed at one time, so we leave it as an
    339      aver.  */
    340   aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0));
    341 
    342   /* We need a lookahead either to distinguish different reductions
    343      (i.e., there are two or more), or to distinguish a reduction from a
    344      shift.  Otherwise, it is straightforward, and the state is
    345      `consistent'.  However, do not treat a state with any reductions as
    346      consistent unless it is the accepting state (because there is never
    347      a lookahead token that makes sense there, and so no lookahead token
    348      should be read) if the user has otherwise disabled default
    349      reductions.  */
    350   if (rp->num > 1
    351       || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))
    352       || (rp->num == 1 && rp->rules[0]->number != 0
    353           && default_reduction_only_for_accept))
    354     n_lookahead_tokens += rp->num;
    355   else
    356     s->consistent = 1;
    357 
    358   return n_lookahead_tokens;
    359 }
    360 
    361 
    362 /*----------------------------------------------------.
    363 | Compute LA, NLA, and the lookahead_tokens members.  |
    364 `----------------------------------------------------*/
    365 
    366 void
    367 initialize_LA (void)
    368 {
    369   state_number i;
    370   bitsetv pLA;
    371   bool default_reduction_only_for_accept;
    372   {
    373     char *default_reductions =
    374       muscle_percent_define_get ("lr.default-reductions");
    375     default_reduction_only_for_accept =
    376       0 == strcmp (default_reductions, "accepting");
    377     free (default_reductions);
    378   }
    379 
    380   /* Compute the total number of reductions requiring a lookahead.  */
    381   nLA = 0;
    382   for (i = 0; i < nstates; i++)
    383     nLA +=
    384       state_lookahead_tokens_count (states[i],
    385                                     default_reduction_only_for_accept);
    386   /* Avoid having to special case 0.  */
    387   if (!nLA)
    388     nLA = 1;
    389 
    390   pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
    391 
    392   /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
    393      require lookahead tokens.  */
    394   for (i = 0; i < nstates; i++)
    395     {
    396       int count =
    397         state_lookahead_tokens_count (states[i],
    398                                       default_reduction_only_for_accept);
    399       if (count)
    400 	{
    401 	  states[i]->reductions->lookahead_tokens = pLA;
    402 	  pLA += count;
    403 	}
    404     }
    405 }
    406 
    407 
    408 /*---------------------------------------------.
    409 | Output the lookahead tokens for each state.  |
    410 `---------------------------------------------*/
    411 
    412 static void
    413 lookahead_tokens_print (FILE *out)
    414 {
    415   state_number i;
    416   int j, k;
    417   fprintf (out, "Lookahead tokens: BEGIN\n");
    418   for (i = 0; i < nstates; ++i)
    419     {
    420       reductions *reds = states[i]->reductions;
    421       bitset_iterator iter;
    422       int n_lookahead_tokens = 0;
    423 
    424       if (reds->lookahead_tokens)
    425 	for (k = 0; k < reds->num; ++k)
    426 	  if (reds->lookahead_tokens[k])
    427 	    ++n_lookahead_tokens;
    428 
    429       fprintf (out, "State %d: %d lookahead tokens\n",
    430 	       i, n_lookahead_tokens);
    431 
    432       if (reds->lookahead_tokens)
    433 	for (j = 0; j < reds->num; ++j)
    434 	  BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
    435 	  {
    436 	    fprintf (out, "   on %d (%s) -> rule %d\n",
    437 		     k, symbols[k]->tag,
    438 		     reds->rules[j]->number);
    439 	  };
    440     }
    441   fprintf (out, "Lookahead tokens: END\n");
    442 }
    443 
    444 void
    445 lalr (void)
    446 {
    447   initialize_LA ();
    448   set_goto_map ();
    449   initialize_F ();
    450   lookback = xcalloc (nLA, sizeof *lookback);
    451   build_relations ();
    452   compute_FOLLOWS ();
    453   compute_lookahead_tokens ();
    454 
    455   if (trace_flag & trace_sets)
    456     lookahead_tokens_print (stderr);
    457 }
    458 
    459 
    460 void
    461 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
    462 {
    463   goto_number ngotos_reachable = 0;
    464   symbol_number nonterminal = 0;
    465   aver (nsyms == nvars + ntokens);
    466   {
    467     goto_number i;
    468     for (i = 0; i < ngotos; ++i)
    469       {
    470         while (i == goto_map[nonterminal])
    471           goto_map[nonterminal++] = ngotos_reachable;
    472         /* If old_to_new[from_state[i]] = nstates_old, remove this goto
    473            entry.  */
    474         if (old_to_new[from_state[i]] != nstates_old)
    475           {
    476             /* from_state[i] is not removed, so it and thus to_state[i] are
    477                reachable, so to_state[i] != nstates_old.  */
    478             aver (old_to_new[to_state[i]] != nstates_old);
    479             from_state[ngotos_reachable] = old_to_new[from_state[i]];
    480             to_state[ngotos_reachable] = old_to_new[to_state[i]];
    481             ++ngotos_reachable;
    482           }
    483       }
    484   }
    485   while (nonterminal <= nvars)
    486     {
    487       aver (ngotos == goto_map[nonterminal]);
    488       goto_map[nonterminal++] = ngotos_reachable;
    489     }
    490   ngotos = ngotos_reachable;
    491 }
    492 
    493 
    494 void
    495 lalr_free (void)
    496 {
    497   state_number s;
    498   for (s = 0; s < nstates; ++s)
    499     states[s]->reductions->lookahead_tokens = NULL;
    500   bitsetv_free (LA);
    501 }
    502