Home | History | Annotate | Download | only in src
      1 /* Symbol table manager for Bison.
      2 
      3    Copyright (C) 1984, 1989, 2000, 2001, 2002, 2004, 2005, 2006 Free
      4    Software Foundation, Inc.
      5 
      6    This file is part of Bison, the GNU Compiler Compiler.
      7 
      8    Bison is free software; you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 2, or (at your option)
     11    any later version.
     12 
     13    Bison is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with Bison; see the file COPYING.  If not, write to
     20    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21    Boston, MA 02110-1301, USA.  */
     22 
     23 #include <config.h>
     24 #include "system.h"
     25 
     26 #include <hash.h>
     27 #include <quotearg.h>
     28 
     29 #include "complain.h"
     30 #include "gram.h"
     31 #include "symtab.h"
     32 
     33 /*------------------------.
     34 | Distinguished symbols.  |
     35 `------------------------*/
     36 
     37 symbol *errtoken = NULL;
     38 symbol *undeftoken = NULL;
     39 symbol *endtoken = NULL;
     40 symbol *accept = NULL;
     41 symbol *startsymbol = NULL;
     42 location startsymbol_location;
     43 
     44 /*---------------------------------.
     45 | Create a new symbol, named TAG.  |
     46 `---------------------------------*/
     47 
     48 static symbol *
     49 symbol_new (uniqstr tag, location loc)
     50 {
     51   symbol *res = xmalloc (sizeof *res);
     52 
     53   uniqstr_assert (tag);
     54   res->tag = tag;
     55   res->location = loc;
     56 
     57   res->type_name = NULL;
     58   res->destructor = NULL;
     59   res->printer = NULL;
     60 
     61   res->number = NUMBER_UNDEFINED;
     62   res->prec = 0;
     63   res->assoc = undef_assoc;
     64   res->user_token_number = USER_NUMBER_UNDEFINED;
     65 
     66   res->alias = NULL;
     67   res->class = unknown_sym;
     68   res->declared = false;
     69 
     70   if (nsyms == SYMBOL_NUMBER_MAXIMUM)
     71     fatal (_("too many symbols in input grammar (limit is %d)"),
     72 	   SYMBOL_NUMBER_MAXIMUM);
     73   nsyms++;
     74   return res;
     75 }
     76 
     77 
     78 /*-----------------.
     79 | Print a symbol.  |
     80 `-----------------*/
     81 
     82 #define SYMBOL_ATTR_PRINT(Attr)				\
     83   if (s->Attr)						\
     84     fprintf (f, " %s { %s }", #Attr, s->Attr)
     85 
     86 void
     87 symbol_print (symbol *s, FILE *f)
     88 {
     89   if (s)
     90     {
     91       fprintf (f, "\"%s\"", s->tag);
     92       SYMBOL_ATTR_PRINT (type_name);
     93       SYMBOL_ATTR_PRINT (destructor);
     94       SYMBOL_ATTR_PRINT (printer);
     95     }
     96   else
     97     fprintf (f, "<NULL>");
     98 }
     99 
    100 #undef SYMBOL_ATTR_PRINT
    101 
    102 /*------------------------------------------------------------------.
    103 | Complain that S's WHAT is redeclared at SECOND, and was first set |
    104 | at FIRST.                                                         |
    105 `------------------------------------------------------------------*/
    106 
    107 static void
    108 redeclaration (symbol* s, const char *what, location first, location second)
    109 {
    110   complain_at (second, _("%s redeclaration for %s"), what, s->tag);
    111   complain_at (first, _("first declaration"));
    112 }
    113 
    114 
    115 /*-----------------------------------------------------------------.
    116 | Set the TYPE_NAME associated with SYM.  Does nothing if passed 0 |
    117 | as TYPE_NAME.                                                    |
    118 `-----------------------------------------------------------------*/
    119 
    120 void
    121 symbol_type_set (symbol *sym, uniqstr type_name, location loc)
    122 {
    123   if (type_name)
    124     {
    125       if (sym->type_name)
    126 	redeclaration (sym, "%type", sym->type_location, loc);
    127       uniqstr_assert (type_name);
    128       sym->type_name = type_name;
    129       sym->type_location = loc;
    130     }
    131 }
    132 
    133 
    134 /*------------------------------------------------------------------.
    135 | Set the DESTRUCTOR associated with SYM.  Do nothing if passed 0.  |
    136 `------------------------------------------------------------------*/
    137 
    138 void
    139 symbol_destructor_set (symbol *sym, const char *destructor, location loc)
    140 {
    141   if (destructor)
    142     {
    143       if (sym->destructor)
    144 	redeclaration (sym, "%destructor", sym->destructor_location, loc);
    145       sym->destructor = destructor;
    146       sym->destructor_location = loc;
    147     }
    148 }
    149 
    150 
    151 /*---------------------------------------------------------------.
    152 | Set the PRINTER associated with SYM.  Do nothing if passed 0.  |
    153 `---------------------------------------------------------------*/
    154 
    155 void
    156 symbol_printer_set (symbol *sym, const char *printer, location loc)
    157 {
    158   if (printer)
    159     {
    160       if (sym->printer)
    161 	redeclaration (sym, "%printer", sym->destructor_location, loc);
    162       sym->printer = printer;
    163       sym->printer_location = loc;
    164     }
    165 }
    166 
    167 
    168 /*-----------------------------------------------------------------.
    169 | Set the PRECEDENCE associated with SYM.  Does nothing if invoked |
    170 | with UNDEF_ASSOC as ASSOC.                                       |
    171 `-----------------------------------------------------------------*/
    172 
    173 void
    174 symbol_precedence_set (symbol *sym, int prec, assoc a, location loc)
    175 {
    176   if (a != undef_assoc)
    177     {
    178       if (sym->prec != 0)
    179 	redeclaration (sym, assoc_to_string (a), sym->prec_location, loc);
    180       sym->prec = prec;
    181       sym->assoc = a;
    182       sym->prec_location = loc;
    183     }
    184 
    185   /* Only terminals have a precedence. */
    186   symbol_class_set (sym, token_sym, loc, false);
    187 }
    188 
    189 
    190 /*------------------------------------.
    191 | Set the CLASS associated with SYM.  |
    192 `------------------------------------*/
    193 
    194 void
    195 symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
    196 {
    197   if (sym->class != unknown_sym && sym->class != class)
    198     {
    199       complain_at (loc, _("symbol %s redefined"), sym->tag);
    200       sym->declared = false;
    201     }
    202 
    203   if (class == nterm_sym && sym->class != nterm_sym)
    204     sym->number = nvars++;
    205   else if (class == token_sym && sym->number == NUMBER_UNDEFINED)
    206     sym->number = ntokens++;
    207 
    208   sym->class = class;
    209 
    210   if (declaring)
    211     {
    212       if (sym->declared)
    213 	warn_at (loc, _("symbol %s redeclared"), sym->tag);
    214       sym->declared = true;
    215     }
    216 }
    217 
    218 
    219 /*------------------------------------------------.
    220 | Set the USER_TOKEN_NUMBER associated with SYM.  |
    221 `------------------------------------------------*/
    222 
    223 void
    224 symbol_user_token_number_set (symbol *sym, int user_token_number, location loc)
    225 {
    226   assert (sym->class == token_sym);
    227 
    228   if (sym->user_token_number != USER_NUMBER_UNDEFINED
    229       && sym->user_token_number != user_token_number)
    230     complain_at (loc, _("redefining user token number of %s"), sym->tag);
    231 
    232   sym->user_token_number = user_token_number;
    233   /* User defined $end token? */
    234   if (user_token_number == 0)
    235     {
    236       endtoken = sym;
    237       endtoken->number = 0;
    238       /* It is always mapped to 0, so it was already counted in
    239 	 NTOKENS.  */
    240       --ntokens;
    241     }
    242 }
    243 
    244 
    245 /*----------------------------------------------------------.
    246 | If SYM is not defined, report an error, and consider it a |
    247 | nonterminal.                                              |
    248 `----------------------------------------------------------*/
    249 
    250 static inline bool
    251 symbol_check_defined (symbol *sym)
    252 {
    253   if (sym->class == unknown_sym)
    254     {
    255       complain_at
    256 	(sym->location,
    257 	 _("symbol %s is used, but is not defined as a token and has no rules"),
    258 	 sym->tag);
    259       sym->class = nterm_sym;
    260       sym->number = nvars++;
    261     }
    262 
    263   return true;
    264 }
    265 
    266 static bool
    267 symbol_check_defined_processor (void *sym, void *null ATTRIBUTE_UNUSED)
    268 {
    269   return symbol_check_defined (sym);
    270 }
    271 
    272 
    273 /*------------------------------------------------------------------.
    274 | Declare the new symbol SYM.  Make it an alias of SYMVAL, and type |
    275 | SYMVAL with SYM's type.                                           |
    276 `------------------------------------------------------------------*/
    277 
    278 void
    279 symbol_make_alias (symbol *sym, symbol *symval, location loc)
    280 {
    281   if (symval->alias)
    282     warn_at (loc, _("symbol `%s' used more than once as a literal string"),
    283 	     symval->tag);
    284   else if (sym->alias)
    285     warn_at (loc, _("symbol `%s' given more than one literal string"),
    286 	     sym->tag);
    287   else
    288     {
    289       symval->class = token_sym;
    290       symval->user_token_number = sym->user_token_number;
    291       sym->user_token_number = USER_NUMBER_ALIAS;
    292       symval->alias = sym;
    293       sym->alias = symval;
    294       /* sym and symval combined are only one symbol.  */
    295       nsyms--;
    296       ntokens--;
    297       assert (ntokens == sym->number || ntokens == symval->number);
    298       sym->number = symval->number =
    299 	(symval->number < sym->number) ? symval->number : sym->number;
    300       symbol_type_set (symval, sym->type_name, loc);
    301     }
    302 }
    303 
    304 
    305 /*---------------------------------------------------------.
    306 | Check that THIS, and its alias, have same precedence and |
    307 | associativity.                                           |
    308 `---------------------------------------------------------*/
    309 
    310 static inline void
    311 symbol_check_alias_consistency (symbol *this)
    312 {
    313   symbol *alias = this;
    314   symbol *orig  = this->alias;
    315 
    316   /* Check only those that _are_ the aliases.  */
    317   if (!(this->alias && this->user_token_number == USER_NUMBER_ALIAS))
    318     return;
    319 
    320   if (orig->type_name != alias->type_name)
    321     {
    322       if (orig->type_name)
    323 	symbol_type_set (alias, orig->type_name, orig->type_location);
    324       else
    325 	symbol_type_set (orig, alias->type_name, alias->type_location);
    326     }
    327 
    328 
    329   if (orig->destructor || alias->destructor)
    330     {
    331       if (orig->destructor)
    332 	symbol_destructor_set (alias, orig->destructor,
    333 			       orig->destructor_location);
    334       else
    335 	symbol_destructor_set (orig, alias->destructor,
    336 			       alias->destructor_location);
    337     }
    338 
    339   if (orig->printer || alias->printer)
    340     {
    341       if (orig->printer)
    342 	symbol_printer_set (alias, orig->printer, orig->printer_location);
    343       else
    344 	symbol_printer_set (orig, alias->printer, alias->printer_location);
    345     }
    346 
    347   if (alias->prec || orig->prec)
    348     {
    349       if (orig->prec)
    350 	symbol_precedence_set (alias, orig->prec, orig->assoc,
    351 			       orig->prec_location);
    352       else
    353 	symbol_precedence_set (orig, alias->prec, alias->assoc,
    354 			       alias->prec_location);
    355     }
    356 }
    357 
    358 static bool
    359 symbol_check_alias_consistency_processor (void *this,
    360 					  void *null ATTRIBUTE_UNUSED)
    361 {
    362   symbol_check_alias_consistency (this);
    363   return true;
    364 }
    365 
    366 
    367 /*-------------------------------------------------------------------.
    368 | Assign a symbol number, and write the definition of the token name |
    369 | into FDEFINES.  Put in SYMBOLS.                                    |
    370 `-------------------------------------------------------------------*/
    371 
    372 static inline bool
    373 symbol_pack (symbol *this)
    374 {
    375   if (this->class == nterm_sym)
    376     {
    377       this->number += ntokens;
    378     }
    379   else if (this->alias)
    380     {
    381       /* This symbol and its alias are a single token defn.
    382 	 Allocate a tokno, and assign to both check agreement of
    383 	 prec and assoc fields and make both the same */
    384       if (this->number == NUMBER_UNDEFINED)
    385 	{
    386 	  if (this == endtoken || this->alias == endtoken)
    387 	    this->number = this->alias->number = 0;
    388 	  else
    389 	    {
    390 	      assert (this->alias->number != NUMBER_UNDEFINED);
    391 	      this->number = this->alias->number;
    392 	    }
    393 	}
    394       /* Do not do processing below for USER_NUMBER_ALIASes.  */
    395       if (this->user_token_number == USER_NUMBER_ALIAS)
    396 	return true;
    397     }
    398   else /* this->class == token_sym */
    399     assert (this->number != NUMBER_UNDEFINED);
    400 
    401   symbols[this->number] = this;
    402   return true;
    403 }
    404 
    405 static bool
    406 symbol_pack_processor (void *this, void *null ATTRIBUTE_UNUSED)
    407 {
    408   return symbol_pack (this);
    409 }
    410 
    411 
    412 
    413 
    414 /*--------------------------------------------------.
    415 | Put THIS in TOKEN_TRANSLATIONS if it is a token.  |
    416 `--------------------------------------------------*/
    417 
    418 static inline bool
    419 symbol_translation (symbol *this)
    420 {
    421   /* Non-terminal? */
    422   if (this->class == token_sym
    423       && this->user_token_number != USER_NUMBER_ALIAS)
    424     {
    425       /* A token which translation has already been set? */
    426       if (token_translations[this->user_token_number] != undeftoken->number)
    427 	complain_at (this->location,
    428 		     _("tokens %s and %s both assigned number %d"),
    429 		     symbols[token_translations[this->user_token_number]]->tag,
    430 		     this->tag, this->user_token_number);
    431 
    432       token_translations[this->user_token_number] = this->number;
    433     }
    434 
    435   return true;
    436 }
    437 
    438 static bool
    439 symbol_translation_processor (void *this, void *null ATTRIBUTE_UNUSED)
    440 {
    441   return symbol_translation (this);
    442 }
    443 
    444 
    445 /*----------------------.
    446 | A symbol hash table.  |
    447 `----------------------*/
    448 
    449 /* Initial capacity of symbols hash table.  */
    450 #define HT_INITIAL_CAPACITY 257
    451 
    452 static struct hash_table *symbol_table = NULL;
    453 
    454 static inline bool
    455 hash_compare_symbol (const symbol *m1, const symbol *m2)
    456 {
    457   /* Since tags are unique, we can compare the pointers themselves.  */
    458   return UNIQSTR_EQ (m1->tag, m2->tag);
    459 }
    460 
    461 static bool
    462 hash_symbol_comparator (void const *m1, void const *m2)
    463 {
    464   return hash_compare_symbol (m1, m2);
    465 }
    466 
    467 static inline size_t
    468 hash_symbol (const symbol *m, size_t tablesize)
    469 {
    470   /* Since tags are unique, we can hash the pointer itself.  */
    471   return ((uintptr_t) m->tag) % tablesize;
    472 }
    473 
    474 static size_t
    475 hash_symbol_hasher (void const *m, size_t tablesize)
    476 {
    477   return hash_symbol (m, tablesize);
    478 }
    479 
    480 
    481 /*-------------------------------.
    482 | Create the symbol hash table.  |
    483 `-------------------------------*/
    484 
    485 void
    486 symbols_new (void)
    487 {
    488   symbol_table = hash_initialize (HT_INITIAL_CAPACITY,
    489 				  NULL,
    490 				  hash_symbol_hasher,
    491 				  hash_symbol_comparator,
    492 				  free);
    493 }
    494 
    495 
    496 /*----------------------------------------------------------------.
    497 | Find the symbol named KEY, and return it.  If it does not exist |
    498 | yet, create it.                                                 |
    499 `----------------------------------------------------------------*/
    500 
    501 symbol *
    502 symbol_get (const char *key, location loc)
    503 {
    504   symbol probe;
    505   symbol *entry;
    506 
    507   key = uniqstr_new (key);
    508   probe.tag = key;
    509   entry = hash_lookup (symbol_table, &probe);
    510 
    511   if (!entry)
    512     {
    513       /* First insertion in the hash. */
    514       entry = symbol_new (key, loc);
    515       hash_insert (symbol_table, entry);
    516     }
    517   return entry;
    518 }
    519 
    520 
    521 /*------------------------------------------------------------------.
    522 | Generate a dummy nonterminal, whose name cannot conflict with the |
    523 | user's names.                                                     |
    524 `------------------------------------------------------------------*/
    525 
    526 symbol *
    527 dummy_symbol_get (location loc)
    528 {
    529   /* Incremented for each generated symbol.  */
    530   static int dummy_count = 0;
    531   static char buf[256];
    532 
    533   symbol *sym;
    534 
    535   sprintf (buf, "@%d", ++dummy_count);
    536   sym = symbol_get (buf, loc);
    537   sym->class = nterm_sym;
    538   sym->number = nvars++;
    539   return sym;
    540 }
    541 
    542 
    543 /*-------------------.
    544 | Free the symbols.  |
    545 `-------------------*/
    546 
    547 void
    548 symbols_free (void)
    549 {
    550   hash_free (symbol_table);
    551   free (symbols);
    552 }
    553 
    554 
    555 /*---------------------------------------------------------------.
    556 | Look for undefined symbols, report an error, and consider them |
    557 | terminals.                                                     |
    558 `---------------------------------------------------------------*/
    559 
    560 static void
    561 symbols_do (Hash_processor processor, void *processor_data)
    562 {
    563   hash_do_for_each (symbol_table, processor, processor_data);
    564 }
    565 
    566 
    567 /*--------------------------------------------------------------.
    568 | Check that all the symbols are defined.  Report any undefined |
    569 | symbols and consider them nonterminals.                       |
    570 `--------------------------------------------------------------*/
    571 
    572 void
    573 symbols_check_defined (void)
    574 {
    575   symbols_do (symbol_check_defined_processor, NULL);
    576 }
    577 
    578 /*------------------------------------------------------------------.
    579 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
    580 | number.                                                           |
    581 `------------------------------------------------------------------*/
    582 
    583 static void
    584 symbols_token_translations_init (void)
    585 {
    586   bool num_256_available_p = true;
    587   int i;
    588 
    589   /* Find the highest user token number, and whether 256, the POSIX
    590      preferred user token number for the error token, is used.  */
    591   max_user_token_number = 0;
    592   for (i = 0; i < ntokens; ++i)
    593     {
    594       symbol *this = symbols[i];
    595       if (this->user_token_number != USER_NUMBER_UNDEFINED)
    596 	{
    597 	  if (this->user_token_number > max_user_token_number)
    598 	    max_user_token_number = this->user_token_number;
    599 	  if (this->user_token_number == 256)
    600 	    num_256_available_p = false;
    601 	}
    602     }
    603 
    604   /* If 256 is not used, assign it to error, to follow POSIX.  */
    605   if (num_256_available_p
    606       && errtoken->user_token_number == USER_NUMBER_UNDEFINED)
    607     errtoken->user_token_number = 256;
    608 
    609   /* Set the missing user numbers. */
    610   if (max_user_token_number < 256)
    611     max_user_token_number = 256;
    612 
    613   for (i = 0; i < ntokens; ++i)
    614     {
    615       symbol *this = symbols[i];
    616       if (this->user_token_number == USER_NUMBER_UNDEFINED)
    617 	this->user_token_number = ++max_user_token_number;
    618       if (this->user_token_number > max_user_token_number)
    619 	max_user_token_number = this->user_token_number;
    620     }
    621 
    622   token_translations = xnmalloc (max_user_token_number + 1,
    623 				 sizeof *token_translations);
    624 
    625   /* Initialize all entries for literal tokens to 2, the internal
    626      token number for $undefined, which represents all invalid inputs.
    627      */
    628   for (i = 0; i < max_user_token_number + 1; i++)
    629     token_translations[i] = undeftoken->number;
    630   symbols_do (symbol_translation_processor, NULL);
    631 }
    632 
    633 
    634 /*----------------------------------------------------------------.
    635 | Assign symbol numbers, and write definition of token names into |
    636 | FDEFINES.  Set up vectors SYMBOL_TABLE, TAGS of symbols.        |
    637 `----------------------------------------------------------------*/
    638 
    639 void
    640 symbols_pack (void)
    641 {
    642   symbols = xcalloc (nsyms, sizeof *symbols);
    643 
    644   symbols_do (symbol_check_alias_consistency_processor, NULL);
    645   symbols_do (symbol_pack_processor, NULL);
    646 
    647   symbols_token_translations_init ();
    648 
    649   if (startsymbol->class == unknown_sym)
    650     fatal_at (startsymbol_location,
    651 	      _("the start symbol %s is undefined"),
    652 	      startsymbol->tag);
    653   else if (startsymbol->class == token_sym)
    654     fatal_at (startsymbol_location,
    655 	      _("the start symbol %s is a token"),
    656 	      startsymbol->tag);
    657 }
    658