Home | History | Annotate | Download | only in src
      1 %{/* Bison Grammar Parser                             -*- C -*-
      2 
      3    Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
      4 
      5    This file is part of Bison, the GNU Compiler Compiler.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 2 of the License, or
     10    (at your option) any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program; if not, write to the Free Software
     19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     20    02110-1301  USA
     21 */
     22 
     23 #include <config.h>
     24 #include "system.h"
     25 
     26 #include "complain.h"
     27 #include "conflicts.h"
     28 #include "files.h"
     29 #include "getargs.h"
     30 #include "gram.h"
     31 #include "muscle_tab.h"
     32 #include "quotearg.h"
     33 #include "reader.h"
     34 #include "symlist.h"
     35 #include "strverscmp.h"
     36 
     37 #define YYLLOC_DEFAULT(Current, Rhs, N)  (Current) = lloc_default (Rhs, N)
     38 static YYLTYPE lloc_default (YYLTYPE const *, int);
     39 
     40 #define YY_LOCATION_PRINT(File, Loc) \
     41 	  location_print (File, Loc)
     42 
     43 static void version_check (location const *loc, char const *version);
     44 
     45 /* Request detailed syntax error messages, and pass them to GRAM_ERROR.
     46    FIXME: depends on the undocumented availability of YYLLOC.  */
     47 #undef  yyerror
     48 #define yyerror(Msg) \
     49 	gram_error (&yylloc, Msg)
     50 static void gram_error (location const *, char const *);
     51 
     52 static void add_param (char const *, char *, location);
     53 
     54 static symbol_class current_class = unknown_sym;
     55 static uniqstr current_type = 0;
     56 static symbol *current_lhs;
     57 static location current_lhs_location;
     58 static int current_prec = 0;
     59 
     60 #ifdef UINT_FAST8_MAX
     61 # define YYTYPE_UINT8 uint_fast8_t
     62 #endif
     63 #ifdef INT_FAST8_MAX
     64 # define YYTYPE_INT8 int_fast8_t
     65 #endif
     66 #ifdef UINT_FAST16_MAX
     67 # define YYTYPE_UINT16 uint_fast16_t
     68 #endif
     69 #ifdef INT_FAST16_MAX
     70 # define YYTYPE_INT16 int_fast16_t
     71 #endif
     72 %}
     73 
     74 %debug
     75 %verbose
     76 %defines
     77 %locations
     78 %pure-parser
     79 %error-verbose
     80 %defines
     81 %name-prefix="gram_"
     82 
     83 %initial-action
     84 {
     85   /* Bison's grammar can initial empty locations, hence a default
     86      location is needed. */
     87   @$.start.file   = @$.end.file   = current_file;
     88   @$.start.line   = @$.end.line   = 1;
     89   @$.start.column = @$.end.column = 0;
     90 }
     91 
     92 /* Only NUMBERS have a value.  */
     93 %union
     94 {
     95   symbol *symbol;
     96   symbol_list *list;
     97   int integer;
     98   char *chars;
     99   assoc assoc;
    100   uniqstr uniqstr;
    101 };
    102 
    103 /* Define the tokens together with their human representation.  */
    104 %token GRAM_EOF 0 "end of file"
    105 %token STRING     "string"
    106 %token INT        "integer"
    107 
    108 %token PERCENT_TOKEN       "%token"
    109 %token PERCENT_NTERM       "%nterm"
    110 
    111 %token PERCENT_TYPE        "%type"
    112 %token PERCENT_DESTRUCTOR  "%destructor {...}"
    113 %token PERCENT_PRINTER     "%printer {...}"
    114 
    115 %token PERCENT_UNION       "%union {...}"
    116 
    117 %token PERCENT_LEFT        "%left"
    118 %token PERCENT_RIGHT       "%right"
    119 %token PERCENT_NONASSOC    "%nonassoc"
    120 
    121 %token PERCENT_PREC          "%prec"
    122 %token PERCENT_DPREC         "%dprec"
    123 %token PERCENT_MERGE         "%merge"
    124 
    125 
    126 /*----------------------.
    127 | Global Declarations.  |
    128 `----------------------*/
    129 
    130 %token
    131   PERCENT_DEBUG           "%debug"
    132   PERCENT_DEFAULT_PREC    "%default-prec"
    133   PERCENT_DEFINE          "%define"
    134   PERCENT_DEFINES         "%defines"
    135   PERCENT_ERROR_VERBOSE   "%error-verbose"
    136   PERCENT_EXPECT          "%expect"
    137   PERCENT_EXPECT_RR	  "%expect-rr"
    138   PERCENT_FILE_PREFIX     "%file-prefix"
    139   PERCENT_GLR_PARSER      "%glr-parser"
    140   PERCENT_INITIAL_ACTION  "%initial-action {...}"
    141   PERCENT_LEX_PARAM       "%lex-param {...}"
    142   PERCENT_LOCATIONS       "%locations"
    143   PERCENT_NAME_PREFIX     "%name-prefix"
    144   PERCENT_NO_DEFAULT_PREC "%no-default-prec"
    145   PERCENT_NO_LINES        "%no-lines"
    146   PERCENT_NONDETERMINISTIC_PARSER
    147 			  "%nondeterministic-parser"
    148   PERCENT_OUTPUT          "%output"
    149   PERCENT_PARSE_PARAM     "%parse-param {...}"
    150   PERCENT_PURE_PARSER     "%pure-parser"
    151   PERCENT_REQUIRE	  "%require"
    152   PERCENT_SKELETON        "%skeleton"
    153   PERCENT_START           "%start"
    154   PERCENT_TOKEN_TABLE     "%token-table"
    155   PERCENT_VERBOSE         "%verbose"
    156   PERCENT_YACC            "%yacc"
    157 ;
    158 
    159 %token TYPE            "type"
    160 %token EQUAL           "="
    161 %token SEMICOLON       ";"
    162 %token PIPE            "|"
    163 %token ID              "identifier"
    164 %token ID_COLON        "identifier:"
    165 %token PERCENT_PERCENT "%%"
    166 %token PROLOGUE        "%{...%}"
    167 %token EPILOGUE        "epilogue"
    168 %token BRACED_CODE     "{...}"
    169 
    170 
    171 %type <chars> STRING string_content
    172 	      "%destructor {...}"
    173 	      "%initial-action {...}"
    174 	      "%lex-param {...}"
    175 	      "%parse-param {...}"
    176 	      "%printer {...}"
    177 	      "%union {...}"
    178 	      PROLOGUE EPILOGUE
    179 %printer { fprintf (stderr, "\"%s\"", $$); }
    180 	      STRING string_content
    181 %printer { fprintf (stderr, "{\n%s\n}", $$); }
    182 	      "%destructor {...}"
    183 	      "%initial-action {...}"
    184 	      "%lex-param {...}"
    185 	      "%parse-param {...}"
    186 	      "%printer {...}"
    187 	      "%union {...}"
    188 	      PROLOGUE EPILOGUE
    189 %type <uniqstr> TYPE
    190 %printer { fprintf (stderr, "<%s>", $$); } TYPE
    191 %type <integer> INT
    192 %printer { fprintf (stderr, "%d", $$); } INT
    193 %type <symbol> ID symbol string_as_id
    194 %printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
    195 %type <symbol> ID_COLON
    196 %printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
    197 %type <assoc> precedence_declarator
    198 %type <list>  symbols.1
    199 %%
    200 
    201 input:
    202   declarations "%%" grammar epilogue.opt
    203 ;
    204 
    205 
    206 	/*------------------------------------.
    207 	| Declarations: before the first %%.  |
    208 	`------------------------------------*/
    209 
    210 declarations:
    211   /* Nothing */
    212 | declarations declaration
    213 ;
    214 
    215 declaration:
    216   grammar_declaration
    217 | PROLOGUE                                 { prologue_augment ($1, @1); }
    218 | "%debug"                                 { debug_flag = true; }
    219 | "%define" string_content
    220     {
    221       static char one[] = "1";
    222       muscle_insert ($2, one);
    223     }
    224 | "%define" string_content string_content  { muscle_insert ($2, $3); }
    225 | "%defines"                               { defines_flag = true; }
    226 | "%error-verbose"                         { error_verbose = true; }
    227 | "%expect" INT                            { expected_sr_conflicts = $2; }
    228 | "%expect-rr" INT			   { expected_rr_conflicts = $2; }
    229 | "%file-prefix" "=" string_content        { spec_file_prefix = $3; }
    230 | "%glr-parser"
    231     {
    232       nondeterministic_parser = true;
    233       glr_parser = true;
    234     }
    235 | "%initial-action {...}"
    236     {
    237       muscle_code_grow ("initial_action", $1, @1);
    238     }
    239 | "%lex-param {...}"			   { add_param ("lex_param", $1, @1); }
    240 | "%locations"                             { locations_flag = true; }
    241 | "%name-prefix" "=" string_content        { spec_name_prefix = $3; }
    242 | "%no-lines"                              { no_lines_flag = true; }
    243 | "%nondeterministic-parser"		   { nondeterministic_parser = true; }
    244 | "%output" "=" string_content             { spec_outfile = $3; }
    245 | "%parse-param {...}"			   { add_param ("parse_param", $1, @1); }
    246 | "%pure-parser"                           { pure_parser = true; }
    247 | "%require" string_content                { version_check (&@2, $2); }
    248 | "%skeleton" string_content               { skeleton = $2; }
    249 | "%token-table"                           { token_table_flag = true; }
    250 | "%verbose"                               { report_flag = report_states; }
    251 | "%yacc"                                  { yacc_flag = true; }
    252 | /*FIXME: Err?  What is this horror doing here? */ ";"
    253 ;
    254 
    255 grammar_declaration:
    256   precedence_declaration
    257 | symbol_declaration
    258 | "%start" symbol
    259     {
    260       grammar_start_symbol_set ($2, @2);
    261     }
    262 | "%union {...}"
    263     {
    264       char const *body = $1;
    265 
    266       if (typed)
    267 	{
    268 	  /* Concatenate the union bodies, turning the first one's
    269 	     trailing '}' into '\n', and omitting the second one's '{'.  */
    270 	  char *code = muscle_find ("stype");
    271 	  code[strlen (code) - 1] = '\n';
    272 	  body++;
    273 	}
    274 
    275       typed = true;
    276       muscle_code_grow ("stype", body, @1);
    277     }
    278 | "%destructor {...}" symbols.1
    279     {
    280       symbol_list *list;
    281       for (list = $2; list; list = list->next)
    282 	symbol_destructor_set (list->sym, $1, @1);
    283       symbol_list_free ($2);
    284     }
    285 | "%printer {...}" symbols.1
    286     {
    287       symbol_list *list;
    288       for (list = $2; list; list = list->next)
    289 	symbol_printer_set (list->sym, $1, @1);
    290       symbol_list_free ($2);
    291     }
    292 | "%default-prec"
    293     {
    294       default_prec = true;
    295     }
    296 | "%no-default-prec"
    297     {
    298       default_prec = false;
    299     }
    300 ;
    301 
    302 symbol_declaration:
    303   "%nterm" { current_class = nterm_sym; } symbol_defs.1
    304     {
    305       current_class = unknown_sym;
    306       current_type = NULL;
    307     }
    308 | "%token" { current_class = token_sym; } symbol_defs.1
    309     {
    310       current_class = unknown_sym;
    311       current_type = NULL;
    312     }
    313 | "%type" TYPE symbols.1
    314     {
    315       symbol_list *list;
    316       for (list = $3; list; list = list->next)
    317 	symbol_type_set (list->sym, $2, @2);
    318       symbol_list_free ($3);
    319     }
    320 ;
    321 
    322 precedence_declaration:
    323   precedence_declarator type.opt symbols.1
    324     {
    325       symbol_list *list;
    326       ++current_prec;
    327       for (list = $3; list; list = list->next)
    328 	{
    329 	  symbol_type_set (list->sym, current_type, @2);
    330 	  symbol_precedence_set (list->sym, current_prec, $1, @1);
    331 	}
    332       symbol_list_free ($3);
    333       current_type = NULL;
    334     }
    335 ;
    336 
    337 precedence_declarator:
    338   "%left"     { $$ = left_assoc; }
    339 | "%right"    { $$ = right_assoc; }
    340 | "%nonassoc" { $$ = non_assoc; }
    341 ;
    342 
    343 type.opt:
    344   /* Nothing. */ { current_type = NULL; }
    345 | TYPE           { current_type = $1; }
    346 ;
    347 
    348 /* One or more nonterminals to be %typed. */
    349 
    350 symbols.1:
    351   symbol            { $$ = symbol_list_new ($1, @1); }
    352 | symbols.1 symbol  { $$ = symbol_list_prepend ($1, $2, @2); }
    353 ;
    354 
    355 /* One token definition.  */
    356 symbol_def:
    357   TYPE
    358      {
    359        current_type = $1;
    360      }
    361 | ID
    362      {
    363        symbol_class_set ($1, current_class, @1, true);
    364        symbol_type_set ($1, current_type, @1);
    365      }
    366 | ID INT
    367     {
    368       symbol_class_set ($1, current_class, @1, true);
    369       symbol_type_set ($1, current_type, @1);
    370       symbol_user_token_number_set ($1, $2, @2);
    371     }
    372 | ID string_as_id
    373     {
    374       symbol_class_set ($1, current_class, @1, true);
    375       symbol_type_set ($1, current_type, @1);
    376       symbol_make_alias ($1, $2, @$);
    377     }
    378 | ID INT string_as_id
    379     {
    380       symbol_class_set ($1, current_class, @1, true);
    381       symbol_type_set ($1, current_type, @1);
    382       symbol_user_token_number_set ($1, $2, @2);
    383       symbol_make_alias ($1, $3, @$);
    384     }
    385 ;
    386 
    387 /* One or more symbol definitions. */
    388 symbol_defs.1:
    389   symbol_def
    390 | symbol_defs.1 symbol_def
    391 ;
    392 
    393 
    394 	/*------------------------------------------.
    395 	| The grammar section: between the two %%.  |
    396 	`------------------------------------------*/
    397 
    398 grammar:
    399   rules_or_grammar_declaration
    400 | grammar rules_or_grammar_declaration
    401 ;
    402 
    403 /* As a Bison extension, one can use the grammar declarations in the
    404    body of the grammar.  */
    405 rules_or_grammar_declaration:
    406   rules
    407 | grammar_declaration ";"
    408 | error ";"
    409     {
    410       yyerrok;
    411     }
    412 ;
    413 
    414 rules:
    415   ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
    416 ;
    417 
    418 rhses.1:
    419   rhs                { grammar_current_rule_end (@1); }
    420 | rhses.1 "|" rhs    { grammar_current_rule_end (@3); }
    421 | rhses.1 ";"
    422 ;
    423 
    424 rhs:
    425   /* Nothing.  */
    426     { grammar_current_rule_begin (current_lhs, current_lhs_location); }
    427 | rhs symbol
    428     { grammar_current_rule_symbol_append ($2, @2); }
    429 | rhs action
    430 | rhs "%prec" symbol
    431     { grammar_current_rule_prec_set ($3, @3); }
    432 | rhs "%dprec" INT
    433     { grammar_current_rule_dprec_set ($3, @3); }
    434 | rhs "%merge" TYPE
    435     { grammar_current_rule_merge_set ($3, @3); }
    436 ;
    437 
    438 symbol:
    439   ID              { $$ = $1; }
    440 | string_as_id    { $$ = $1; }
    441 ;
    442 
    443 /* Handle the semantics of an action specially, with a mid-rule
    444    action, so that grammar_current_rule_action_append is invoked
    445    immediately after the braced code is read by the scanner.
    446 
    447    This implementation relies on the LALR(1) parsing algorithm.
    448    If grammar_current_rule_action_append were executed in a normal
    449    action for this rule, then when the input grammar contains two
    450    successive actions, the scanner would have to read both actions
    451    before reducing this rule.  That wouldn't work, since the scanner
    452    relies on all preceding input actions being processed by
    453    grammar_current_rule_action_append before it scans the next
    454    action.  */
    455 action:
    456     { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
    457   BRACED_CODE
    458 ;
    459 
    460 /* A string used as an ID: quote it.  */
    461 string_as_id:
    462   STRING
    463     {
    464       $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
    465       symbol_class_set ($$, token_sym, @1, false);
    466     }
    467 ;
    468 
    469 /* A string used for its contents.  Don't quote it.  */
    470 string_content:
    471   STRING
    472     { $$ = $1; }
    473 ;
    474 
    475 
    476 epilogue.opt:
    477   /* Nothing.  */
    478 | "%%" EPILOGUE
    479     {
    480       muscle_code_grow ("epilogue", $2, @2);
    481       scanner_last_string_free ();
    482     }
    483 ;
    484 
    485 %%
    486 
    487 
    488 /* Return the location of the left-hand side of a rule whose
    489    right-hand side is RHS[1] ... RHS[N].  Ignore empty nonterminals in
    490    the right-hand side, and return an empty location equal to the end
    491    boundary of RHS[0] if the right-hand side is empty.  */
    492 
    493 static YYLTYPE
    494 lloc_default (YYLTYPE const *rhs, int n)
    495 {
    496   int i;
    497   YYLTYPE loc;
    498 
    499   /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
    500      The bug is fixed in 7.4.2m, but play it safe for now.  */
    501   loc.start = rhs[n].end;
    502   loc.end = rhs[n].end;
    503 
    504   /* Ignore empty nonterminals the start of the the right-hand side.
    505      Do not bother to ignore them at the end of the right-hand side,
    506      since empty nonterminals have the same end as their predecessors.  */
    507   for (i = 1; i <= n; i++)
    508     if (! equal_boundaries (rhs[i].start, rhs[i].end))
    509       {
    510 	loc.start = rhs[i].start;
    511 	break;
    512       }
    513 
    514   return loc;
    515 }
    516 
    517 
    518 /* Add a lex-param or a parse-param (depending on TYPE) with
    519    declaration DECL and location LOC.  */
    520 
    521 static void
    522 add_param (char const *type, char *decl, location loc)
    523 {
    524   static char const alphanum[26 + 26 + 1 + 10] =
    525     "abcdefghijklmnopqrstuvwxyz"
    526     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    527     "_"
    528     "0123456789";
    529   char const *name_start = NULL;
    530   char *p;
    531 
    532   /* Stop on last actual character.  */
    533   for (p = decl; p[1]; p++)
    534     if ((p == decl
    535 	 || ! memchr (alphanum, p[-1], sizeof alphanum))
    536 	&& memchr (alphanum, p[0], sizeof alphanum - 10))
    537       name_start = p;
    538 
    539   /* Strip the surrounding '{' and '}', and any blanks just inside
    540      the braces.  */
    541   while (*--p == ' ' || *p == '\t')
    542     continue;
    543   p[1] = '\0';
    544   while (*++decl == ' ' || *decl == '\t')
    545     continue;
    546 
    547   if (! name_start)
    548     complain_at (loc, _("missing identifier in parameter declaration"));
    549   else
    550     {
    551       char *name;
    552       size_t name_len;
    553 
    554       for (name_len = 1;
    555 	   memchr (alphanum, name_start[name_len], sizeof alphanum);
    556 	   name_len++)
    557 	continue;
    558 
    559       name = xmalloc (name_len + 1);
    560       memcpy (name, name_start, name_len);
    561       name[name_len] = '\0';
    562       muscle_pair_list_grow (type, decl, name);
    563       free (name);
    564     }
    565 
    566   scanner_last_string_free ();
    567 }
    568 
    569 static void
    570 version_check (location const *loc, char const *version)
    571 {
    572   if (strverscmp (version, PACKAGE_VERSION) > 0)
    573     {
    574       complain_at (*loc, "require bison %s, but have %s",
    575 		   version, PACKAGE_VERSION);
    576       exit (63);
    577     }
    578 }
    579 
    580 static void
    581 gram_error (location const *loc, char const *msg)
    582 {
    583   complain_at (*loc, "%s", msg);
    584 }
    585 
    586 char const *
    587 token_name (int type)
    588 {
    589   return yytname[YYTRANSLATE (type)];
    590 }
    591