Home | History | Annotate | Download | only in java_cup
      1 
      2 /*================================================================*/
      3 /*
      4   JavaCup Specification for the JavaCup Specification Language
      5   by Scott Hudson, GVU Center, Georgia Tech, August 1995
      6 
      7   This JavaCup specification is used to implement JavaCup itself.
      8   It specifies the parser for the JavaCup specification language.
      9   (It also serves as a reasonable example of what a typical JavaCup
     10   spec looks like).
     11 
     12   The specification has the following parts:
     13     Package and import declarations
     14       These serve the same purpose as in a normal Java source file
     15       (and will appear in the generated code for the parser). In this
     16       case we are part of the java_cup package and we import both the
     17       java_cup runtime system and Hashtable from the standard Java
     18       utilities package.
     19 
     20     Action code
     21       This section provides code that is included with the class encapsulating
     22       the various pieces of user code embedded in the grammar (i.e., the
     23       semantic actions).  This provides a series of helper routines and
     24       data structures that the semantic actions use.
     25 
     26     Parser code
     27       This section provides code included in the parser class itself.  In
     28       this case we override the default error reporting routines.
     29 
     30     Init with and scan with
     31       These sections provide small bits of code that initialize, then
     32       indicate how to invoke the scanner.
     33 
     34     Symbols and grammar
     35       These sections declare all the terminal and non terminal symbols
     36       and the types of objects that they will be represented by at runtime,
     37       then indicate the start symbol of the grammar (), and finally provide
     38       the grammar itself (with embedded actions).
     39 
     40     Operation of the parser
     41       The parser acts primarily by accumulating data structures representing
     42       various parts of the specification.  Various small parts (e.g., single
     43       code strings) are stored as static variables of the emit class and
     44       in a few cases as variables declared in the action code section.
     45       Terminals, non terminals, and productions, are maintained as collection
     46       accessible via static methods of those classes.  In addition, two
     47       symbol tables are kept:
     48     symbols   maintains the name to object mapping for all symbols
     49     non_terms maintains a separate mapping containing only the non terms
     50 
     51       Several intermediate working structures are also declared in the action
     52       code section.  These include: rhs_parts, rhs_pos, and lhs_nt which
     53       build up parts of the current production while it is being parsed.
     54 
     55   Author(s)
     56     Scott Hudson, GVU Center, Georgia Tech.
     57 
     58   Revisions
     59     v0.9a   First released version                     [SEH] 8/29/95
     60     v0.9b   Updated for beta language (throws clauses) [SEH] 11/25/95
     61 */
     62 /*================================================================*/
     63 
     64 package java_cup;
     65 import java_cup.runtime.*;
     66 import java.util.Hashtable;
     67 
     68 /*----------------------------------------------------------------*/
     69 
     70 action code {:
     71   /** helper routine to clone a new production part adding a given label */
     72   protected production_part add_lab(production_part part, String lab)
     73     throws internal_error
     74     {
     75       /* if there is no label, or this is an action, just return the original */
     76       if (lab == null || part.is_action()) return part;
     77 
     78       /* otherwise build a new one with the given label attached */
     79       return new symbol_part(((symbol_part)part).the_symbol(),lab);
     80     }
     81 
     82   /** max size of right hand side we will support */
     83   protected final int MAX_RHS = 200;
     84 
     85   /** array for accumulating right hand side parts */
     86   protected production_part[] rhs_parts = new production_part[MAX_RHS];
     87 
     88   /** where we are currently in building a right hand side */
     89   protected int rhs_pos = 0;
     90 
     91   /** start a new right hand side */
     92   protected void new_rhs() {rhs_pos = 0; }
     93 
     94   /** add a new right hand side part */
     95   protected void add_rhs_part(production_part part) throws java.lang.Exception
     96     {
     97       if (rhs_pos >= MAX_RHS)
     98     throw new Exception("Internal Error: Productions limited to " +
     99                  MAX_RHS + " symbols and actions");
    100 
    101       rhs_parts[rhs_pos] = part;
    102       rhs_pos++;
    103     }
    104 
    105   /** string to build up multiple part names */
    106   protected String multipart_name = new String();
    107 
    108   /** append a new name segment to the accumulated multipart name */
    109   protected void append_multipart(String name)
    110     {
    111       String dot = "";
    112 
    113       /* if we aren't just starting out, put on a dot */
    114       if (multipart_name.length() != 0)  dot = ".";
    115 
    116       multipart_name = multipart_name.concat(dot + name);
    117     }
    118 
    119   /** table of declared symbols -- contains production parts indexed by name */
    120   protected Hashtable symbols = new Hashtable();
    121 
    122   /** table of just non terminals -- contains non_terminals indexed by name */
    123   protected Hashtable non_terms = new Hashtable();
    124 
    125   /** declared start non_terminal */
    126   protected non_terminal start_nt = null;
    127 
    128   /** left hand side non terminal of the current production */
    129   protected non_terminal lhs_nt;
    130 
    131 :};
    132 
    133 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    134 
    135 parser code {:
    136 
    137   /* override error routines */
    138 
    139   public void report_fatal_error(
    140     String   message,
    141     Object   info)
    142     {
    143       done_parsing();
    144       lexer.emit_error(message);
    145       System.err.println("Can't recover from previous error(s), giving up.");
    146       System.exit(1);
    147     }
    148 
    149     public void report_error(String message, Object info)
    150     {
    151       lexer.emit_error(message);
    152     }
    153 :};
    154 
    155 /*----------------------------------------------------------------*/
    156 
    157 init with {: lexer.init(); :};
    158 scan with {: return lexer.next_token(); :};
    159 
    160 /*----------------------------------------------------------------*/
    161 
    162 terminal java_cup.runtime.token
    163   PACKAGE, IMPORT, CODE, ACTION, PARSER, TERMINAL, NON, INIT, SCAN, WITH,
    164   START, SEMI, COMMA, STAR, DOT, COLON, COLON_COLON_EQUALS, BAR,
    165   DEBUG;
    166 
    167 terminal java_cup.runtime.str_token  ID, CODE_STRING;
    168 
    169 non terminal java_cup.runtime.symbol
    170   spec, package_spec, import_list, code_part, action_code_part,
    171   parser_code_part, symbol_list, start_spec, production_list,
    172   multipart_id, import_spec, import_id, init_code, scan_code, symbol,
    173   debug_grammar,
    174   type_id, term_name_list, non_term_name_list, production, prod_part_list,
    175   prod_part, new_term_id, new_non_term_id, rhs_list, rhs, empty;
    176 
    177 non terminal java_cup.runtime.str_token  nt_id, symbol_id, label_id, opt_label;
    178 
    179 /*----------------------------------------------------------------*/
    180 
    181 start with spec;
    182 
    183 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    184 
    185 spec ::=
    186     {:
    187           /* declare "error" as a terminal */
    188           symbols.put("error", new symbol_part(terminal.error));
    189 
    190           /* declare start non terminal */
    191           non_terms.put("$START", non_terminal.START_nt);
    192     :}
    193     package_spec
    194     import_list
    195     code_part
    196         debug_grammar
    197         init_code
    198     scan_code
    199     symbol_list
    200     start_spec
    201     production_list
    202     |
    203     /* error recovery assuming something went wrong before symbols
    204        and we have TERMINAL or NON TERMINAL to sync on.  if we get
    205        an error after that, we recover inside symbol_list or
    206        production_list
    207     */
    208     error
    209     symbol_list
    210     start_spec
    211     production_list
    212     ;
    213 
    214 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    215 
    216 package_spec ::=
    217     PACKAGE
    218     multipart_id
    219     {:
    220       /* save the package name */
    221       emit.package_name = multipart_name;
    222 
    223       /* reset the accumulated multipart name */
    224       multipart_name = new String();
    225     :}
    226     SEMI
    227     |
    228     empty
    229     ;
    230 
    231 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    232 
    233 import_list ::=
    234     import_list
    235     import_spec
    236     |
    237     empty
    238     ;
    239 
    240 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    241 
    242 import_spec ::=
    243     IMPORT
    244     import_id
    245     {:
    246       /* save this import on the imports list */
    247       emit.import_list.push(multipart_name);
    248 
    249       /* reset the accumulated multipart name */
    250       multipart_name = new String();
    251     :}
    252     SEMI
    253     ;
    254 
    255 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    256 
    257 code_part ::= action_code_part parser_code_part ;
    258 
    259 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    260 
    261 action_code_part ::=
    262     ACTION CODE CODE_STRING:user_code SEMI
    263     {:
    264       /* save the user included code string */
    265       emit.action_code = user_code.str_val;
    266     :}
    267     |
    268     empty
    269     ;
    270 
    271 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    272 
    273 parser_code_part ::=
    274     PARSER CODE CODE_STRING:user_code SEMI
    275     {:
    276       /* save the user included code string */
    277       emit.parser_code = user_code.str_val;
    278     :}
    279     |
    280     empty
    281     ;
    282 
    283 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    284 
    285 init_code ::=
    286     INIT WITH CODE_STRING:user_code SEMI
    287     {:
    288       /* save the user code */
    289       emit.init_code = user_code.str_val;
    290     :}
    291     |
    292     empty
    293     ;
    294 
    295 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    296 
    297 scan_code ::=
    298     SCAN WITH CODE_STRING:user_code SEMI
    299     {:
    300       /* save the user code */
    301       emit.scan_code = user_code.str_val;
    302     :}
    303     |
    304     empty
    305     ;
    306 
    307 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    308 
    309 debug_grammar ::=
    310         DEBUG WITH multipart_id SEMI
    311     {:
    312       /* save the procedure name */
    313           emit.debug_grammar = multipart_name;
    314           /* reset the accumulated multipart name */
    315           multipart_name = new String();
    316     :}
    317     |
    318     empty
    319     ;
    320 
    321 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    322 
    323 symbol_list ::= symbol_list symbol | symbol;
    324 
    325 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    326 
    327 symbol ::=
    328     TERMINAL
    329     type_id
    330     term_name_list
    331     {:
    332       /* reset the accumulated multipart name */
    333       multipart_name = new String();
    334     :}
    335     SEMI
    336     |
    337     NON
    338     TERMINAL
    339     type_id
    340     non_term_name_list
    341     {:
    342       /* reset the accumulated multipart name */
    343       multipart_name = new String();
    344     :}
    345     SEMI
    346     |
    347 
    348     /* error recovery productions -- sync on semicolon */
    349 
    350     TERMINAL
    351     error
    352     {:
    353       /* reset the accumulated multipart name */
    354       multipart_name = new String();
    355     :}
    356     SEMI
    357     |
    358     NON
    359     TERMINAL
    360     error
    361     {:
    362       /* reset the accumulated multipart name */
    363       multipart_name = new String();
    364     :}
    365     SEMI
    366     ;
    367 
    368 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    369 
    370 term_name_list ::= term_name_list COMMA new_term_id | new_term_id;
    371 
    372 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    373 
    374 non_term_name_list ::=
    375     non_term_name_list
    376     COMMA
    377     new_non_term_id
    378     |
    379     new_non_term_id
    380     ;
    381 
    382 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    383 
    384 start_spec ::=
    385     START WITH nt_id:start_name
    386     {:
    387       /* verify that the name has been declared as a non terminal */
    388       non_terminal nt = (non_terminal)non_terms.get(start_name.str_val);
    389       if (nt == null)
    390         {
    391           lexer.emit_error( "Start non terminal \"" + start_name.str_val +
    392                        "\" has not been declared");
    393         }
    394           else
    395         {
    396           /* remember the non-terminal for later */
    397           start_nt = nt;
    398 
    399           /* build a special start production */
    400           new_rhs();
    401           add_rhs_part(new symbol_part(start_nt));
    402           add_rhs_part(new symbol_part(terminal.EOF));
    403           emit.start_production =
    404              new production(non_terminal.START_nt, rhs_parts, rhs_pos);
    405           new_rhs();
    406         }
    407     :}
    408     SEMI
    409     |
    410     empty
    411     ;
    412 
    413 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    414 
    415 production_list ::= production_list production | production;
    416 
    417 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    418 
    419 production ::=
    420     nt_id:lhs_id
    421     {:
    422       /* lookup the lhs nt */
    423       lhs_nt = (non_terminal)non_terms.get(lhs_id.str_val);
    424 
    425           /* if it wasn't declared, emit a message */
    426       if (lhs_nt == null)
    427         {
    428           if (lexer.error_count == 0)
    429             lexer.emit_error("LHS non terminal \"" + lhs_id.str_val +
    430                    "\" has not been declared");
    431         }
    432 
    433       /* reset the rhs accumulation */
    434       new_rhs();
    435     :}
    436     COLON_COLON_EQUALS
    437     rhs_list
    438     SEMI
    439     |
    440     error
    441     {: lexer.emit_error("Syntax Error"); :}
    442     SEMI
    443     ;
    444 
    445 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    446 
    447 rhs_list ::= rhs_list BAR rhs | rhs;
    448 
    449 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    450 
    451 rhs ::=
    452     prod_part_list
    453     {:
    454       if (lhs_nt != null)
    455         {
    456           /* build the production */
    457           production p = new production(lhs_nt, rhs_parts, rhs_pos);
    458 
    459           /* if we have no start non-terminal declared and this is
    460          the first production, make its lhs nt the start_nt
    461          and build a special start production for it. */
    462               if (start_nt == null)
    463         {
    464           start_nt = lhs_nt;
    465 
    466               /* build a special start production */
    467               new_rhs();
    468               add_rhs_part(new symbol_part(start_nt));
    469               add_rhs_part(new symbol_part(terminal.EOF));
    470               emit.start_production =
    471              new production(non_terminal.START_nt, rhs_parts, rhs_pos);
    472               new_rhs();
    473         }
    474         }
    475 
    476       /* reset the rhs accumulation in any case */
    477       new_rhs();
    478     :}
    479     ;
    480 
    481 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    482 
    483 prod_part_list ::= prod_part_list prod_part | empty;
    484 
    485 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    486 
    487 prod_part ::=
    488     symbol_id:symid opt_label:labid
    489     {:
    490       /* try to look up the id */
    491       production_part symb = (production_part)symbols.get(symid.str_val);
    492 
    493       /* if that fails, symbol is undeclared */
    494       if (symb == null)
    495         {
    496           if (lexer.error_count == 0)
    497             lexer.emit_error("Symbol \"" + symid.str_val +
    498                    "\" has not been declared");
    499         }
    500       else
    501         {
    502           /* add a labeled production part */
    503           add_rhs_part(add_lab(symb, labid.str_val));
    504         }
    505     :}
    506     |
    507     CODE_STRING:code_str
    508     {:
    509       /* add a new production part */
    510       add_rhs_part(new action_part(code_str.str_val));
    511     :}
    512     ;
    513 
    514 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    515 
    516 opt_label ::=
    517     COLON label_id:labid
    518     {: RESULT.str_val = labid.str_val; :}
    519     |
    520     empty
    521     {: RESULT.str_val = null; :}
    522     ;
    523 
    524 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    525 
    526 multipart_id ::=
    527     multipart_id DOT ID:another_id
    528     {: append_multipart(another_id.str_val); :}
    529     |
    530     ID:an_id
    531     {: append_multipart(an_id.str_val); :}
    532     ;
    533 
    534 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    535 
    536 import_id ::=
    537     multipart_id DOT STAR
    538     {: append_multipart("*"); :}
    539     |
    540     multipart_id
    541     ;
    542 
    543 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    544 
    545 type_id ::= multipart_id;
    546 
    547 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    548 
    549 new_term_id ::=
    550     ID:term_id
    551     {:
    552       /* see if this terminal has been declared before */
    553       if (symbols.get(term_id.str_val) != null)
    554         {
    555           /* issue a message */
    556           lexer.emit_error("Symbol \"" + term_id.str_val +
    557                "\" has already been declared");
    558         }
    559       else
    560         {
    561           /* build a production_part and put it in the table */
    562           symbols.put(term_id.str_val,
    563             new symbol_part(new terminal(term_id.str_val, multipart_name)));
    564         }
    565     :}
    566     ;
    567 
    568 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    569 
    570 new_non_term_id ::=
    571     ID:non_term_id
    572     {:
    573       /* see if this non terminal has been declared before */
    574       if (symbols.get(non_term_id.str_val) != null)
    575         {
    576           /* issue a message */
    577           lexer.emit_error( "Symbol \"" + non_term_id.str_val +
    578                                   "\" has already been declared");
    579         }
    580       else
    581         {
    582           /* build the non terminal object */
    583               non_terminal this_nt =
    584         new non_terminal(non_term_id.str_val, multipart_name);
    585 
    586           /* put it in the non_terms table */
    587           non_terms.put(non_term_id.str_val, this_nt);
    588 
    589           /* build a production_part and put it in the symbols table */
    590           symbols.put(non_term_id.str_val, new symbol_part(this_nt));
    591         }
    592     :}
    593     ;
    594 
    595 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    596 
    597 nt_id ::=
    598     ID:the_id
    599     {: RESULT.str_val = the_id.str_val; :}
    600     ;
    601 
    602 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    603 
    604 symbol_id ::=
    605     ID:the_id
    606     {: RESULT.str_val = the_id.str_val; :}
    607     ;
    608 
    609 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    610 
    611 label_id ::=
    612     ID:the_id
    613     {: RESULT.str_val = the_id.str_val; :}
    614     ;
    615 
    616 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */
    617 
    618 empty ::= /* nothing */;
    619 
    620 /*----------------------------------------------------------------*/
    621