1 2 /*================================================================*/ 3 /* 4 JavaCup Specification for the JavaCup Specification Language 5 by Scott Hudson, GVU Center, Georgia Tech, August 1995 6 7 This JavaCup specification is used to implement JavaCup itself. 8 It specifies the parser for the JavaCup specification language. 9 (It also serves as a reasonable example of what a typical JavaCup 10 spec looks like). 11 12 The specification has the following parts: 13 Package and import declarations 14 These serve the same purpose as in a normal Java source file 15 (and will appear in the generated code for the parser). In this 16 case we are part of the java_cup package and we import both the 17 java_cup runtime system and Hashtable from the standard Java 18 utilities package. 19 20 Action code 21 This section provides code that is included with the class encapsulating 22 the various pieces of user code embedded in the grammar (i.e., the 23 semantic actions). This provides a series of helper routines and 24 data structures that the semantic actions use. 25 26 Parser code 27 This section provides code included in the parser class itself. In 28 this case we override the default error reporting routines. 29 30 Init with and scan with 31 These sections provide small bits of code that initialize, then 32 indicate how to invoke the scanner. 33 34 Symbols and grammar 35 These sections declare all the terminal and non terminal symbols 36 and the types of objects that they will be represented by at runtime, 37 then indicate the start symbol of the grammar (), and finally provide 38 the grammar itself (with embedded actions). 39 40 Operation of the parser 41 The parser acts primarily by accumulating data structures representing 42 various parts of the specification. Various small parts (e.g., single 43 code strings) are stored as static variables of the emit class and 44 in a few cases as variables declared in the action code section. 45 Terminals, non terminals, and productions, are maintained as collection 46 accessible via static methods of those classes. In addition, two 47 symbol tables are kept: 48 symbols maintains the name to object mapping for all symbols 49 non_terms maintains a separate mapping containing only the non terms 50 51 Several intermediate working structures are also declared in the action 52 code section. These include: rhs_parts, rhs_pos, and lhs_nt which 53 build up parts of the current production while it is being parsed. 54 55 Author(s) 56 Scott Hudson, GVU Center, Georgia Tech. 57 58 Revisions 59 v0.9a First released version [SEH] 8/29/95 60 v0.9b Updated for beta language (throws clauses) [SEH] 11/25/95 61 */ 62 /*================================================================*/ 63 64 package java_cup; 65 import java_cup.runtime.*; 66 import java.util.Hashtable; 67 68 /*----------------------------------------------------------------*/ 69 70 action code {: 71 /** helper routine to clone a new production part adding a given label */ 72 protected production_part add_lab(production_part part, String lab) 73 throws internal_error 74 { 75 /* if there is no label, or this is an action, just return the original */ 76 if (lab == null || part.is_action()) return part; 77 78 /* otherwise build a new one with the given label attached */ 79 return new symbol_part(((symbol_part)part).the_symbol(),lab); 80 } 81 82 /** max size of right hand side we will support */ 83 protected final int MAX_RHS = 200; 84 85 /** array for accumulating right hand side parts */ 86 protected production_part[] rhs_parts = new production_part[MAX_RHS]; 87 88 /** where we are currently in building a right hand side */ 89 protected int rhs_pos = 0; 90 91 /** start a new right hand side */ 92 protected void new_rhs() {rhs_pos = 0; } 93 94 /** add a new right hand side part */ 95 protected void add_rhs_part(production_part part) throws java.lang.Exception 96 { 97 if (rhs_pos >= MAX_RHS) 98 throw new Exception("Internal Error: Productions limited to " + 99 MAX_RHS + " symbols and actions"); 100 101 rhs_parts[rhs_pos] = part; 102 rhs_pos++; 103 } 104 105 /** string to build up multiple part names */ 106 protected String multipart_name = new String(); 107 108 /** append a new name segment to the accumulated multipart name */ 109 protected void append_multipart(String name) 110 { 111 String dot = ""; 112 113 /* if we aren't just starting out, put on a dot */ 114 if (multipart_name.length() != 0) dot = "."; 115 116 multipart_name = multipart_name.concat(dot + name); 117 } 118 119 /** table of declared symbols -- contains production parts indexed by name */ 120 protected Hashtable symbols = new Hashtable(); 121 122 /** table of just non terminals -- contains non_terminals indexed by name */ 123 protected Hashtable non_terms = new Hashtable(); 124 125 /** declared start non_terminal */ 126 protected non_terminal start_nt = null; 127 128 /** left hand side non terminal of the current production */ 129 protected non_terminal lhs_nt; 130 131 :}; 132 133 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 134 135 parser code {: 136 137 /* override error routines */ 138 139 public void report_fatal_error( 140 String message, 141 Object info) 142 { 143 done_parsing(); 144 lexer.emit_error(message); 145 System.err.println("Can't recover from previous error(s), giving up."); 146 System.exit(1); 147 } 148 149 public void report_error(String message, Object info) 150 { 151 lexer.emit_error(message); 152 } 153 :}; 154 155 /*----------------------------------------------------------------*/ 156 157 init with {: lexer.init(); :}; 158 scan with {: return lexer.next_token(); :}; 159 160 /*----------------------------------------------------------------*/ 161 162 terminal java_cup.runtime.token 163 PACKAGE, IMPORT, CODE, ACTION, PARSER, TERMINAL, NON, INIT, SCAN, WITH, 164 START, SEMI, COMMA, STAR, DOT, COLON, COLON_COLON_EQUALS, BAR, 165 DEBUG; 166 167 terminal java_cup.runtime.str_token ID, CODE_STRING; 168 169 non terminal java_cup.runtime.symbol 170 spec, package_spec, import_list, code_part, action_code_part, 171 parser_code_part, symbol_list, start_spec, production_list, 172 multipart_id, import_spec, import_id, init_code, scan_code, symbol, 173 debug_grammar, 174 type_id, term_name_list, non_term_name_list, production, prod_part_list, 175 prod_part, new_term_id, new_non_term_id, rhs_list, rhs, empty; 176 177 non terminal java_cup.runtime.str_token nt_id, symbol_id, label_id, opt_label; 178 179 /*----------------------------------------------------------------*/ 180 181 start with spec; 182 183 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 184 185 spec ::= 186 {: 187 /* declare "error" as a terminal */ 188 symbols.put("error", new symbol_part(terminal.error)); 189 190 /* declare start non terminal */ 191 non_terms.put("$START", non_terminal.START_nt); 192 :} 193 package_spec 194 import_list 195 code_part 196 debug_grammar 197 init_code 198 scan_code 199 symbol_list 200 start_spec 201 production_list 202 | 203 /* error recovery assuming something went wrong before symbols 204 and we have TERMINAL or NON TERMINAL to sync on. if we get 205 an error after that, we recover inside symbol_list or 206 production_list 207 */ 208 error 209 symbol_list 210 start_spec 211 production_list 212 ; 213 214 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 215 216 package_spec ::= 217 PACKAGE 218 multipart_id 219 {: 220 /* save the package name */ 221 emit.package_name = multipart_name; 222 223 /* reset the accumulated multipart name */ 224 multipart_name = new String(); 225 :} 226 SEMI 227 | 228 empty 229 ; 230 231 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 232 233 import_list ::= 234 import_list 235 import_spec 236 | 237 empty 238 ; 239 240 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 241 242 import_spec ::= 243 IMPORT 244 import_id 245 {: 246 /* save this import on the imports list */ 247 emit.import_list.push(multipart_name); 248 249 /* reset the accumulated multipart name */ 250 multipart_name = new String(); 251 :} 252 SEMI 253 ; 254 255 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 256 257 code_part ::= action_code_part parser_code_part ; 258 259 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 260 261 action_code_part ::= 262 ACTION CODE CODE_STRING:user_code SEMI 263 {: 264 /* save the user included code string */ 265 emit.action_code = user_code.str_val; 266 :} 267 | 268 empty 269 ; 270 271 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 272 273 parser_code_part ::= 274 PARSER CODE CODE_STRING:user_code SEMI 275 {: 276 /* save the user included code string */ 277 emit.parser_code = user_code.str_val; 278 :} 279 | 280 empty 281 ; 282 283 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 284 285 init_code ::= 286 INIT WITH CODE_STRING:user_code SEMI 287 {: 288 /* save the user code */ 289 emit.init_code = user_code.str_val; 290 :} 291 | 292 empty 293 ; 294 295 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 296 297 scan_code ::= 298 SCAN WITH CODE_STRING:user_code SEMI 299 {: 300 /* save the user code */ 301 emit.scan_code = user_code.str_val; 302 :} 303 | 304 empty 305 ; 306 307 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 308 309 debug_grammar ::= 310 DEBUG WITH multipart_id SEMI 311 {: 312 /* save the procedure name */ 313 emit.debug_grammar = multipart_name; 314 /* reset the accumulated multipart name */ 315 multipart_name = new String(); 316 :} 317 | 318 empty 319 ; 320 321 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 322 323 symbol_list ::= symbol_list symbol | symbol; 324 325 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 326 327 symbol ::= 328 TERMINAL 329 type_id 330 term_name_list 331 {: 332 /* reset the accumulated multipart name */ 333 multipart_name = new String(); 334 :} 335 SEMI 336 | 337 NON 338 TERMINAL 339 type_id 340 non_term_name_list 341 {: 342 /* reset the accumulated multipart name */ 343 multipart_name = new String(); 344 :} 345 SEMI 346 | 347 348 /* error recovery productions -- sync on semicolon */ 349 350 TERMINAL 351 error 352 {: 353 /* reset the accumulated multipart name */ 354 multipart_name = new String(); 355 :} 356 SEMI 357 | 358 NON 359 TERMINAL 360 error 361 {: 362 /* reset the accumulated multipart name */ 363 multipart_name = new String(); 364 :} 365 SEMI 366 ; 367 368 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 369 370 term_name_list ::= term_name_list COMMA new_term_id | new_term_id; 371 372 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 373 374 non_term_name_list ::= 375 non_term_name_list 376 COMMA 377 new_non_term_id 378 | 379 new_non_term_id 380 ; 381 382 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 383 384 start_spec ::= 385 START WITH nt_id:start_name 386 {: 387 /* verify that the name has been declared as a non terminal */ 388 non_terminal nt = (non_terminal)non_terms.get(start_name.str_val); 389 if (nt == null) 390 { 391 lexer.emit_error( "Start non terminal \"" + start_name.str_val + 392 "\" has not been declared"); 393 } 394 else 395 { 396 /* remember the non-terminal for later */ 397 start_nt = nt; 398 399 /* build a special start production */ 400 new_rhs(); 401 add_rhs_part(new symbol_part(start_nt)); 402 add_rhs_part(new symbol_part(terminal.EOF)); 403 emit.start_production = 404 new production(non_terminal.START_nt, rhs_parts, rhs_pos); 405 new_rhs(); 406 } 407 :} 408 SEMI 409 | 410 empty 411 ; 412 413 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 414 415 production_list ::= production_list production | production; 416 417 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 418 419 production ::= 420 nt_id:lhs_id 421 {: 422 /* lookup the lhs nt */ 423 lhs_nt = (non_terminal)non_terms.get(lhs_id.str_val); 424 425 /* if it wasn't declared, emit a message */ 426 if (lhs_nt == null) 427 { 428 if (lexer.error_count == 0) 429 lexer.emit_error("LHS non terminal \"" + lhs_id.str_val + 430 "\" has not been declared"); 431 } 432 433 /* reset the rhs accumulation */ 434 new_rhs(); 435 :} 436 COLON_COLON_EQUALS 437 rhs_list 438 SEMI 439 | 440 error 441 {: lexer.emit_error("Syntax Error"); :} 442 SEMI 443 ; 444 445 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 446 447 rhs_list ::= rhs_list BAR rhs | rhs; 448 449 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 450 451 rhs ::= 452 prod_part_list 453 {: 454 if (lhs_nt != null) 455 { 456 /* build the production */ 457 production p = new production(lhs_nt, rhs_parts, rhs_pos); 458 459 /* if we have no start non-terminal declared and this is 460 the first production, make its lhs nt the start_nt 461 and build a special start production for it. */ 462 if (start_nt == null) 463 { 464 start_nt = lhs_nt; 465 466 /* build a special start production */ 467 new_rhs(); 468 add_rhs_part(new symbol_part(start_nt)); 469 add_rhs_part(new symbol_part(terminal.EOF)); 470 emit.start_production = 471 new production(non_terminal.START_nt, rhs_parts, rhs_pos); 472 new_rhs(); 473 } 474 } 475 476 /* reset the rhs accumulation in any case */ 477 new_rhs(); 478 :} 479 ; 480 481 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 482 483 prod_part_list ::= prod_part_list prod_part | empty; 484 485 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 486 487 prod_part ::= 488 symbol_id:symid opt_label:labid 489 {: 490 /* try to look up the id */ 491 production_part symb = (production_part)symbols.get(symid.str_val); 492 493 /* if that fails, symbol is undeclared */ 494 if (symb == null) 495 { 496 if (lexer.error_count == 0) 497 lexer.emit_error("Symbol \"" + symid.str_val + 498 "\" has not been declared"); 499 } 500 else 501 { 502 /* add a labeled production part */ 503 add_rhs_part(add_lab(symb, labid.str_val)); 504 } 505 :} 506 | 507 CODE_STRING:code_str 508 {: 509 /* add a new production part */ 510 add_rhs_part(new action_part(code_str.str_val)); 511 :} 512 ; 513 514 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 515 516 opt_label ::= 517 COLON label_id:labid 518 {: RESULT.str_val = labid.str_val; :} 519 | 520 empty 521 {: RESULT.str_val = null; :} 522 ; 523 524 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 525 526 multipart_id ::= 527 multipart_id DOT ID:another_id 528 {: append_multipart(another_id.str_val); :} 529 | 530 ID:an_id 531 {: append_multipart(an_id.str_val); :} 532 ; 533 534 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 535 536 import_id ::= 537 multipart_id DOT STAR 538 {: append_multipart("*"); :} 539 | 540 multipart_id 541 ; 542 543 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 544 545 type_id ::= multipart_id; 546 547 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 548 549 new_term_id ::= 550 ID:term_id 551 {: 552 /* see if this terminal has been declared before */ 553 if (symbols.get(term_id.str_val) != null) 554 { 555 /* issue a message */ 556 lexer.emit_error("Symbol \"" + term_id.str_val + 557 "\" has already been declared"); 558 } 559 else 560 { 561 /* build a production_part and put it in the table */ 562 symbols.put(term_id.str_val, 563 new symbol_part(new terminal(term_id.str_val, multipart_name))); 564 } 565 :} 566 ; 567 568 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 569 570 new_non_term_id ::= 571 ID:non_term_id 572 {: 573 /* see if this non terminal has been declared before */ 574 if (symbols.get(non_term_id.str_val) != null) 575 { 576 /* issue a message */ 577 lexer.emit_error( "Symbol \"" + non_term_id.str_val + 578 "\" has already been declared"); 579 } 580 else 581 { 582 /* build the non terminal object */ 583 non_terminal this_nt = 584 new non_terminal(non_term_id.str_val, multipart_name); 585 586 /* put it in the non_terms table */ 587 non_terms.put(non_term_id.str_val, this_nt); 588 589 /* build a production_part and put it in the symbols table */ 590 symbols.put(non_term_id.str_val, new symbol_part(this_nt)); 591 } 592 :} 593 ; 594 595 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 596 597 nt_id ::= 598 ID:the_id 599 {: RESULT.str_val = the_id.str_val; :} 600 ; 601 602 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 603 604 symbol_id ::= 605 ID:the_id 606 {: RESULT.str_val = the_id.str_val; :} 607 ; 608 609 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 610 611 label_id ::= 612 ID:the_id 613 {: RESULT.str_val = the_id.str_val; :} 614 ; 615 616 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . */ 617 618 empty ::= /* nothing */; 619 620 /*----------------------------------------------------------------*/ 621