1 2 package java_cup; 3 4 import java.util.Enumeration; 5 import java.util.Hashtable; 6 7 /** This class represents a production in the grammar. It contains 8 * a LHS non terminal, and an array of RHS symbols. As various 9 * transformations are done on the RHS of the production, it may shrink. 10 * As a result a separate length is always maintained to indicate how much 11 * of the RHS array is still valid.<p> 12 * 13 * I addition to construction and manipulation operations, productions provide 14 * methods for factoring out actions (see remove_embedded_actions()), for 15 * computing the nullability of the production (i.e., can it derive the empty 16 * string, see check_nullable()), and operations for computing its first 17 * set (i.e., the set of terminals that could appear at the beginning of some 18 * string derived from the production, see check_first_set()). 19 * 20 * @see java_cup.production_part 21 * @see java_cup.symbol_part 22 * @see java_cup.action_part 23 * @version last updated: 11/25/95 24 * @author Scott Hudson 25 */ 26 27 public class production { 28 29 /*-----------------------------------------------------------*/ 30 /*--- Constructor(s) ----------------------------------------*/ 31 /*-----------------------------------------------------------*/ 32 33 /** Full constructor. This constructor accepts a LHS non terminal, 34 * an array of RHS parts (including terminals, non terminals, and 35 * actions), and a string for a final reduce action. It does several 36 * manipulations in the process of creating a production object. 37 * After some validity checking it translates labels that appear in 38 * actions into code for accessing objects on the runtime parse stack. 39 * It them merges adjacent actions if they appear and moves any trailing 40 * action into the final reduce actions string. Next it removes any 41 * embedded actions by factoring them out with new action productions. 42 * Finally it assigns a unique index to the production.<p> 43 * 44 * Factoring out of actions is accomplished by creating new "hidden" 45 * non terminals. For example if the production was originally: <pre> 46 * A ::= B {action} C D 47 * </pre> 48 * then it is factored into two productions:<pre> 49 * A ::= B X C D 50 * X ::= {action} 51 * </pre> 52 * (where X is a unique new non terminal). This has the effect of placing 53 * all actions at the end where they can be handled as part of a reduce by 54 * the parser. 55 */ 56 public production( 57 non_terminal lhs_sym, 58 production_part rhs_parts[], 59 int rhs_l, 60 String action_str) 61 throws internal_error 62 { 63 int i; 64 action_part tail_action; 65 66 /* remember the length */ 67 if (rhs_l >= 0) 68 _rhs_length = rhs_l; 69 else if (rhs_parts != null) 70 _rhs_length = rhs_parts.length; 71 else 72 _rhs_length = 0; 73 74 /* make sure we have a valid left-hand-side */ 75 if (lhs_sym == null) 76 throw new internal_error( 77 "Attempt to construct a production with a null LHS"); 78 79 /* translate labels appearing in action strings */ 80 action_str = translate_labels( 81 rhs_parts, rhs_l, action_str, lhs_sym.stack_type()); 82 83 /* count use of lhs */ 84 lhs_sym.note_use(); 85 86 /* create the part for left-hand-side */ 87 _lhs = new symbol_part(lhs_sym); 88 89 /* merge adjacent actions (if any) */ 90 _rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length); 91 92 /* strip off any trailing action */ 93 tail_action = strip_trailing_action(rhs_parts, _rhs_length); 94 if (tail_action != null) _rhs_length--; 95 96 /* allocate and copy over the right-hand-side */ 97 _rhs = new production_part[_rhs_length]; 98 for (i=0; i<_rhs_length; i++) 99 _rhs[i] = rhs_parts[i]; 100 101 /* count use of each rhs symbol */ 102 for (i=0; i<_rhs_length; i++) 103 if (!_rhs[i].is_action()) 104 ((symbol_part)_rhs[i]).the_symbol().note_use(); 105 106 /* merge any trailing action with action string parameter */ 107 if (action_str == null) action_str = ""; 108 if (tail_action != null && tail_action.code_string() != null) 109 action_str = tail_action.code_string() + action_str; 110 111 /* stash the action */ 112 _action = new action_part(action_str); 113 114 /* rewrite production to remove any embedded actions */ 115 remove_embedded_actions(); 116 117 /* assign an index */ 118 _index = next_index++; 119 120 /* put us in the global collection of productions */ 121 _all.put(new Integer(_index),this); 122 123 /* put us in the production list of the lhs non terminal */ 124 lhs_sym.add_production(this); 125 } 126 127 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 128 129 /** Constructor with no action string. */ 130 public production( 131 non_terminal lhs_sym, 132 production_part rhs_parts[], 133 int rhs_l) 134 throws internal_error 135 { 136 this(lhs_sym,rhs_parts,rhs_l,null); 137 } 138 139 /*-----------------------------------------------------------*/ 140 /*--- (Access to) Static (Class) Variables ------------------*/ 141 /*-----------------------------------------------------------*/ 142 143 /** Table of all productions. Elements are stored using their index as 144 * the key. 145 */ 146 protected static Hashtable _all = new Hashtable(); 147 148 /** Access to all productions. */ 149 public static Enumeration all() {return _all.elements();}; 150 151 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 152 153 /** Total number of productions. */ 154 public static int number() {return _all.size();}; 155 156 /** Static counter for assigning unique index numbers. */ 157 protected static int next_index; 158 159 /*-----------------------------------------------------------*/ 160 /*--- (Access to) Instance Variables ------------------------*/ 161 /*-----------------------------------------------------------*/ 162 163 /** The left hand side non-terminal. */ 164 protected symbol_part _lhs; 165 166 /** The left hand side non-terminal. */ 167 public symbol_part lhs() {return _lhs;} 168 169 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 170 171 /** A collection of parts for the right hand side. */ 172 protected production_part _rhs[]; 173 174 /** Access to the collection of parts for the right hand side. */ 175 public production_part rhs(int indx) throws internal_error 176 { 177 if (indx >= 0 && indx < _rhs_length) 178 return _rhs[indx]; 179 else 180 throw new internal_error( 181 "Index out of range for right hand side of production"); 182 } 183 184 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 185 186 /** How much of the right hand side array we are presently using. */ 187 protected int _rhs_length; 188 189 /** How much of the right hand side array we are presently using. */ 190 public int rhs_length() {return _rhs_length;} 191 192 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 193 194 /** An action_part containing code for the action to be performed when we 195 * reduce with this production. 196 */ 197 protected action_part _action; 198 199 /** An action_part containing code for the action to be performed when we 200 * reduce with this production. 201 */ 202 public action_part action() {return _action;} 203 204 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 205 206 /** Index number of the production. */ 207 protected int _index; 208 209 /** Index number of the production. */ 210 public int index() {return _index;} 211 212 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 213 214 /** Count of number of reductions using this production. */ 215 protected int _num_reductions = 0; 216 217 /** Count of number of reductions using this production. */ 218 public int num_reductions() {return _num_reductions;} 219 220 /** Increment the count of reductions with this non-terminal */ 221 public void note_reduction_use() {_num_reductions++;} 222 223 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 224 225 /** Is the nullability of the production known or unknown? */ 226 protected boolean _nullable_known = false; 227 228 /** Is the nullability of the production known or unknown? */ 229 public boolean nullable_known() {return _nullable_known;} 230 231 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 232 233 /** Nullability of the production (can it derive the empty string). */ 234 protected boolean _nullable = false; 235 236 /** Nullability of the production (can it derive the empty string). */ 237 public boolean nullable() {return _nullable;} 238 239 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 240 241 /** First set of the production. This is the set of terminals that 242 * could appear at the front of some string derived from this production. 243 */ 244 protected terminal_set _first_set = new terminal_set(); 245 246 /** First set of the production. This is the set of terminals that 247 * could appear at the front of some string derived from this production. 248 */ 249 public terminal_set first_set() {return _first_set;} 250 251 /*-----------------------------------------------------------*/ 252 /*--- Static Methods ----------------------------------------*/ 253 /*-----------------------------------------------------------*/ 254 255 /** Determine if a given character can be a label id starter. 256 * @param c the character in question. 257 */ 258 protected static boolean is_id_start(char c) 259 { 260 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_'); 261 262 //later need to handle non-8-bit chars here 263 } 264 265 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 266 267 /** Determine if a character can be in a label id. 268 * @param c the character in question. 269 */ 270 protected static boolean is_id_char(char c) 271 { 272 return is_id_start(c) || (c >= '0' && c <= '9'); 273 } 274 275 /*-----------------------------------------------------------*/ 276 /*--- General Methods ---------------------------------------*/ 277 /*-----------------------------------------------------------*/ 278 279 /** Determine the translation for one label id found within a code_string. 280 * Symbols appearing in the RHS correspond to objects found on the parse 281 * stack at runtime. The code to access them, becomes code to access an 282 * object at the appropriate offset from the top of the stack, and then 283 * cast that to the proper type. 284 * 285 * @param id_str the name of the id to be translated. 286 * @param act_pos the original position of the action it appears in. 287 * @param label_map a hash table mapping labels to positions in the RHS. 288 * @param type_map a hash table mapping labels to declared symbol types. 289 */ 290 protected String label_translate( 291 String id_str, /* the id string we are (possibly) translating */ 292 int act_pos, /* position of the action */ 293 Hashtable label_map, /* map from labels to positions in the RHS */ 294 Hashtable label_types)/* map from labels to stack types */ 295 { 296 Integer label_pos; 297 String label_type; 298 int offset; 299 300 /* look up the id */ 301 label_pos = (Integer)label_map.get(id_str); 302 303 /* if we don't find it, just return the id */ 304 if (label_pos == null) return id_str; 305 306 /* extract the type of the labeled symbol */ 307 label_type = (String)label_types.get(id_str); 308 309 /* is this for the LHS? */ 310 if (label_pos.intValue() == -1) 311 { 312 /* return the result object cast properly */ 313 return "((" + label_type + ")" + emit.pre("result") + ")"; 314 } 315 316 /* its a RHS label */ 317 318 /* if the label appears after the action, we have an error */ 319 if (label_pos.intValue() > act_pos) 320 { 321 /* emit an error message */ 322 System.err.println("*** Label \"" + id_str + 323 "\" appears in action before it appears in production"); 324 lexer.error_count++; 325 326 // later need to print the production this is in 327 328 /* just return the id unchanged */ 329 return id_str; 330 } 331 332 /* calculate the stack offset as the difference in position from 333 label to action minus one */ 334 offset = (act_pos - label_pos.intValue())-1; 335 336 /* translation is properly cast element at that offset from TOS */ 337 return "(/*"+id_str+"*/("+label_type+")" + 338 emit.pre("stack") + ".elementAt(" + emit.pre("top") +"-"+ offset + "))"; 339 340 } 341 342 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 343 344 /** Translate all the label names within an action string to appropriate code. 345 * @param act_string the string to be translated 346 * @param act_pos the position that the action originally held in the 347 * production. 348 * @param label_map a hash table mapping labels to positions in the RHS. 349 * @param type_map a hash table mapping labels to declared symbol types. 350 */ 351 protected String action_translate( 352 String act_string, /* the action string */ 353 int act_pos, /* the position of the action on the RHS */ 354 Hashtable label_map, /* map from labels to RHS positions */ 355 Hashtable label_types) /* map from labels to symbol stack types */ 356 { 357 int id_start; 358 int pos; 359 int len; 360 String id_str; 361 boolean in_id; 362 StringBuffer result; 363 char buffer[]; 364 365 /* if we have no string we are done */ 366 if (act_string == null || act_string.length()== 0) return act_string; 367 368 len = act_string.length(); 369 370 /* set up a place to put the result */ 371 result = new StringBuffer(len + 50); 372 373 /* extract string into array */ 374 buffer = new char[len + 1]; 375 act_string.getChars(0, len, buffer, 0); 376 377 /* put terminator in buffer so we can look one past the end */ 378 buffer[len] = '\0'; 379 380 /* walk down the input buffer looking for identifiers */ 381 in_id = false; 382 for (pos = id_start = 0; pos <= len; pos++) 383 { 384 /* are we currently working on an id? */ 385 if (in_id) 386 { 387 /* does this end the id? */ 388 if (!is_id_char(buffer[pos])) 389 { 390 /* extract the id string and translate it */ 391 id_str = new String(buffer, id_start, pos - id_start); 392 result.append( 393 label_translate(id_str, act_pos, label_map,label_types)); 394 395 /* copy over the ending character */ 396 if (buffer[pos] != '\0') 397 result.append(buffer, pos, 1); 398 399 /* and we are done with this id */ 400 in_id = false; 401 } 402 else 403 { 404 /* we are still in the id, so just keep going */ 405 } 406 } 407 else /* we are not inside an id */ 408 { 409 /* start a new id? */ 410 if (is_id_start(buffer[pos])) 411 { 412 /* start keeping these chars as an id */ 413 in_id = true; 414 id_start = pos; 415 } 416 else 417 { 418 /* just copy over the char */ 419 if (buffer[pos] != '\0') 420 result.append(buffer, pos, 1); 421 } 422 } 423 } 424 425 /* return the accumulated result */ 426 return result.toString(); 427 } 428 429 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 430 431 /** Translate label names to appropriate code within all action strings. 432 * @param rhs array of RHS parts. 433 * @param rhs_len how much of rhs to consider valid. 434 * @param final_action the final action string of the production. 435 * @param lhs_type the object type associated with the LHS symbol. 436 */ 437 protected String translate_labels( 438 production_part rhs[], 439 int rhs_len, 440 String final_action, 441 String lhs_type) 442 { 443 Hashtable label_map = new Hashtable(11); 444 Hashtable label_types = new Hashtable(11); 445 symbol_part part; 446 action_part act_part; 447 int pos; 448 449 /* walk down the parts and extract the labels */ 450 for (pos = 0; pos < rhs_len; pos++) 451 { 452 if (!rhs[pos].is_action()) 453 { 454 part = (symbol_part)rhs[pos]; 455 456 /* if it has a label enter it in the tables */ 457 if (part.label() != null) 458 { 459 label_map.put(part.label(), new Integer(pos)); 460 label_types.put(part.label(), part.the_symbol().stack_type()); 461 } 462 } 463 } 464 465 /* add a label for the LHS */ 466 label_map.put("RESULT", new Integer(-1)); 467 label_types.put("RESULT", lhs_type); 468 469 /* now walk across and do each action string */ 470 for (pos = 0; pos < rhs_len; pos++) 471 { 472 if (rhs[pos].is_action()) 473 { 474 act_part = (action_part)rhs[pos]; 475 act_part.set_code_string( 476 action_translate( 477 act_part.code_string(), pos, label_map, label_types)); 478 } 479 } 480 481 /* now do the final action string at the position after the last */ 482 return action_translate(final_action, rhs_len, label_map, label_types); 483 484 } 485 486 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 487 488 /** Helper routine to merge adjacent actions in a set of RHS parts 489 * @param rhs_parts array of RHS parts. 490 * @param len amount of that array that is valid. 491 * @return remaining valid length. 492 */ 493 protected int merge_adjacent_actions(production_part rhs_parts[], int len) 494 { 495 int from_loc, to_loc, merge_cnt; 496 497 /* bail out early if we have no work to do */ 498 if (rhs_parts == null || len == 0) return 0; 499 500 merge_cnt = 0; 501 to_loc = -1; 502 for (from_loc=0; from_loc<len; from_loc++) 503 { 504 /* do we go in the current position or one further */ 505 if (to_loc < 0 || !rhs_parts[to_loc].is_action() 506 || !rhs_parts[from_loc].is_action()) 507 { 508 /* next one */ 509 to_loc++; 510 511 /* clear the way for it */ 512 if (to_loc != from_loc) rhs_parts[to_loc] = null; 513 } 514 515 /* if this is not trivial? */ 516 if (to_loc != from_loc) 517 { 518 /* do we merge or copy? */ 519 if (rhs_parts[to_loc] != null && rhs_parts[to_loc].is_action() && 520 rhs_parts[from_loc].is_action()) 521 { 522 /* merge */ 523 rhs_parts[to_loc] = new action_part( 524 ((action_part)rhs_parts[to_loc]).code_string() + 525 ((action_part)rhs_parts[from_loc]).code_string()); 526 merge_cnt++; 527 } 528 else 529 { 530 /* copy */ 531 rhs_parts[to_loc] = rhs_parts[from_loc]; 532 } 533 } 534 } 535 536 /* return the used length */ 537 return len - merge_cnt; 538 } 539 540 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 541 542 /** Helper routine to strip a trailing action off rhs and return it 543 * @param rhs_parts array of RHS parts. 544 * @param len how many of those are valid. 545 * @return the removed action part. 546 */ 547 protected action_part strip_trailing_action( 548 production_part rhs_parts[], 549 int len) 550 { 551 action_part result; 552 553 /* bail out early if we have nothing to do */ 554 if (rhs_parts == null || len == 0) return null; 555 556 /* see if we have a trailing action */ 557 if (rhs_parts[len-1].is_action()) 558 { 559 /* snip it out and return it */ 560 result = (action_part)rhs_parts[len-1]; 561 rhs_parts[len-1] = null; 562 return result; 563 } 564 else 565 return null; 566 } 567 568 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 569 570 /** Remove all embedded actions from a production by factoring them 571 * out into individual action production using new non terminals. 572 * if the original production was: <pre> 573 * A ::= B {action1} C {action2} D 574 * </pre> 575 * then it will be factored into: <pre> 576 * A ::= B NT$1 C NT$2 D 577 * NT$1 ::= {action1} 578 * NT$2 ::= {action2} 579 * </pre> 580 * where NT$1 and NT$2 are new system created non terminals. 581 */ 582 protected void remove_embedded_actions() throws internal_error 583 { 584 non_terminal new_nt; 585 production new_prod; 586 587 /* walk over the production and process each action */ 588 for (int act_loc = 0; act_loc < rhs_length(); act_loc++) 589 if (rhs(act_loc).is_action()) 590 { 591 /* create a new non terminal for the action production */ 592 new_nt = non_terminal.create_new(); 593 594 /* create a new production with just the action */ 595 new_prod = new action_production(this, new_nt, null, 0, 596 ((action_part)rhs(act_loc)).code_string()); 597 598 /* replace the action with the generated non terminal */ 599 _rhs[act_loc] = new symbol_part(new_nt); 600 } 601 } 602 603 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 604 605 /** Check to see if the production (now) appears to be nullable. 606 * A production is nullable if its RHS could derive the empty string. 607 * This results when the RHS is empty or contains only non terminals 608 * which themselves are nullable. 609 */ 610 public boolean check_nullable() throws internal_error 611 { 612 production_part part; 613 symbol sym; 614 int pos; 615 616 /* if we already know bail out early */ 617 if (nullable_known()) return nullable(); 618 619 /* if we have a zero size RHS we are directly nullable */ 620 if (rhs_length() == 0) 621 { 622 /* stash and return the result */ 623 return set_nullable(true); 624 } 625 626 /* otherwise we need to test all of our parts */ 627 for (pos=0; pos<rhs_length(); pos++) 628 { 629 part = rhs(pos); 630 631 /* only look at non-actions */ 632 if (!part.is_action()) 633 { 634 sym = ((symbol_part)part).the_symbol(); 635 636 /* if its a terminal we are definitely not nullable */ 637 if (!sym.is_non_term()) 638 return set_nullable(false); 639 /* its a non-term, is it marked nullable */ 640 else if (!((non_terminal)sym).nullable()) 641 /* this one not (yet) nullable, so we aren't */ 642 return false; 643 } 644 } 645 646 /* if we make it here all parts are nullable */ 647 return set_nullable(true); 648 } 649 650 /** set (and return) nullability */ 651 boolean set_nullable(boolean v) 652 { 653 _nullable_known = true; 654 _nullable = v; 655 return v; 656 } 657 658 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 659 660 /** Update (and return) the first set based on current NT firsts. 661 * This assumes that nullability has already been computed for all non 662 * terminals and productions. 663 */ 664 public terminal_set check_first_set() throws internal_error 665 { 666 int part; 667 symbol sym; 668 669 /* walk down the right hand side till we get past all nullables */ 670 for (part=0; part<rhs_length(); part++) 671 { 672 /* only look at non-actions */ 673 if (!rhs(part).is_action()) 674 { 675 sym = ((symbol_part)rhs(part)).the_symbol(); 676 677 /* is it a non-terminal?*/ 678 if (sym.is_non_term()) 679 { 680 /* add in current firsts from that NT */ 681 _first_set.add(((non_terminal)sym).first_set()); 682 683 /* if its not nullable, we are done */ 684 if (!((non_terminal)sym).nullable()) 685 break; 686 } 687 else 688 { 689 /* its a terminal -- add that to the set */ 690 _first_set.add((terminal)sym); 691 692 /* we are done */ 693 break; 694 } 695 } 696 } 697 698 /* return our updated first set */ 699 return first_set(); 700 } 701 702 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 703 704 /** Equality comparison. */ 705 public boolean equals(production other) 706 { 707 if (other == null) return false; 708 return other._index == _index; 709 } 710 711 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 712 713 /** Generic equality comparison. */ 714 public boolean equals(Object other) 715 { 716 if (!(other instanceof production)) 717 return false; 718 else 719 return equals((production)other); 720 } 721 722 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 723 724 /** Produce a hash code. */ 725 public int hashCode() 726 { 727 /* just use a simple function of the index */ 728 return _index*13; 729 } 730 731 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 732 733 /** Convert to a string. */ 734 public String toString() 735 { 736 String result; 737 738 /* catch any internal errors */ 739 try { 740 result = "production [" + index() + "]: "; 741 result += ((lhs() != null) ? lhs().toString() : "$$NULL-LHS$$"); 742 result += " :: = "; 743 for (int i = 0; i<rhs_length(); i++) 744 result += rhs(i) + " "; 745 result += ";"; 746 if (action() != null && action().code_string() != null) 747 result += " {" + action().code_string() + "}"; 748 749 if (nullable_known()) 750 if (nullable()) 751 result += "[NULLABLE]"; 752 else 753 result += "[NOT NULLABLE]"; 754 } catch (internal_error e) { 755 /* crash on internal error since we can't throw it from here (because 756 superclass does not throw anything. */ 757 e.crash(); 758 result = null; 759 } 760 761 return result; 762 } 763 764 /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ 765 766 /** Convert to a simpler string. */ 767 public String to_simple_string() throws internal_error 768 { 769 String result; 770 771 result = ((lhs() != null) ? lhs().the_symbol().name() : "NULL_LHS"); 772 result += " ::= "; 773 for (int i = 0; i < rhs_length(); i++) 774 if (!rhs(i).is_action()) 775 result += ((symbol_part)rhs(i)).the_symbol().name() + " "; 776 777 return result; 778 } 779 780 /*-----------------------------------------------------------*/ 781 782 }; 783