1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.tool; 29 30 import org.antlr.analysis.*; 31 import org.antlr.misc.IntSet; 32 import org.antlr.misc.IntervalSet; 33 34 import java.util.ArrayList; 35 import java.util.Collection; 36 import java.util.Iterator; 37 import java.util.List; 38 39 /** Routines to construct StateClusters from EBNF grammar constructs. 40 * No optimization is done to remove unnecessary epsilon edges. 41 * 42 * TODO: add an optimization that reduces number of states and transitions 43 * will help with speed of conversion and make it easier to view NFA. For 44 * example, o-A->o-->o-B->o should be o-A->o-B->o 45 */ 46 public class NFAFactory { 47 /** This factory is attached to a specifc NFA that it is building. 48 * The NFA will be filled up with states and transitions. 49 */ 50 NFA nfa = null; 51 52 public Rule getCurrentRule() { 53 return currentRule; 54 } 55 56 public void setCurrentRule(Rule currentRule) { 57 this.currentRule = currentRule; 58 } 59 60 Rule currentRule = null; 61 62 public NFAFactory(NFA nfa) { 63 nfa.setFactory(this); 64 this.nfa = nfa; 65 } 66 67 public NFAState newState() { 68 NFAState n = new NFAState(nfa); 69 int state = nfa.getNewNFAStateNumber(); 70 n.stateNumber = state; 71 nfa.addState(n); 72 n.enclosingRule = currentRule; 73 return n; 74 } 75 76 /** Optimize an alternative (list of grammar elements). 77 * 78 * Walk the chain of elements (which can be complicated loop blocks...) 79 * and throw away any epsilon transitions used to link up simple elements. 80 * 81 * This only removes 195 states from the java.g's NFA, but every little 82 * bit helps. Perhaps I can improve in the future. 83 */ 84 public void optimizeAlternative(StateCluster alt) { 85 NFAState s = alt.left; 86 while ( s!=alt.right ) { 87 // if it's a block element, jump over it and continue 88 if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) { 89 s = nfa.getState(s.endOfBlockStateNumber); 90 continue; 91 } 92 Transition t = s.transition[0]; 93 if ( t instanceof RuleClosureTransition ) { 94 s = ((RuleClosureTransition) t).followState; 95 continue; 96 } 97 if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) { 98 // bypass epsilon transition and point to what the epsilon's 99 // target points to unless that epsilon transition points to 100 // a block or loop etc.. Also don't collapse epsilons that 101 // point at the last node of the alt. Don't collapse action edges 102 NFAState epsilonTarget = (NFAState)t.target; 103 if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER && 104 epsilonTarget.transition[0] !=null ) 105 { 106 s.setTransition0(epsilonTarget.transition[0]); 107 /* 108 System.out.println("### opt "+s.stateNumber+"->"+ 109 epsilonTarget.transition(0).target.stateNumber); 110 */ 111 } 112 } 113 s = (NFAState)t.target; 114 } 115 } 116 117 /** From label A build Graph o-A->o */ 118 public StateCluster build_Atom(int label, GrammarAST associatedAST) { 119 NFAState left = newState(); 120 NFAState right = newState(); 121 left.associatedASTNode = associatedAST; 122 right.associatedASTNode = associatedAST; 123 transitionBetweenStates(left, right, label); 124 StateCluster g = new StateCluster(left, right); 125 return g; 126 } 127 128 public StateCluster build_Atom(GrammarAST atomAST) { 129 int tokenType = nfa.grammar.getTokenType(atomAST.getText()); 130 return build_Atom(tokenType, atomAST); 131 } 132 133 /** From set build single edge graph o->o-set->o. To conform to 134 * what an alt block looks like, must have extra state on left. 135 */ 136 public StateCluster build_Set(IntSet set, GrammarAST associatedAST) { 137 NFAState left = newState(); 138 NFAState right = newState(); 139 left.associatedASTNode = associatedAST; 140 right.associatedASTNode = associatedAST; 141 Label label = new Label(set); 142 Transition e = new Transition(label,right); 143 left.addTransition(e); 144 StateCluster g = new StateCluster(left, right); 145 return g; 146 } 147 148 /** Can only complement block of simple alts; can complement build_Set() 149 * result, that is. Get set and complement, replace old with complement. 150 public StateCluster build_AlternativeBlockComplement(StateCluster blk) { 151 State s0 = blk.left; 152 IntSet set = getCollapsedBlockAsSet(s0); 153 if ( set!=null ) { 154 // if set is available, then structure known and blk is a set 155 set = nfa.grammar.complement(set); 156 Label label = s0.transition(0).target.transition(0).label; 157 label.setSet(set); 158 } 159 return blk; 160 } 161 */ 162 163 public StateCluster build_Range(int a, int b) { 164 NFAState left = newState(); 165 NFAState right = newState(); 166 Label label = new Label(IntervalSet.of(a, b)); 167 Transition e = new Transition(label,right); 168 left.addTransition(e); 169 StateCluster g = new StateCluster(left, right); 170 return g; 171 } 172 173 /** From char 'c' build StateCluster o-intValue(c)->o 174 */ 175 public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) { 176 int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText()); 177 return build_Atom(c, charLiteralAST); 178 } 179 180 /** From char 'c' build StateCluster o-intValue(c)->o 181 * can include unicode spec likes '\u0024' later. Accepts 182 * actual unicode 16-bit now, of course, by default. 183 * TODO not supplemental char clean! 184 */ 185 public StateCluster build_CharRange(String a, String b) { 186 int from = Grammar.getCharValueFromGrammarCharLiteral(a); 187 int to = Grammar.getCharValueFromGrammarCharLiteral(b); 188 return build_Range(from, to); 189 } 190 191 /** For a non-lexer, just build a simple token reference atom. 192 * For a lexer, a string is a sequence of char to match. That is, 193 * "fog" is treated as 'f' 'o' 'g' not as a single transition in 194 * the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states 195 * for n characters. 196 */ 197 public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) { 198 if ( nfa.grammar.type==Grammar.LEXER ) { 199 StringBuffer chars = 200 Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText()); 201 NFAState first = newState(); 202 NFAState last = null; 203 NFAState prev = first; 204 for (int i=0; i<chars.length(); i++) { 205 int c = chars.charAt(i); 206 NFAState next = newState(); 207 transitionBetweenStates(prev, next, c); 208 prev = last = next; 209 } 210 return new StateCluster(first, last); 211 } 212 213 // a simple token reference in non-Lexers 214 int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText()); 215 return build_Atom(tokenType, stringLiteralAST); 216 } 217 218 /** For reference to rule r, build 219 * 220 * o-e->(r) o 221 * 222 * where (r) is the start of rule r and the trailing o is not linked 223 * to from rule ref state directly (it's done thru the transition(0) 224 * RuleClosureTransition. 225 * 226 * If the rule r is just a list of tokens, it's block will be just 227 * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing 228 * the rule reference, but i'm not doing this yet as I'm not sure 229 * it would help much in the NFA→DFA construction. 230 * 231 * TODO add to codegen: collapse alt blks that are sets into single matchSet 232 */ 233 public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) { 234 //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name); 235 NFAState left = newState(); 236 // left.setDescription("ref to "+ruleStart.getDescription()); 237 NFAState right = newState(); 238 // right.setDescription("NFAState following ref to "+ruleStart.getDescription()); 239 Transition e = new RuleClosureTransition(refDef,ruleStart,right); 240 left.addTransition(e); 241 StateCluster g = new StateCluster(left, right); 242 return g; 243 } 244 245 /** From an empty alternative build StateCluster o-e->o */ 246 public StateCluster build_Epsilon() { 247 NFAState left = newState(); 248 NFAState right = newState(); 249 transitionBetweenStates(left, right, Label.EPSILON); 250 StateCluster g = new StateCluster(left, right); 251 return g; 252 } 253 254 /** Build what amounts to an epsilon transition with a semantic 255 * predicate action. The pred is a pointer into the AST of 256 * the SEMPRED token. 257 */ 258 public StateCluster build_SemanticPredicate(GrammarAST pred) { 259 // don't count syn preds 260 if ( !pred.getText().toUpperCase() 261 .startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) ) 262 { 263 nfa.grammar.numberOfSemanticPredicates++; 264 } 265 NFAState left = newState(); 266 NFAState right = newState(); 267 Transition e = new Transition(new PredicateLabel(pred), right); 268 left.addTransition(e); 269 StateCluster g = new StateCluster(left, right); 270 return g; 271 } 272 273 /** Build what amounts to an epsilon transition with an action. 274 * The action goes into NFA though it is ignored during analysis. 275 * It slows things down a bit, but I must ignore predicates after 276 * having seen an action (5-5-2008). 277 */ 278 public StateCluster build_Action(GrammarAST action) { 279 NFAState left = newState(); 280 NFAState right = newState(); 281 Transition e = new Transition(new ActionLabel(action), right); 282 left.addTransition(e); 283 return new StateCluster(left, right); 284 } 285 286 /** add an EOF transition to any rule end NFAState that points to nothing 287 * (i.e., for all those rules not invoked by another rule). These 288 * are start symbols then. 289 * 290 * Return the number of grammar entry points; i.e., how many rules are 291 * not invoked by another rule (they can only be invoked from outside). 292 * These are the start rules. 293 */ 294 public int build_EOFStates(Collection<Rule> rules) { 295 int numberUnInvokedRules = 0; 296 for (Rule r : rules) { 297 NFAState endNFAState = r.stopState; 298 // Is this rule a start symbol? (no follow links) 299 if ( endNFAState.transition[0] ==null ) { 300 // if so, then don't let algorithm fall off the end of 301 // the rule, make it hit EOF/EOT. 302 build_EOFState(endNFAState); 303 // track how many rules have been invoked by another rule 304 numberUnInvokedRules++; 305 } 306 } 307 return numberUnInvokedRules; 308 } 309 310 /** set up an NFA NFAState that will yield eof tokens or, 311 * in the case of a lexer grammar, an EOT token when the conversion 312 * hits the end of a rule. 313 */ 314 private void build_EOFState(NFAState endNFAState) { 315 NFAState end = newState(); 316 int label = Label.EOF; 317 if ( nfa.grammar.type==Grammar.LEXER ) { 318 label = Label.EOT; 319 end.setEOTTargetState(true); 320 } 321 /* 322 System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+ 323 " loop on end of state "+endNFAState.getDescription()+ 324 " to state "+end.stateNumber); 325 */ 326 Transition toEnd = new Transition(label, end); 327 endNFAState.addTransition(toEnd); 328 } 329 330 /** From A B build A-e->B (that is, build an epsilon arc from right 331 * of A to left of B). 332 * 333 * As a convenience, return B if A is null or return A if B is null. 334 */ 335 public StateCluster build_AB(StateCluster A, StateCluster B) { 336 if ( A==null ) { 337 return B; 338 } 339 if ( B==null ) { 340 return A; 341 } 342 transitionBetweenStates(A.right, B.left, Label.EPSILON); 343 StateCluster g = new StateCluster(A.left, B.right); 344 return g; 345 } 346 347 /** From a set ('a'|'b') build 348 * 349 * o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts) 350 */ 351 public StateCluster build_AlternativeBlockFromSet(StateCluster set) { 352 if ( set==null ) { 353 return null; 354 } 355 356 // single alt, no decision, just return only alt state cluster 357 NFAState startOfAlt = newState(); // must have this no matter what 358 transitionBetweenStates(startOfAlt, set.left, Label.EPSILON); 359 360 return new StateCluster(startOfAlt,set.right); 361 } 362 363 /** From A|B|..|Z alternative block build 364 * 365 * o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts) 366 * | ^ 367 * o->o-B->o--| 368 * | | 369 * ... | 370 * | | 371 * o->o-Z->o--| 372 * 373 * So every alternative gets begin NFAState connected by epsilon 374 * and every alt right side points at a block end NFAState. There is a 375 * new NFAState in the NFAState in the StateCluster for each alt plus one for the 376 * end NFAState. 377 * 378 * Special case: only one alternative: don't make a block with alt 379 * begin/end. 380 * 381 * Special case: if just a list of tokens/chars/sets, then collapse 382 * to a single edge'd o-set->o graph. 383 * 384 * Set alt number (1..n) in the left-Transition NFAState. 385 */ 386 public StateCluster build_AlternativeBlock(List<StateCluster> alternativeStateClusters) 387 { 388 StateCluster result; 389 if ( alternativeStateClusters==null || alternativeStateClusters.isEmpty() ) { 390 return null; 391 } 392 393 // single alt case 394 if ( alternativeStateClusters.size()==1 ) { 395 // single alt, no decision, just return only alt state cluster 396 StateCluster g = alternativeStateClusters.get(0); 397 NFAState startOfAlt = newState(); // must have this no matter what 398 transitionBetweenStates(startOfAlt, g.left, Label.EPSILON); 399 400 //System.out.println("### opt saved start/stop end in (...)"); 401 return new StateCluster(startOfAlt,g.right); 402 } 403 404 // even if we can collapse for lookahead purposes, we will still 405 // need to predict the alts of this subrule in case there are actions 406 // etc... This is the decision that is pointed to from the AST node 407 // (always) 408 NFAState prevAlternative = null; // tracks prev so we can link to next alt 409 NFAState firstAlt = null; 410 NFAState blockEndNFAState = newState(); 411 blockEndNFAState.setDescription("end block"); 412 int altNum = 1; 413 for (StateCluster g : alternativeStateClusters) { 414 // add begin NFAState for this alt connected by epsilon 415 NFAState left = newState(); 416 left.setDescription("alt "+altNum+" of ()"); 417 transitionBetweenStates(left, g.left, Label.EPSILON); 418 transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON); 419 // Are we the first alternative? 420 if ( firstAlt==null ) { 421 firstAlt = left; // track extreme left node of StateCluster 422 } 423 else { 424 // if not first alternative, must link to this alt from previous 425 transitionBetweenStates(prevAlternative, left, Label.EPSILON); 426 } 427 prevAlternative = left; 428 altNum++; 429 } 430 431 // return StateCluster pointing representing entire block 432 // Points to first alt NFAState on left, block end on right 433 result = new StateCluster(firstAlt, blockEndNFAState); 434 435 firstAlt.decisionStateType = NFAState.BLOCK_START; 436 437 // set EOB markers for Jean 438 firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber; 439 440 return result; 441 } 442 443 /** From (A)? build either: 444 * 445 * o--A->o 446 * | ^ 447 * o---->| 448 * 449 * or, if A is a block, just add an empty alt to the end of the block 450 */ 451 public StateCluster build_Aoptional(StateCluster A) { 452 StateCluster g; 453 int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left); 454 if ( n==1 ) { 455 // no decision, just wrap in an optional path 456 //NFAState decisionState = newState(); 457 NFAState decisionState = A.left; // resuse left edge 458 decisionState.setDescription("only alt of ()? block"); 459 NFAState emptyAlt = newState(); 460 emptyAlt.setDescription("epsilon path of ()? block"); 461 NFAState blockEndNFAState; 462 blockEndNFAState = newState(); 463 transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); 464 blockEndNFAState.setDescription("end ()? block"); 465 //transitionBetweenStates(decisionState, A.left, Label.EPSILON); 466 transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON); 467 transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON); 468 469 // set EOB markers for Jean 470 decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; 471 blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 472 473 g = new StateCluster(decisionState, blockEndNFAState); 474 } 475 else { 476 // a decision block, add an empty alt 477 NFAState lastRealAlt = 478 nfa.grammar.getNFAStateForAltOfDecision(A.left, n); 479 NFAState emptyAlt = newState(); 480 emptyAlt.setDescription("epsilon path of ()? block"); 481 transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON); 482 transitionBetweenStates(emptyAlt, A.right, Label.EPSILON); 483 484 // set EOB markers for Jean (I think this is redundant here) 485 A.left.endOfBlockStateNumber = A.right.stateNumber; 486 A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 487 488 g = A; // return same block, but now with optional last path 489 } 490 g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START; 491 492 return g; 493 } 494 495 /** From (A)+ build 496 * 497 * |---| (Transition 2 from A.right points at alt 1) 498 * v | (follow of loop is Transition 1) 499 * o->o-A-o->o 500 * 501 * Meaning that the last NFAState in A points back to A's left Transition NFAState 502 * and we add a new begin/end NFAState. A can be single alternative or 503 * multiple. 504 * 505 * During analysis we'll call the follow link (transition 1) alt n+1 for 506 * an n-alt A block. 507 */ 508 public StateCluster build_Aplus(StateCluster A) { 509 NFAState left = newState(); 510 NFAState blockEndNFAState = newState(); 511 blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 512 513 // don't reuse A.right as loopback if it's right edge of another block 514 if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { 515 // nested A* so make another tail node to be the loop back 516 // instead of the usual A.right which is the EOB for inner loop 517 NFAState extraRightEdge = newState(); 518 transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); 519 A.right = extraRightEdge; 520 } 521 522 transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1 523 // turn A's block end into a loopback (acts like alt 2) 524 transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2 525 transitionBetweenStates(left, A.left, Label.EPSILON); 526 527 A.right.decisionStateType = NFAState.LOOPBACK; 528 A.left.decisionStateType = NFAState.BLOCK_START; 529 530 // set EOB markers for Jean 531 A.left.endOfBlockStateNumber = A.right.stateNumber; 532 533 StateCluster g = new StateCluster(left, blockEndNFAState); 534 return g; 535 } 536 537 /** From (A)* build 538 * 539 * |---| 540 * v | 541 * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1) 542 * | ^ 543 * o---------| (optional branch is 2nd alt of optional block containing A+) 544 * 545 * Meaning that the last (end) NFAState in A points back to A's 546 * left side NFAState and we add 3 new NFAStates (the 547 * optional branch is built just like an optional subrule). 548 * See the Aplus() method for more on the loop back Transition. 549 * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we 550 * can detect nested (A*)* loops and insert an extra node. Previously, 551 * two blocks shared same EOB node. 552 * 553 * There are 2 or 3 decision points in a A*. If A is not a block (i.e., 554 * it only has one alt), then there are two decisions: the optional bypass 555 * and then loopback. If A is a block of alts, then there are three 556 * decisions: bypass, loopback, and A's decision point. 557 * 558 * Note that the optional bypass must be outside the loop as (A|B)* is 559 * not the same thing as (A|B|)+. 560 * 561 * This is an accurate NFA representation of the meaning of (A)*, but 562 * for generating code, I don't need a DFA for the optional branch by 563 * virtue of how I generate code. The exit-loopback-branch decision 564 * is sufficient to let me make an appropriate enter, exit, loop 565 * determination. See codegen.g 566 */ 567 public StateCluster build_Astar(StateCluster A) { 568 NFAState bypassDecisionState = newState(); 569 bypassDecisionState.setDescription("enter loop path of ()* block"); 570 NFAState optionalAlt = newState(); 571 optionalAlt.setDescription("epsilon path of ()* block"); 572 NFAState blockEndNFAState = newState(); 573 blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK; 574 575 // don't reuse A.right as loopback if it's right edge of another block 576 if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) { 577 // nested A* so make another tail node to be the loop back 578 // instead of the usual A.right which is the EOB for inner loop 579 NFAState extraRightEdge = newState(); 580 transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON); 581 A.right = extraRightEdge; 582 } 583 584 // convert A's end block to loopback 585 A.right.setDescription("()* loopback"); 586 // Transition 1 to actual block of stuff 587 transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON); 588 // Transition 2 optional to bypass 589 transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON); 590 transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON); 591 // Transition 1 of end block exits 592 transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); 593 // Transition 2 of end block loops 594 transitionBetweenStates(A.right, A.left, Label.EPSILON); 595 596 bypassDecisionState.decisionStateType = NFAState.BYPASS; 597 A.left.decisionStateType = NFAState.BLOCK_START; 598 A.right.decisionStateType = NFAState.LOOPBACK; 599 600 // set EOB markers for Jean 601 A.left.endOfBlockStateNumber = A.right.stateNumber; 602 bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber; 603 604 StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState); 605 return g; 606 } 607 608 /** Build an NFA predictor for special rule called Tokens manually that 609 * predicts which token will succeed. The refs to the rules are not 610 * RuleRefTransitions as I want DFA conversion to stop at the EOT 611 * transition on the end of each token, rather than return to Tokens rule. 612 * If I used normal build_alternativeBlock for this, the RuleRefTransitions 613 * would save return address when jumping away from Tokens rule. 614 * 615 * All I do here is build n new states for n rules with an epsilon 616 * edge to the rule start states and then to the next state in the 617 * list: 618 * 619 * o->(A) (a state links to start of A and to next in list) 620 * | 621 * o->(B) 622 * | 623 * ... 624 * | 625 * o->(Z) 626 * 627 * This is the NFA created for the artificial rule created in 628 * Grammar.addArtificialMatchTokensRule(). 629 * 630 * 11/28/2005: removed so we can use normal rule construction for Tokens. 631 public NFAState build_ArtificialMatchTokensRuleNFA() { 632 int altNum = 1; 633 NFAState firstAlt = null; // the start state for the "rule" 634 NFAState prevAlternative = null; 635 Iterator iter = nfa.grammar.getRules().iterator(); 636 // TODO: add a single decision node/state for good description 637 while (iter.hasNext()) { 638 Rule r = (Rule) iter.next(); 639 String ruleName = r.name; 640 String modifier = nfa.grammar.getRuleModifier(ruleName); 641 if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) || 642 (modifier!=null && 643 modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) ) 644 { 645 continue; // don't loop to yourself or do nontoken rules 646 } 647 NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName); 648 NFAState left = newState(); 649 left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME); 650 transitionBetweenStates(left, ruleStartState, Label.EPSILON); 651 // Are we the first alternative? 652 if ( firstAlt==null ) { 653 firstAlt = left; // track extreme top left node as rule start 654 } 655 else { 656 // if not first alternative, must link to this alt from previous 657 transitionBetweenStates(prevAlternative, left, Label.EPSILON); 658 } 659 prevAlternative = left; 660 altNum++; 661 } 662 firstAlt.decisionStateType = NFAState.BLOCK_START; 663 664 return firstAlt; 665 } 666 */ 667 668 /** Build an atom with all possible values in its label */ 669 public StateCluster build_Wildcard(GrammarAST associatedAST) { 670 NFAState left = newState(); 671 NFAState right = newState(); 672 left.associatedASTNode = associatedAST; 673 right.associatedASTNode = associatedAST; 674 Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens 675 Transition e = new Transition(label,right); 676 left.addTransition(e); 677 StateCluster g = new StateCluster(left, right); 678 return g; 679 } 680 681 /** Build a subrule matching ^(. .*) (any tree or node). Let's use 682 * (^(. .+) | .) to be safe. 683 */ 684 public StateCluster build_WildcardTree(GrammarAST associatedAST) { 685 StateCluster wildRoot = build_Wildcard(associatedAST); 686 687 StateCluster down = build_Atom(Label.DOWN, associatedAST); 688 wildRoot = build_AB(wildRoot,down); // hook in; . DOWN 689 690 // make .+ 691 StateCluster wildChildren = build_Wildcard(associatedAST); 692 wildChildren = build_Aplus(wildChildren); 693 wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+ 694 695 StateCluster up = build_Atom(Label.UP, associatedAST); 696 wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP 697 698 // make optional . alt 699 StateCluster optionalNodeAlt = build_Wildcard(associatedAST); 700 701 List<StateCluster> alts = new ArrayList<StateCluster>(); 702 alts.add(wildRoot); 703 alts.add(optionalNodeAlt); 704 StateCluster blk = build_AlternativeBlock(alts); 705 706 return blk; 707 } 708 709 /** Given a collapsed block of alts (a set of atoms), pull out 710 * the set and return it. 711 */ 712 protected IntSet getCollapsedBlockAsSet(State blk) { 713 State s0 = blk; 714 if ( s0!=null && s0.transition(0)!=null ) { 715 State s1 = s0.transition(0).target; 716 if ( s1!=null && s1.transition(0)!=null ) { 717 Label label = s1.transition(0).label; 718 if ( label.isSet() ) { 719 return label.getSet(); 720 } 721 } 722 } 723 return null; 724 } 725 726 private void transitionBetweenStates(NFAState a, NFAState b, int label) { 727 Transition e = new Transition(label,b); 728 a.addTransition(e); 729 } 730 } 731