Home | History | Annotate | Download | only in tool
      1 /*
      2  * [The "BSD license"]
      3  *  Copyright (c) 2010 Terence Parr
      4  *  All rights reserved.
      5  *
      6  *  Redistribution and use in source and binary forms, with or without
      7  *  modification, are permitted provided that the following conditions
      8  *  are met:
      9  *  1. Redistributions of source code must retain the above copyright
     10  *      notice, this list of conditions and the following disclaimer.
     11  *  2. Redistributions in binary form must reproduce the above copyright
     12  *      notice, this list of conditions and the following disclaimer in the
     13  *      documentation and/or other materials provided with the distribution.
     14  *  3. The name of the author may not be used to endorse or promote products
     15  *      derived from this software without specific prior written permission.
     16  *
     17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 package org.antlr.tool;
     29 
     30 import org.antlr.analysis.*;
     31 import org.antlr.misc.IntSet;
     32 import org.antlr.misc.IntervalSet;
     33 
     34 import java.util.ArrayList;
     35 import java.util.Collection;
     36 import java.util.Iterator;
     37 import java.util.List;
     38 
     39 /** Routines to construct StateClusters from EBNF grammar constructs.
     40  *  No optimization is done to remove unnecessary epsilon edges.
     41  *
     42  *  TODO: add an optimization that reduces number of states and transitions
     43  *  will help with speed of conversion and make it easier to view NFA.  For
     44  *  example, o-A->o-->o-B->o should be o-A->o-B->o
     45  */
     46 public class NFAFactory {
     47 	/** This factory is attached to a specifc NFA that it is building.
     48      *  The NFA will be filled up with states and transitions.
     49      */
     50 	NFA nfa = null;
     51 
     52     public Rule getCurrentRule() {
     53         return currentRule;
     54     }
     55 
     56     public void setCurrentRule(Rule currentRule) {
     57         this.currentRule = currentRule;
     58     }
     59 
     60 	Rule currentRule = null;
     61 
     62 	public NFAFactory(NFA nfa) {
     63         nfa.setFactory(this);
     64 		this.nfa = nfa;
     65 	}
     66 
     67     public NFAState newState() {
     68         NFAState n = new NFAState(nfa);
     69         int state = nfa.getNewNFAStateNumber();
     70         n.stateNumber = state;
     71         nfa.addState(n);
     72 		n.enclosingRule = currentRule;
     73 		return n;
     74     }
     75 
     76 	/** Optimize an alternative (list of grammar elements).
     77 	 *
     78 	 *  Walk the chain of elements (which can be complicated loop blocks...)
     79 	 *  and throw away any epsilon transitions used to link up simple elements.
     80 	 *
     81 	 *  This only removes 195 states from the java.g's NFA, but every little
     82 	 *  bit helps.  Perhaps I can improve in the future.
     83 	 */
     84 	public void optimizeAlternative(StateCluster alt) {
     85 		NFAState s = alt.left;
     86 		while ( s!=alt.right ) {
     87 			// if it's a block element, jump over it and continue
     88 			if ( s.endOfBlockStateNumber!=State.INVALID_STATE_NUMBER ) {
     89 				s = nfa.getState(s.endOfBlockStateNumber);
     90 				continue;
     91 			}
     92 			Transition t = s.transition[0];
     93 			if ( t instanceof RuleClosureTransition ) {
     94 				s = ((RuleClosureTransition) t).followState;
     95 				continue;
     96 			}
     97 			if ( t.label.isEpsilon() && !t.label.isAction() && s.getNumberOfTransitions()==1 ) {
     98 				// bypass epsilon transition and point to what the epsilon's
     99 				// target points to unless that epsilon transition points to
    100 				// a block or loop etc..  Also don't collapse epsilons that
    101 				// point at the last node of the alt. Don't collapse action edges
    102 				NFAState epsilonTarget = (NFAState)t.target;
    103 				if ( epsilonTarget.endOfBlockStateNumber==State.INVALID_STATE_NUMBER &&
    104 					 epsilonTarget.transition[0] !=null )
    105 				{
    106 					s.setTransition0(epsilonTarget.transition[0]);
    107 					/*
    108 					System.out.println("### opt "+s.stateNumber+"->"+
    109 									   epsilonTarget.transition(0).target.stateNumber);
    110 					*/
    111 				}
    112 			}
    113 			s = (NFAState)t.target;
    114 		}
    115 	}
    116 
    117 	/** From label A build Graph o-A->o */
    118 	public StateCluster build_Atom(int label, GrammarAST associatedAST) {
    119 		NFAState left = newState();
    120 		NFAState right = newState();
    121 		left.associatedASTNode = associatedAST;
    122 		right.associatedASTNode = associatedAST;
    123 		transitionBetweenStates(left, right, label);
    124 		StateCluster g = new StateCluster(left, right);
    125 		return g;
    126 	}
    127 
    128 	public StateCluster build_Atom(GrammarAST atomAST) {
    129 		int tokenType = nfa.grammar.getTokenType(atomAST.getText());
    130 		return build_Atom(tokenType, atomAST);
    131 	}
    132 
    133 	/** From set build single edge graph o->o-set->o.  To conform to
    134      *  what an alt block looks like, must have extra state on left.
    135      */
    136 	public StateCluster build_Set(IntSet set, GrammarAST associatedAST) {
    137         NFAState left = newState();
    138         NFAState right = newState();
    139 		left.associatedASTNode = associatedAST;
    140 		right.associatedASTNode = associatedAST;
    141 		Label label = new Label(set);
    142 		Transition e = new Transition(label,right);
    143         left.addTransition(e);
    144 		StateCluster g = new StateCluster(left, right);
    145         return g;
    146 	}
    147 
    148     /** Can only complement block of simple alts; can complement build_Set()
    149      *  result, that is.  Get set and complement, replace old with complement.
    150     public StateCluster build_AlternativeBlockComplement(StateCluster blk) {
    151         State s0 = blk.left;
    152         IntSet set = getCollapsedBlockAsSet(s0);
    153         if ( set!=null ) {
    154             // if set is available, then structure known and blk is a set
    155             set = nfa.grammar.complement(set);
    156             Label label = s0.transition(0).target.transition(0).label;
    157             label.setSet(set);
    158         }
    159         return blk;
    160     }
    161 	 */
    162 
    163     public StateCluster build_Range(int a, int b) {
    164         NFAState left = newState();
    165         NFAState right = newState();
    166 		Label label = new Label(IntervalSet.of(a, b));
    167 		Transition e = new Transition(label,right);
    168         left.addTransition(e);
    169         StateCluster g = new StateCluster(left, right);
    170         return g;
    171     }
    172 
    173 	/** From char 'c' build StateCluster o-intValue(c)->o
    174 	 */
    175 	public StateCluster build_CharLiteralAtom(GrammarAST charLiteralAST) {
    176         int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteralAST.getText());
    177 		return build_Atom(c, charLiteralAST);
    178 	}
    179 
    180 	/** From char 'c' build StateCluster o-intValue(c)->o
    181 	 *  can include unicode spec likes '\u0024' later.  Accepts
    182 	 *  actual unicode 16-bit now, of course, by default.
    183      *  TODO not supplemental char clean!
    184 	 */
    185 	public StateCluster build_CharRange(String a, String b) {
    186 		int from = Grammar.getCharValueFromGrammarCharLiteral(a);
    187 		int to = Grammar.getCharValueFromGrammarCharLiteral(b);
    188 		return build_Range(from, to);
    189 	}
    190 
    191     /** For a non-lexer, just build a simple token reference atom.
    192      *  For a lexer, a string is a sequence of char to match.  That is,
    193      *  "fog" is treated as 'f' 'o' 'g' not as a single transition in
    194      *  the DFA.  Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
    195      *  for n characters.
    196      */
    197     public StateCluster build_StringLiteralAtom(GrammarAST stringLiteralAST) {
    198         if ( nfa.grammar.type==Grammar.LEXER ) {
    199 			StringBuffer chars =
    200 				Grammar.getUnescapedStringFromGrammarStringLiteral(stringLiteralAST.getText());
    201             NFAState first = newState();
    202             NFAState last = null;
    203             NFAState prev = first;
    204             for (int i=0; i<chars.length(); i++) {
    205                 int c = chars.charAt(i);
    206                 NFAState next = newState();
    207                 transitionBetweenStates(prev, next, c);
    208                 prev = last = next;
    209             }
    210             return  new StateCluster(first, last);
    211         }
    212 
    213         // a simple token reference in non-Lexers
    214         int tokenType = nfa.grammar.getTokenType(stringLiteralAST.getText());
    215 		return build_Atom(tokenType, stringLiteralAST);
    216     }
    217 
    218     /** For reference to rule r, build
    219      *
    220      *  o-e-&gt;(r)  o
    221      *
    222      *  where (r) is the start of rule r and the trailing o is not linked
    223      *  to from rule ref state directly (it's done thru the transition(0)
    224      *  RuleClosureTransition.
    225      *
    226      *  If the rule r is just a list of tokens, it's block will be just
    227      *  a set on an edge o-&gt;o-&gt;o-set-&gt;o-&gt;o-&gt;o, could inline it rather than doing
    228      *  the rule reference, but i'm not doing this yet as I'm not sure
    229      *  it would help much in the NFA&rarr;DFA construction.
    230      *
    231      *  TODO add to codegen: collapse alt blks that are sets into single matchSet
    232      */
    233     public StateCluster build_RuleRef(Rule refDef, NFAState ruleStart) {
    234         //System.out.println("building ref to rule "+nfa.grammar.name+"."+refDef.name);
    235         NFAState left = newState();
    236         // left.setDescription("ref to "+ruleStart.getDescription());
    237         NFAState right = newState();
    238         // right.setDescription("NFAState following ref to "+ruleStart.getDescription());
    239         Transition e = new RuleClosureTransition(refDef,ruleStart,right);
    240         left.addTransition(e);
    241         StateCluster g = new StateCluster(left, right);
    242         return g;
    243     }
    244 
    245     /** From an empty alternative build StateCluster o-e-&gt;o */
    246     public StateCluster build_Epsilon() {
    247         NFAState left = newState();
    248         NFAState right = newState();
    249         transitionBetweenStates(left, right, Label.EPSILON);
    250         StateCluster g = new StateCluster(left, right);
    251         return g;
    252     }
    253 
    254 	/** Build what amounts to an epsilon transition with a semantic
    255 	 *  predicate action.  The pred is a pointer into the AST of
    256 	 *  the SEMPRED token.
    257 	 */
    258 	public StateCluster build_SemanticPredicate(GrammarAST pred) {
    259 		// don't count syn preds
    260 		if ( !pred.getText().toUpperCase()
    261 				.startsWith(Grammar.SYNPRED_RULE_PREFIX.toUpperCase()) )
    262 		{
    263 			nfa.grammar.numberOfSemanticPredicates++;
    264 		}
    265 		NFAState left = newState();
    266 		NFAState right = newState();
    267 		Transition e = new Transition(new PredicateLabel(pred), right);
    268 		left.addTransition(e);
    269 		StateCluster g = new StateCluster(left, right);
    270 		return g;
    271 	}
    272 
    273 	/** Build what amounts to an epsilon transition with an action.
    274 	 *  The action goes into NFA though it is ignored during analysis.
    275 	 *  It slows things down a bit, but I must ignore predicates after
    276 	 *  having seen an action (5-5-2008).
    277 	 */
    278 	public StateCluster build_Action(GrammarAST action) {
    279 		NFAState left = newState();
    280 		NFAState right = newState();
    281 		Transition e = new Transition(new ActionLabel(action), right);
    282 		left.addTransition(e);
    283 		return new StateCluster(left, right);
    284 	}
    285 
    286 	/** add an EOF transition to any rule end NFAState that points to nothing
    287      *  (i.e., for all those rules not invoked by another rule).  These
    288      *  are start symbols then.
    289 	 *
    290 	 *  Return the number of grammar entry points; i.e., how many rules are
    291 	 *  not invoked by another rule (they can only be invoked from outside).
    292 	 *  These are the start rules.
    293      */
    294     public int build_EOFStates(Collection<Rule> rules) {
    295 		int numberUnInvokedRules = 0;
    296         for (Rule r : rules) {
    297 			NFAState endNFAState = r.stopState;
    298             // Is this rule a start symbol?  (no follow links)
    299 			if ( endNFAState.transition[0] ==null ) {
    300 				// if so, then don't let algorithm fall off the end of
    301 				// the rule, make it hit EOF/EOT.
    302 				build_EOFState(endNFAState);
    303 				// track how many rules have been invoked by another rule
    304 				numberUnInvokedRules++;
    305 			}
    306         }
    307 		return numberUnInvokedRules;
    308     }
    309 
    310     /** set up an NFA NFAState that will yield eof tokens or,
    311      *  in the case of a lexer grammar, an EOT token when the conversion
    312      *  hits the end of a rule.
    313      */
    314     private void build_EOFState(NFAState endNFAState) {
    315 		NFAState end = newState();
    316         int label = Label.EOF;
    317         if ( nfa.grammar.type==Grammar.LEXER ) {
    318             label = Label.EOT;
    319 			end.setEOTTargetState(true);
    320         }
    321 		/*
    322 		System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+
    323 						   " loop on end of state "+endNFAState.getDescription()+
    324 						   " to state "+end.stateNumber);
    325 		*/
    326 		Transition toEnd = new Transition(label, end);
    327 		endNFAState.addTransition(toEnd);
    328 	}
    329 
    330     /** From A B build A-e-&gt;B (that is, build an epsilon arc from right
    331      *  of A to left of B).
    332      *
    333      *  As a convenience, return B if A is null or return A if B is null.
    334      */
    335     public StateCluster build_AB(StateCluster A, StateCluster B) {
    336         if ( A==null ) {
    337             return B;
    338         }
    339         if ( B==null ) {
    340             return A;
    341         }
    342 		transitionBetweenStates(A.right, B.left, Label.EPSILON);
    343 		StateCluster g = new StateCluster(A.left, B.right);
    344         return g;
    345     }
    346 
    347 	/** From a set ('a'|'b') build
    348      *
    349      *  o-&gt;o-'a'..'b'-&gt;o-&gt;o (last NFAState is blockEndNFAState pointed to by all alts)
    350 	 */
    351 	public StateCluster build_AlternativeBlockFromSet(StateCluster set) {
    352 		if ( set==null ) {
    353 			return null;
    354 		}
    355 
    356 		// single alt, no decision, just return only alt state cluster
    357 		NFAState startOfAlt = newState(); // must have this no matter what
    358 		transitionBetweenStates(startOfAlt, set.left, Label.EPSILON);
    359 
    360 		return new StateCluster(startOfAlt,set.right);
    361 	}
    362 
    363 	/** From A|B|..|Z alternative block build
    364      *
    365      *  o-&gt;o-A-&gt;o-&gt;o (last NFAState is blockEndNFAState pointed to by all alts)
    366      *  |          ^
    367      *  o-&gt;o-B-&gt;o--|
    368      *  |          |
    369      *  ...        |
    370      *  |          |
    371      *  o-&gt;o-Z-&gt;o--|
    372      *
    373      *  So every alternative gets begin NFAState connected by epsilon
    374      *  and every alt right side points at a block end NFAState.  There is a
    375      *  new NFAState in the NFAState in the StateCluster for each alt plus one for the
    376      *  end NFAState.
    377      *
    378      *  Special case: only one alternative: don't make a block with alt
    379      *  begin/end.
    380      *
    381      *  Special case: if just a list of tokens/chars/sets, then collapse
    382      *  to a single edge'd o-set-&gt;o graph.
    383      *
    384      *  Set alt number (1..n) in the left-Transition NFAState.
    385      */
    386     public StateCluster build_AlternativeBlock(List<StateCluster> alternativeStateClusters)
    387     {
    388         StateCluster result;
    389         if ( alternativeStateClusters==null || alternativeStateClusters.isEmpty() ) {
    390             return null;
    391         }
    392 
    393 		// single alt case
    394 		if ( alternativeStateClusters.size()==1 ) {
    395 			// single alt, no decision, just return only alt state cluster
    396 			StateCluster g = alternativeStateClusters.get(0);
    397 			NFAState startOfAlt = newState(); // must have this no matter what
    398 			transitionBetweenStates(startOfAlt, g.left, Label.EPSILON);
    399 
    400 			//System.out.println("### opt saved start/stop end in (...)");
    401 			return new StateCluster(startOfAlt,g.right);
    402 		}
    403 
    404 		// even if we can collapse for lookahead purposes, we will still
    405         // need to predict the alts of this subrule in case there are actions
    406         // etc...  This is the decision that is pointed to from the AST node
    407         // (always)
    408         NFAState prevAlternative = null; // tracks prev so we can link to next alt
    409         NFAState firstAlt = null;
    410         NFAState blockEndNFAState = newState();
    411         blockEndNFAState.setDescription("end block");
    412         int altNum = 1;
    413         for (StateCluster g : alternativeStateClusters) {
    414             // add begin NFAState for this alt connected by epsilon
    415             NFAState left = newState();
    416             left.setDescription("alt "+altNum+" of ()");
    417 			transitionBetweenStates(left, g.left, Label.EPSILON);
    418 			transitionBetweenStates(g.right, blockEndNFAState, Label.EPSILON);
    419 			// Are we the first alternative?
    420 			if ( firstAlt==null ) {
    421 				firstAlt = left; // track extreme left node of StateCluster
    422 			}
    423 			else {
    424 				// if not first alternative, must link to this alt from previous
    425 				transitionBetweenStates(prevAlternative, left, Label.EPSILON);
    426 			}
    427 			prevAlternative = left;
    428 			altNum++;
    429 		}
    430 
    431 		// return StateCluster pointing representing entire block
    432 		// Points to first alt NFAState on left, block end on right
    433 		result = new StateCluster(firstAlt, blockEndNFAState);
    434 
    435 		firstAlt.decisionStateType = NFAState.BLOCK_START;
    436 
    437 		// set EOB markers for Jean
    438 		firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber;
    439 
    440 		return result;
    441     }
    442 
    443     /** From (A)? build either:
    444      *
    445 	 *  o--A-&gt;o
    446 	 *  |     ^
    447 	 *  o----&gt;|
    448      *
    449      *  or, if A is a block, just add an empty alt to the end of the block
    450      */
    451     public StateCluster build_Aoptional(StateCluster A) {
    452         StateCluster g;
    453         int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left);
    454         if ( n==1 ) {
    455             // no decision, just wrap in an optional path
    456 			//NFAState decisionState = newState();
    457 			NFAState decisionState = A.left; // resuse left edge
    458 			decisionState.setDescription("only alt of ()? block");
    459 			NFAState emptyAlt = newState();
    460             emptyAlt.setDescription("epsilon path of ()? block");
    461             NFAState blockEndNFAState;
    462 			blockEndNFAState = newState();
    463 			transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
    464 			blockEndNFAState.setDescription("end ()? block");
    465             //transitionBetweenStates(decisionState, A.left, Label.EPSILON);
    466             transitionBetweenStates(decisionState, emptyAlt, Label.EPSILON);
    467             transitionBetweenStates(emptyAlt, blockEndNFAState, Label.EPSILON);
    468 
    469 			// set EOB markers for Jean
    470 			decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
    471 			blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
    472 
    473             g = new StateCluster(decisionState, blockEndNFAState);
    474         }
    475         else {
    476             // a decision block, add an empty alt
    477             NFAState lastRealAlt =
    478                     nfa.grammar.getNFAStateForAltOfDecision(A.left, n);
    479             NFAState emptyAlt = newState();
    480             emptyAlt.setDescription("epsilon path of ()? block");
    481             transitionBetweenStates(lastRealAlt, emptyAlt, Label.EPSILON);
    482             transitionBetweenStates(emptyAlt, A.right, Label.EPSILON);
    483 
    484 			// set EOB markers for Jean (I think this is redundant here)
    485 			A.left.endOfBlockStateNumber = A.right.stateNumber;
    486 			A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
    487 
    488             g = A; // return same block, but now with optional last path
    489         }
    490 		g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START;
    491 
    492         return g;
    493     }
    494 
    495     /** From (A)+ build
    496 	 *
    497      *     |---|    (Transition 2 from A.right points at alt 1)
    498 	 *     v   |    (follow of loop is Transition 1)
    499      *  o-&gt;o-A-o-&gt;o
    500      *
    501      *  Meaning that the last NFAState in A points back to A's left Transition NFAState
    502      *  and we add a new begin/end NFAState.  A can be single alternative or
    503      *  multiple.
    504 	 *
    505 	 *  During analysis we'll call the follow link (transition 1) alt n+1 for
    506 	 *  an n-alt A block.
    507      */
    508     public StateCluster build_Aplus(StateCluster A) {
    509         NFAState left = newState();
    510         NFAState blockEndNFAState = newState();
    511 		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
    512 
    513 		// don't reuse A.right as loopback if it's right edge of another block
    514 		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
    515 			// nested A* so make another tail node to be the loop back
    516 			// instead of the usual A.right which is the EOB for inner loop
    517 			NFAState extraRightEdge = newState();
    518 			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
    519 			A.right = extraRightEdge;
    520 		}
    521 
    522         transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON); // follow is Transition 1
    523 		// turn A's block end into a loopback (acts like alt 2)
    524 		transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2
    525 		transitionBetweenStates(left, A.left, Label.EPSILON);
    526 
    527 		A.right.decisionStateType = NFAState.LOOPBACK;
    528 		A.left.decisionStateType = NFAState.BLOCK_START;
    529 
    530 		// set EOB markers for Jean
    531 		A.left.endOfBlockStateNumber = A.right.stateNumber;
    532 
    533         StateCluster g = new StateCluster(left, blockEndNFAState);
    534         return g;
    535     }
    536 
    537     /** From (A)* build
    538      *
    539 	 *     |---|
    540 	 *     v   |
    541 	 *  o-&gt;o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
    542      *  |         ^
    543      *  o---------| (optional branch is 2nd alt of optional block containing A+)
    544      *
    545      *  Meaning that the last (end) NFAState in A points back to A's
    546      *  left side NFAState and we add 3 new NFAStates (the
    547      *  optional branch is built just like an optional subrule).
    548      *  See the Aplus() method for more on the loop back Transition.
    549 	 *  The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
    550 	 *  can detect nested (A*)* loops and insert an extra node.  Previously,
    551 	 *  two blocks shared same EOB node.
    552      *
    553      *  There are 2 or 3 decision points in a A*.  If A is not a block (i.e.,
    554      *  it only has one alt), then there are two decisions: the optional bypass
    555      *  and then loopback.  If A is a block of alts, then there are three
    556      *  decisions: bypass, loopback, and A's decision point.
    557      *
    558      *  Note that the optional bypass must be outside the loop as (A|B)* is
    559      *  not the same thing as (A|B|)+.
    560      *
    561      *  This is an accurate NFA representation of the meaning of (A)*, but
    562      *  for generating code, I don't need a DFA for the optional branch by
    563      *  virtue of how I generate code.  The exit-loopback-branch decision
    564      *  is sufficient to let me make an appropriate enter, exit, loop
    565      *  determination.  See codegen.g
    566      */
    567     public StateCluster build_Astar(StateCluster A) {
    568 		NFAState bypassDecisionState = newState();
    569 		bypassDecisionState.setDescription("enter loop path of ()* block");
    570         NFAState optionalAlt = newState();
    571         optionalAlt.setDescription("epsilon path of ()* block");
    572         NFAState blockEndNFAState = newState();
    573 		blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
    574 
    575 		// don't reuse A.right as loopback if it's right edge of another block
    576 		if ( A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK ) {
    577 			// nested A* so make another tail node to be the loop back
    578 			// instead of the usual A.right which is the EOB for inner loop
    579 			NFAState extraRightEdge = newState();
    580 			transitionBetweenStates(A.right, extraRightEdge, Label.EPSILON);
    581 			A.right = extraRightEdge;
    582 		}
    583 
    584 		// convert A's end block to loopback
    585 		A.right.setDescription("()* loopback");
    586 		// Transition 1 to actual block of stuff
    587         transitionBetweenStates(bypassDecisionState, A.left, Label.EPSILON);
    588         // Transition 2 optional to bypass
    589         transitionBetweenStates(bypassDecisionState, optionalAlt, Label.EPSILON);
    590 		transitionBetweenStates(optionalAlt, blockEndNFAState, Label.EPSILON);
    591         // Transition 1 of end block exits
    592         transitionBetweenStates(A.right, blockEndNFAState, Label.EPSILON);
    593         // Transition 2 of end block loops
    594         transitionBetweenStates(A.right, A.left, Label.EPSILON);
    595 
    596 		bypassDecisionState.decisionStateType = NFAState.BYPASS;
    597 		A.left.decisionStateType = NFAState.BLOCK_START;
    598 		A.right.decisionStateType = NFAState.LOOPBACK;
    599 
    600 		// set EOB markers for Jean
    601 		A.left.endOfBlockStateNumber = A.right.stateNumber;
    602 		bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
    603 
    604         StateCluster g = new StateCluster(bypassDecisionState, blockEndNFAState);
    605         return g;
    606     }
    607 
    608     /** Build an NFA predictor for special rule called Tokens manually that
    609      *  predicts which token will succeed.  The refs to the rules are not
    610      *  RuleRefTransitions as I want DFA conversion to stop at the EOT
    611      *  transition on the end of each token, rather than return to Tokens rule.
    612      *  If I used normal build_alternativeBlock for this, the RuleRefTransitions
    613      *  would save return address when jumping away from Tokens rule.
    614      *
    615      *  All I do here is build n new states for n rules with an epsilon
    616      *  edge to the rule start states and then to the next state in the
    617      *  list:
    618      *
    619      *   o->(A)  (a state links to start of A and to next in list)
    620      *   |
    621      *   o->(B)
    622      *   |
    623      *   ...
    624      *   |
    625      *   o->(Z)
    626 	 *
    627 	 *  This is the NFA created for the artificial rule created in
    628 	 *  Grammar.addArtificialMatchTokensRule().
    629 	 *
    630 	 *  11/28/2005: removed so we can use normal rule construction for Tokens.
    631     public NFAState build_ArtificialMatchTokensRuleNFA() {
    632         int altNum = 1;
    633         NFAState firstAlt = null; // the start state for the "rule"
    634         NFAState prevAlternative = null;
    635         Iterator iter = nfa.grammar.getRules().iterator();
    636 		// TODO: add a single decision node/state for good description
    637         while (iter.hasNext()) {
    638 			Rule r = (Rule) iter.next();
    639             String ruleName = r.name;
    640 			String modifier = nfa.grammar.getRuleModifier(ruleName);
    641             if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ||
    642 				 (modifier!=null &&
    643 				  modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) )
    644 			{
    645                 continue; // don't loop to yourself or do nontoken rules
    646             }
    647             NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName);
    648             NFAState left = newState();
    649             left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME);
    650             transitionBetweenStates(left, ruleStartState, Label.EPSILON);
    651             // Are we the first alternative?
    652             if ( firstAlt==null ) {
    653                 firstAlt = left; // track extreme top left node as rule start
    654             }
    655             else {
    656                 // if not first alternative, must link to this alt from previous
    657                 transitionBetweenStates(prevAlternative, left, Label.EPSILON);
    658             }
    659             prevAlternative = left;
    660             altNum++;
    661         }
    662 		firstAlt.decisionStateType = NFAState.BLOCK_START;
    663 
    664         return firstAlt;
    665     }
    666 	 */
    667 
    668     /** Build an atom with all possible values in its label */
    669     public StateCluster build_Wildcard(GrammarAST associatedAST) {
    670         NFAState left = newState();
    671         NFAState right = newState();
    672         left.associatedASTNode = associatedAST;
    673         right.associatedASTNode = associatedAST;
    674         Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens
    675         Transition e = new Transition(label,right);
    676         left.addTransition(e);
    677         StateCluster g = new StateCluster(left, right);
    678         return g;
    679     }
    680 
    681     /** Build a subrule matching ^(. .*) (any tree or node). Let's use
    682      *  (^(. .+) | .) to be safe.
    683      */
    684     public StateCluster build_WildcardTree(GrammarAST associatedAST) {
    685         StateCluster wildRoot = build_Wildcard(associatedAST);
    686 
    687         StateCluster down = build_Atom(Label.DOWN, associatedAST);
    688         wildRoot = build_AB(wildRoot,down); // hook in; . DOWN
    689 
    690         // make .+
    691         StateCluster wildChildren = build_Wildcard(associatedAST);
    692         wildChildren = build_Aplus(wildChildren);
    693         wildRoot = build_AB(wildRoot,wildChildren); // hook in; . DOWN .+
    694 
    695         StateCluster up = build_Atom(Label.UP, associatedAST);
    696         wildRoot = build_AB(wildRoot,up); // hook in; . DOWN .+ UP
    697 
    698         // make optional . alt
    699         StateCluster optionalNodeAlt = build_Wildcard(associatedAST);
    700 
    701         List<StateCluster> alts = new ArrayList<StateCluster>();
    702         alts.add(wildRoot);
    703         alts.add(optionalNodeAlt);
    704         StateCluster blk = build_AlternativeBlock(alts);
    705 
    706         return blk;
    707     }
    708 
    709     /** Given a collapsed block of alts (a set of atoms), pull out
    710      *  the set and return it.
    711      */
    712     protected IntSet getCollapsedBlockAsSet(State blk) {
    713         State s0 = blk;
    714         if ( s0!=null && s0.transition(0)!=null ) {
    715             State s1 = s0.transition(0).target;
    716             if ( s1!=null && s1.transition(0)!=null ) {
    717                 Label label = s1.transition(0).label;
    718                 if ( label.isSet() ) {
    719                     return label.getSet();
    720                 }
    721             }
    722         }
    723         return null;
    724     }
    725 
    726 	private void transitionBetweenStates(NFAState a, NFAState b, int label) {
    727 		Transition e = new Transition(label,b);
    728 		a.addTransition(e);
    729 	}
    730 }
    731