Home | History | Annotate | Download | only in analysis
      1 /*
      2  * [The "BSD license"]
      3  *  Copyright (c) 2010 Terence Parr
      4  *  All rights reserved.
      5  *
      6  *  Redistribution and use in source and binary forms, with or without
      7  *  modification, are permitted provided that the following conditions
      8  *  are met:
      9  *  1. Redistributions of source code must retain the above copyright
     10  *      notice, this list of conditions and the following disclaimer.
     11  *  2. Redistributions in binary form must reproduce the above copyright
     12  *      notice, this list of conditions and the following disclaimer in the
     13  *      documentation and/or other materials provided with the distribution.
     14  *  3. The name of the author may not be used to endorse or promote products
     15  *      derived from this software without specific prior written permission.
     16  *
     17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 package org.antlr.analysis;
     29 
     30 import org.antlr.misc.IntSet;
     31 import org.antlr.misc.MultiMap;
     32 import org.antlr.misc.OrderedHashSet;
     33 import org.antlr.misc.Utils;
     34 import org.antlr.tool.Grammar;
     35 
     36 import java.util.*;
     37 
     38 /** A DFA state represents a set of possible NFA configurations.
     39  *  As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
     40  *  to keep track of all possible states the NFA can be in after
     41  *  reading each input symbol.  That is to say, after reading
     42  *  input a1a2..an, the DFA is in a state that represents the
     43  *  subset T of the states of the NFA that are reachable from the
     44  *  NFA's start state along some path labeled a1a2..an."
     45  *  In conventional NFA→DFA conversion, therefore, the subset T
     46  *  would be a bitset representing the set of states the
     47  *  NFA could be in.  We need to track the alt predicted by each
     48  *  state as well, however.  More importantly, we need to maintain
     49  *  a stack of states, tracking the closure operations as they
     50  *  jump from rule to rule, emulating rule invocations (method calls).
     51  *  Recall that NFAs do not normally have a stack like a pushdown-machine
     52  *  so I have to add one to simulate the proper lookahead sequences for
     53  *  the underlying LL grammar from which the NFA was derived.
     54  *
     55  *  I use a list of NFAConfiguration objects.  An NFAConfiguration
     56  *  is both a state (ala normal conversion) and an NFAContext describing
     57  *  the chain of rules (if any) followed to arrive at that state.  There
     58  *  is also the semantic context, which is the "set" of predicates found
     59  *  on the path to this configuration.
     60  *
     61  *  A DFA state may have multiple references to a particular state,
     62  *  but with different NFAContexts (with same or different alts)
     63  *  meaning that state was reached via a different set of rule invocations.
     64  */
     65 public class DFAState extends State {
     66     public static final int INITIAL_NUM_TRANSITIONS = 4;
     67 	public static final int PREDICTED_ALT_UNSET = NFA.INVALID_ALT_NUMBER-1;
     68 
     69     /** We are part of what DFA?  Use this ref to get access to the
     70      *  context trees for an alt.
     71      */
     72     public DFA dfa;
     73 
     74     /** Track the transitions emanating from this DFA state.  The List
     75      *  elements are Transition objects.
     76      */
     77     protected List<Transition> transitions =
     78 		new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);
     79 
     80 	/** When doing an acyclic DFA, this is the number of lookahead symbols
     81 	 *  consumed to reach this state.  This value may be nonzero for most
     82 	 *  dfa states, but it is only a valid value if the user has specified
     83 	 *  a max fixed lookahead.
     84 	 */
     85     protected int k;
     86 
     87     /** The NFA&rarr;DFA algorithm may terminate leaving some states
     88      *  without a path to an accept state, implying that upon certain
     89      *  input, the decision is not deterministic--no decision about
     90      *  predicting a unique alternative can be made.  Recall that an
     91      *  accept state is one in which a unique alternative is predicted.
     92      */
     93     protected int acceptStateReachable = DFA.REACHABLE_UNKNOWN;
     94 
     95     /** Rather than recheck every NFA configuration in a DFA state (after
     96      *  resolving) in findNewDFAStatesAndAddDFATransitions just check
     97      *  this boolean.  Saves a linear walk perhaps DFA state creation.
     98      *  Every little bit helps.
     99      */
    100     protected boolean resolvedWithPredicates = false;
    101 
    102 	/** If a closure operation finds that we tried to invoke the same
    103 	 *  rule too many times (stack would grow beyond a threshold), it
    104 	 *  marks the state has aborted and notifies the DecisionProbe.
    105 	 */
    106 	public boolean abortedDueToRecursionOverflow = false;
    107 
    108 	/** If we detect recursion on more than one alt, decision is non-LL(*),
    109 	 *  but try to isolate it to only those states whose closure operations
    110 	 *  detect recursion.  There may be other alts that are cool:
    111 	 *
    112 	 *  a : recur '.'
    113 	 *    | recur ';'
    114 	 *    | X Y  // LL(2) decision; don't abort and use k=1 plus backtracking
    115 	 *    | X Z
    116 	 *    ;
    117 	 *
    118 	 *  12/13/2007: Actually this has caused problems.  If k=*, must terminate
    119 	 *  and throw out entire DFA; retry with k=1.  Since recursive, do not
    120 	 *  attempt more closure ops as it may take forever.  Exception thrown
    121 	 *  now and we simply report the problem.  If synpreds exist, I'll retry
    122 	 *  with k=1.
    123 	 */
    124 	protected boolean abortedDueToMultipleRecursiveAlts = false;
    125 
    126 	/** Build up the hash code for this state as NFA configurations
    127      *  are added as it's monotonically increasing list of configurations.
    128      */
    129     protected int cachedHashCode;
    130 
    131 	protected int cachedUniquelyPredicatedAlt = PREDICTED_ALT_UNSET;
    132 
    133 	public int minAltInConfigurations=Integer.MAX_VALUE;
    134 
    135 	public boolean atLeastOneConfigurationHasAPredicate = false;
    136 
    137 	/** The set of NFA configurations (state,alt,context) for this DFA state */
    138     public OrderedHashSet<NFAConfiguration> nfaConfigurations =
    139 		new OrderedHashSet<NFAConfiguration>();
    140 
    141 	public List<NFAConfiguration> configurationsWithLabeledEdges =
    142 		new ArrayList<NFAConfiguration>();
    143 
    144 	/** Used to prevent the closure operation from looping to itself and
    145      *  hence looping forever.  Sensitive to the NFA state, the alt, and
    146      *  the stack context.  This just the nfa config set because we want to
    147 	 *  prevent closures only on states contributed by closure not reach
    148 	 *  operations.
    149 	 *
    150 	 *  Two configurations identical including semantic context are
    151 	 *  considered the same closure computation.  @see NFAToDFAConverter.closureBusy().
    152      */
    153 	protected Set<NFAConfiguration> closureBusy = new HashSet<NFAConfiguration>();
    154 
    155 	/** As this state is constructed (i.e., as NFA states are added), we
    156      *  can easily check for non-epsilon transitions because the only
    157      *  transition that could be a valid label is transition(0).  When we
    158      *  process this node eventually, we'll have to walk all states looking
    159      *  for all possible transitions.  That is of the order: size(label space)
    160      *  times size(nfa states), which can be pretty damn big.  It's better
    161      *  to simply track possible labels.
    162      */
    163     protected OrderedHashSet<Label> reachableLabels;
    164 
    165     public DFAState(DFA dfa) {
    166         this.dfa = dfa;
    167     }
    168 
    169 	public void reset() {
    170 		//nfaConfigurations = null; // getGatedPredicatesInNFAConfigurations needs
    171 		configurationsWithLabeledEdges = null;
    172 		closureBusy = null;
    173 		reachableLabels = null;
    174 	}
    175 
    176 	@Override
    177 	public Transition transition(int i) {
    178         return transitions.get(i);
    179     }
    180 
    181 	@Override
    182     public int getNumberOfTransitions() {
    183         return transitions.size();
    184     }
    185 
    186 	@Override
    187     public void addTransition(Transition t) {
    188         transitions.add(t);
    189     }
    190 
    191 	/** Add a transition from this state to target with label.  Return
    192 	 *  the transition number from 0..n-1.
    193 	 */
    194     public int addTransition(DFAState target, Label label) {
    195 		transitions.add( new Transition(label, target) );
    196 		return transitions.size()-1;
    197     }
    198 
    199     public Transition getTransition(int trans) {
    200         return transitions.get(trans);
    201     }
    202 
    203 	public void removeTransition(int trans) {
    204 		transitions.remove(trans);
    205 	}
    206 
    207     /** Add an NFA configuration to this DFA node.  Add uniquely
    208      *  an NFA state/alt/syntactic&amp;semantic context (chain of invoking state(s)
    209      *  and semantic predicate contexts).
    210      *
    211      *  I don't see how there could be two configurations with same
    212      *  state|alt|synCtx and different semantic contexts because the
    213      *  semantic contexts are computed along the path to a particular state
    214      *  so those two configurations would have to have the same predicate.
    215      *  Nonetheless, the addition of configurations is unique on all
    216      *  configuration info.  I guess I'm saying that syntactic context
    217      *  implies semantic context as the latter is computed according to the
    218      *  former.
    219      *
    220      *  As we add configurations to this DFA state, track the set of all possible
    221      *  transition labels so we can simply walk it later rather than doing a
    222      *  loop over all possible labels in the NFA.
    223      */
    224     public void addNFAConfiguration(NFAState state, NFAConfiguration c) {
    225 		if ( nfaConfigurations.contains(c) ) {
    226             return;
    227         }
    228 
    229         nfaConfigurations.add(c);
    230 
    231 		// track min alt rather than compute later
    232 		if ( c.alt < minAltInConfigurations ) {
    233 			minAltInConfigurations = c.alt;
    234 		}
    235 
    236 		if ( c.semanticContext!=SemanticContext.EMPTY_SEMANTIC_CONTEXT ) {
    237 			atLeastOneConfigurationHasAPredicate = true;
    238 		}
    239 
    240 		// update hashCode; for some reason using context.hashCode() also
    241         // makes the GC take like 70% of the CPU and is slow!
    242         cachedHashCode += c.state + c.alt;
    243 
    244 		// update reachableLabels
    245 		// We're adding an NFA state; check to see if it has a non-epsilon edge
    246 		if ( state.transition[0] != null ) {
    247 			Label label = state.transition[0].label;
    248 			if ( !(label.isEpsilon()||label.isSemanticPredicate()) ) {
    249 				// this NFA state has a non-epsilon edge, track for fast
    250 				// walking later when we do reach on this DFA state we're
    251 				// building.
    252 				configurationsWithLabeledEdges.add(c);
    253 				if ( state.transition[1] ==null ) {
    254 					// later we can check this to ignore o-A->o states in closure
    255 					c.singleAtomTransitionEmanating = true;
    256 				}
    257 				addReachableLabel(label);
    258 			}
    259 		}
    260     }
    261 
    262 	public NFAConfiguration addNFAConfiguration(NFAState state,
    263 												int alt,
    264 												NFAContext context,
    265 												SemanticContext semanticContext)
    266 	{
    267 		NFAConfiguration c = new NFAConfiguration(state.stateNumber,
    268 												  alt,
    269 												  context,
    270 												  semanticContext);
    271 		addNFAConfiguration(state, c);
    272 		return c;
    273 	}
    274 
    275 	/** Add label uniquely and disjointly; intersection with
    276      *  another set or int/char forces breaking up the set(s).
    277      *
    278      *  Example, if reachable list of labels is [a..z, {k,9}, 0..9],
    279      *  the disjoint list will be [{a..j,l..z}, k, 9, 0..8].
    280      *
    281      *  As we add NFA configurations to a DFA state, we might as well track
    282      *  the set of all possible transition labels to make the DFA conversion
    283      *  more efficient.  W/o the reachable labels, we'd need to check the
    284      *  whole vocabulary space (could be 0..\uFFFF)!  The problem is that
    285      *  labels can be sets, which may overlap with int labels or other sets.
    286      *  As we need a deterministic set of transitions from any
    287      *  state in the DFA, we must make the reachable labels set disjoint.
    288      *  This operation amounts to finding the character classes for this
    289      *  DFA state whereas with tools like flex, that need to generate a
    290      *  homogeneous DFA, must compute char classes across all states.
    291      *  We are going to generate DFAs with heterogeneous states so we
    292      *  only care that the set of transitions out of a single state are
    293      *  unique. :)
    294      *
    295      *  The idea for adding a new set, t, is to look for overlap with the
    296      *  elements of existing list s.  Upon overlap, replace
    297      *  existing set s[i] with two new disjoint sets, s[i]-t and s[i]&amp;t.
    298      *  (if s[i]-t is nil, don't add).  The remainder is t-s[i], which is
    299      *  what you want to add to the set minus what was already there.  The
    300      *  remainder must then be compared against the i+1..n elements in s
    301      *  looking for another collision.  Each collision results in a smaller
    302      *  and smaller remainder.  Stop when you run out of s elements or
    303      *  remainder goes to nil.  If remainder is non nil when you run out of
    304      *  s elements, then add remainder to the end.
    305      *
    306      *  Single element labels are treated as sets to make the code uniform.
    307      */
    308     protected void addReachableLabel(Label label) {
    309 		if ( reachableLabels==null ) {
    310 			reachableLabels = new OrderedHashSet<Label>();
    311 		}
    312 		/*
    313 		System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar));
    314 		System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
    315 				"reachableLabels="+reachableLabels.toString());
    316 				*/
    317 		if ( reachableLabels.contains(label) ) { // exact label present
    318             return;
    319         }
    320         IntSet t = label.getSet();
    321         IntSet remainder = t; // remainder starts out as whole set to add
    322         int n = reachableLabels.size(); // only look at initial elements
    323         // walk the existing list looking for the collision
    324         for (int i=0; i<n; i++) {
    325 			Label rl = reachableLabels.get(i);
    326             /*
    327 			System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+
    328                     rl.toString(dfa.nfa.grammar)+"="+
    329                     intersection.toString(dfa.nfa.grammar));
    330             */
    331 			if ( !Label.intersect(label, rl) ) {
    332                 continue;
    333             }
    334 			//System.out.println(label+" collides with "+rl);
    335 
    336 			// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
    337             // (ignoring s_i-t if nil; don't put in list)
    338 
    339             // Replace existing s_i with intersection since we
    340             // know that will always be a non nil character class
    341 			IntSet s_i = rl.getSet();
    342 			IntSet intersection = s_i.and(t);
    343             reachableLabels.set(i, new Label(intersection));
    344 
    345             // Compute s_i-t to see what is in current set and not in incoming
    346             IntSet existingMinusNewElements = s_i.subtract(t);
    347 			//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
    348             if ( !existingMinusNewElements.isNil() ) {
    349                 // found a new character class, add to the end (doesn't affect
    350                 // outer loop duration due to n computation a priori.
    351                 Label newLabel = new Label(existingMinusNewElements);
    352                 reachableLabels.add(newLabel);
    353             }
    354 
    355 			/*
    356             System.out.println("after collision, " +
    357                     "reachableLabels="+reachableLabels.toString());
    358 					*/
    359 
    360             // anything left to add to the reachableLabels?
    361             remainder = t.subtract(s_i);
    362             if ( remainder.isNil() ) {
    363                 break; // nothing left to add to set.  done!
    364             }
    365 
    366             t = remainder;
    367         }
    368         if ( !remainder.isNil() ) {
    369 			/*
    370 			System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " +
    371 					"reachableLabels="+reachableLabels.toString());
    372 			System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar));
    373             */
    374 			Label newLabel = new Label(remainder);
    375             reachableLabels.add(newLabel);
    376         }
    377 		/*
    378 		System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
    379 				"reachableLabels="+reachableLabels.toString());
    380 				*/
    381     }
    382 
    383     public OrderedHashSet<Label> getReachableLabels() {
    384         return reachableLabels;
    385     }
    386 
    387 	public void setNFAConfigurations(OrderedHashSet<NFAConfiguration> configs) {
    388 		this.nfaConfigurations = configs;
    389 	}
    390 
    391     /** A decent hash for a DFA state is the sum of the NFA state/alt pairs.
    392      *  This is used when we add DFAState objects to the DFA.states Map and
    393      *  when we compare DFA states.  Computed in addNFAConfiguration()
    394      */
    395 	@Override
    396     public int hashCode() {
    397 		if ( cachedHashCode==0 ) {
    398 			// LL(1) algorithm doesn't use NFA configurations, which
    399 			// dynamically compute hashcode; must have something; use super
    400 			return super.hashCode();
    401 		}
    402 		return cachedHashCode;
    403     }
    404 
    405     /** Two DFAStates are equal if their NFA configuration sets are the
    406 	 *  same. This method is used to see if a DFA state already exists.
    407 	 *
    408      *  Because the number of alternatives and number of NFA configurations are
    409      *  finite, there is a finite number of DFA states that can be processed.
    410      *  This is necessary to show that the algorithm terminates.
    411 	 *
    412 	 *  Cannot test the DFA state numbers here because in DFA.addState we need
    413 	 *  to know if any other state exists that has this exact set of NFA
    414 	 *  configurations.  The DFAState state number is irrelevant.
    415      */
    416 	@Override
    417     public boolean equals(Object o) {
    418 		// compare set of NFA configurations in this set with other
    419         DFAState other = (DFAState)o;
    420 		return this.nfaConfigurations.equals(other.nfaConfigurations);
    421 	}
    422 
    423     /** Walk each configuration and if they are all the same alt, return
    424      *  that alt else return NFA.INVALID_ALT_NUMBER.  Ignore resolved
    425      *  configurations, but don't ignore resolveWithPredicate configs
    426      *  because this state should not be an accept state.  We need to add
    427      *  this to the work list and then have semantic predicate edges
    428      *  emanating from it.
    429      */
    430     public int getUniquelyPredictedAlt() {
    431 		if ( cachedUniquelyPredicatedAlt!=PREDICTED_ALT_UNSET ) {
    432 			return cachedUniquelyPredicatedAlt;
    433 		}
    434         int alt = NFA.INVALID_ALT_NUMBER;
    435 		int numConfigs = nfaConfigurations.size();
    436 		for (int i = 0; i < numConfigs; i++) {
    437 			NFAConfiguration configuration = nfaConfigurations.get(i);
    438 			// ignore anything we resolved; predicates will still result
    439 			// in transitions out of this state, so must count those
    440 			// configurations; i.e., don't ignore resolveWithPredicate configs
    441 			if ( configuration.resolved ) {
    442 				continue;
    443 			}
    444 			if ( alt==NFA.INVALID_ALT_NUMBER ) {
    445 				alt = configuration.alt; // found first nonresolved alt
    446 			}
    447 			else if ( configuration.alt!=alt ) {
    448 				return NFA.INVALID_ALT_NUMBER;
    449 			}
    450 		}
    451 		this.cachedUniquelyPredicatedAlt = alt;
    452         return alt;
    453     }
    454 
    455 	/** Return the uniquely mentioned alt from the NFA configurations;
    456 	 *  Ignore the resolved bit etc...  Return INVALID_ALT_NUMBER
    457 	 *  if there is more than one alt mentioned.
    458 	 */
    459 	public int getUniqueAlt() {
    460 		int alt = NFA.INVALID_ALT_NUMBER;
    461 		int numConfigs = nfaConfigurations.size();
    462 		for (int i = 0; i < numConfigs; i++) {
    463 			NFAConfiguration configuration = nfaConfigurations.get(i);
    464 			if ( alt==NFA.INVALID_ALT_NUMBER ) {
    465 				alt = configuration.alt; // found first alt
    466 			}
    467 			else if ( configuration.alt!=alt ) {
    468 				return NFA.INVALID_ALT_NUMBER;
    469 			}
    470 		}
    471 		return alt;
    472 	}
    473 
    474 	/** When more than one alternative can match the same input, the first
    475 	 *  alternative is chosen to resolve the conflict.  The other alts
    476 	 *  are "turned off" by setting the "resolved" flag in the NFA
    477 	 *  configurations.  Return the set of disabled alternatives.  For
    478 	 *
    479 	 *  a : A | A | A ;
    480 	 *
    481 	 *  this method returns {2,3} as disabled.  This does not mean that
    482 	 *  the alternative is totally unreachable, it just means that for this
    483 	 *  DFA state, that alt is disabled.  There may be other accept states
    484 	 *  for that alt.
    485 	 */
    486 	public Set<Integer> getDisabledAlternatives() {
    487 		Set<Integer> disabled = new LinkedHashSet<Integer>();
    488 		int numConfigs = nfaConfigurations.size();
    489 		for (int i = 0; i < numConfigs; i++) {
    490 			NFAConfiguration configuration = nfaConfigurations.get(i);
    491 			if ( configuration.resolved ) {
    492 				disabled.add(Utils.integer(configuration.alt));
    493 			}
    494 		}
    495 		return disabled;
    496 	}
    497 
    498 	protected Set<Integer> getNonDeterministicAlts() {
    499 		int user_k = dfa.getUserMaxLookahead();
    500 		if ( user_k>0 && user_k==k ) {
    501 			// if fixed lookahead, then more than 1 alt is a nondeterminism
    502 			// if we have hit the max lookahead
    503 			return getAltSet();
    504 		}
    505 		else if ( abortedDueToMultipleRecursiveAlts || abortedDueToRecursionOverflow ) {
    506 			// if we had to abort for non-LL(*) state assume all alts are a problem
    507 			return getAltSet();
    508 		}
    509 		else {
    510 			return getConflictingAlts();
    511 		}
    512 	}
    513 
    514     /** Walk each NFA configuration in this DFA state looking for a conflict
    515      *  where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with
    516      *  context conflicting ctx predicts alts i and j.  Return an Integer set
    517 	 *  of the alternative numbers that conflict.  Two contexts conflict if
    518 	 *  they are equal or one is a stack suffix of the other or one is
    519 	 *  the empty context.
    520 	 *
    521      *  Use a hash table to record the lists of configs for each state
    522 	 *  as they are encountered.  We need only consider states for which
    523 	 *  there is more than one configuration.  The configurations' predicted
    524 	 *  alt must be different or must have different contexts to avoid a
    525 	 *  conflict.
    526 	 *
    527 	 *  Don't report conflicts for DFA states that have conflicting Tokens
    528 	 *  rule NFA states; they will be resolved in favor of the first rule.
    529      */
    530     protected Set<Integer> getConflictingAlts() {
    531 		// TODO this is called multiple times: cache result?
    532 		//System.out.println("getNondetAlts for DFA state "+stateNumber);
    533  		Set<Integer> nondeterministicAlts = new HashSet<Integer>();
    534 
    535 		// If only 1 NFA conf then no way it can be nondeterministic;
    536 		// save the overhead.  There are many o-a->o NFA transitions
    537 		// and so we save a hash map and iterator creation for each
    538 		// state.
    539 		int numConfigs = nfaConfigurations.size();
    540 		if ( numConfigs <=1 ) {
    541 			return null;
    542 		}
    543 
    544 		// First get a list of configurations for each state.
    545 		// Most of the time, each state will have one associated configuration.
    546 		MultiMap<Integer, NFAConfiguration> stateToConfigListMap =
    547 			new MultiMap<Integer, NFAConfiguration>();
    548 		for (int i = 0; i < numConfigs; i++) {
    549 			NFAConfiguration configuration = nfaConfigurations.get(i);
    550 			Integer stateI = Utils.integer(configuration.state);
    551 			stateToConfigListMap.map(stateI, configuration);
    552 		}
    553 		// potential conflicts are states with > 1 configuration and diff alts
    554 		Set<Integer> states = stateToConfigListMap.keySet();
    555 		int numPotentialConflicts = 0;
    556 		for (Integer stateI : states) {
    557 			boolean thisStateHasPotentialProblem = false;
    558 			List<NFAConfiguration> configsForState = stateToConfigListMap.get(stateI);
    559 			int alt=0;
    560 			int numConfigsForState = configsForState.size();
    561 			for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) {
    562 				NFAConfiguration c = configsForState.get(i);
    563 				if ( alt==0 ) {
    564 					alt = c.alt;
    565 				}
    566 				else if ( c.alt!=alt ) {
    567 					/*
    568 					System.out.println("potential conflict in state "+stateI+
    569 									   " configs: "+configsForState);
    570 					*/
    571 					// 11/28/2005: don't report closures that pinch back
    572 					// together in Tokens rule.  We want to silently resolve
    573 					// to the first token definition ala lex/flex by ignoring
    574 					// these conflicts.
    575 					// Also this ensures that lexers look for more and more
    576 					// characters (longest match) before resorting to predicates.
    577 					// TestSemanticPredicates.testLexerMatchesLongestThenTestPred()
    578 					// for example would terminate at state s1 and test predicate
    579 					// meaning input "ab" would test preds to decide what to
    580 					// do but it should match rule C w/o testing preds.
    581 					if ( dfa.nfa.grammar.type!=Grammar.LEXER ||
    582 						 !dfa.decisionNFAStartState.enclosingRule.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) )
    583 					{
    584 						numPotentialConflicts++;
    585 						thisStateHasPotentialProblem = true;
    586 					}
    587 				}
    588 			}
    589 			if ( !thisStateHasPotentialProblem ) {
    590 				// remove NFA state's configurations from
    591 				// further checking; no issues with it
    592 				// (can't remove as it's concurrent modification; set to null)
    593 				stateToConfigListMap.put(stateI, null);
    594 			}
    595 		}
    596 
    597 		// a fast check for potential issues; most states have none
    598 		if ( numPotentialConflicts==0 ) {
    599 			return null;
    600 		}
    601 
    602 		// we have a potential problem, so now go through config lists again
    603 		// looking for different alts (only states with potential issues
    604 		// are left in the states set).  Now we will check context.
    605 		// For example, the list of configs for NFA state 3 in some DFA
    606 		// state might be:
    607 		//   [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2]
    608 		// I want to create a map from context to alts looking for overlap:
    609 		//   [28 18 $] -> 2
    610 		//   [28 $] -> 1
    611 		//   [$] -> 1,2
    612 		// Indeed a conflict exists as same state 3, same context [$], predicts
    613 		// alts 1 and 2.
    614 		// walk each state with potential conflicting configurations
    615 		for (Integer stateI : states) {
    616 			List<NFAConfiguration> configsForState = stateToConfigListMap.get(stateI);
    617 			// compare each configuration pair s, t to ensure:
    618 			// s.ctx different than t.ctx if s.alt != t.alt
    619 			int numConfigsForState = 0;
    620 			if ( configsForState!=null ) {
    621 				numConfigsForState = configsForState.size();
    622 			}
    623 			for (int i = 0; i < numConfigsForState; i++) {
    624 				NFAConfiguration s = configsForState.get(i);
    625 				for (int j = i+1; j < numConfigsForState; j++) {
    626 					NFAConfiguration t = configsForState.get(j);
    627 					// conflicts means s.ctx==t.ctx or s.ctx is a stack
    628 					// suffix of t.ctx or vice versa (if alts differ).
    629 					// Also a conflict if s.ctx or t.ctx is empty
    630 					if ( s.alt != t.alt && s.context.conflictsWith(t.context) ) {
    631 						nondeterministicAlts.add(Utils.integer(s.alt));
    632 						nondeterministicAlts.add(Utils.integer(t.alt));
    633 					}
    634 				}
    635 			}
    636 		}
    637 
    638 		if ( nondeterministicAlts.isEmpty() ) {
    639 			return null;
    640 		}
    641         return nondeterministicAlts;
    642     }
    643 
    644 	/** Get the set of all alts mentioned by all NFA configurations in this
    645 	 *  DFA state.
    646 	 */
    647 	public Set<Integer> getAltSet() {
    648 		int numConfigs = nfaConfigurations.size();
    649 		Set<Integer> alts = new HashSet<Integer>();
    650 		for (int i = 0; i < numConfigs; i++) {
    651 			NFAConfiguration configuration = nfaConfigurations.get(i);
    652 			alts.add(Utils.integer(configuration.alt));
    653 		}
    654 		if ( alts.isEmpty() ) {
    655 			return null;
    656 		}
    657 		return alts;
    658 	}
    659 
    660 	public Set<? extends SemanticContext> getGatedSyntacticPredicatesInNFAConfigurations() {
    661 		int numConfigs = nfaConfigurations.size();
    662 		Set<SemanticContext> synpreds = new HashSet<SemanticContext>();
    663 		for (int i = 0; i < numConfigs; i++) {
    664 			NFAConfiguration configuration = nfaConfigurations.get(i);
    665 			SemanticContext gatedPredExpr =
    666 				configuration.semanticContext.getGatedPredicateContext();
    667 			// if this is a manual syn pred (gated and syn pred), add
    668 			if ( gatedPredExpr!=null &&
    669 				 configuration.semanticContext.isSyntacticPredicate() )
    670 			{
    671 				synpreds.add(configuration.semanticContext);
    672 			}
    673 		}
    674 		if ( synpreds.isEmpty() ) {
    675 			return null;
    676 		}
    677 		return synpreds;
    678 	}
    679 
    680 	/** For gated productions, we need an OR'd list of all predicates for the
    681 	 *  target of an edge so we can gate the edge based upon the predicates
    682 	 *  associated with taking that path (if any).
    683 	 *
    684 	 *  For syntactic predicates, we only want to generate predicate
    685 	 *  evaluations as it transitions to an accept state; waste to
    686 	 *  do it earlier.  So, only add gated preds derived from manually-
    687 	 *  specified syntactic predicates if this is an accept state.
    688 	 *
    689 	 *  Also, since configurations w/o gated predicates are like true
    690 	 *  gated predicates, finding a configuration whose alt has no gated
    691 	 *  predicate implies we should evaluate the predicate to true. This
    692 	 *  means the whole edge has to be ungated. Consider:
    693 	 *
    694 	 *	 X : ('a' | {p}?=&gt; 'a')
    695 	 *	   | 'a' 'b'
    696 	 *	   ;
    697 	 *
    698 	 *  Here, you 'a' gets you from s0 to s1 but you can't test p because
    699 	 *  plain 'a' is ok.  It's also ok for starting alt 2.  Hence, you can't
    700 	 *  test p.  Even on the edge going to accept state for alt 1 of X, you
    701 	 *  can't test p.  You can get to the same place with and w/o the context.
    702 	 *  Therefore, it is never ok to test p in this situation.
    703 	 *
    704 	 *  TODO: cache this as it's called a lot; or at least set bit if &gt;1 present in state
    705 	 */
    706 	public SemanticContext getGatedPredicatesInNFAConfigurations() {
    707 		SemanticContext unionOfPredicatesFromAllAlts = null;
    708 		int numConfigs = nfaConfigurations.size();
    709 		for (int i = 0; i < numConfigs; i++) {
    710 			NFAConfiguration configuration = nfaConfigurations.get(i);
    711 			SemanticContext gatedPredExpr =
    712 				configuration.semanticContext.getGatedPredicateContext();
    713 			if ( gatedPredExpr==null ) {
    714 				// if we ever find a configuration w/o a gated predicate
    715 				// (even if it's a nongated predicate), we cannot gate
    716 				// the indident edges.
    717 				return null;
    718 			}
    719 			else if ( acceptState || !configuration.semanticContext.isSyntacticPredicate() ) {
    720 				// at this point we have a gated predicate and, due to elseif,
    721 				// we know it's an accept or not a syn pred.  In this case,
    722 				// it's safe to add the gated predicate to the union.  We
    723 				// only want to add syn preds if it's an accept state.  Other
    724 				// gated preds can be used with edges leading to accept states.
    725 				if ( unionOfPredicatesFromAllAlts==null ) {
    726 					unionOfPredicatesFromAllAlts = gatedPredExpr;
    727 				}
    728 				else {
    729 					unionOfPredicatesFromAllAlts =
    730 						SemanticContext.or(unionOfPredicatesFromAllAlts,gatedPredExpr);
    731 				}
    732 			}
    733 		}
    734 		if ( unionOfPredicatesFromAllAlts instanceof SemanticContext.TruePredicate ) {
    735 			return null;
    736 		}
    737 		return unionOfPredicatesFromAllAlts;
    738 	}
    739 
    740     /** Is an accept state reachable from this state? */
    741     public int getAcceptStateReachable() {
    742         return acceptStateReachable;
    743     }
    744 
    745     public void setAcceptStateReachable(int acceptStateReachable) {
    746         this.acceptStateReachable = acceptStateReachable;
    747     }
    748 
    749     public boolean isResolvedWithPredicates() {
    750         return resolvedWithPredicates;
    751     }
    752 
    753     /** Print all NFA states plus what alts they predict */
    754 	@Override
    755     public String toString() {
    756         StringBuilder buf = new StringBuilder();
    757         buf.append(stateNumber).append(":{");
    758 		for (int i = 0; i < nfaConfigurations.size(); i++) {
    759 			NFAConfiguration configuration = nfaConfigurations.get(i);
    760 			if ( i>0 ) {
    761 				buf.append(", ");
    762 			}
    763 			buf.append(configuration);
    764 		}
    765         buf.append("}");
    766         return buf.toString();
    767     }
    768 
    769 	public int getLookaheadDepth() {
    770 		return k;
    771 	}
    772 
    773 	public void setLookaheadDepth(int k) {
    774 		this.k = k;
    775 		if ( k > dfa.max_k ) { // track max k for entire DFA
    776 			dfa.max_k = k;
    777 		}
    778 	}
    779 
    780 }
    781