Home | History | Annotate | Download | only in analysis
      1 /*
      2  * [The "BSD license"]
      3  *  Copyright (c) 2010 Terence Parr
      4  *  All rights reserved.
      5  *
      6  *  Redistribution and use in source and binary forms, with or without
      7  *  modification, are permitted provided that the following conditions
      8  *  are met:
      9  *  1. Redistributions of source code must retain the above copyright
     10  *      notice, this list of conditions and the following disclaimer.
     11  *  2. Redistributions in binary form must reproduce the above copyright
     12  *      notice, this list of conditions and the following disclaimer in the
     13  *      documentation and/or other materials provided with the distribution.
     14  *  3. The name of the author may not be used to endorse or promote products
     15  *      derived from this software without specific prior written permission.
     16  *
     17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 package org.antlr.analysis;
     29 
     30 /** A tree node for tracking the call chains for NFAs that invoke
     31  *  other NFAs.  These trees only have to point upwards to their parents
     32  *  so we can walk back up the tree (i.e., pop stuff off the stack).  We
     33  *  never walk from stack down down through the children.
     34  *
     35  *  Each alt predicted in a decision has its own context tree,
     36  *  representing all possible return nodes.  The initial stack has
     37  *  EOF ("$") in it.  So, for m alternative productions, the lookahead
     38  *  DFA will have m NFAContext trees.
     39  *
     40  *  To "push" a new context, just do "new NFAContext(context-parent, state)"
     41  *  which will add itself to the parent.  The root is NFAContext(null, null).
     42  *
     43  *  The complete context for an NFA configuration is the set of invoking states
     44  *  on the path from this node thru the parent pointers to the root.
     45  */
     46 public class NFAContext {
     47 	/** This is similar to Bermudez's m constant in his LAR(m) where
     48 	 *  you bound the stack so your states don't explode.  The main difference
     49 	 *  is that I bound only recursion on the stack, not the simple stack size.
     50 	 *  This looser constraint will let the conversion roam further to find
     51 	 *  lookahead to resolve a decision.
     52 	 *
     53 	 *  Bermudez's m operates differently as it is his LR stack depth
     54 	 *  I'm pretty sure it therefore includes all stack symbols.  Here I
     55 	 *  restrict the size of an NFA configuration to be finite because a
     56 	 *  stack component may mention the same NFA invocation state at
     57 	 *  most m times.  Hence, the number of DFA states will not grow forever.
     58 	 *  With recursive rules like
     59 	 *
     60 	 *    e : '(' e ')' | INT ;
     61 	 *
     62 	 *  you could chase your tail forever if somebody said "s : e '.' | e ';' ;"
     63 	 *  This constant prevents new states from being created after a stack gets
     64 	 *  "too big".  Actually (12/14/2007) I realize that this example is
     65 	 *  trapped by the non-LL(*) detector for recursion in > 1 alt.  Here is
     66 	 *  an example that trips stack overflow:
     67 	 *
     68 	 *	  s : a Y | A A A A A X ; // force recursion past m=4
     69 	 *	  a : A a | Q;
     70 	 *
     71 	 *  If that were:
     72 	 *
     73 	 *	  s : a Y | A+ X ;
     74 	 *
     75 	 *  it could loop forever.
     76 	 *
     77 	 *  Imagine doing a depth-first search on the e DFA...as you chase an input
     78 	 *  sequence you can recurse to same rule such as e above.  You'd have a
     79 	 *  chain of ((((.  When you get do some point, you have to give up.  The
     80 	 *  states in the chain will have longer and longer NFA config stacks.
     81 	 *  Must limit size.
     82 	 *
     83 	 *  max=0 implies you cannot ever jump to another rule during closure.
     84 	 *  max=1 implies you can make as many calls as you want--you just
     85 	 *        can't ever visit a state that is on your rule invocation stack.
     86 	 * 		  I.e., you cannot ever recurse.
     87 	 *  max=2 implies you are able to recurse once (i.e., call a rule twice
     88 	 *  	  from the same place).
     89 	 *
     90 	 *  This tracks recursion to a rule specific to an invocation site!
     91 	 *  It does not detect multiple calls to a rule from different rule
     92 	 *  invocation states.  We are guaranteed to terminate because the
     93 	 *  stack can only grow as big as the number of NFA states * max.
     94 	 *
     95 	 *  I noticed that the Java grammar didn't work with max=1, but did with
     96 	 *  max=4.  Let's set to 4. Recursion is sometimes needed to resolve some
     97 	 *  fixed lookahead decisions.
     98 	 */
     99 	public static int MAX_SAME_RULE_INVOCATIONS_PER_NFA_CONFIG_STACK = 4;
    100 
    101     public NFAContext parent;
    102 
    103     /** The NFA state that invoked another rule's start state is recorded
    104      *  on the rule invocation context stack.
    105      */
    106     public NFAState invokingState;
    107 
    108     /** Computing the hashCode is very expensive and closureBusy()
    109      *  uses it to track when it's seen a state|ctx before to avoid
    110      *  infinite loops.  As we add new contexts, record the hash code
    111      *  as this.invokingState + parent.cachedHashCode.  Avoids walking
    112      *  up the tree for every hashCode().  Note that this caching works
    113      *  because a context is a monotonically growing tree of context nodes
    114      *  and nothing on the stack is ever modified...ctx just grows
    115      *  or shrinks.
    116      */
    117     protected int cachedHashCode;
    118 
    119     public NFAContext(NFAContext parent, NFAState invokingState) {
    120         this.parent = parent;
    121         this.invokingState = invokingState;
    122         if ( invokingState!=null ) {
    123             this.cachedHashCode = invokingState.stateNumber;
    124         }
    125         if ( parent!=null ) {
    126             this.cachedHashCode += parent.cachedHashCode;
    127         }
    128     }
    129 
    130 	/** Two contexts are equals() if both have
    131 	 *  same call stack; walk upwards to the root.
    132 	 *  Recall that the root sentinel node has no invokingStates and no parent.
    133 	 *  Note that you may be comparing contexts in different alt trees.
    134 	 *
    135 	 *  The hashCode is now cheap as it's computed once upon each context
    136 	 *  push on the stack.  Use it to make equals() more efficient.
    137 	 */
    138 	@Override
    139 	public boolean equals(Object o) {
    140 		NFAContext other = ((NFAContext)o);
    141 		if ( this.cachedHashCode != other.cachedHashCode ) {
    142 			return false; // can't be same if hash is different
    143 		}
    144 		if ( this==other ) {
    145 			return true;
    146 		}
    147 		// System.out.println("comparing "+this+" with "+other);
    148 		NFAContext sp = this;
    149 		while ( sp.parent!=null && other.parent!=null ) {
    150 			if ( sp.invokingState != other.invokingState ) {
    151 				return false;
    152 			}
    153 			sp = sp.parent;
    154 			other = other.parent;
    155 		}
    156 		if ( !(sp.parent==null && other.parent==null) ) {
    157 			return false; // both pointers must be at their roots after walk
    158 		}
    159 		return true;
    160 	}
    161 
    162 	/** Two contexts conflict() if they are equals() or one is a stack suffix
    163 	 *  of the other.  For example, contexts [21 12 $] and [21 9 $] do not
    164 	 *  conflict, but [21 $] and [21 12 $] do conflict.  Note that I should
    165 	 *  probably not show the $ in this case.  There is a dummy node for each
    166 	 *  stack that just means empty; $ is a marker that's all.
    167 	 *
    168 	 *  This is used in relation to checking conflicts associated with a
    169 	 *  single NFA state's configurations within a single DFA state.
    170 	 *  If there are configurations s and t within a DFA state such that
    171 	 *  s.state=t.state && s.alt != t.alt && s.ctx conflicts t.ctx then
    172 	 *  the DFA state predicts more than a single alt--it's nondeterministic.
    173 	 *  Two contexts conflict if they are the same or if one is a suffix
    174 	 *  of the other.
    175 	 *
    176 	 *  When comparing contexts, if one context has a stack and the other
    177 	 *  does not then they should be considered the same context.  The only
    178 	 *  way for an NFA state p to have an empty context and a nonempty context
    179 	 *  is the case when closure falls off end of rule without a call stack
    180 	 *  and re-enters the rule with a context.  This resolves the issue I
    181 	 *  discussed with Sriram Srinivasan Feb 28, 2005 about not terminating
    182 	 *  fast enough upon nondeterminism.
    183 	 */
    184 	public boolean conflictsWith(NFAContext other) {
    185 		return this.suffix(other); // || this.equals(other);
    186 	}
    187 
    188 	/** [$] suffix any context
    189 	 *  [21 $] suffix [21 12 $]
    190 	 *  [21 12 $] suffix [21 $]
    191 	 *  [21 18 $] suffix [21 18 12 9 $]
    192 	 *  [21 18 12 9 $] suffix [21 18 $]
    193 	 *  [21 12 $] not suffix [21 9 $]
    194 	 *
    195 	 *  Example "[21 $] suffix [21 12 $]" means: rule r invoked current rule
    196 	 *  from state 21.  Rule s invoked rule r from state 12 which then invoked
    197 	 *  current rule also via state 21.  While the context prior to state 21
    198 	 *  is different, the fact that both contexts emanate from state 21 implies
    199 	 *  that they are now going to track perfectly together.  Once they
    200 	 *  converged on state 21, there is no way they can separate.  In other
    201 	 *  words, the prior stack state is not consulted when computing where to
    202 	 *  go in the closure operation.  ?$ and ??$ are considered the same stack.
    203 	 *  If ? is popped off then $ and ?$ remain; they are now an empty and
    204 	 *  nonempty context comparison.  So, if one stack is a suffix of
    205 	 *  another, then it will still degenerate to the simple empty stack
    206 	 *  comparison case.
    207 	 */
    208 	protected boolean suffix(NFAContext other) {
    209 		NFAContext sp = this;
    210 		// if one of the contexts is empty, it never enters loop and returns true
    211 		while ( sp.parent!=null && other.parent!=null ) {
    212 			if ( sp.invokingState != other.invokingState ) {
    213 				return false;
    214 			}
    215 			sp = sp.parent;
    216 			other = other.parent;
    217 		}
    218 		//System.out.println("suffix");
    219 		return true;
    220 	}
    221 
    222     /** Walk upwards to the root of the call stack context looking
    223      *  for a particular invoking state.
    224 	public boolean contains(int state) {
    225         NFAContext sp = this;
    226 		int n = 0; // track recursive invocations of state
    227 		System.out.println("this.context is "+sp);
    228 		while ( sp.parent!=null ) {
    229             if ( sp.invokingState.stateNumber == state ) {
    230 				return true;
    231             }
    232             sp = sp.parent;
    233         }
    234         return false;
    235     }
    236 	 */
    237 
    238 	/** Given an NFA state number, how many times has the NFA-to-DFA
    239 	 *  conversion pushed that state on the stack?  In other words,
    240 	 *  the NFA state must be a rule invocation state and this method
    241 	 *  tells you how many times you've been to this state.  If none,
    242 	 *  then you have not called the target rule from this state before
    243 	 *  (though another NFA state could have called that target rule).
    244 	 *  If n=1, then you've been to this state before during this
    245 	 *  DFA construction and are going to invoke that rule again.
    246 	 *
    247 	 *  Note that many NFA states can invoke rule r, but we ignore recursion
    248 	 *  unless you hit the same rule invocation state again.
    249 	 */
    250 	public int recursionDepthEmanatingFromState(int state) {
    251 		NFAContext sp = this;
    252 		int n = 0; // track recursive invocations of target from this state
    253 		//System.out.println("this.context is "+sp);
    254 		while ( sp.parent!=null ) {
    255 			if ( sp.invokingState.stateNumber == state ) {
    256 				n++;
    257 			}
    258 			sp = sp.parent;
    259 		}
    260 		return n;
    261 	}
    262 
    263 	@Override
    264     public int hashCode() {
    265         return cachedHashCode;
    266         /*
    267         int h = 0;
    268         NFAContext sp = this;
    269         while ( sp.parent!=null ) {
    270             h += sp.invokingState.getStateNumber();
    271             sp = sp.parent;
    272         }
    273         return h;
    274         */
    275     }
    276 
    277 	/** A context is empty if there is no parent; meaning nobody pushed
    278 	 *  anything on the call stack.
    279 	 */
    280 	public boolean isEmpty() {
    281 		return parent==null;
    282 	}
    283 
    284 	@Override
    285     public String toString() {
    286         StringBuilder buf = new StringBuilder();
    287         NFAContext sp = this;
    288         buf.append("[");
    289         while ( sp.parent!=null ) {
    290             buf.append(sp.invokingState.stateNumber);
    291             buf.append(" ");
    292             sp = sp.parent;
    293         }
    294         buf.append("$]");
    295         return buf.toString();
    296     }
    297 }
    298