Home | History | Annotate | Download | only in runtime
      1 /*
      2  [The "BSD license"]
      3  Copyright (c) 2005-2009 Terence Parr
      4  All rights reserved.
      5 
      6  Redistribution and use in source and binary forms, with or without
      7  modification, are permitted provided that the following conditions
      8  are met:
      9  1. Redistributions of source code must retain the above copyright
     10      notice, this list of conditions and the following disclaimer.
     11  2. Redistributions in binary form must reproduce the above copyright
     12      notice, this list of conditions and the following disclaimer in the
     13      documentation and/or other materials provided with the distribution.
     14  3. The name of the author may not be used to endorse or promote products
     15      derived from this software without specific prior written permission.
     16 
     17  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 package org.antlr.runtime;
     29 
     30 import java.util.ArrayList;
     31 import java.util.HashMap;
     32 import java.util.List;
     33 import java.util.Map;
     34 
     35 /** A generic recognizer that can handle recognizers generated from
     36  *  lexer, parser, and tree grammars.  This is all the parsing
     37  *  support code essentially; most of it is error recovery stuff and
     38  *  backtracking.
     39  */
     40 public abstract class BaseRecognizer {
     41 	public static final int MEMO_RULE_FAILED = -2;
     42 	public static final int MEMO_RULE_UNKNOWN = -1;
     43 	public static final int INITIAL_FOLLOW_STACK_SIZE = 100;
     44 
     45 	// copies from Token object for convenience in actions
     46 	public static final int DEFAULT_TOKEN_CHANNEL = Token.DEFAULT_CHANNEL;
     47 	public static final int HIDDEN = Token.HIDDEN_CHANNEL;
     48 
     49 	public static final String NEXT_TOKEN_RULE_NAME = "nextToken";
     50 
     51 	/** State of a lexer, parser, or tree parser are collected into a state
     52 	 *  object so the state can be shared.  This sharing is needed to
     53 	 *  have one grammar import others and share same error variables
     54 	 *  and other state variables.  It's a kind of explicit multiple
     55 	 *  inheritance via delegation of methods and shared state.
     56 	 */
     57 	protected RecognizerSharedState state;
     58 
     59 	public BaseRecognizer() {
     60 		state = new RecognizerSharedState();
     61 	}
     62 
     63 	public BaseRecognizer(RecognizerSharedState state) {
     64 		if ( state==null ) {
     65 			state = new RecognizerSharedState();
     66 		}
     67 		this.state = state;
     68 	}
     69 
     70 	/** reset the parser's state; subclasses must rewinds the input stream */
     71 	public void reset() {
     72 		// wack everything related to error recovery
     73 		if ( state==null ) {
     74 			return; // no shared state work to do
     75 		}
     76 		state._fsp = -1;
     77 		state.errorRecovery = false;
     78 		state.lastErrorIndex = -1;
     79 		state.failed = false;
     80 		state.syntaxErrors = 0;
     81 		// wack everything related to backtracking and memoization
     82 		state.backtracking = 0;
     83 		for (int i = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { // wipe cache
     84 			state.ruleMemo[i] = null;
     85 		}
     86 	}
     87 
     88 
     89 	/** Match current input symbol against ttype.  Attempt
     90 	 *  single token insertion or deletion error recovery.  If
     91 	 *  that fails, throw MismatchedTokenException.
     92 	 *
     93 	 *  To turn off single token insertion or deletion error
     94 	 *  recovery, override recoverFromMismatchedToken() and have it
     95      *  throw an exception. See TreeParser.recoverFromMismatchedToken().
     96      *  This way any error in a rule will cause an exception and
     97      *  immediate exit from rule.  Rule would recover by resynchronizing
     98      *  to the set of symbols that can follow rule ref.
     99 	 */
    100 	public Object match(IntStream input, int ttype, BitSet follow)
    101 		throws RecognitionException
    102 	{
    103 		//System.out.println("match "+((TokenStream)input).LT(1));
    104 		Object matchedSymbol = getCurrentInputSymbol(input);
    105 		if ( input.LA(1)==ttype ) {
    106 			input.consume();
    107 			state.errorRecovery = false;
    108 			state.failed = false;
    109 			return matchedSymbol;
    110 		}
    111 		if ( state.backtracking>0 ) {
    112 			state.failed = true;
    113 			return matchedSymbol;
    114 		}
    115 		matchedSymbol = recoverFromMismatchedToken(input, ttype, follow);
    116 		return matchedSymbol;
    117 	}
    118 
    119 	/** Match the wildcard: in a symbol */
    120 	public void matchAny(IntStream input) {
    121 		state.errorRecovery = false;
    122 		state.failed = false;
    123 		input.consume();
    124 	}
    125 
    126 	public boolean mismatchIsUnwantedToken(IntStream input, int ttype) {
    127 		return input.LA(2)==ttype;
    128 	}
    129 
    130 	public boolean mismatchIsMissingToken(IntStream input, BitSet follow) {
    131 		if ( follow==null ) {
    132 			// we have no information about the follow; we can only consume
    133 			// a single token and hope for the best
    134 			return false;
    135 		}
    136 		// compute what can follow this grammar element reference
    137 		if ( follow.member(Token.EOR_TOKEN_TYPE) ) {
    138 			BitSet viableTokensFollowingThisRule = computeContextSensitiveRuleFOLLOW();
    139 			follow = follow.or(viableTokensFollowingThisRule);
    140             if ( state._fsp>=0 ) { // remove EOR if we're not the start symbol
    141                 follow.remove(Token.EOR_TOKEN_TYPE);
    142             }
    143 		}
    144 		// if current token is consistent with what could come after set
    145 		// then we know we're missing a token; error recovery is free to
    146 		// "insert" the missing token
    147 
    148 		//System.out.println("viable tokens="+follow.toString(getTokenNames()));
    149 		//System.out.println("LT(1)="+((TokenStream)input).LT(1));
    150 
    151 		// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    152 		// in follow set to indicate that the fall of the start symbol is
    153 		// in the set (EOF can follow).
    154 		if ( follow.member(input.LA(1)) || follow.member(Token.EOR_TOKEN_TYPE) ) {
    155 			//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
    156 			return true;
    157 		}
    158 		return false;
    159 	}
    160 
    161 	/** Report a recognition problem.
    162 	 *
    163 	 *  This method sets errorRecovery to indicate the parser is recovering
    164 	 *  not parsing.  Once in recovery mode, no errors are generated.
    165 	 *  To get out of recovery mode, the parser must successfully match
    166 	 *  a token (after a resync).  So it will go:
    167 	 *
    168 	 * 		1. error occurs
    169 	 * 		2. enter recovery mode, report error
    170 	 * 		3. consume until token found in resynch set
    171 	 * 		4. try to resume parsing
    172 	 * 		5. next match() will reset errorRecovery mode
    173 	 *
    174 	 *  If you override, make sure to update syntaxErrors if you care about that.
    175 	 */
    176 	public void reportError(RecognitionException e) {
    177 		// if we've already reported an error and have not matched a token
    178 		// yet successfully, don't report any errors.
    179 		if ( state.errorRecovery ) {
    180 			//System.err.print("[SPURIOUS] ");
    181 			return;
    182 		}
    183 		state.syntaxErrors++; // don't count spurious
    184 		state.errorRecovery = true;
    185 
    186 		displayRecognitionError(this.getTokenNames(), e);
    187 	}
    188 
    189 	public void displayRecognitionError(String[] tokenNames,
    190 										RecognitionException e)
    191 	{
    192 		String hdr = getErrorHeader(e);
    193 		String msg = getErrorMessage(e, tokenNames);
    194 		emitErrorMessage(hdr+" "+msg);
    195 	}
    196 
    197 	/** What error message should be generated for the various
    198 	 *  exception types?
    199 	 *
    200 	 *  Not very object-oriented code, but I like having all error message
    201 	 *  generation within one method rather than spread among all of the
    202 	 *  exception classes. This also makes it much easier for the exception
    203 	 *  handling because the exception classes do not have to have pointers back
    204 	 *  to this object to access utility routines and so on. Also, changing
    205 	 *  the message for an exception type would be difficult because you
    206 	 *  would have to subclassing exception, but then somehow get ANTLR
    207 	 *  to make those kinds of exception objects instead of the default.
    208 	 *  This looks weird, but trust me--it makes the most sense in terms
    209 	 *  of flexibility.
    210 	 *
    211 	 *  For grammar debugging, you will want to override this to add
    212 	 *  more information such as the stack frame with
    213 	 *  getRuleInvocationStack(e, this.getClass().getName()) and,
    214 	 *  for no viable alts, the decision description and state etc...
    215 	 *
    216 	 *  Override this to change the message generated for one or more
    217 	 *  exception types.
    218 	 */
    219 	public String getErrorMessage(RecognitionException e, String[] tokenNames) {
    220 		String msg = e.getMessage();
    221 		if ( e instanceof UnwantedTokenException ) {
    222 			UnwantedTokenException ute = (UnwantedTokenException)e;
    223 			String tokenName="<unknown>";
    224 			if ( ute.expecting== Token.EOF ) {
    225 				tokenName = "EOF";
    226 			}
    227 			else {
    228 				tokenName = tokenNames[ute.expecting];
    229 			}
    230 			msg = "extraneous input "+getTokenErrorDisplay(ute.getUnexpectedToken())+
    231 				" expecting "+tokenName;
    232 		}
    233 		else if ( e instanceof MissingTokenException ) {
    234 			MissingTokenException mte = (MissingTokenException)e;
    235 			String tokenName="<unknown>";
    236 			if ( mte.expecting== Token.EOF ) {
    237 				tokenName = "EOF";
    238 			}
    239 			else {
    240 				tokenName = tokenNames[mte.expecting];
    241 			}
    242 			msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token);
    243 		}
    244 		else if ( e instanceof MismatchedTokenException ) {
    245 			MismatchedTokenException mte = (MismatchedTokenException)e;
    246 			String tokenName="<unknown>";
    247 			if ( mte.expecting== Token.EOF ) {
    248 				tokenName = "EOF";
    249 			}
    250 			else {
    251 				tokenName = tokenNames[mte.expecting];
    252 			}
    253 			msg = "mismatched input "+getTokenErrorDisplay(e.token)+
    254 				" expecting "+tokenName;
    255 		}
    256 		else if ( e instanceof MismatchedTreeNodeException ) {
    257 			MismatchedTreeNodeException mtne = (MismatchedTreeNodeException)e;
    258 			String tokenName="<unknown>";
    259 			if ( mtne.expecting==Token.EOF ) {
    260 				tokenName = "EOF";
    261 			}
    262 			else {
    263 				tokenName = tokenNames[mtne.expecting];
    264 			}
    265 			msg = "mismatched tree node: "+mtne.node+
    266 				" expecting "+tokenName;
    267 		}
    268 		else if ( e instanceof NoViableAltException ) {
    269 			//NoViableAltException nvae = (NoViableAltException)e;
    270 			// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    271 			// and "(decision="+nvae.decisionNumber+") and
    272 			// "state "+nvae.stateNumber
    273 			msg = "no viable alternative at input "+getTokenErrorDisplay(e.token);
    274 		}
    275 		else if ( e instanceof EarlyExitException ) {
    276 			//EarlyExitException eee = (EarlyExitException)e;
    277 			// for development, can add "(decision="+eee.decisionNumber+")"
    278 			msg = "required (...)+ loop did not match anything at input "+
    279 				getTokenErrorDisplay(e.token);
    280 		}
    281 		else if ( e instanceof MismatchedSetException ) {
    282 			MismatchedSetException mse = (MismatchedSetException)e;
    283 			msg = "mismatched input "+getTokenErrorDisplay(e.token)+
    284 				" expecting set "+mse.expecting;
    285 		}
    286 		else if ( e instanceof MismatchedNotSetException ) {
    287 			MismatchedNotSetException mse = (MismatchedNotSetException)e;
    288 			msg = "mismatched input "+getTokenErrorDisplay(e.token)+
    289 				" expecting set "+mse.expecting;
    290 		}
    291 		else if ( e instanceof FailedPredicateException ) {
    292 			FailedPredicateException fpe = (FailedPredicateException)e;
    293 			msg = "rule "+fpe.ruleName+" failed predicate: {"+
    294 				fpe.predicateText+"}?";
    295 		}
    296 		return msg;
    297 	}
    298 
    299 	/** Get number of recognition errors (lexer, parser, tree parser).  Each
    300 	 *  recognizer tracks its own number.  So parser and lexer each have
    301 	 *  separate count.  Does not count the spurious errors found between
    302 	 *  an error and next valid token match
    303 	 *
    304 	 *  See also reportError()
    305 	 */
    306 	public int getNumberOfSyntaxErrors() {
    307 		return state.syntaxErrors;
    308 	}
    309 
    310 	/** What is the error header, normally line/character position information? */
    311 	public String getErrorHeader(RecognitionException e) {
    312 		if ( getSourceName()!=null )
    313 			return getSourceName()+" line "+e.line+":"+e.charPositionInLine;
    314 
    315 		return "line "+e.line+":"+e.charPositionInLine;
    316 	}
    317 
    318 	/** How should a token be displayed in an error message? The default
    319 	 *  is to display just the text, but during development you might
    320 	 *  want to have a lot of information spit out.  Override in that case
    321 	 *  to use t.toString() (which, for CommonToken, dumps everything about
    322 	 *  the token). This is better than forcing you to override a method in
    323 	 *  your token objects because you don't have to go modify your lexer
    324 	 *  so that it creates a new Java type.
    325 	 */
    326 	public String getTokenErrorDisplay(Token t) {
    327 		String s = t.getText();
    328 		if ( s==null ) {
    329 			if ( t.getType()==Token.EOF ) {
    330 				s = "<EOF>";
    331 			}
    332 			else {
    333 				s = "<"+t.getType()+">";
    334 			}
    335 		}
    336 		s = s.replaceAll("\n","\\\\n");
    337 		s = s.replaceAll("\r","\\\\r");
    338 		s = s.replaceAll("\t","\\\\t");
    339 		return "'"+s+"'";
    340 	}
    341 
    342 	/** Override this method to change where error messages go */
    343 	public void emitErrorMessage(String msg) {
    344 		System.err.println(msg);
    345 	}
    346 
    347 	/** Recover from an error found on the input stream.  This is
    348 	 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
    349 	 *  single token insertion and deletion, this will usually not
    350 	 *  handle mismatched symbol exceptions but there could be a mismatched
    351 	 *  token that the match() routine could not recover from.
    352 	 */
    353 	public void recover(IntStream input, RecognitionException re) {
    354 		if ( state.lastErrorIndex==input.index() ) {
    355 			// uh oh, another error at same token index; must be a case
    356 			// where LT(1) is in the recovery token set so nothing is
    357 			// consumed; consume a single token so at least to prevent
    358 			// an infinite loop; this is a failsafe.
    359 			input.consume();
    360 		}
    361 		state.lastErrorIndex = input.index();
    362 		BitSet followSet = computeErrorRecoverySet();
    363 		beginResync();
    364 		consumeUntil(input, followSet);
    365 		endResync();
    366 	}
    367 
    368 	/** A hook to listen in on the token consumption during error recovery.
    369 	 *  The DebugParser subclasses this to fire events to the listenter.
    370 	 */
    371 	public void beginResync() {
    372 	}
    373 
    374 	public void endResync() {
    375 	}
    376 
    377 	/*  Compute the error recovery set for the current rule.  During
    378 	 *  rule invocation, the parser pushes the set of tokens that can
    379 	 *  follow that rule reference on the stack; this amounts to
    380 	 *  computing FIRST of what follows the rule reference in the
    381 	 *  enclosing rule. This local follow set only includes tokens
    382 	 *  from within the rule; i.e., the FIRST computation done by
    383 	 *  ANTLR stops at the end of a rule.
    384 	 *
    385 	 *  EXAMPLE
    386 	 *
    387 	 *  When you find a "no viable alt exception", the input is not
    388 	 *  consistent with any of the alternatives for rule r.  The best
    389 	 *  thing to do is to consume tokens until you see something that
    390 	 *  can legally follow a call to r *or* any rule that called r.
    391 	 *  You don't want the exact set of viable next tokens because the
    392 	 *  input might just be missing a token--you might consume the
    393 	 *  rest of the input looking for one of the missing tokens.
    394 	 *
    395 	 *  Consider grammar:
    396 	 *
    397 	 *  a : '[' b ']'
    398 	 *    | '(' b ')'
    399 	 *    ;
    400 	 *  b : c '^' INT ;
    401 	 *  c : ID
    402 	 *    | INT
    403 	 *    ;
    404 	 *
    405 	 *  At each rule invocation, the set of tokens that could follow
    406 	 *  that rule is pushed on a stack.  Here are the various "local"
    407 	 *  follow sets:
    408 	 *
    409 	 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
    410 	 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
    411 	 *  FOLLOW(c_in_b) = FIRST('^') = '^'
    412 	 *
    413 	 *  Upon erroneous input "[]", the call chain is
    414 	 *
    415 	 *  a -> b -> c
    416 	 *
    417 	 *  and, hence, the follow context stack is:
    418 	 *
    419 	 *  depth  local follow set     after call to rule
    420 	 *    0         <EOF>                    a (from main())
    421 	 *    1          ']'                     b
    422 	 *    3          '^'                     c
    423 	 *
    424 	 *  Notice that ')' is not included, because b would have to have
    425 	 *  been called from a different context in rule a for ')' to be
    426 	 *  included.
    427 	 *
    428 	 *  For error recovery, we cannot consider FOLLOW(c)
    429 	 *  (context-sensitive or otherwise).  We need the combined set of
    430 	 *  all context-sensitive FOLLOW sets--the set of all tokens that
    431 	 *  could follow any reference in the call chain.  We need to
    432 	 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
    433 	 *  we resync'd to that token, we'd consume until EOF.  We need to
    434 	 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
    435 	 *  In this case, for input "[]", LA(1) is in this set so we would
    436 	 *  not consume anything and after printing an error rule c would
    437 	 *  return normally.  It would not find the required '^' though.
    438 	 *  At this point, it gets a mismatched token error and throws an
    439 	 *  exception (since LA(1) is not in the viable following token
    440 	 *  set).  The rule exception handler tries to recover, but finds
    441 	 *  the same recovery set and doesn't consume anything.  Rule b
    442 	 *  exits normally returning to rule a.  Now it finds the ']' (and
    443 	 *  with the successful match exits errorRecovery mode).
    444 	 *
    445 	 *  So, you cna see that the parser walks up call chain looking
    446 	 *  for the token that was a member of the recovery set.
    447 	 *
    448 	 *  Errors are not generated in errorRecovery mode.
    449 	 *
    450 	 *  ANTLR's error recovery mechanism is based upon original ideas:
    451 	 *
    452 	 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
    453 	 *
    454 	 *  and
    455 	 *
    456 	 *  "A note on error recovery in recursive descent parsers":
    457 	 *  http://portal.acm.org/citation.cfm?id=947902.947905
    458 	 *
    459 	 *  Later, Josef Grosch had some good ideas:
    460 	 *
    461 	 *  "Efficient and Comfortable Error Recovery in Recursive Descent
    462 	 *  Parsers":
    463 	 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
    464 	 *
    465 	 *  Like Grosch I implemented local FOLLOW sets that are combined
    466 	 *  at run-time upon error to avoid overhead during parsing.
    467 	 */
    468 	protected BitSet computeErrorRecoverySet() {
    469 		return combineFollows(false);
    470 	}
    471 
    472 	/** Compute the context-sensitive FOLLOW set for current rule.
    473 	 *  This is set of token types that can follow a specific rule
    474 	 *  reference given a specific call chain.  You get the set of
    475 	 *  viable tokens that can possibly come next (lookahead depth 1)
    476 	 *  given the current call chain.  Contrast this with the
    477 	 *  definition of plain FOLLOW for rule r:
    478 	 *
    479 	 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
    480 	 *
    481 	 *  where x in T* and alpha, beta in V*; T is set of terminals and
    482 	 *  V is the set of terminals and nonterminals.  In other words,
    483 	 *  FOLLOW(r) is the set of all tokens that can possibly follow
    484 	 *  references to r in *any* sentential form (context).  At
    485 	 *  runtime, however, we know precisely which context applies as
    486 	 *  we have the call chain.  We may compute the exact (rather
    487 	 *  than covering superset) set of following tokens.
    488 	 *
    489 	 *  For example, consider grammar:
    490 	 *
    491 	 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
    492 	 *       | "return" expr '.'
    493 	 *       ;
    494 	 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
    495 	 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
    496 	 *       | '(' expr ')'
    497 	 *       ;
    498 	 *
    499 	 *  The FOLLOW sets are all inclusive whereas context-sensitive
    500 	 *  FOLLOW sets are precisely what could follow a rule reference.
    501 	 *  For input input "i=(3);", here is the derivation:
    502 	 *
    503 	 *  stat => ID '=' expr ';'
    504 	 *       => ID '=' atom ('+' atom)* ';'
    505 	 *       => ID '=' '(' expr ')' ('+' atom)* ';'
    506 	 *       => ID '=' '(' atom ')' ('+' atom)* ';'
    507 	 *       => ID '=' '(' INT ')' ('+' atom)* ';'
    508 	 *       => ID '=' '(' INT ')' ';'
    509 	 *
    510 	 *  At the "3" token, you'd have a call chain of
    511 	 *
    512 	 *    stat -> expr -> atom -> expr -> atom
    513 	 *
    514 	 *  What can follow that specific nested ref to atom?  Exactly ')'
    515 	 *  as you can see by looking at the derivation of this specific
    516 	 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
    517 	 *
    518 	 *  You want the exact viable token set when recovering from a
    519 	 *  token mismatch.  Upon token mismatch, if LA(1) is member of
    520 	 *  the viable next token set, then you know there is most likely
    521 	 *  a missing token in the input stream.  "Insert" one by just not
    522 	 *  throwing an exception.
    523 	 */
    524 	protected BitSet computeContextSensitiveRuleFOLLOW() {
    525 		return combineFollows(true);
    526 	}
    527 
    528 	// what is exact? it seems to only add sets from above on stack
    529 	// if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
    530 	// Why would we ever want them all?  Maybe no viable alt instead of
    531 	// mismatched token?
    532 	protected BitSet combineFollows(boolean exact) {
    533 		int top = state._fsp;
    534 		BitSet followSet = new BitSet();
    535 		for (int i=top; i>=0; i--) {
    536 			BitSet localFollowSet = (BitSet)state.following[i];
    537 			/*
    538 			System.out.println("local follow depth "+i+"="+
    539 							   localFollowSet.toString(getTokenNames())+")");
    540 			 */
    541 			followSet.orInPlace(localFollowSet);
    542 			if ( exact ) {
    543 				// can we see end of rule?
    544 				if ( localFollowSet.member(Token.EOR_TOKEN_TYPE) ) {
    545 					// Only leave EOR in set if at top (start rule); this lets
    546 					// us know if have to include follow(start rule); i.e., EOF
    547 					if ( i>0 ) {
    548 						followSet.remove(Token.EOR_TOKEN_TYPE);
    549 					}
    550 				}
    551 				else { // can't see end of rule, quit
    552 					break;
    553 				}
    554 			}
    555 		}
    556 		return followSet;
    557 	}
    558 
    559 	/** Attempt to recover from a single missing or extra token.
    560 	 *
    561 	 *  EXTRA TOKEN
    562 	 *
    563 	 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
    564 	 *  however, then assume LA(1) is some extra spurious token.  Delete it
    565 	 *  and LA(2) as if we were doing a normal match(), which advances the
    566 	 *  input.
    567 	 *
    568 	 *  MISSING TOKEN
    569 	 *
    570 	 *  If current token is consistent with what could come after
    571 	 *  ttype then it is ok to "insert" the missing token, else throw
    572 	 *  exception For example, Input "i=(3;" is clearly missing the
    573 	 *  ')'.  When the parser returns from the nested call to expr, it
    574 	 *  will have call chain:
    575 	 *
    576 	 *    stat -> expr -> atom
    577 	 *
    578 	 *  and it will be trying to match the ')' at this point in the
    579 	 *  derivation:
    580 	 *
    581 	 *       => ID '=' '(' INT ')' ('+' atom)* ';'
    582 	 *                          ^
    583 	 *  match() will see that ';' doesn't match ')' and report a
    584 	 *  mismatched token error.  To recover, it sees that LA(1)==';'
    585 	 *  is in the set of tokens that can follow the ')' token
    586 	 *  reference in rule atom.  It can assume that you forgot the ')'.
    587 	 */
    588 	protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
    589 		throws RecognitionException
    590 	{
    591 		RecognitionException e = null;
    592 		// if next token is what we are looking for then "delete" this token
    593 		if ( mismatchIsUnwantedToken(input, ttype) ) {
    594 			e = new UnwantedTokenException(ttype, input);
    595 			/*
    596 			System.err.println("recoverFromMismatchedToken deleting "+
    597 							   ((TokenStream)input).LT(1)+
    598 							   " since "+((TokenStream)input).LT(2)+" is what we want");
    599 			 */
    600 			beginResync();
    601 			input.consume(); // simply delete extra token
    602 			endResync();
    603 			reportError(e);  // report after consuming so AW sees the token in the exception
    604 			// we want to return the token we're actually matching
    605 			Object matchedSymbol = getCurrentInputSymbol(input);
    606 			input.consume(); // move past ttype token as if all were ok
    607 			return matchedSymbol;
    608 		}
    609 		// can't recover with single token deletion, try insertion
    610 		if ( mismatchIsMissingToken(input, follow) ) {
    611 			Object inserted = getMissingSymbol(input, e, ttype, follow);
    612 			e = new MissingTokenException(ttype, input, inserted);
    613 			reportError(e);  // report after inserting so AW sees the token in the exception
    614 			return inserted;
    615 		}
    616 		// even that didn't work; must throw the exception
    617 		e = new MismatchedTokenException(ttype, input);
    618 		throw e;
    619 	}
    620 
    621 	/** Not currently used */
    622 	public Object recoverFromMismatchedSet(IntStream input,
    623 										   RecognitionException e,
    624 										   BitSet follow)
    625 		throws RecognitionException
    626 	{
    627 		if ( mismatchIsMissingToken(input, follow) ) {
    628 			// System.out.println("missing token");
    629 			reportError(e);
    630 			// we don't know how to conjure up a token for sets yet
    631 			return getMissingSymbol(input, e, Token.INVALID_TOKEN_TYPE, follow);
    632 		}
    633 		// TODO do single token deletion like above for Token mismatch
    634 		throw e;
    635 	}
    636 
    637 	/** Match needs to return the current input symbol, which gets put
    638 	 *  into the label for the associated token ref; e.g., x=ID.  Token
    639 	 *  and tree parsers need to return different objects. Rather than test
    640 	 *  for input stream type or change the IntStream interface, I use
    641 	 *  a simple method to ask the recognizer to tell me what the current
    642 	 *  input symbol is.
    643 	 *
    644 	 *  This is ignored for lexers.
    645 	 */
    646 	protected Object getCurrentInputSymbol(IntStream input) { return null; }
    647 
    648 	/** Conjure up a missing token during error recovery.
    649 	 *
    650 	 *  The recognizer attempts to recover from single missing
    651 	 *  symbols. But, actions might refer to that missing symbol.
    652 	 *  For example, x=ID {f($x);}. The action clearly assumes
    653 	 *  that there has been an identifier matched previously and that
    654 	 *  $x points at that token. If that token is missing, but
    655 	 *  the next token in the stream is what we want we assume that
    656 	 *  this token is missing and we keep going. Because we
    657 	 *  have to return some token to replace the missing token,
    658 	 *  we have to conjure one up. This method gives the user control
    659 	 *  over the tokens returned for missing tokens. Mostly,
    660 	 *  you will want to create something special for identifier
    661 	 *  tokens. For literals such as '{' and ',', the default
    662 	 *  action in the parser or tree parser works. It simply creates
    663 	 *  a CommonToken of the appropriate type. The text will be the token.
    664 	 *  If you change what tokens must be created by the lexer,
    665 	 *  override this method to create the appropriate tokens.
    666 	 */
    667 	protected Object getMissingSymbol(IntStream input,
    668 									  RecognitionException e,
    669 									  int expectedTokenType,
    670 									  BitSet follow)
    671 	{
    672 		return null;
    673 	}
    674 
    675 	public void consumeUntil(IntStream input, int tokenType) {
    676 		//System.out.println("consumeUntil "+tokenType);
    677 		int ttype = input.LA(1);
    678 		while (ttype != Token.EOF && ttype != tokenType) {
    679 			input.consume();
    680 			ttype = input.LA(1);
    681 		}
    682 	}
    683 
    684 	/** Consume tokens until one matches the given token set */
    685 	public void consumeUntil(IntStream input, BitSet set) {
    686 		//System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
    687 		int ttype = input.LA(1);
    688 		while (ttype != Token.EOF && !set.member(ttype) ) {
    689 			//System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
    690 			input.consume();
    691 			ttype = input.LA(1);
    692 		}
    693 	}
    694 
    695 	/** Push a rule's follow set using our own hardcoded stack */
    696 	protected void pushFollow(BitSet fset) {
    697 		if ( (state._fsp +1)>=state.following.length ) {
    698 			BitSet[] f = new BitSet[state.following.length*2];
    699 			System.arraycopy(state.following, 0, f, 0, state.following.length);
    700 			state.following = f;
    701 		}
    702 		state.following[++state._fsp] = fset;
    703 	}
    704 
    705 	/** Return List<String> of the rules in your parser instance
    706 	 *  leading up to a call to this method.  You could override if
    707 	 *  you want more details such as the file/line info of where
    708 	 *  in the parser java code a rule is invoked.
    709 	 *
    710 	 *  This is very useful for error messages and for context-sensitive
    711 	 *  error recovery.
    712 	 */
    713 	public List getRuleInvocationStack() {
    714 		String parserClassName = getClass().getName();
    715 		return getRuleInvocationStack(new Throwable(), parserClassName);
    716 	}
    717 
    718 	/** A more general version of getRuleInvocationStack where you can
    719 	 *  pass in, for example, a RecognitionException to get it's rule
    720 	 *  stack trace.  This routine is shared with all recognizers, hence,
    721 	 *  static.
    722 	 *
    723 	 *  TODO: move to a utility class or something; weird having lexer call this
    724 	 */
    725 	public static List getRuleInvocationStack(Throwable e,
    726 											  String recognizerClassName)
    727 	{
    728 		List rules = new ArrayList();
    729 		StackTraceElement[] stack = e.getStackTrace();
    730 		int i = 0;
    731 		for (i=stack.length-1; i>=0; i--) {
    732 			StackTraceElement t = stack[i];
    733 			if ( t.getClassName().startsWith("org.antlr.runtime.") ) {
    734 				continue; // skip support code such as this method
    735 			}
    736 			if ( t.getMethodName().equals(NEXT_TOKEN_RULE_NAME) ) {
    737 				continue;
    738 			}
    739 			if ( !t.getClassName().equals(recognizerClassName) ) {
    740 				continue; // must not be part of this parser
    741 			}
    742             rules.add(t.getMethodName());
    743 		}
    744 		return rules;
    745 	}
    746 
    747     public int getBacktrackingLevel() { return state.backtracking; }
    748 
    749     public void setBacktrackingLevel(int n) { state.backtracking = n; }
    750 
    751     /** Return whether or not a backtracking attempt failed. */
    752     public boolean failed() { return state.failed; }
    753 
    754 	/** Used to print out token names like ID during debugging and
    755 	 *  error reporting.  The generated parsers implement a method
    756 	 *  that overrides this to point to their String[] tokenNames.
    757 	 */
    758 	public String[] getTokenNames() {
    759 		return null;
    760 	}
    761 
    762 	/** For debugging and other purposes, might want the grammar name.
    763 	 *  Have ANTLR generate an implementation for this method.
    764 	 */
    765 	public String getGrammarFileName() {
    766 		return null;
    767 	}
    768 
    769 	public abstract String getSourceName();
    770 
    771 	/** A convenience method for use most often with template rewrites.
    772 	 *  Convert a List<Token> to List<String>
    773 	 */
    774 	public List toStrings(List tokens) {
    775 		if ( tokens==null ) return null;
    776 		List strings = new ArrayList(tokens.size());
    777 		for (int i=0; i<tokens.size(); i++) {
    778 			strings.add(((Token)tokens.get(i)).getText());
    779 		}
    780 		return strings;
    781 	}
    782 
    783 	/** Given a rule number and a start token index number, return
    784 	 *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
    785 	 *  start index.  If this rule has parsed input starting from the
    786 	 *  start index before, then return where the rule stopped parsing.
    787 	 *  It returns the index of the last token matched by the rule.
    788 	 *
    789 	 *  For now we use a hashtable and just the slow Object-based one.
    790 	 *  Later, we can make a special one for ints and also one that
    791 	 *  tosses out data after we commit past input position i.
    792 	 */
    793 	public int getRuleMemoization(int ruleIndex, int ruleStartIndex) {
    794 		if ( state.ruleMemo[ruleIndex]==null ) {
    795 			state.ruleMemo[ruleIndex] = new HashMap();
    796 		}
    797 		Integer stopIndexI =
    798 			(Integer)state.ruleMemo[ruleIndex].get(new Integer(ruleStartIndex));
    799 		if ( stopIndexI==null ) {
    800 			return MEMO_RULE_UNKNOWN;
    801 		}
    802 		return stopIndexI.intValue();
    803 	}
    804 
    805 	/** Has this rule already parsed input at the current index in the
    806 	 *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
    807 	 *  If we attempted but failed to parse properly before, return
    808 	 *  MEMO_RULE_FAILED.
    809 	 *
    810 	 *  This method has a side-effect: if we have seen this input for
    811 	 *  this rule and successfully parsed before, then seek ahead to
    812 	 *  1 past the stop token matched for this rule last time.
    813 	 */
    814 	public boolean alreadyParsedRule(IntStream input, int ruleIndex) {
    815 		int stopIndex = getRuleMemoization(ruleIndex, input.index());
    816 		if ( stopIndex==MEMO_RULE_UNKNOWN ) {
    817 			return false;
    818 		}
    819 		if ( stopIndex==MEMO_RULE_FAILED ) {
    820 			//System.out.println("rule "+ruleIndex+" will never succeed");
    821 			state.failed=true;
    822 		}
    823 		else {
    824 			//System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+state.failed);
    825 			input.seek(stopIndex+1); // jump to one past stop token
    826 		}
    827 		return true;
    828 	}
    829 
    830 	/** Record whether or not this rule parsed the input at this position
    831 	 *  successfully.  Use a standard java hashtable for now.
    832 	 */
    833 	public void memoize(IntStream input,
    834 						int ruleIndex,
    835 						int ruleStartIndex)
    836 	{
    837 		int stopTokenIndex = state.failed?MEMO_RULE_FAILED:input.index()-1;
    838 		if ( state.ruleMemo==null ) {
    839 			System.err.println("!!!!!!!!! memo array is null for "+ getGrammarFileName());
    840 		}
    841 		if ( ruleIndex >= state.ruleMemo.length ) {
    842 			System.err.println("!!!!!!!!! memo size is "+state.ruleMemo.length+", but rule index is "+ruleIndex);
    843 		}
    844 		if ( state.ruleMemo[ruleIndex]!=null ) {
    845 			state.ruleMemo[ruleIndex].put(
    846 				new Integer(ruleStartIndex), new Integer(stopTokenIndex)
    847 			);
    848 		}
    849 	}
    850 
    851 	/** return how many rule/input-index pairs there are in total.
    852 	 *  TODO: this includes synpreds. :(
    853 	 */
    854 	public int getRuleMemoizationCacheSize() {
    855 		int n = 0;
    856 		for (int i = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) {
    857 			Map ruleMap = state.ruleMemo[i];
    858 			if ( ruleMap!=null ) {
    859 				n += ruleMap.size(); // how many input indexes are recorded?
    860 			}
    861 		}
    862 		return n;
    863 	}
    864 
    865 	public void traceIn(String ruleName, int ruleIndex, Object inputSymbol)  {
    866 		System.out.print("enter "+ruleName+" "+inputSymbol);
    867 		if ( state.backtracking>0 ) {
    868 			System.out.print(" backtracking="+state.backtracking);
    869 		}
    870 		System.out.println();
    871 	}
    872 
    873 	public void traceOut(String ruleName,
    874 						 int ruleIndex,
    875 						 Object inputSymbol)
    876 	{
    877 		System.out.print("exit "+ruleName+" "+inputSymbol);
    878 		if ( state.backtracking>0 ) {
    879             System.out.print(" backtracking="+state.backtracking);
    880             if ( state.failed ) System.out.print(" failed");
    881             else System.out.print(" succeeded");
    882         }
    883 		System.out.println();
    884 	}
    885 
    886 }
    887