Home | History | Annotate | Download | only in runtime
      1 package org.antlr.runtime {
      2 	
      3 	/** A generic recognizer that can handle recognizers generated from
      4 	 *  lexer, parser, and tree grammars.  This is all the parsing
      5 	 *  support code essentially; most of it is error recovery stuff and
      6 	 *  backtracking.
      7 	 */
      8 	public class BaseRecognizer {
      9 		public static const MEMO_RULE_FAILED:int = -2;
     10 		public static const MEMO_RULE_UNKNOWN:int = -1;
     11 		public static const INITIAL_FOLLOW_STACK_SIZE:int = 100;
     12 	
     13 		// copies from Token object for convenience in actions
     14 		public static const DEFAULT_TOKEN_CHANNEL:int = TokenConstants.DEFAULT_CHANNEL;
     15 		public static const HIDDEN:int = TokenConstants.HIDDEN_CHANNEL;
     16 	
     17 		public static const NEXT_TOKEN_RULE_NAME:String = "nextToken";
     18 	
     19 		/** State of a lexer, parser, or tree parser are collected into a state
     20 		 *  object so the state can be shared.  This sharing is needed to
     21 		 *  have one grammar import others and share same error variables
     22 		 *  and other state variables.  It's a kind of explicit multiple
     23 		 *  inheritance via delegation of methods and shared state.
     24 		 * 
     25 		 */
     26 		public var state:RecognizerSharedState;  // TODO - Place in private Namespace - cannot be private 
     27 	
     28 		public function BaseRecognizer(state:RecognizerSharedState = null) {
     29 			if ( state == null ) { // don't ever let us have a null state
     30 				state = new RecognizerSharedState();
     31 			}
     32 			this.state = state;			
     33 		}
     34 	
     35 		/** reset the parser's state; subclasses must rewinds the input stream */
     36 		public function reset():void {
     37 			// wack everything related to error recovery
     38 			if (state == null) {
     39 			    return;
     40 			}
     41 			state._fsp = -1;
     42 			state.errorRecovery = false;
     43 			state.lastErrorIndex = -1;
     44 			state.failed = false;
     45 			state.syntaxErrors = 0;
     46 			// wack everything related to backtracking and memoization
     47 			state.backtracking = 0;
     48 			for (var i:int = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) { // wipe cache
     49 				state.ruleMemo[i] = null;
     50 			}
     51 		}
     52 	
     53     	/** Match current input symbol against ttype.  Attempt
     54 		 *  single token insertion or deletion error recovery.  If
     55 		 *  that fails, throw MismatchedTokenException.
     56 		 *
     57 		 *  To turn off single token insertion or deletion error
     58 		 *  recovery, override mismatchRecover() and have it call
     59 		 *  plain mismatch(), which does not recover.  Then any error
     60 		 *  in a rule will cause an exception and immediate exit from
     61 		 *  rule.  Rule would recover by resynchronizing to the set of
     62 		 *  symbols that can follow rule ref.
     63 		 */
     64 		public function matchStream(input:IntStream, ttype:int, follow:BitSet):Object {
     65 			//System.out.println("match "+((TokenStream)input).LT(1));
     66 			var matchedSymbol:Object = getCurrentInputSymbol(input);
     67 			if ( input.LA(1)==ttype ) {
     68 				input.consume();
     69 				state.errorRecovery = false;
     70 				state.failed = false;
     71 				return matchedSymbol;
     72 			}
     73 			if ( state.backtracking>0 ) {
     74 				state.failed = true;
     75 				return matchedSymbol;
     76 			}
     77 			matchedSymbol = recoverFromMismatchedToken(input, ttype, follow);
     78 			return matchedSymbol;
     79 		}
     80 	
     81 		/** Match the wildcard: in a symbol */
     82 		public function matchAnyStream(input:IntStream):void {
     83 			state.errorRecovery = false;
     84 			state.failed = false;
     85 			input.consume();
     86 		}
     87 
     88     	public function mismatchIsUnwantedToken(input:IntStream, ttype:int):Boolean {
     89     		return input.LA(2)==ttype;
     90     	}
     91 	
     92     	public function mismatchIsMissingToken(input:IntStream, follow:BitSet):Boolean {
     93     		if ( follow==null ) {
     94     			// we have no information about the follow; we can only consume
     95     			// a single token and hope for the best
     96     			return false;
     97     		}
     98      		// compute what can follow this grammar element reference
     99     		if ( follow.member(TokenConstants.EOR_TOKEN_TYPE) ) {
    100 				var viableTokensFollowingThisRule:BitSet = computeContextSensitiveRuleFOLLOW();
    101 				follow = follow.or(viableTokensFollowingThisRule);
    102 				if ( state._fsp>=0 ) { // remove EOR if we're not the start symbol
    103                     follow.remove(TokenConstants.EOR_TOKEN_TYPE);
    104                 }
    105     		}
    106     		// if current token is consistent with what could come after set
    107 			// then we know we're missing a token; error recovery is free to
    108 			// "insert" the missing token
    109 
    110 			//System.out.println("LT(1)="+((TokenStream)input).LT(1));
    111 	
    112 			// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    113 			// in follow set to indicate that the fall of the start symbol is
    114 			// in the set (EOF can follow).
    115 			if ( follow.member(input.LA(1)) || follow.member(TokenConstants.EOR_TOKEN_TYPE) ) {
    116 				//System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
    117 				return true;
    118 			}
    119 			return false;
    120     	}
    121     
    122     	/** Factor out what to do upon token mismatch so tree parsers can behave
    123 		 *  differently.  Override and call mismatchRecover(input, ttype, follow)
    124 		 *  to get single token insertion and deletion.  Use this to turn of
    125 		 *  single token insertion and deletion. Override mismatchRecover
    126 		 *  to call this instead.
    127 		 */
    128 		protected function mismatch(input:IntStream, ttype:int, follow:BitSet):void
    129 		{
    130     		if ( mismatchIsUnwantedToken(input, ttype) ) {
    131     			throw new UnwantedTokenException(ttype, input);
    132     		}
    133     		else if ( mismatchIsMissingToken(input, follow) ) {
    134     			throw new MissingTokenException(ttype, input, null);
    135     		}
    136     		throw new MismatchedTokenException(ttype, input);
    137 		}
    138 	
    139     	/** Report a recognition problem.
    140     	 *
    141     	 *  This method sets errorRecovery to indicate the parser is recovering
    142     	 *  not parsing.  Once in recovery mode, no errors are generated.
    143     	 *  To get out of recovery mode, the parser must successfully match
    144     	 *  a token (after a resync).  So it will go:
    145     	 *
    146     	 * 		1. error occurs
    147     	 * 		2. enter recovery mode, report error
    148     	 * 		3. consume until token found in resynch set
    149     	 * 		4. try to resume parsing
    150     	 * 		5. next match() will reset errorRecovery mode
    151     	 *
    152     	 *  If you override, make sure to update syntaxErrors if you care about that.
    153     	 */
    154 		public function reportError(e:RecognitionException):void {
    155 			// if we've already reported an error and have not matched a token
    156 			// yet successfully, don't report any errors.
    157 			if ( state.errorRecovery ) {
    158 				//System.err.print("[SPURIOUS] ");
    159 				return;
    160 			}
    161 	    	state.syntaxErrors++; // don't count spurious
    162 			state.errorRecovery = true;
    163 	
    164 			displayRecognitionError(this.tokenNames, e);
    165 		}
    166 	
    167 		public function displayRecognitionError(tokenNames:Array,
    168 											e:RecognitionException):void
    169 		{
    170 			var hdr:String = getErrorHeader(e);
    171 			var msg:String = getErrorMessage(e, tokenNames);
    172 			emitErrorMessage(hdr+" "+msg);
    173 		}
    174 	
    175 		/** What error message should be generated for the various
    176 		 *  exception types?
    177 		 *
    178 		 *  Not very object-oriented code, but I like having all error message
    179 		 *  generation within one method rather than spread among all of the
    180 		 *  exception classes. This also makes it much easier for the exception
    181 		 *  handling because the exception classes do not have to have pointers back
    182 		 *  to this object to access utility routines and so on. Also, changing
    183 		 *  the message for an exception type would be difficult because you
    184 		 *  would have to subclassing exception, but then somehow get ANTLR
    185 		 *  to make those kinds of exception objects instead of the default.
    186 		 *  This looks weird, but trust me--it makes the most sense in terms
    187 		 *  of flexibility.
    188 		 *
    189 		 *  For grammar debugging, you will want to override this to add
    190 		 *  more information such as the stack frame with
    191 		 *  getRuleInvocationStack(e, this.getClass().getName()) and,
    192 		 *  for no viable alts, the decision description and state etc...
    193 		 *
    194 		 *  Override this to change the message generated for one or more
    195 		 *  exception types.
    196 		 */
    197 		public function getErrorMessage(e:RecognitionException, tokenNames:Array):String {
    198 			var msg:String = e.message;
    199 			var tokenName:String = null;
    200     		if ( e is UnwantedTokenException ) {
    201     			var ute:UnwantedTokenException = UnwantedTokenException(e);
    202     			tokenName="<unknown>";
    203     			if ( ute.expecting== TokenConstants.EOF ) {
    204     				tokenName = "EOF";
    205     			}
    206     			else {
    207     				tokenName = tokenNames[ute.expecting];
    208     			}
    209     			msg = "extraneous input "+getTokenErrorDisplay(ute.unexpectedToken)+
    210     				" expecting "+tokenName;
    211     		}
    212     		else if ( e is MissingTokenException ) {
    213     			var mite:MissingTokenException = MissingTokenException(e);
    214     			tokenName="<unknown>";
    215     			if ( mite.expecting == TokenConstants.EOF ) {
    216     				tokenName = "EOF";
    217     			}
    218     			else {
    219     				tokenName = tokenNames[mite.expecting];
    220     			}
    221     			msg = "missing "+tokenName+" at "+getTokenErrorDisplay(e.token);
    222     		}  			
    223 			else if ( e is MismatchedTokenException ) {
    224 				var mte:MismatchedTokenException = MismatchedTokenException(e);
    225 				tokenName="<unknown>";
    226 				if ( mte.expecting== TokenConstants.EOF ) {
    227 					tokenName = "EOF";
    228 				}
    229 				else {
    230 					tokenName = tokenNames[mte.expecting];
    231 				}
    232 				msg = "mismatched input "+getTokenErrorDisplay(e.token)+
    233 					" expecting "+tokenName;
    234 			}
    235 			else if ( e is MismatchedTreeNodeException ) {
    236 				var mtne:MismatchedTreeNodeException = MismatchedTreeNodeException(e);
    237 				tokenName="<unknown>";
    238 				if ( mtne.expecting==TokenConstants.EOF ) {
    239 					tokenName = "EOF";
    240 				}
    241 				else {
    242 					tokenName = tokenNames[mtne.expecting];
    243 				}
    244 				msg = "mismatched tree node: "+mtne.node+
    245 					" expecting "+tokenName;
    246 			}
    247 			else if ( e is NoViableAltException ) {
    248 				var nvae:NoViableAltException = NoViableAltException(e);
    249 				// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    250 				// and "(decision="+nvae.decisionNumber+") and
    251 				// "state "+nvae.stateNumber
    252 				msg = "no viable alternative at input "+getTokenErrorDisplay(e.token);
    253 			}
    254 			else if ( e is EarlyExitException ) {
    255 				var eee:EarlyExitException = EarlyExitException(e);
    256 				// for development, can add "(decision="+eee.decisionNumber+")"
    257 				msg = "required (...)+ loop did not match anything at input "+
    258 					getTokenErrorDisplay(e.token);
    259 			}
    260 			else if ( e is MismatchedSetException ) {
    261 				var mse:MismatchedSetException = MismatchedSetException(e);
    262 				msg = "mismatched input "+getTokenErrorDisplay(e.token)+
    263 					" expecting set "+mse.expecting;
    264 			}
    265 			else if ( e is MismatchedNotSetException ) {
    266 				var mnse:MismatchedNotSetException = MismatchedNotSetException(e);
    267 				msg = "mismatched input "+getTokenErrorDisplay(e.token)+
    268 					" expecting set "+mnse.expecting;
    269 			}
    270 			else if ( e is FailedPredicateException ) {
    271 				var fpe:FailedPredicateException = FailedPredicateException(e);
    272 				msg = "rule "+fpe.ruleName+" failed predicate: {"+
    273 					fpe.predicateText+"}?";
    274 			}
    275 			return msg;
    276 		}
    277 
    278     	/** Get number of recognition errors (lexer, parser, tree parser).  Each
    279     	 *  recognizer tracks its own number.  So parser and lexer each have
    280     	 *  separate count.  Does not count the spurious errors found between
    281     	 *  an error and next valid token match
    282     	 *
    283     	 *  See also reportError()
    284     	 */
    285     	public function get numberOfSyntaxErrors():int {
    286     		return state.syntaxErrors;
    287     	}
    288 	
    289 		/** What is the error header, normally line/character position information? */
    290 		public function getErrorHeader(e:RecognitionException):String {
    291 			return "line "+e.line+":"+e.charPositionInLine;
    292 		}
    293 	
    294 		/** How should a token be displayed in an error message? The default
    295 		 *  is to display just the text, but during development you might
    296 		 *  want to have a lot of information spit out.  Override in that case
    297 		 *  to use t.toString() (which, for CommonToken, dumps everything about
    298 		 *  the token). This is better than forcing you to override a method in
    299 		 *  your token objects because you don't have to go modify your lexer
    300 		 *  so that it creates a new Java type.
    301 		 */
    302 		public function getTokenErrorDisplay(t:Token):String {
    303 			var s:String = t.text;
    304 			if ( s==null ) {
    305 				if ( t.type==TokenConstants.EOF ) {
    306 					s = "<EOF>";
    307 				}
    308 				else {
    309 					s = "<"+t.type+">";
    310 				}
    311 			}
    312 			s = s.replace("\n","\\\\n");
    313 			s = s.replace("\r","\\\\r");
    314 			s = s.replace("\t","\\\\t");
    315 			return "'"+s+"'";
    316 		}
    317 	
    318 		/** Override this method to change where error messages go */
    319 		public function emitErrorMessage(msg:String):void {
    320 			    trace(msg);
    321 		}
    322 	
    323     	/** Recover from an error found on the input stream.  This is
    324     	 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
    325     	 *  single token insertion and deletion, this will usually not
    326     	 *  handle mismatched symbol exceptions but there could be a mismatched
    327     	 *  token that the match() routine could not recover from.
    328     	 */
    329 		public function recoverStream(input:IntStream, re:RecognitionException):void {
    330 			if ( state.lastErrorIndex==input.index) {
    331 				// uh oh, another error at same token index; must be a case
    332 				// where LT(1) is in the recovery token set so nothing is
    333 				// consumed; consume a single token so at least to prevent
    334 				// an infinite loop; this is a failsafe.
    335 				input.consume();
    336 			}
    337 			state.lastErrorIndex = input.index;
    338 			var followSet:BitSet = computeErrorRecoverySet();
    339 			beginResync();
    340 			consumeUntil(input, followSet);
    341 			endResync();
    342 		}
    343 	
    344 		/** A hook to listen in on the token consumption during error recovery.
    345 		 *  The DebugParser subclasses this to fire events to the listenter.
    346 		 */
    347 		public function beginResync():void {
    348 		}
    349 	
    350 		public function endResync():void {
    351 		}
    352 	
    353 		/*  Compute the error recovery set for the current rule.  During
    354 		 *  rule invocation, the parser pushes the set of tokens that can
    355 		 *  follow that rule reference on the stack; this amounts to
    356 		 *  computing FIRST of what follows the rule reference in the
    357 		 *  enclosing rule. This local follow set only includes tokens
    358 		 *  from within the rule; i.e., the FIRST computation done by
    359 		 *  ANTLR stops at the end of a rule.
    360 		 *
    361 		 *  EXAMPLE
    362 		 *
    363 		 *  When you find a "no viable alt exception", the input is not
    364 		 *  consistent with any of the alternatives for rule r.  The best
    365 		 *  thing to do is to consume tokens until you see something that
    366 		 *  can legally follow a call to r *or* any rule that called r.
    367 		 *  You don't want the exact set of viable next tokens because the
    368 		 *  input might just be missing a token--you might consume the
    369 		 *  rest of the input looking for one of the missing tokens.
    370 		 *
    371 		 *  Consider grammar:
    372 		 *
    373 		 *  a : '[' b ']'
    374 		 *    | '(' b ')'
    375 		 *    ;
    376 		 *  b : c '^' INT ;
    377 		 *  c : ID
    378 		 *    | INT
    379 		 *    ;
    380 		 *
    381 		 *  At each rule invocation, the set of tokens that could follow
    382 		 *  that rule is pushed on a stack.  Here are the various "local"
    383 		 *  follow sets:
    384 		 *
    385 		 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
    386 		 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
    387 		 *  FOLLOW(c_in_b) = FIRST('^') = '^'
    388 		 *
    389 		 *  Upon erroneous input "[]", the call chain is
    390 		 *
    391 		 *  a -> b -> c
    392 		 *
    393 		 *  and, hence, the follow context stack is:
    394 		 *
    395 		 *  depth  local follow set     after call to rule
    396 		 *    0         <EOF>                    a (from main())
    397 		 *    1          ']'                     b
    398 		 *    3          '^'                     c
    399 		 *
    400 		 *  Notice that ')' is not included, because b would have to have
    401 		 *  been called from a different context in rule a for ')' to be
    402 		 *  included.
    403 		 *
    404 		 *  For error recovery, we cannot consider FOLLOW(c)
    405 		 *  (context-sensitive or otherwise).  We need the combined set of
    406 		 *  all context-sensitive FOLLOW sets--the set of all tokens that
    407 		 *  could follow any reference in the call chain.  We need to
    408 		 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
    409 		 *  we resync'd to that token, we'd consume until EOF.  We need to
    410 		 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
    411 		 *  In this case, for input "[]", LA(1) is in this set so we would
    412 		 *  not consume anything and after printing an error rule c would
    413 		 *  return normally.  It would not find the required '^' though.
    414 		 *  At this point, it gets a mismatched token error and throws an
    415 		 *  exception (since LA(1) is not in the viable following token
    416 		 *  set).  The rule exception handler tries to recover, but finds
    417 		 *  the same recovery set and doesn't consume anything.  Rule b
    418 		 *  exits normally returning to rule a.  Now it finds the ']' (and
    419 		 *  with the successful match exits errorRecovery mode).
    420 		 *
    421 		 *  So, you cna see that the parser walks up call chain looking
    422 		 *  for the token that was a member of the recovery set.
    423 		 *
    424 		 *  Errors are not generated in errorRecovery mode.
    425 		 *
    426 		 *  ANTLR's error recovery mechanism is based upon original ideas:
    427 		 *
    428 		 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
    429 		 *
    430 		 *  and
    431 		 *
    432 		 *  "A note on error recovery in recursive descent parsers":
    433 		 *  http://portal.acm.org/citation.cfm?id=947902.947905
    434 		 *
    435 		 *  Later, Josef Grosch had some good ideas:
    436 		 *
    437 		 *  "Efficient and Comfortable Error Recovery in Recursive Descent
    438 		 *  Parsers":
    439 		 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
    440 		 *
    441 		 *  Like Grosch I implemented local FOLLOW sets that are combined
    442 		 *  at run-time upon error to avoid overhead during parsing.
    443 		 */
    444 		protected function computeErrorRecoverySet():BitSet {
    445 			return combineFollows(false);
    446 		}
    447 	
    448 		/** Compute the context-sensitive FOLLOW set for current rule.
    449 		 *  This is set of token types that can follow a specific rule
    450 		 *  reference given a specific call chain.  You get the set of
    451 		 *  viable tokens that can possibly come next (lookahead depth 1)
    452 		 *  given the current call chain.  Contrast this with the
    453 		 *  definition of plain FOLLOW for rule r:
    454 		 *
    455 		 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
    456 		 *
    457 		 *  where x in T* and alpha, beta in V*; T is set of terminals and
    458 		 *  V is the set of terminals and nonterminals.  In other words,
    459 		 *  FOLLOW(r) is the set of all tokens that can possibly follow
    460 		 *  references to r in *any* sentential form (context).  At
    461 		 *  runtime, however, we know precisely which context applies as
    462 		 *  we have the call chain.  We may compute the exact (rather
    463 		 *  than covering superset) set of following tokens.
    464 		 *
    465 		 *  For example, consider grammar:
    466 		 *
    467 		 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
    468 		 *       | "return" expr '.'
    469 		 *       ;
    470 		 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
    471 		 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
    472 		 *       | '(' expr ')'
    473 		 *       ;
    474 		 *
    475 		 *  The FOLLOW sets are all inclusive whereas context-sensitive
    476 		 *  FOLLOW sets are precisely what could follow a rule reference.
    477 		 *  For input input "i=(3);", here is the derivation:
    478 		 *
    479 		 *  stat => ID '=' expr ';'
    480 		 *       => ID '=' atom ('+' atom)* ';'
    481 		 *       => ID '=' '(' expr ')' ('+' atom)* ';'
    482 		 *       => ID '=' '(' atom ')' ('+' atom)* ';'
    483 		 *       => ID '=' '(' INT ')' ('+' atom)* ';'
    484 		 *       => ID '=' '(' INT ')' ';'
    485 		 *
    486 		 *  At the "3" token, you'd have a call chain of
    487 		 *
    488 		 *    stat -> expr -> atom -> expr -> atom
    489 		 *
    490 		 *  What can follow that specific nested ref to atom?  Exactly ')'
    491 		 *  as you can see by looking at the derivation of this specific
    492 		 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
    493 		 *
    494 		 *  You want the exact viable token set when recovering from a
    495 		 *  token mismatch.  Upon token mismatch, if LA(1) is member of
    496 		 *  the viable next token set, then you know there is most likely
    497 		 *  a missing token in the input stream.  "Insert" one by just not
    498 		 *  throwing an exception.
    499 		 */
    500 		protected function computeContextSensitiveRuleFOLLOW():BitSet {
    501 			return combineFollows(true);
    502 		}
    503 	
    504 		protected function combineFollows(exact:Boolean):BitSet {
    505 			var top:int = state._fsp;
    506 			var followSet:BitSet = new BitSet();
    507 			for (var i:int=top; i>=0; i--) {
    508 				var localFollowSet:BitSet = state.following[i];
    509 				followSet.orInPlace(localFollowSet);
    510 				if ( exact ) {
    511 					// can we see end of rule?
    512 					if ( localFollowSet.member(TokenConstants.EOR_TOKEN_TYPE) ) {
    513 						// Only leave EOR in set if at top (start rule); this lets
    514 						// us know if have to include follow(start rule); i.e., EOF
    515 						if ( i>0 ) {
    516 							followSet.remove(TokenConstants.EOR_TOKEN_TYPE);
    517 						}
    518 					}
    519 					else { // can't see end of rule, quit
    520 						break;
    521 					}
    522 				}
    523 			}
    524 			return followSet;
    525 		}
    526 	
    527 		/** Attempt to recover from a single missing or extra token.
    528 		 *
    529 		 *  EXTRA TOKEN
    530 		 *
    531 		 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
    532 		 *  however, then assume LA(1) is some extra spurious token.  Delete it
    533 		 *  and LA(2) as if we were doing a normal match(), which advances the
    534 		 *  input.
    535 		 *
    536 		 *  MISSING TOKEN
    537 		 *
    538 		 *  If current token is consistent with what could come after
    539 		 *  ttype then it is ok to "insert" the missing token, else throw
    540 		 *  exception For example, Input "i=(3;" is clearly missing the
    541 		 *  ')'.  When the parser returns from the nested call to expr, it
    542 		 *  will have call chain:
    543 		 *
    544 		 *    stat -> expr -> atom
    545 		 *
    546 		 *  and it will be trying to match the ')' at this point in the
    547 		 *  derivation:
    548 		 *
    549 		 *       => ID '=' '(' INT ')' ('+' atom)* ';'
    550 		 *                          ^
    551 		 *  match() will see that ';' doesn't match ')' and report a
    552 		 *  mismatched token error.  To recover, it sees that LA(1)==';'
    553 		 *  is in the set of tokens that can follow the ')' token
    554 		 *  reference in rule atom.  It can assume that you forgot the ')'.
    555 		 */
    556 		public function recoverFromMismatchedToken(input:IntStream,
    557 											   	   ttype:int,
    558 											       follow:BitSet):Object {	
    559     		var e:RecognitionException = null;
    560 			// if next token is what we are looking for then "delete" this token
    561 			if ( mismatchIsUnwantedToken(input, ttype) ) {
    562 				e = new UnwantedTokenException(ttype, input);
    563 				/*
    564 				System.err.println("recoverFromMismatchedToken deleting "+
    565 								   ((TokenStream)input).LT(1)+
    566 								   " since "+((TokenStream)input).LT(2)+" is what we want");
    567 				 */
    568 				beginResync();
    569 				input.consume(); // simply delete extra token
    570 				endResync();
    571 				reportError(e);  // report after consuming so AW sees the token in the exception
    572 				// we want to return the token we're actually matching
    573 				var matchedSymbol:Object = getCurrentInputSymbol(input);
    574 				input.consume(); // move past ttype token as if all were ok
    575 				return matchedSymbol;
    576 			}
    577 			// can't recover with single token deletion, try insertion
    578 			if ( mismatchIsMissingToken(input, follow) ) {
    579 				var inserted:Object = getMissingSymbol(input, e, ttype, follow);
    580 				e = new MissingTokenException(ttype, input, inserted);
    581 				reportError(e);  // report after inserting so AW sees the token in the exception
    582 				return inserted;
    583 			}
    584 			// even that didn't work; must throw the exception
    585 			e = new MismatchedTokenException(ttype, input);
    586 			throw e;
    587 		}
    588 	
    589 		/** Not currently used */
    590 		public function recoverFromMismatchedSet(input:IntStream,
    591 											     e:RecognitionException,
    592 											     follow:BitSet):Object
    593 		{
    594     		if ( mismatchIsMissingToken(input, follow) ) {
    595 				// System.out.println("missing token");
    596 				reportError(e);
    597 				// we don't know how to conjure up a token for sets yet
    598 				return getMissingSymbol(input, e, TokenConstants.INVALID_TOKEN_TYPE, follow);
    599 			}
    600 			// TODO do single token deletion like above for Token mismatch
    601 			throw e;
    602 		}
    603 	
    604 		/** Match needs to return the current input symbol, which gets put
    605 		 *  into the label for the associated token ref; e.g., x=ID.  Token
    606 		 *  and tree parsers need to return different objects. Rather than test
    607 		 *  for input stream type or change the IntStream interface, I use
    608 		 *  a simple method to ask the recognizer to tell me what the current
    609 		 *  input symbol is.
    610 		 * 
    611 		 *  This is ignored for lexers.
    612 		 */
    613 		protected function getCurrentInputSymbol(input:IntStream):Object { return null; }
    614 	
    615 		/** Conjure up a missing token during error recovery.
    616 		 *
    617 		 *  The recognizer attempts to recover from single missing
    618 		 *  symbols. But, actions might refer to that missing symbol.
    619 		 *  For example, x=ID {f($x);}. The action clearly assumes
    620 		 *  that there has been an identifier matched previously and that
    621 		 *  $x points at that token. If that token is missing, but
    622 		 *  the next token in the stream is what we want we assume that
    623 		 *  this token is missing and we keep going. Because we
    624 		 *  have to return some token to replace the missing token,
    625 		 *  we have to conjure one up. This method gives the user control
    626 		 *  over the tokens returned for missing tokens. Mostly,
    627 		 *  you will want to create something special for identifier
    628 		 *  tokens. For literals such as '{' and ',', the default
    629 		 *  action in the parser or tree parser works. It simply creates
    630 		 *  a CommonToken of the appropriate type. The text will be the token.
    631 		 *  If you change what tokens must be created by the lexer,
    632 		 *  override this method to create the appropriate tokens.
    633 		 */
    634 		protected function getMissingSymbol(input:IntStream,
    635 										    e:RecognitionException,
    636 										    expectedTokenType:int,
    637 										    follow:BitSet):Object
    638 		{
    639 			return null;
    640 		}
    641 		
    642 		public function consumeUntilToken(input:IntStream, tokenType:int):void {
    643 			//System.out.println("consumeUntil "+tokenType);
    644 			var ttype:int = input.LA(1);
    645 			while (ttype != TokenConstants.EOF && ttype != tokenType) {
    646 				input.consume();
    647 				ttype = input.LA(1);
    648 			}
    649 		}
    650 	
    651 		/** Consume tokens until one matches the given token set */
    652 		public function consumeUntil(input:IntStream, bitSet:BitSet):void {
    653 			//trace("consumeUntil("+bitSet.toStringFromTokens(tokenNames)+")");
    654 			var ttype:int = input.LA(1);
    655 			while (ttype != TokenConstants.EOF && !bitSet.member(ttype) ) {
    656 				//trace("consume during recover LA(1)="+tokenNames[input.LA(1)]);
    657 				input.consume();
    658 				ttype = input.LA(1);
    659 			}
    660 		}
    661 	
    662 		/** Push a rule's follow set using our own hardcoded stack */
    663 		protected function pushFollow(fset:BitSet):void {
    664 			state.following[++state._fsp] = fset;
    665 		}
    666 	
    667 		public function get backtrackingLevel():int {
    668 			return state.backtracking;
    669 		}
    670 	
    671         public function set backtrakingLevel(n:int):void {
    672             state.backtracking = n;
    673         }
    674         
    675         /** Return whether or not a backtracking attempt failed. */
    676         public function get failed():Boolean {
    677             return state.failed;
    678         }
    679         
    680 		/** Used to print out token names like ID during debugging and
    681 		 *  error reporting.  The generated parsers implement a method
    682 		 *  that overrides this to point to their String[] tokenNames.
    683 		 */
    684 		public function get tokenNames():Array {
    685 			return null;
    686 		}
    687 	
    688 		/** For debugging and other purposes, might want the grammar name.
    689 		 *  Have ANTLR generate an implementation for this method.
    690 		 */
    691 		public function get grammarFileName():String {
    692 			return null;
    693 		}
    694 	
    695 		public function get sourceName():String {
    696 			return null;
    697 		}
    698 		
    699 		/** A convenience method for use most often with template rewrites.
    700 		 *  Convert a List<Token> to List<String>
    701 		 */
    702 		public function toStrings(tokens:Array):Array {
    703 			if ( tokens==null ) return null;
    704 			var strings:Array = new Array();
    705 			for (var i:int = 0; i<tokens.length; i++) {
    706 				strings.push(tokens[i].text);
    707 			}
    708 			return strings;
    709 		}
    710 	
    711 		/** Given a rule number and a start token index number, return
    712 		 *  MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
    713 		 *  start index.  If this rule has parsed input starting from the
    714 		 *  start index before, then return where the rule stopped parsing.
    715 		 *  It returns the index of the last token matched by the rule.
    716 		 *
    717 		 *  For now we use a hashtable and just the slow Object-based one.
    718 		 *  Later, we can make a special one for ints and also one that
    719 		 *  tosses out data after we commit past input position i.
    720 		 */
    721 		public function getRuleMemoization(ruleIndex:int, ruleStartIndex:int):int {
    722 			if ( state.ruleMemo[ruleIndex]==undefined ) {
    723 				state.ruleMemo[ruleIndex] = new Array();
    724 			}
    725 			var stopIndex:* = state.ruleMemo[ruleIndex][ruleStartIndex];
    726 			if ( stopIndex == undefined ) {
    727 				return MEMO_RULE_UNKNOWN;
    728 			}
    729 			return stopIndex as int;
    730 		}
    731 	
    732 		/** Has this rule already parsed input at the current index in the
    733 		 *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
    734 		 *  If we attempted but failed to parse properly before, return
    735 		 *  MEMO_RULE_FAILED.
    736 		 *
    737 		 *  This method has a side-effect: if we have seen this input for
    738 		 *  this rule and successfully parsed before, then seek ahead to
    739 		 *  1 past the stop token matched for this rule last time.
    740 		 */
    741 		public function alreadyParsedRule(input:IntStream, ruleIndex:int):Boolean {
    742 			var stopIndex:int = getRuleMemoization(ruleIndex, input.index);
    743 			if ( stopIndex==MEMO_RULE_UNKNOWN ) {
    744 				return false;
    745 			}
    746 			if ( stopIndex==MEMO_RULE_FAILED ) {
    747 				//System.out.println("rule "+ruleIndex+" will never succeed");
    748 				state.failed=true;
    749 			}
    750 			else {
    751 				//System.out.println("seen rule "+ruleIndex+" before; skipping ahead to @"+(stopIndex+1)+" failed="+failed);
    752 				input.seek(stopIndex+1); // jump to one past stop token
    753 			}
    754 			return true;
    755 		}
    756 	
    757 		/** Record whether or not this rule parsed the input at this position
    758 		 *  successfully.  Use a standard java hashtable for now.
    759 		 */
    760 		public function memoize(input:IntStream,
    761 							ruleIndex:int,
    762 							ruleStartIndex:int):void
    763 		{
    764 			var stopTokenIndex:int = state.failed ? MEMO_RULE_FAILED : input.index - 1;
    765 			if ( state.ruleMemo==null ) {
    766     			trace("!!!!!!!!! memo array is null for "+ grammarFileName);
    767     		}
    768     		if ( ruleIndex >= state.ruleMemo.length ) {
    769     			trace("!!!!!!!!! memo size is "+state.ruleMemo.length+", but rule index is "+ruleIndex);
    770     		}
    771 
    772 			if ( state.ruleMemo[ruleIndex]!=null ) {
    773 				state.ruleMemo[ruleIndex][ruleStartIndex] = stopTokenIndex;
    774 			}
    775 		}
    776 	
    777 		/** return how many rule/input-index pairs there are in total.
    778 		 *  TODO: this includes synpreds. :(
    779 		 */
    780 		public function getRuleMemoizationCacheSize():int {
    781 			var n:int = 0;
    782 			for (var i:int = 0; state.ruleMemo!=null && i < state.ruleMemo.length; i++) {
    783 				var ruleMap:Object = state.ruleMemo[i];
    784 				if ( ruleMap!=null ) {
    785 					n += ruleMap.length; // how many input indexes are recorded?
    786 				}
    787 			}
    788 			return n;
    789 		}
    790 	
    791 		public function traceInSymbol(ruleName:String, ruleIndex:int, inputSymbol:Object):void  {
    792 			trace("enter "+ruleName+" "+inputSymbol);
    793 			if ( state.backtracking>0 ) {
    794 				trace(" backtracking="+state.backtracking);
    795 			}
    796 			trace();
    797 		}
    798 	
    799 		public function traceOutSymbol(ruleName:String,
    800 							  ruleIndex:int,
    801 							  inputSymbol:Object):void
    802 		{
    803 			trace("exit "+ruleName+" "+inputSymbol);
    804 			if ( state.backtracking>0 ) {
    805 				trace(" backtracking="+state.backtracking);
    806 				if ( state.failed ) trace(" failed");
    807                 else trace(" succeeded");
    808 			}
    809 			trace();
    810 		}
    811 	
    812 	}
    813 }