Home | History | Annotate | Download | only in Framework
      1 //
      2 //  ANTLRBaseRecognizer.m
      3 //  ANTLR
      4 //
      5 //  Created by Alan Condit on 6/16/10.
      6 // [The "BSD licence"]
      7 // Copyright (c) 2010 Alan Condit
      8 // All rights reserved.
      9 //
     10 // Redistribution and use in source and binary forms, with or without
     11 // modification, are permitted provided that the following conditions
     12 // are met:
     13 // 1. Redistributions of source code must retain the above copyright
     14 //    notice, this list of conditions and the following disclaimer.
     15 // 2. Redistributions in binary form must reproduce the above copyright
     16 //    notice, this list of conditions and the following disclaimer in the
     17 //    documentation and/or other materials provided with the distribution.
     18 // 3. The name of the author may not be used to endorse or promote products
     19 //    derived from this software without specific prior written permission.
     20 //
     21 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 
     32 #import "ANTLRBaseRecognizer.h"
     33 #import "ANTLRHashRule.h"
     34 #import "ANTLRRuleMemo.h"
     35 #import "ANTLRCommonToken.h"
     36 #import "ANTLRMap.h"
     37 
     38 extern NSInteger debug;
     39 
     40 @implementation ANTLRBaseRecognizer
     41 
     42 static AMutableArray *_tokenNames;
     43 static NSString *_grammarFileName;
     44 static NSString *NEXT_TOKEN_RULE_NAME;
     45 
     46 @synthesize state;
     47 @synthesize grammarFileName;
     48 //@synthesize failed;
     49 @synthesize sourceName;
     50 //@synthesize numberOfSyntaxErrors;
     51 @synthesize tokenNames;
     52 
     53 + (void) initialize
     54 {
     55     NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
     56     [NEXT_TOKEN_RULE_NAME retain];
     57 }
     58 
     59 + (ANTLRBaseRecognizer *) newANTLRBaseRecognizer
     60 {
     61     return [[ANTLRBaseRecognizer alloc] init];
     62 }
     63 
     64 + (ANTLRBaseRecognizer *) newANTLRBaseRecognizerWithRuleLen:(NSInteger)aLen
     65 {
     66     return [[ANTLRBaseRecognizer alloc] initWithLen:aLen];
     67 }
     68 
     69 + (ANTLRBaseRecognizer *) newANTLRBaseRecognizer:(ANTLRRecognizerSharedState *)aState
     70 {
     71 	return [[ANTLRBaseRecognizer alloc] initWithState:aState];
     72 }
     73 
     74 + (AMutableArray *)getTokenNames
     75 {
     76     return _tokenNames;
     77 }
     78 
     79 + (void)setTokenNames:(AMutableArray *)theTokNams
     80 {
     81     if ( _tokenNames != theTokNams ) {
     82         if ( _tokenNames ) [_tokenNames release];
     83         [theTokNams retain];
     84     }
     85     _tokenNames = theTokNams;
     86 }
     87 
     88 + (void)setGrammarFileName:(NSString *)aFileName
     89 {
     90     if ( _grammarFileName != aFileName ) {
     91         if ( _grammarFileName ) [_grammarFileName release];
     92         [aFileName retain];
     93     }
     94     [_grammarFileName retain];
     95 }
     96 
     97 - (id) init
     98 {
     99 	if ((self = [super init]) != nil) {
    100         if (state == nil) {
    101             state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedState] retain];
    102         }
    103         tokenNames = _tokenNames;
    104         if ( tokenNames ) [tokenNames retain];
    105         grammarFileName = _grammarFileName;
    106         if ( grammarFileName ) [grammarFileName retain];
    107         state._fsp = -1;
    108         state.errorRecovery = NO;		// are we recovering?
    109         state.lastErrorIndex = -1;
    110         state.failed = NO;				// indicate that some match failed
    111         state.syntaxErrors = 0;
    112         state.backtracking = 0;			// the level of backtracking
    113         state.tokenStartCharIndex = -1;
    114 	}
    115 	return self;
    116 }
    117 
    118 - (id) initWithLen:(NSInteger)aLen
    119 {
    120 	if ((self = [super init]) != nil) {
    121         if (state == nil) {
    122             state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedStateWithRuleLen:aLen] retain];
    123         }
    124         tokenNames = _tokenNames;
    125         if ( tokenNames ) [tokenNames retain];
    126         grammarFileName = _grammarFileName;
    127         if ( grammarFileName ) [grammarFileName retain];
    128         state._fsp = -1;
    129         state.errorRecovery = NO;		// are we recovering?
    130         state.lastErrorIndex = -1;
    131         state.failed = NO;				// indicate that some match failed
    132         state.syntaxErrors = 0;
    133         state.backtracking = 0;			// the level of backtracking
    134         state.tokenStartCharIndex = -1;
    135 	}
    136 	return self;
    137 }
    138 
    139 - (id) initWithState:(ANTLRRecognizerSharedState *)aState
    140 {
    141 	if ((self = [super init]) != nil) {
    142 		state = aState;
    143         if (state == nil) {
    144             state = [ANTLRRecognizerSharedState newANTLRRecognizerSharedState];
    145         }
    146         [state retain];
    147         tokenNames = _tokenNames;
    148         if ( tokenNames ) [tokenNames retain];
    149         grammarFileName = _grammarFileName;
    150         if ( grammarFileName ) [grammarFileName retain];
    151         state._fsp = -1;
    152         state.errorRecovery = NO;		// are we recovering?
    153         state.lastErrorIndex = -1;
    154         state.failed = NO;				// indicate that some match failed
    155         state.syntaxErrors = 0;
    156         state.backtracking = 0;			// the level of backtracking
    157         state.tokenStartCharIndex = -1;
    158 	}
    159 	return self;
    160 }
    161 
    162 - (void)dealloc
    163 {
    164 #ifdef DEBUG_DEALLOC
    165     NSLog( @"called dealloc in ANTLRBaseRecognizer" );
    166 #endif
    167 	if ( grammarFileName ) [grammarFileName release];
    168 	if ( tokenNames ) [tokenNames release];
    169 	if ( state ) [state release];
    170 	[super dealloc];
    171 }
    172 
    173 // reset the recognizer to the initial state. does not touch the token source!
    174 // this can be extended by the grammar writer to reset custom ivars
    175 - (void) reset
    176 {
    177     if ( state == nil )
    178         return; 
    179     if ( state.following != nil ) {
    180         if ( [state.following count] )
    181             [state.following removeAllObjects];
    182     }
    183     state._fsp = -1;
    184     state.errorRecovery = NO;		// are we recovering?
    185     state.lastErrorIndex = -1;
    186     state.failed = NO;				// indicate that some match failed
    187     state.syntaxErrors = 0;
    188     state.backtracking = 0;			// the level of backtracking
    189     state.tokenStartCharIndex = -1;
    190     if ( state.ruleMemo != nil ) {
    191         if ( [state.ruleMemo count] )
    192             [state.ruleMemo removeAllObjects];
    193     }
    194 }
    195 
    196 - (BOOL) getFailed
    197 {
    198 	return [state getFailed];
    199 }
    200 
    201 - (void) setFailed:(BOOL)flag
    202 {
    203 	[state setFailed:flag];
    204 }
    205 
    206 - (ANTLRRecognizerSharedState *) getState
    207 {
    208 	return state;
    209 }
    210 
    211 - (void) setState:(ANTLRRecognizerSharedState *) theState
    212 {
    213 	if (state != theState) {
    214 		if ( state ) [state release];
    215 		state = theState;
    216 		[state retain];
    217 	}
    218 }
    219 
    220 - (id)input
    221 {
    222     return nil; // Must be overriden in inheriting class
    223 }
    224 
    225 - (void)skip // override in inheriting class
    226 {
    227     return;
    228 }
    229 
    230 -(id) match:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
    231 {
    232 	id matchedSymbol = [self getCurrentInputSymbol:anInput];
    233 	if ([anInput LA:1] == ttype) {
    234 		[anInput consume];
    235 		state.errorRecovery = NO;
    236 		state.failed = NO;
    237 		return matchedSymbol;
    238 	}
    239 	if (state.backtracking > 0) {
    240 		state.failed = YES;
    241 		return matchedSymbol;
    242 	}
    243 	matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
    244 	return matchedSymbol;
    245 }
    246 
    247 -(void) matchAny:(id<ANTLRIntStream>)anInput
    248 {
    249     state.errorRecovery = NO;
    250     state.failed = NO;
    251     [anInput consume];
    252 }
    253 
    254 -(BOOL) mismatchIsUnwantedToken:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype
    255 {
    256     return [anInput LA:2] == ttype;
    257 }
    258 
    259 -(BOOL) mismatchIsMissingToken:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *) follow
    260 {
    261     if ( follow == nil ) {
    262         // we have no information about the follow; we can only consume
    263         // a single token and hope for the best
    264         return NO;
    265     }
    266     // compute what can follow this grammar element reference
    267     if ( [follow member:ANTLRTokenTypeEOR] ) {
    268         ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
    269         follow = [follow or:viableTokensFollowingThisRule];
    270         if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
    271             [follow remove:(ANTLRTokenTypeEOR)];
    272         }
    273     }
    274     // if current token is consistent with what could come after set
    275     // then we know we're missing a token; error recovery is free to
    276     // "insert" the missing token
    277     
    278     //System.out.println("viable tokens="+follow.toString(getTokenNames()));
    279     //System.out.println("LT(1)="+((TokenStream)input).LT(1));
    280     
    281     // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    282     // in follow set to indicate that the fall of the start symbol is
    283     // in the set (EOF can follow).
    284     if ( [follow member:[anInput LA:1]] || [follow member:ANTLRTokenTypeEOR] ) {
    285         //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
    286         return YES;
    287     }
    288     return NO;
    289 }
    290 
    291 /** Report a recognition problem.
    292  *
    293  *  This method sets errorRecovery to indicate the parser is recovering
    294  *  not parsing.  Once in recovery mode, no errors are generated.
    295  *  To get out of recovery mode, the parser must successfully match
    296  *  a token (after a resync).  So it will go:
    297  *
    298  * 		1. error occurs
    299  * 		2. enter recovery mode, report error
    300  * 		3. consume until token found in resynch set
    301  * 		4. try to resume parsing
    302  * 		5. next match() will reset errorRecovery mode
    303  *
    304  *  If you override, make sure to update syntaxErrors if you care about that.
    305  */
    306 -(void) reportError:(ANTLRRecognitionException *) e
    307 {
    308     // if we've already reported an error and have not matched a token
    309     // yet successfully, don't report any errors.
    310     if ( state.errorRecovery ) {
    311         //System.err.print("[SPURIOUS] ");
    312         return;
    313     }
    314     state.syntaxErrors++; // don't count spurious
    315     state.errorRecovery = YES;
    316     
    317     [self displayRecognitionError:[self getTokenNames] Exception:e];
    318 }
    319 
    320 -(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(ANTLRRecognitionException *)e
    321 {
    322     NSString *hdr = [self getErrorHeader:e];
    323     NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
    324     [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
    325 }
    326 
    327 /** What error message should be generated for the various
    328  *  exception types?
    329  *
    330  *  Not very object-oriented code, but I like having all error message
    331  *  generation within one method rather than spread among all of the
    332  *  exception classes. This also makes it much easier for the exception
    333  *  handling because the exception classes do not have to have pointers back
    334  *  to this object to access utility routines and so on. Also, changing
    335  *  the message for an exception type would be difficult because you
    336  *  would have to subclassing exception, but then somehow get ANTLR
    337  *  to make those kinds of exception objects instead of the default.
    338  *  This looks weird, but trust me--it makes the most sense in terms
    339  *  of flexibility.
    340  *
    341  *  For grammar debugging, you will want to override this to add
    342  *  more information such as the stack frame with
    343  *  getRuleInvocationStack(e, this.getClass().getName()) and,
    344  *  for no viable alts, the decision description and state etc...
    345  *
    346  *  Override this to change the message generated for one or more
    347  *  exception types.
    348  */
    349 - (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)theTokNams
    350 {
    351     // NSString *msg = [e getMessage];
    352     NSString *msg;
    353     if ( [e isKindOfClass:[ANTLRUnwantedTokenException class]] ) {
    354         ANTLRUnwantedTokenException *ute = (ANTLRUnwantedTokenException *)e;
    355         NSString *tokenName=@"<unknown>";
    356         if ( ute.expecting == ANTLRTokenTypeEOF ) {
    357             tokenName = @"EOF";
    358         }
    359         else {
    360             tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
    361         }
    362         msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
    363                tokenName];
    364     }
    365     else if ( [e isKindOfClass:[ANTLRMissingTokenException class] ] ) {
    366         ANTLRMissingTokenException *mte = (ANTLRMissingTokenException *)e;
    367         NSString *tokenName=@"<unknown>";
    368         if ( mte.expecting== ANTLRTokenTypeEOF ) {
    369             tokenName = @"EOF";
    370         }
    371         else {
    372             tokenName = [theTokNams objectAtIndex:mte.expecting];
    373         }
    374         msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
    375     }
    376     else if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
    377         ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
    378         NSString *tokenName=@"<unknown>";
    379         if ( mte.expecting== ANTLRTokenTypeEOF ) {
    380             tokenName = @"EOF";
    381         }
    382         else {
    383             tokenName = [theTokNams objectAtIndex:mte.expecting];
    384         }
    385         msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
    386     }
    387     else if ( [e isKindOfClass:[ANTLRMismatchedTreeNodeException class]] ) {
    388         ANTLRMismatchedTreeNodeException *mtne = (ANTLRMismatchedTreeNodeException *)e;
    389         NSString *tokenName=@"<unknown>";
    390         if ( mtne.expecting==ANTLRTokenTypeEOF ) {
    391             tokenName = @"EOF";
    392         }
    393         else {
    394             tokenName = [theTokNams objectAtIndex:mtne.expecting];
    395         }
    396         msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
    397     }
    398     else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
    399         //NoViableAltException *nvae = (NoViableAltException *)e;
    400         // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    401         // and "(decision="+nvae.decisionNumber+") and
    402         // "state "+nvae.stateNumber
    403         msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
    404     }
    405     else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
    406         //ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
    407         // for development, can add "(decision="+eee.decisionNumber+")"
    408         msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
    409     }
    410     else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
    411         ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
    412         msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
    413                [self getTokenErrorDisplay:(e.token)],
    414                mse.expecting];
    415     }
    416 #pragma warning NotSet not yet implemented.
    417     else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class] ] ) {
    418         ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
    419         msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
    420                [self getTokenErrorDisplay:(e.token)],
    421                mse.expecting];
    422     }
    423     else if ( [e isKindOfClass:[ANTLRFailedPredicateException class]] ) {
    424         ANTLRFailedPredicateException *fpe = (ANTLRFailedPredicateException *)e;
    425         msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
    426     }
    427     else {
    428         msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
    429     }
    430     return msg;
    431 }
    432 
    433 /** Get number of recognition errors (lexer, parser, tree parser).  Each
    434  *  recognizer tracks its own number.  So parser and lexer each have
    435  *  separate count.  Does not count the spurious errors found between
    436  *  an error and next valid token match
    437  *
    438  *  See also reportError()
    439  */
    440 - (NSInteger) getNumberOfSyntaxErrors
    441 {
    442     return state.syntaxErrors;
    443 }
    444 
    445 /** What is the error header, normally line/character position information? */
    446 - (NSString *)getErrorHeader:(ANTLRRecognitionException *)e
    447 {
    448     return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
    449 }
    450 
    451 /** How should a token be displayed in an error message? The default
    452  *  is to display just the text, but during development you might
    453  *  want to have a lot of information spit out.  Override in that case
    454  *  to use t.toString() (which, for CommonToken, dumps everything about
    455  *  the token). This is better than forcing you to override a method in
    456  *  your token objects because you don't have to go modify your lexer
    457  *  so that it creates a new Java type.
    458  */
    459 - (NSString *)getTokenErrorDisplay:(id<ANTLRToken>)t
    460 {
    461     NSString *s = t.text;
    462     if ( s == nil ) {
    463         if ( t.type == ANTLRTokenTypeEOF ) {
    464             s = @"<EOF>";
    465         }
    466         else {
    467             s = [NSString stringWithFormat:@"<%@>", t.type];
    468         }
    469     }
    470     s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
    471     s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
    472     s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
    473     return [NSString stringWithFormat:@"\'%@\'", s];
    474 }
    475                                         
    476 /** Override this method to change where error messages go */
    477 - (void) emitErrorMessage:(NSString *) msg
    478 {
    479 //    System.err.println(msg);
    480     NSLog(@"%@", msg);
    481 }
    482 
    483 /** Recover from an error found on the input stream.  This is
    484  *  for NoViableAlt and mismatched symbol exceptions.  If you enable
    485  *  single token insertion and deletion, this will usually not
    486  *  handle mismatched symbol exceptions but there could be a mismatched
    487  *  token that the match() routine could not recover from.
    488  */
    489 - (void)recover:(id<ANTLRIntStream>)anInput Exception:(ANTLRRecognitionException *)re
    490 {
    491     if ( state.lastErrorIndex == anInput.index ) {
    492         // uh oh, another error at same token index; must be a case
    493         // where LT(1) is in the recovery token set so nothing is
    494         // consumed; consume a single token so at least to prevent
    495         // an infinite loop; this is a failsafe.
    496         [anInput consume];
    497     }
    498     state.lastErrorIndex = anInput.index;
    499     ANTLRBitSet *followSet = [self computeErrorRecoverySet];
    500     [self beginResync];
    501     [self consumeUntilFollow:anInput Follow:followSet];
    502     [self endResync];
    503 }
    504 
    505 - (void) beginResync
    506 {
    507     
    508 }
    509 
    510 - (void) endResync
    511 {
    512     
    513 }
    514                             
    515 /*  Compute the error recovery set for the current rule.  During
    516  *  rule invocation, the parser pushes the set of tokens that can
    517  *  follow that rule reference on the stack; this amounts to
    518  *  computing FIRST of what follows the rule reference in the
    519  *  enclosing rule. This local follow set only includes tokens
    520  *  from within the rule; i.e., the FIRST computation done by
    521  *  ANTLR stops at the end of a rule.
    522  *
    523  *  EXAMPLE
    524  *
    525  *  When you find a "no viable alt exception", the input is not
    526  *  consistent with any of the alternatives for rule r.  The best
    527  *  thing to do is to consume tokens until you see something that
    528  *  can legally follow a call to r *or* any rule that called r.
    529  *  You don't want the exact set of viable next tokens because the
    530  *  input might just be missing a token--you might consume the
    531  *  rest of the input looking for one of the missing tokens.
    532  *
    533  *  Consider grammar:
    534  *
    535  *  a : '[' b ']'
    536  *    | '(' b ')'
    537  *    ;
    538  *  b : c '^' INT ;
    539  *  c : ID
    540  *    | INT
    541  *    ;
    542  *
    543  *  At each rule invocation, the set of tokens that could follow
    544  *  that rule is pushed on a stack.  Here are the various "local"
    545  *  follow sets:
    546  *
    547  *  FOLLOW(b1_in_a) = FIRST(']') = ']'
    548  *  FOLLOW(b2_in_a) = FIRST(')') = ')'
    549  *  FOLLOW(c_in_b) = FIRST('^') = '^'
    550  *
    551  *  Upon erroneous input "[]", the call chain is
    552  *
    553  *  a -> b -> c
    554  *
    555  *  and, hence, the follow context stack is:
    556  *
    557  *  depth  local follow set     after call to rule
    558  *    0         <EOF>                    a (from main())
    559  *    1          ']'                     b
    560  *    3          '^'                     c
    561  *
    562  *  Notice that ')' is not included, because b would have to have
    563  *  been called from a different context in rule a for ')' to be
    564  *  included.
    565  *
    566  *  For error recovery, we cannot consider FOLLOW(c)
    567  *  (context-sensitive or otherwise).  We need the combined set of
    568  *  all context-sensitive FOLLOW sets--the set of all tokens that
    569  *  could follow any reference in the call chain.  We need to
    570  *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
    571  *  we resync'd to that token, we'd consume until EOF.  We need to
    572  *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
    573  *  In this case, for input "[]", LA(1) is in this set so we would
    574  *  not consume anything and after printing an error rule c would
    575  *  return normally.  It would not find the required '^' though.
    576  *  At this point, it gets a mismatched token error and throws an
    577  *  exception (since LA(1) is not in the viable following token
    578  *  set).  The rule exception handler tries to recover, but finds
    579  *  the same recovery set and doesn't consume anything.  Rule b
    580  *  exits normally returning to rule a.  Now it finds the ']' (and
    581  *  with the successful match exits errorRecovery mode).
    582  *
    583  *  So, you cna see that the parser walks up call chain looking
    584  *  for the token that was a member of the recovery set.
    585  *
    586  *  Errors are not generated in errorRecovery mode.
    587  *
    588  *  ANTLR's error recovery mechanism is based upon original ideas:
    589  *
    590  *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
    591  *
    592  *  and
    593  *
    594  *  "A note on error recovery in recursive descent parsers":
    595  *  http://portal.acm.org/citation.cfm?id=947902.947905
    596  *
    597  *  Later, Josef Grosch had some good ideas:
    598  *
    599  *  "Efficient and Comfortable Error Recovery in Recursive Descent
    600  *  Parsers":
    601  *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
    602  *
    603  *  Like Grosch I implemented local FOLLOW sets that are combined
    604  *  at run-time upon error to avoid overhead during parsing.
    605  */
    606 - (ANTLRBitSet *) computeErrorRecoverySet
    607 {
    608     return [self combineFollows:NO];
    609 }
    610 
    611 /** Compute the context-sensitive FOLLOW set for current rule.
    612  *  This is set of token types that can follow a specific rule
    613  *  reference given a specific call chain.  You get the set of
    614  *  viable tokens that can possibly come next (lookahead depth 1)
    615  *  given the current call chain.  Contrast this with the
    616  *  definition of plain FOLLOW for rule r:
    617  *
    618  *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
    619  *
    620  *  where x in T* and alpha, beta in V*; T is set of terminals and
    621  *  V is the set of terminals and nonterminals.  In other words,
    622  *  FOLLOW(r) is the set of all tokens that can possibly follow
    623  *  references to r in *any* sentential form (context).  At
    624  *  runtime, however, we know precisely which context applies as
    625  *  we have the call chain.  We may compute the exact (rather
    626  *  than covering superset) set of following tokens.
    627  *
    628  *  For example, consider grammar:
    629  *
    630  *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
    631  *       | "return" expr '.'
    632  *       ;
    633  *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
    634  *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
    635  *       | '(' expr ')'
    636  *       ;
    637  *
    638  *  The FOLLOW sets are all inclusive whereas context-sensitive
    639  *  FOLLOW sets are precisely what could follow a rule reference.
    640  *  For input input "i=(3);", here is the derivation:
    641  *
    642  *  stat => ID '=' expr ';'
    643  *       => ID '=' atom ('+' atom)* ';'
    644  *       => ID '=' '(' expr ')' ('+' atom)* ';'
    645  *       => ID '=' '(' atom ')' ('+' atom)* ';'
    646  *       => ID '=' '(' INT ')' ('+' atom)* ';'
    647  *       => ID '=' '(' INT ')' ';'
    648  *
    649  *  At the "3" token, you'd have a call chain of
    650  *
    651  *    stat -> expr -> atom -> expr -> atom
    652  *
    653  *  What can follow that specific nested ref to atom?  Exactly ')'
    654  *  as you can see by looking at the derivation of this specific
    655  *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
    656  *
    657  *  You want the exact viable token set when recovering from a
    658  *  token mismatch.  Upon token mismatch, if LA(1) is member of
    659  *  the viable next token set, then you know there is most likely
    660  *  a missing token in the input stream.  "Insert" one by just not
    661  *  throwing an exception.
    662  */
    663 - (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
    664 {
    665     return [self combineFollows:YES];
    666 }
    667 
    668 // what is exact? it seems to only add sets from above on stack
    669 // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
    670 // Why would we ever want them all?  Maybe no viable alt instead of
    671 // mismatched token?
    672 - (ANTLRBitSet *)combineFollows:(BOOL) exact
    673 {
    674     NSInteger top = state._fsp;
    675     ANTLRBitSet *followSet = [[ANTLRBitSet newANTLRBitSet] retain];
    676     for (int i = top; i >= 0; i--) {
    677         ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
    678         /*
    679          System.out.println("local follow depth "+i+"="+
    680          localFollowSet.toString(getTokenNames())+")");
    681          */
    682         [followSet orInPlace:localFollowSet];
    683         if ( exact ) {
    684             // can we see end of rule?
    685             if ( [localFollowSet member:ANTLRTokenTypeEOR] ) {
    686                 // Only leave EOR in set if at top (start rule); this lets
    687                 // us know if have to include follow(start rule); i.e., EOF
    688                 if ( i > 0 ) {
    689                     [followSet remove:ANTLRTokenTypeEOR];
    690                 }
    691             }
    692             else { // can't see end of rule, quit
    693                 break;
    694             }
    695         }
    696     }
    697     return followSet;
    698 }
    699 
    700 /** Attempt to recover from a single missing or extra token.
    701  *
    702  *  EXTRA TOKEN
    703  *
    704  *  LA(1) is not what we are looking for.  If LA(2) has the right token,
    705  *  however, then assume LA(1) is some extra spurious token.  Delete it
    706  *  and LA(2) as if we were doing a normal match(), which advances the
    707  *  input.
    708  *
    709  *  MISSING TOKEN
    710  *
    711  *  If current token is consistent with what could come after
    712  *  ttype then it is ok to "insert" the missing token, else throw
    713  *  exception For example, Input "i=(3;" is clearly missing the
    714  *  ')'.  When the parser returns from the nested call to expr, it
    715  *  will have call chain:
    716  *
    717  *    stat -> expr -> atom
    718  *
    719  *  and it will be trying to match the ')' at this point in the
    720  *  derivation:
    721  *
    722  *       => ID '=' '(' INT ')' ('+' atom)* ';'
    723  *                          ^
    724  *  match() will see that ';' doesn't match ')' and report a
    725  *  mismatched token error.  To recover, it sees that LA(1)==';'
    726  *  is in the set of tokens that can follow the ')' token
    727  *  reference in rule atom.  It can assume that you forgot the ')'.
    728  */
    729 - (id<ANTLRToken>)recoverFromMismatchedToken:(id<ANTLRIntStream>)anInput
    730                        TokenType:(NSInteger)ttype
    731                           Follow:(ANTLRBitSet *)follow
    732 {
    733     ANTLRRecognitionException *e = nil;
    734     // if next token is what we are looking for then "delete" this token
    735     if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
    736         e = [ANTLRUnwantedTokenException newException:ttype Stream:anInput];
    737         /*
    738          System.err.println("recoverFromMismatchedToken deleting "+
    739          ((TokenStream)input).LT(1)+
    740          " since "+((TokenStream)input).LT(2)+" is what we want");
    741          */
    742         [self beginResync];
    743         [anInput consume]; // simply delete extra token
    744         [self endResync];
    745         [self reportError:e];  // report after consuming so AW sees the token in the exception
    746                          // we want to return the token we're actually matching
    747         id matchedSymbol = [self getCurrentInputSymbol:anInput];
    748         [anInput consume]; // move past ttype token as if all were ok
    749         return matchedSymbol;
    750     }
    751     // can't recover with single token deletion, try insertion
    752     if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
    753         id<ANTLRToken> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
    754         e = [ANTLRMissingTokenException newException:ttype Stream:anInput With:inserted];
    755         [self reportError:e];  // report after inserting so AW sees the token in the exception
    756         return inserted;
    757     }
    758     // even that didn't work; must throw the exception
    759     e = [ANTLRMismatchedTokenException newException:ttype Stream:anInput];
    760     @throw e;
    761 }
    762 
    763 /** Not currently used */
    764 -(id) recoverFromMismatchedSet:(id<ANTLRIntStream>)anInput
    765                      Exception:(ANTLRRecognitionException *)e
    766                         Follow:(ANTLRBitSet *) follow
    767 {
    768     if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
    769         // System.out.println("missing token");
    770         [self reportError:e];
    771         // we don't know how to conjure up a token for sets yet
    772         return [self getMissingSymbol:anInput Exception:e TokenType:ANTLRTokenTypeInvalid Follow:follow];
    773     }
    774     // TODO do single token deletion like above for Token mismatch
    775     @throw e;
    776 }
    777 
    778 /** Match needs to return the current input symbol, which gets put
    779  *  into the label for the associated token ref; e.g., x=ID.  Token
    780  *  and tree parsers need to return different objects. Rather than test
    781  *  for input stream type or change the IntStream interface, I use
    782  *  a simple method to ask the recognizer to tell me what the current
    783  *  input symbol is.
    784  * 
    785  *  This is ignored for lexers.
    786  */
    787 - (id) getCurrentInputSymbol:(id<ANTLRIntStream>)anInput
    788 {
    789     return nil;
    790 }
    791 
    792 /** Conjure up a missing token during error recovery.
    793  *
    794  *  The recognizer attempts to recover from single missing
    795  *  symbols. But, actions might refer to that missing symbol.
    796  *  For example, x=ID {f($x);}. The action clearly assumes
    797  *  that there has been an identifier matched previously and that
    798  *  $x points at that token. If that token is missing, but
    799  *  the next token in the stream is what we want we assume that
    800  *  this token is missing and we keep going. Because we
    801  *  have to return some token to replace the missing token,
    802  *  we have to conjure one up. This method gives the user control
    803  *  over the tokens returned for missing tokens. Mostly,
    804  *  you will want to create something special for identifier
    805  *  tokens. For literals such as '{' and ',', the default
    806  *  action in the parser or tree parser works. It simply creates
    807  *  a CommonToken of the appropriate type. The text will be the token.
    808  *  If you change what tokens must be created by the lexer,
    809  *  override this method to create the appropriate tokens.
    810  */
    811 - (id)getMissingSymbol:(id<ANTLRIntStream>)anInput
    812              Exception:(ANTLRRecognitionException *)e
    813              TokenType:(NSInteger)expectedTokenType
    814                 Follow:(ANTLRBitSet *)follow
    815 {
    816     return nil;
    817 }
    818 
    819 
    820 -(void) consumeUntilTType:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)tokenType
    821 {
    822     //System.out.println("consumeUntil "+tokenType);
    823     int ttype = [anInput LA:1];
    824     while (ttype != ANTLRTokenTypeEOF && ttype != tokenType) {
    825         [anInput consume];
    826         ttype = [anInput LA:1];
    827     }
    828 }
    829 
    830 /** Consume tokens until one matches the given token set */
    831 -(void) consumeUntilFollow:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *)set
    832 {
    833     //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
    834     int ttype = [anInput LA:1];
    835     while (ttype != ANTLRTokenTypeEOF && ![set member:ttype] ) {
    836         //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
    837         [anInput consume];
    838         ttype = [anInput LA:1];
    839     }
    840 }
    841 
    842 /** Push a rule's follow set using our own hardcoded stack */
    843 - (void)pushFollow:(ANTLRBitSet *)fset
    844 {
    845     if ( (state._fsp +1) >= [state.following count] ) {
    846         //        AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
    847         //        System.arraycopy(state.following, 0, f, 0, state.following.length);
    848         //        state.following = f;
    849         [state.following addObject:fset];
    850         [fset retain];
    851         state._fsp++;
    852     }
    853     else {
    854         [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
    855     }
    856 }
    857 
    858 - (ANTLRBitSet *)popFollow
    859 {
    860     ANTLRBitSet *fset;
    861 
    862     if ( state._fsp >= 0 && [state.following count] > 0 ) {
    863         fset = [state.following objectAtIndex:state._fsp--];
    864         [state.following removeLastObject];
    865         return fset;
    866     }
    867     else {
    868         NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
    869     }
    870     return nil;
    871 }
    872 
    873 /** Return List<String> of the rules in your parser instance
    874  *  leading up to a call to this method.  You could override if
    875  *  you want more details such as the file/line info of where
    876  *  in the parser java code a rule is invoked.
    877  *
    878  *  This is very useful for error messages and for context-sensitive
    879  *  error recovery.
    880  */
    881 - (AMutableArray *)getRuleInvocationStack
    882 {
    883     NSString *parserClassName = [[self className] retain];
    884     return [self getRuleInvocationStack:[ANTLRRecognitionException newException] Recognizer:parserClassName];
    885 }
    886 
    887 /** A more general version of getRuleInvocationStack where you can
    888  *  pass in, for example, a RecognitionException to get it's rule
    889  *  stack trace.  This routine is shared with all recognizers, hence,
    890  *  static.
    891  *
    892  *  TODO: move to a utility class or something; weird having lexer call this
    893  */
    894 - (AMutableArray *)getRuleInvocationStack:(ANTLRRecognitionException *)e
    895                                 Recognizer:(NSString *)recognizerClassName
    896 {
    897     // char *name;
    898     AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
    899     NSArray *stack = [e callStackSymbols];
    900     int i = 0;
    901     for (i = [stack count]-1; i >= 0; i--) {
    902         NSString *t = [stack objectAtIndex:i];
    903         // NSLog(@"stack %d = %@\n", i, t);
    904         if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
    905             // id aClass = objc_getClass( [t UTF8String] );
    906             continue; // skip support code such as this method
    907         }
    908         if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
    909             // name = sel_getName(method_getName(method));
    910             // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
    911             continue;
    912         }
    913         if ( ![t isEqualTo:recognizerClassName] ) {
    914             // name = class_getName( [t UTF8String] );
    915             continue; // must not be part of this parser
    916         }
    917         [rules addObject:t];
    918     }
    919 #ifdef DONTUSEYET
    920     StackTraceElement[] stack = e.getStackTrace();
    921     int i = 0;
    922     for (i=stack.length-1; i>=0; i--) {
    923         StackTraceElement t = stack[i];
    924         if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
    925             continue; // skip support code such as this method
    926         }
    927               if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
    928             continue;
    929         }
    930               if ( ![[t getClassName] equals:recognizerClassName] ) {
    931             continue; // must not be part of this parser
    932         }
    933               [rules addObject:[t getMethodName]];
    934     }
    935 #endif
    936     [stack release];
    937     return rules;
    938 }
    939 
    940 - (NSInteger) getBacktrackingLevel
    941 {
    942     return [state getBacktracking];
    943 }
    944       
    945 - (void) setBacktrackingLevel:(NSInteger)level
    946 {
    947     [state setBacktracking:level];
    948 }
    949       
    950         /** Used to print out token names like ID during debugging and
    951  *  error reporting.  The generated parsers implement a method
    952  *  that overrides this to point to their String[] tokenNames.
    953  */
    954 - (NSArray *)getTokenNames
    955 {
    956     return tokenNames;
    957 }
    958 
    959 /** For debugging and other purposes, might want the grammar name.
    960  *  Have ANTLR generate an implementation for this method.
    961  */
    962 - (NSString *)getGrammarFileName
    963 {
    964     return grammarFileName;
    965 }
    966 
    967 - (NSString *)getSourceName
    968 {
    969     return nil;
    970 }
    971 
    972 /** A convenience method for use most often with template rewrites.
    973  *  Convert a List<Token> to List<String>
    974  */
    975 - (AMutableArray *)toStrings:(AMutableArray *)tokens
    976 {
    977     if ( tokens == nil )
    978         return nil;
    979     AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
    980     id object;
    981     NSInteger i = 0;
    982     for (object in tokens) {
    983         [strings addObject:[object text]];
    984         i++;
    985     }
    986     return strings;
    987 }
    988 
    989 /** Given a rule number and a start token index number, return
    990  *  ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
    991  *  start index.  If this rule has parsed input starting from the
    992  *  start index before, then return where the rule stopped parsing.
    993  *  It returns the index of the last token matched by the rule.
    994  *
    995  *  For now we use a hashtable and just the slow Object-based one.
    996  *  Later, we can make a special one for ints and also one that
    997  *  tosses out data after we commit past input position i.
    998  */
    999 - (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
   1000 {
   1001     NSNumber *stopIndexI;
   1002     ANTLRHashRule *aHashRule;
   1003     if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
   1004         aHashRule = [ANTLRHashRule newANTLRHashRuleWithLen:17];
   1005         [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
   1006     }
   1007     stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
   1008     if ( stopIndexI == nil ) {
   1009         return ANTLR_MEMO_RULE_UNKNOWN;
   1010     }
   1011     return [stopIndexI integerValue];
   1012 }
   1013 
   1014 /** Has this rule already parsed input at the current index in the
   1015  *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
   1016  *  If we attempted but failed to parse properly before, return
   1017  *  MEMO_RULE_FAILED.
   1018  *
   1019  *  This method has a side-effect: if we have seen this input for
   1020  *  this rule and successfully parsed before, then seek ahead to
   1021  *  1 past the stop token matched for this rule last time.
   1022  */
   1023 - (BOOL)alreadyParsedRule:(id<ANTLRIntStream>)anInput RuleIndex:(NSInteger)ruleIndex
   1024 {
   1025     NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
   1026     if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
   1027         // NSLog(@"rule %d not yet encountered\n", ruleIndex);
   1028         return NO;
   1029     }
   1030     if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
   1031         if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
   1032         state.failed = YES;
   1033     }
   1034     else {
   1035         if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
   1036         [anInput seek:(aStopIndex+1)]; // jump to one past stop token
   1037     }
   1038     return YES;
   1039 }
   1040       
   1041 /** Record whether or not this rule parsed the input at this position
   1042  *  successfully.  Use a standard java hashtable for now.
   1043  */
   1044 - (void)memoize:(id<ANTLRIntStream>)anInput
   1045       RuleIndex:(NSInteger)ruleIndex
   1046      StartIndex:(NSInteger)ruleStartIndex
   1047 {
   1048     ANTLRRuleStack *aRuleStack;
   1049     NSInteger stopTokenIndex;
   1050 
   1051     aRuleStack = state.ruleMemo;
   1052     stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
   1053     if ( aRuleStack == nil ) {
   1054         if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
   1055         return;
   1056     }
   1057     if ( ruleIndex >= [aRuleStack length] ) {
   1058         if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
   1059         return;
   1060     }
   1061     if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
   1062         [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
   1063     }
   1064     return;
   1065 }
   1066    
   1067 /** return how many rule/input-index pairs there are in total.
   1068  *  TODO: this includes synpreds. :(
   1069  */
   1070 - (NSInteger)getRuleMemoizationCacheSize
   1071 {
   1072     ANTLRRuleStack *aRuleStack;
   1073     ANTLRHashRule *aHashRule;
   1074 
   1075     int aCnt = 0;
   1076     aRuleStack = state.ruleMemo;
   1077     for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
   1078         aHashRule = [aRuleStack objectAtIndex:i];
   1079         if ( aHashRule != nil ) {
   1080             aCnt += [aHashRule count]; // how many input indexes are recorded?
   1081         }
   1082     }
   1083     return aCnt;
   1084 }
   1085 
   1086 #pragma warning Have to fix traceIn and traceOut.
   1087 - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
   1088 {
   1089     NSLog(@"enter %@ %@", ruleName, inputSymbol);
   1090     if ( state.backtracking > 0 ) {
   1091         NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
   1092     }
   1093     NSLog(@"\n");
   1094 }
   1095 
   1096 - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
   1097 {
   1098     NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
   1099     if ( state.backtracking > 0 ) {
   1100         NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
   1101     }
   1102     NSLog(@"\n");
   1103 }
   1104 
   1105 
   1106 // call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
   1107 - (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<ANTLRIntStream>)input
   1108 {
   1109     id<ANTLRIntStream> input;
   1110 
   1111     state.backtracking++;
   1112     // input = state.token.input;
   1113     input = self.input;
   1114     int start = [input mark];
   1115     @try {
   1116         [self performSelector:synpredFragment];
   1117     }
   1118     @catch (ANTLRRecognitionException *re) {
   1119         NSLog(@"impossible synpred: %@", re.name);
   1120     }
   1121     BOOL success = (state.failed == NO);
   1122     [input rewind:start];
   1123     state.backtracking--;
   1124     state.failed = NO;
   1125     return success;
   1126 }
   1127               
   1128 @end
   1129                                
   1130