Home | History | Annotate | Download | only in Framework
      1 //
      2 //  BaseRecognizer.m
      3 //  ANTLR
      4 //
      5 //  Created by Alan Condit on 6/16/10.
      6 // [The "BSD licence"]
      7 // Copyright (c) 2010 Alan Condit
      8 // All rights reserved.
      9 //
     10 // Redistribution and use in source and binary forms, with or without
     11 // modification, are permitted provided that the following conditions
     12 // are met:
     13 // 1. Redistributions of source code must retain the above copyright
     14 //    notice, this list of conditions and the following disclaimer.
     15 // 2. Redistributions in binary form must reproduce the above copyright
     16 //    notice, this list of conditions and the following disclaimer in the
     17 //    documentation and/or other materials provided with the distribution.
     18 // 3. The name of the author may not be used to endorse or promote products
     19 //    derived from this software without specific prior written permission.
     20 //
     21 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 
     32 #import "ACNumber.h"
     33 #import "BaseRecognizer.h"
     34 #import "HashRule.h"
     35 #import "RuleMemo.h"
     36 #import "CommonToken.h"
     37 #import "Map.h"
     38 #import "NoViableAltException.h"
     39 
     40 extern NSInteger debug;
     41 
     42 @implementation BaseRecognizer
     43 
     44 static AMutableArray *_tokenNames;
     45 static NSString *_grammarFileName;
     46 static NSString *NEXT_TOKEN_RULE_NAME;
     47 
     48 @synthesize state;
     49 @synthesize grammarFileName;
     50 //@synthesize failed;
     51 @synthesize sourceName;
     52 //@synthesize numberOfSyntaxErrors;
     53 @synthesize tokenNames;
     54 
     55 + (void) initialize
     56 {
     57     NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
     58     [NEXT_TOKEN_RULE_NAME retain];
     59 }
     60 
     61 + (BaseRecognizer *) newBaseRecognizer
     62 {
     63     return [[BaseRecognizer alloc] init];
     64 }
     65 
     66 + (BaseRecognizer *) newBaseRecognizerWithRuleLen:(NSInteger)aLen
     67 {
     68     return [[BaseRecognizer alloc] initWithLen:aLen];
     69 }
     70 
     71 + (BaseRecognizer *) newBaseRecognizer:(RecognizerSharedState *)aState
     72 {
     73 	return [[BaseRecognizer alloc] initWithState:aState];
     74 }
     75 
     76 + (AMutableArray *)getTokenNames
     77 {
     78     return _tokenNames;
     79 }
     80 
     81 + (void)setTokenNames:(AMutableArray *)theTokNams
     82 {
     83     if ( _tokenNames != theTokNams ) {
     84         if ( _tokenNames ) [_tokenNames release];
     85         [theTokNams retain];
     86     }
     87     _tokenNames = theTokNams;
     88 }
     89 
     90 + (void)setGrammarFileName:(NSString *)aFileName
     91 {
     92     if ( _grammarFileName != aFileName ) {
     93         if ( _grammarFileName ) [_grammarFileName release];
     94         [aFileName retain];
     95     }
     96     [_grammarFileName retain];
     97 }
     98 
     99 - (id) init
    100 {
    101 	if ((self = [super init]) != nil) {
    102         if (state == nil) {
    103             state = [[RecognizerSharedState newRecognizerSharedState] retain];
    104         }
    105         tokenNames = _tokenNames;
    106         if ( tokenNames ) [tokenNames retain];
    107         grammarFileName = _grammarFileName;
    108         if ( grammarFileName ) [grammarFileName retain];
    109         state._fsp = -1;
    110         state.errorRecovery = NO;		// are we recovering?
    111         state.lastErrorIndex = -1;
    112         state.failed = NO;				// indicate that some match failed
    113         state.syntaxErrors = 0;
    114         state.backtracking = 0;			// the level of backtracking
    115         state.tokenStartCharIndex = -1;
    116 	}
    117 	return self;
    118 }
    119 
    120 - (id) initWithLen:(NSInteger)aLen
    121 {
    122 	if ((self = [super init]) != nil) {
    123         if (state == nil) {
    124             state = [[RecognizerSharedState newRecognizerSharedStateWithRuleLen:aLen] retain];
    125         }
    126         tokenNames = _tokenNames;
    127         if ( tokenNames ) [tokenNames retain];
    128         grammarFileName = _grammarFileName;
    129         if ( grammarFileName ) [grammarFileName retain];
    130         state._fsp = -1;
    131         state.errorRecovery = NO;		// are we recovering?
    132         state.lastErrorIndex = -1;
    133         state.failed = NO;				// indicate that some match failed
    134         state.syntaxErrors = 0;
    135         state.backtracking = 0;			// the level of backtracking
    136         state.tokenStartCharIndex = -1;
    137 	}
    138 	return self;
    139 }
    140 
    141 - (id) initWithState:(RecognizerSharedState *)aState
    142 {
    143 	if ((self = [super init]) != nil) {
    144 		state = aState;
    145         if (state == nil) {
    146             state = [RecognizerSharedState newRecognizerSharedState];
    147         }
    148         [state retain];
    149         tokenNames = _tokenNames;
    150         if ( tokenNames ) [tokenNames retain];
    151         grammarFileName = _grammarFileName;
    152         if ( grammarFileName ) [grammarFileName retain];
    153         state._fsp = -1;
    154         state.errorRecovery = NO;		// are we recovering?
    155         state.lastErrorIndex = -1;
    156         state.failed = NO;				// indicate that some match failed
    157         state.syntaxErrors = 0;
    158         state.backtracking = 0;			// the level of backtracking
    159         state.tokenStartCharIndex = -1;
    160 	}
    161 	return self;
    162 }
    163 
    164 - (void)dealloc
    165 {
    166 #ifdef DEBUG_DEALLOC
    167     NSLog( @"called dealloc in BaseRecognizer" );
    168 #endif
    169 	if ( grammarFileName ) [grammarFileName release];
    170 	if ( tokenNames ) [tokenNames release];
    171 	if ( state ) [state release];
    172 	[super dealloc];
    173 }
    174 
    175 // reset the recognizer to the initial state. does not touch the token source!
    176 // this can be extended by the grammar writer to reset custom ivars
    177 - (void) reset
    178 {
    179     if ( state == nil )
    180         return; 
    181     if ( state.following != nil ) {
    182         if ( [state.following count] )
    183             [state.following removeAllObjects];
    184     }
    185     state._fsp = -1;
    186     state.errorRecovery = NO;		// are we recovering?
    187     state.lastErrorIndex = -1;
    188     state.failed = NO;				// indicate that some match failed
    189     state.syntaxErrors = 0;
    190     state.backtracking = 0;			// the level of backtracking
    191     state.tokenStartCharIndex = -1;
    192     if ( state.ruleMemo != nil ) {
    193         if ( [state.ruleMemo count] )
    194             [state.ruleMemo removeAllObjects];
    195     }
    196 }
    197 
    198 - (BOOL) getFailed
    199 {
    200 	return [state getFailed];
    201 }
    202 
    203 - (void) setFailed:(BOOL)flag
    204 {
    205 	[state setFailed:flag];
    206 }
    207 
    208 - (RecognizerSharedState *) getState
    209 {
    210 	return state;
    211 }
    212 
    213 - (void) setState:(RecognizerSharedState *) theState
    214 {
    215 	if (state != theState) {
    216 		if ( state ) [state release];
    217 		state = theState;
    218 		[state retain];
    219 	}
    220 }
    221 
    222 - (id)input
    223 {
    224     return nil; // Must be overriden in inheriting class
    225 }
    226 
    227 - (void)skip // override in inheriting class
    228 {
    229     return;
    230 }
    231 
    232 -(id) match:(id<IntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
    233 {
    234 	id matchedSymbol = [self getCurrentInputSymbol:anInput];
    235 	if ([anInput LA:1] == ttype) {
    236 		[anInput consume];
    237 		state.errorRecovery = NO;
    238 		state.failed = NO;
    239 		return matchedSymbol;
    240 	}
    241 	if (state.backtracking > 0) {
    242 		state.failed = YES;
    243 		return matchedSymbol;
    244 	}
    245 	matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
    246 	return matchedSymbol;
    247 }
    248 
    249 -(void) matchAny:(id<IntStream>)anInput
    250 {
    251     state.errorRecovery = NO;
    252     state.failed = NO;
    253     [anInput consume];
    254 }
    255 
    256 -(BOOL) mismatchIsUnwantedToken:(id<IntStream>)anInput TokenType:(NSInteger)ttype
    257 {
    258     return [anInput LA:2] == ttype;
    259 }
    260 
    261 -(BOOL) mismatchIsMissingToken:(id<IntStream>)anInput Follow:(ANTLRBitSet *) follow
    262 {
    263     if ( follow == nil ) {
    264         // we have no information about the follow; we can only consume
    265         // a single token and hope for the best
    266         return NO;
    267     }
    268     // compute what can follow this grammar element reference
    269     if ( [follow member:TokenTypeEOR] ) {
    270         ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
    271         follow = [follow or:viableTokensFollowingThisRule];
    272         if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
    273             [follow remove:(TokenTypeEOR)];
    274         }
    275     }
    276     // if current token is consistent with what could come after set
    277     // then we know we're missing a token; error recovery is free to
    278     // "insert" the missing token
    279     
    280     //System.out.println("viable tokens="+follow.toString(getTokenNames()));
    281     //System.out.println("LT(1)="+((TokenStream)input).LT(1));
    282     
    283     // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    284     // in follow set to indicate that the fall of the start symbol is
    285     // in the set (EOF can follow).
    286     if ( [follow member:[anInput LA:1]] || [follow member:TokenTypeEOR] ) {
    287         //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
    288         return YES;
    289     }
    290     return NO;
    291 }
    292 
    293 /** Report a recognition problem.
    294  *
    295  *  This method sets errorRecovery to indicate the parser is recovering
    296  *  not parsing.  Once in recovery mode, no errors are generated.
    297  *  To get out of recovery mode, the parser must successfully match
    298  *  a token (after a resync).  So it will go:
    299  *
    300  * 		1. error occurs
    301  * 		2. enter recovery mode, report error
    302  * 		3. consume until token found in resynch set
    303  * 		4. try to resume parsing
    304  * 		5. next match() will reset errorRecovery mode
    305  *
    306  *  If you override, make sure to update syntaxErrors if you care about that.
    307  */
    308 -(void) reportError:(RecognitionException *) e
    309 {
    310     // if we've already reported an error and have not matched a token
    311     // yet successfully, don't report any errors.
    312     if ( state.errorRecovery ) {
    313         //System.err.print("[SPURIOUS] ");
    314         return;
    315     }
    316     state.syntaxErrors++; // don't count spurious
    317     state.errorRecovery = YES;
    318     
    319     [self displayRecognitionError:[self getTokenNames] Exception:e];
    320 }
    321 
    322 -(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(RecognitionException *)e
    323 {
    324     NSString *hdr = [self getErrorHeader:e];
    325     NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
    326     [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
    327 }
    328 
    329 /** What error message should be generated for the various
    330  *  exception types?
    331  *
    332  *  Not very object-oriented code, but I like having all error message
    333  *  generation within one method rather than spread among all of the
    334  *  exception classes. This also makes it much easier for the exception
    335  *  handling because the exception classes do not have to have pointers back
    336  *  to this object to access utility routines and so on. Also, changing
    337  *  the message for an exception type would be difficult because you
    338  *  would have to subclassing exception, but then somehow get ANTLR
    339  *  to make those kinds of exception objects instead of the default.
    340  *  This looks weird, but trust me--it makes the most sense in terms
    341  *  of flexibility.
    342  *
    343  *  For grammar debugging, you will want to override this to add
    344  *  more information such as the stack frame with
    345  *  getRuleInvocationStack(e, this.getClass().getName()) and,
    346  *  for no viable alts, the decision description and state etc...
    347  *
    348  *  Override this to change the message generated for one or more
    349  *  exception types.
    350  */
    351 - (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)theTokNams
    352 {
    353     // NSString *msg = [e getMessage];
    354     NSString *msg;
    355     if ( [e isKindOfClass:[UnwantedTokenException class]] ) {
    356         UnwantedTokenException *ute = (UnwantedTokenException *)e;
    357         NSString *tokenName=@"<unknown>";
    358         if ( ute.expecting == TokenTypeEOF ) {
    359             tokenName = @"EOF";
    360         }
    361         else {
    362             tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
    363         }
    364         msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
    365                tokenName];
    366     }
    367     else if ( [e isKindOfClass:[MissingTokenException class] ] ) {
    368         MissingTokenException *mte = (MissingTokenException *)e;
    369         NSString *tokenName=@"<unknown>";
    370         if ( mte.expecting== TokenTypeEOF ) {
    371             tokenName = @"EOF";
    372         }
    373         else {
    374             tokenName = [theTokNams objectAtIndex:mte.expecting];
    375         }
    376         msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
    377     }
    378     else if ( [e isKindOfClass:[MismatchedTokenException class]] ) {
    379         MismatchedTokenException *mte = (MismatchedTokenException *)e;
    380         NSString *tokenName=@"<unknown>";
    381         if ( mte.expecting== TokenTypeEOF ) {
    382             tokenName = @"EOF";
    383         }
    384         else {
    385             tokenName = [theTokNams objectAtIndex:mte.expecting];
    386         }
    387         msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
    388     }
    389     else if ( [e isKindOfClass:[MismatchedTreeNodeException class]] ) {
    390         MismatchedTreeNodeException *mtne = (MismatchedTreeNodeException *)e;
    391         NSString *tokenName=@"<unknown>";
    392         if ( mtne.expecting==TokenTypeEOF ) {
    393             tokenName = @"EOF";
    394         }
    395         else {
    396             tokenName = [theTokNams objectAtIndex:mtne.expecting];
    397         }
    398         msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
    399     }
    400     else if ( [e isKindOfClass:[NoViableAltException class]] ) {
    401         //NoViableAltException *nvae = (NoViableAltException *)e;
    402         // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    403         // and "(decision="+nvae.decisionNumber+") and
    404         // "state "+nvae.stateNumber
    405         //        msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
    406         msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at input %@", ((NoViableAltException *)e).stateNumber, ((NoViableAltException *)e).decisionNumber, [self getTokenErrorDisplay:e.token]];
    407     }
    408     else if ( [e isKindOfClass:[EarlyExitException class]] ) {
    409         //EarlyExitException *eee = (EarlyExitException *)e;
    410         // for development, can add "(decision="+eee.decisionNumber+")"
    411         msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
    412     }
    413     else if ( [e isKindOfClass:[MismatchedSetException class]] ) {
    414         MismatchedSetException *mse = (MismatchedSetException *)e;
    415         msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
    416                [self getTokenErrorDisplay:(e.token)],
    417                mse.expecting];
    418     }
    419 #pragma warning NotSet not yet implemented.
    420     else if ( [e isKindOfClass:[MismatchedNotSetException class] ] ) {
    421         MismatchedNotSetException *mse = (MismatchedNotSetException *)e;
    422         msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
    423                [self getTokenErrorDisplay:(e.token)],
    424                mse.expecting];
    425     }
    426     else if ( [e isKindOfClass:[FailedPredicateException class]] ) {
    427         FailedPredicateException *fpe = (FailedPredicateException *)e;
    428         msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
    429     }
    430     else {
    431         msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
    432     }
    433     return msg;
    434 }
    435 
    436 /** Get number of recognition errors (lexer, parser, tree parser).  Each
    437  *  recognizer tracks its own number.  So parser and lexer each have
    438  *  separate count.  Does not count the spurious errors found between
    439  *  an error and next valid token match
    440  *
    441  *  See also reportError()
    442  */
    443 - (NSInteger) getNumberOfSyntaxErrors
    444 {
    445     return state.syntaxErrors;
    446 }
    447 
    448 /** What is the error header, normally line/character position information? */
    449 - (NSString *)getErrorHeader:(RecognitionException *)e
    450 {
    451     return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
    452 }
    453 
    454 /** How should a token be displayed in an error message? The default
    455  *  is to display just the text, but during development you might
    456  *  want to have a lot of information spit out.  Override in that case
    457  *  to use t.toString() (which, for CommonToken, dumps everything about
    458  *  the token). This is better than forcing you to override a method in
    459  *  your token objects because you don't have to go modify your lexer
    460  *  so that it creates a new Java type.
    461  */
    462 - (NSString *)getTokenErrorDisplay:(id<Token>)t
    463 {
    464     NSString *s = t.text;
    465     if ( s == nil ) {
    466         if ( t.type == TokenTypeEOF ) {
    467             s = @"<EOF>";
    468         }
    469         else {
    470             s = [NSString stringWithFormat:@"<%@>", t.type];
    471         }
    472     }
    473     s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
    474     s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
    475     s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
    476     return [NSString stringWithFormat:@"\'%@\'", s];
    477 }
    478                                         
    479 /** Override this method to change where error messages go */
    480 - (void) emitErrorMessage:(NSString *) msg
    481 {
    482 //    System.err.println(msg);
    483     NSLog(@"%@", msg);
    484 }
    485 
    486 /** Recover from an error found on the input stream.  This is
    487  *  for NoViableAlt and mismatched symbol exceptions.  If you enable
    488  *  single token insertion and deletion, this will usually not
    489  *  handle mismatched symbol exceptions but there could be a mismatched
    490  *  token that the match() routine could not recover from.
    491  */
    492 - (void)recover:(id<IntStream>)anInput Exception:(RecognitionException *)re
    493 {
    494     if ( state.lastErrorIndex == anInput.index ) {
    495         // uh oh, another error at same token index; must be a case
    496         // where LT(1) is in the recovery token set so nothing is
    497         // consumed; consume a single token so at least to prevent
    498         // an infinite loop; this is a failsafe.
    499         [anInput consume];
    500     }
    501     state.lastErrorIndex = anInput.index;
    502     ANTLRBitSet *followSet = [self computeErrorRecoverySet];
    503     [self beginResync];
    504     [self consumeUntilFollow:anInput Follow:followSet];
    505     [self endResync];
    506 }
    507 
    508 - (void) beginResync
    509 {
    510     
    511 }
    512 
    513 - (void) endResync
    514 {
    515     
    516 }
    517                             
    518 /*  Compute the error recovery set for the current rule.  During
    519  *  rule invocation, the parser pushes the set of tokens that can
    520  *  follow that rule reference on the stack; this amounts to
    521  *  computing FIRST of what follows the rule reference in the
    522  *  enclosing rule. This local follow set only includes tokens
    523  *  from within the rule; i.e., the FIRST computation done by
    524  *  ANTLR stops at the end of a rule.
    525  *
    526  *  EXAMPLE
    527  *
    528  *  When you find a "no viable alt exception", the input is not
    529  *  consistent with any of the alternatives for rule r.  The best
    530  *  thing to do is to consume tokens until you see something that
    531  *  can legally follow a call to r *or* any rule that called r.
    532  *  You don't want the exact set of viable next tokens because the
    533  *  input might just be missing a token--you might consume the
    534  *  rest of the input looking for one of the missing tokens.
    535  *
    536  *  Consider grammar:
    537  *
    538  *  a : '[' b ']'
    539  *    | '(' b ')'
    540  *    ;
    541  *  b : c '^' INT ;
    542  *  c : ID
    543  *    | INT
    544  *    ;
    545  *
    546  *  At each rule invocation, the set of tokens that could follow
    547  *  that rule is pushed on a stack.  Here are the various "local"
    548  *  follow sets:
    549  *
    550  *  FOLLOW(b1_in_a) = FIRST(']') = ']'
    551  *  FOLLOW(b2_in_a) = FIRST(')') = ')'
    552  *  FOLLOW(c_in_b) = FIRST('^') = '^'
    553  *
    554  *  Upon erroneous input "[]", the call chain is
    555  *
    556  *  a -> b -> c
    557  *
    558  *  and, hence, the follow context stack is:
    559  *
    560  *  depth  local follow set     after call to rule
    561  *    0         <EOF>                    a (from main())
    562  *    1          ']'                     b
    563  *    3          '^'                     c
    564  *
    565  *  Notice that ')' is not included, because b would have to have
    566  *  been called from a different context in rule a for ')' to be
    567  *  included.
    568  *
    569  *  For error recovery, we cannot consider FOLLOW(c)
    570  *  (context-sensitive or otherwise).  We need the combined set of
    571  *  all context-sensitive FOLLOW sets--the set of all tokens that
    572  *  could follow any reference in the call chain.  We need to
    573  *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
    574  *  we resync'd to that token, we'd consume until EOF.  We need to
    575  *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
    576  *  In this case, for input "[]", LA(1) is in this set so we would
    577  *  not consume anything and after printing an error rule c would
    578  *  return normally.  It would not find the required '^' though.
    579  *  At this point, it gets a mismatched token error and throws an
    580  *  exception (since LA(1) is not in the viable following token
    581  *  set).  The rule exception handler tries to recover, but finds
    582  *  the same recovery set and doesn't consume anything.  Rule b
    583  *  exits normally returning to rule a.  Now it finds the ']' (and
    584  *  with the successful match exits errorRecovery mode).
    585  *
    586  *  So, you cna see that the parser walks up call chain looking
    587  *  for the token that was a member of the recovery set.
    588  *
    589  *  Errors are not generated in errorRecovery mode.
    590  *
    591  *  ANTLR's error recovery mechanism is based upon original ideas:
    592  *
    593  *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
    594  *
    595  *  and
    596  *
    597  *  "A note on error recovery in recursive descent parsers":
    598  *  http://portal.acm.org/citation.cfm?id=947902.947905
    599  *
    600  *  Later, Josef Grosch had some good ideas:
    601  *
    602  *  "Efficient and Comfortable Error Recovery in Recursive Descent
    603  *  Parsers":
    604  *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
    605  *
    606  *  Like Grosch I implemented local FOLLOW sets that are combined
    607  *  at run-time upon error to avoid overhead during parsing.
    608  */
    609 - (ANTLRBitSet *) computeErrorRecoverySet
    610 {
    611     return [self combineFollows:NO];
    612 }
    613 
    614 /** Compute the context-sensitive FOLLOW set for current rule.
    615  *  This is set of token types that can follow a specific rule
    616  *  reference given a specific call chain.  You get the set of
    617  *  viable tokens that can possibly come next (lookahead depth 1)
    618  *  given the current call chain.  Contrast this with the
    619  *  definition of plain FOLLOW for rule r:
    620  *
    621  *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
    622  *
    623  *  where x in T* and alpha, beta in V*; T is set of terminals and
    624  *  V is the set of terminals and nonterminals.  In other words,
    625  *  FOLLOW(r) is the set of all tokens that can possibly follow
    626  *  references to r in *any* sentential form (context).  At
    627  *  runtime, however, we know precisely which context applies as
    628  *  we have the call chain.  We may compute the exact (rather
    629  *  than covering superset) set of following tokens.
    630  *
    631  *  For example, consider grammar:
    632  *
    633  *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
    634  *       | "return" expr '.'
    635  *       ;
    636  *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
    637  *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
    638  *       | '(' expr ')'
    639  *       ;
    640  *
    641  *  The FOLLOW sets are all inclusive whereas context-sensitive
    642  *  FOLLOW sets are precisely what could follow a rule reference.
    643  *  For input input "i=(3);", here is the derivation:
    644  *
    645  *  stat => ID '=' expr ';'
    646  *       => ID '=' atom ('+' atom)* ';'
    647  *       => ID '=' '(' expr ')' ('+' atom)* ';'
    648  *       => ID '=' '(' atom ')' ('+' atom)* ';'
    649  *       => ID '=' '(' INT ')' ('+' atom)* ';'
    650  *       => ID '=' '(' INT ')' ';'
    651  *
    652  *  At the "3" token, you'd have a call chain of
    653  *
    654  *    stat -> expr -> atom -> expr -> atom
    655  *
    656  *  What can follow that specific nested ref to atom?  Exactly ')'
    657  *  as you can see by looking at the derivation of this specific
    658  *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
    659  *
    660  *  You want the exact viable token set when recovering from a
    661  *  token mismatch.  Upon token mismatch, if LA(1) is member of
    662  *  the viable next token set, then you know there is most likely
    663  *  a missing token in the input stream.  "Insert" one by just not
    664  *  throwing an exception.
    665  */
    666 - (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
    667 {
    668     return [self combineFollows:YES];
    669 }
    670 
    671 // what is exact? it seems to only add sets from above on stack
    672 // if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
    673 // Why would we ever want them all?  Maybe no viable alt instead of
    674 // mismatched token?
    675 - (ANTLRBitSet *)combineFollows:(BOOL) exact
    676 {
    677     NSInteger top = state._fsp;
    678     ANTLRBitSet *followSet = [[ANTLRBitSet newBitSet] retain];
    679     for (int i = top; i >= 0; i--) {
    680         ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
    681         /*
    682          System.out.println("local follow depth "+i+"="+
    683          localFollowSet.toString(getTokenNames())+")");
    684          */
    685         [followSet orInPlace:localFollowSet];
    686         if ( exact ) {
    687             // can we see end of rule?
    688             if ( [localFollowSet member:TokenTypeEOR] ) {
    689                 // Only leave EOR in set if at top (start rule); this lets
    690                 // us know if have to include follow(start rule); i.e., EOF
    691                 if ( i > 0 ) {
    692                     [followSet remove:TokenTypeEOR];
    693                 }
    694             }
    695             else { // can't see end of rule, quit
    696                 break;
    697             }
    698         }
    699     }
    700     return followSet;
    701 }
    702 
    703 /** Attempt to recover from a single missing or extra token.
    704  *
    705  *  EXTRA TOKEN
    706  *
    707  *  LA(1) is not what we are looking for.  If LA(2) has the right token,
    708  *  however, then assume LA(1) is some extra spurious token.  Delete it
    709  *  and LA(2) as if we were doing a normal match(), which advances the
    710  *  input.
    711  *
    712  *  MISSING TOKEN
    713  *
    714  *  If current token is consistent with what could come after
    715  *  ttype then it is ok to "insert" the missing token, else throw
    716  *  exception For example, Input "i=(3;" is clearly missing the
    717  *  ')'.  When the parser returns from the nested call to expr, it
    718  *  will have call chain:
    719  *
    720  *    stat -> expr -> atom
    721  *
    722  *  and it will be trying to match the ')' at this point in the
    723  *  derivation:
    724  *
    725  *       => ID '=' '(' INT ')' ('+' atom)* ';'
    726  *                          ^
    727  *  match() will see that ';' doesn't match ')' and report a
    728  *  mismatched token error.  To recover, it sees that LA(1)==';'
    729  *  is in the set of tokens that can follow the ')' token
    730  *  reference in rule atom.  It can assume that you forgot the ')'.
    731  */
    732 - (id<Token>)recoverFromMismatchedToken:(id<IntStream>)anInput
    733                        TokenType:(NSInteger)ttype
    734                           Follow:(ANTLRBitSet *)follow
    735 {
    736     RecognitionException *e = nil;
    737     // if next token is what we are looking for then "delete" this token
    738     if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
    739         e = [UnwantedTokenException newException:ttype Stream:anInput];
    740         /*
    741          System.err.println("recoverFromMismatchedToken deleting "+
    742          ((TokenStream)input).LT(1)+
    743          " since "+((TokenStream)input).LT(2)+" is what we want");
    744          */
    745         [self beginResync];
    746         [anInput consume]; // simply delete extra token
    747         [self endResync];
    748         [self reportError:e];  // report after consuming so AW sees the token in the exception
    749                          // we want to return the token we're actually matching
    750         id matchedSymbol = [self getCurrentInputSymbol:anInput];
    751         [anInput consume]; // move past ttype token as if all were ok
    752         return matchedSymbol;
    753     }
    754     // can't recover with single token deletion, try insertion
    755     if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
    756         id<Token> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
    757         e = [MissingTokenException newException:ttype Stream:anInput With:inserted];
    758         [self reportError:e];  // report after inserting so AW sees the token in the exception
    759         return inserted;
    760     }
    761     // even that didn't work; must throw the exception
    762     e = [MismatchedTokenException newException:ttype Stream:anInput];
    763     @throw e;
    764 }
    765 
    766 /** Not currently used */
    767 -(id) recoverFromMismatchedSet:(id<IntStream>)anInput
    768                      Exception:(RecognitionException *)e
    769                         Follow:(ANTLRBitSet *) follow
    770 {
    771     if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
    772         // System.out.println("missing token");
    773         [self reportError:e];
    774         // we don't know how to conjure up a token for sets yet
    775         return [self getMissingSymbol:anInput Exception:e TokenType:TokenTypeInvalid Follow:follow];
    776     }
    777     // TODO do single token deletion like above for Token mismatch
    778     @throw e;
    779 }
    780 
    781 /** Match needs to return the current input symbol, which gets put
    782  *  into the label for the associated token ref; e.g., x=ID.  Token
    783  *  and tree parsers need to return different objects. Rather than test
    784  *  for input stream type or change the IntStream interface, I use
    785  *  a simple method to ask the recognizer to tell me what the current
    786  *  input symbol is.
    787  * 
    788  *  This is ignored for lexers.
    789  */
    790 - (id) getCurrentInputSymbol:(id<IntStream>)anInput
    791 {
    792     return nil;
    793 }
    794 
    795 /** Conjure up a missing token during error recovery.
    796  *
    797  *  The recognizer attempts to recover from single missing
    798  *  symbols. But, actions might refer to that missing symbol.
    799  *  For example, x=ID {f($x);}. The action clearly assumes
    800  *  that there has been an identifier matched previously and that
    801  *  $x points at that token. If that token is missing, but
    802  *  the next token in the stream is what we want we assume that
    803  *  this token is missing and we keep going. Because we
    804  *  have to return some token to replace the missing token,
    805  *  we have to conjure one up. This method gives the user control
    806  *  over the tokens returned for missing tokens. Mostly,
    807  *  you will want to create something special for identifier
    808  *  tokens. For literals such as '{' and ',', the default
    809  *  action in the parser or tree parser works. It simply creates
    810  *  a CommonToken of the appropriate type. The text will be the token.
    811  *  If you change what tokens must be created by the lexer,
    812  *  override this method to create the appropriate tokens.
    813  */
    814 - (id)getMissingSymbol:(id<IntStream>)anInput
    815              Exception:(RecognitionException *)e
    816              TokenType:(NSInteger)expectedTokenType
    817                 Follow:(ANTLRBitSet *)follow
    818 {
    819     return nil;
    820 }
    821 
    822 
    823 -(void) consumeUntilTType:(id<IntStream>)anInput TokenType:(NSInteger)tokenType
    824 {
    825     //System.out.println("consumeUntil "+tokenType);
    826     int ttype = [anInput LA:1];
    827     while (ttype != TokenTypeEOF && ttype != tokenType) {
    828         [anInput consume];
    829         ttype = [anInput LA:1];
    830     }
    831 }
    832 
    833 /** Consume tokens until one matches the given token set */
    834 -(void) consumeUntilFollow:(id<IntStream>)anInput Follow:(ANTLRBitSet *)set
    835 {
    836     //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
    837     int ttype = [anInput LA:1];
    838     while (ttype != TokenTypeEOF && ![set member:ttype] ) {
    839         //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
    840         [anInput consume];
    841         ttype = [anInput LA:1];
    842     }
    843 }
    844 
    845 /** Push a rule's follow set using our own hardcoded stack */
    846 - (void)pushFollow:(ANTLRBitSet *)fset
    847 {
    848     if ( (state._fsp +1) >= [state.following count] ) {
    849         //        AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
    850         //        System.arraycopy(state.following, 0, f, 0, state.following.length);
    851         //        state.following = f;
    852         [state.following addObject:fset];
    853         [fset retain];
    854         state._fsp++;
    855     }
    856     else {
    857         [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
    858     }
    859 }
    860 
    861 - (ANTLRBitSet *)popFollow
    862 {
    863     ANTLRBitSet *fset;
    864 
    865     if ( state._fsp >= 0 && [state.following count] > 0 ) {
    866         fset = [state.following objectAtIndex:state._fsp--];
    867         [state.following removeLastObject];
    868         return fset;
    869     }
    870     else {
    871         NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
    872     }
    873     return nil;
    874 }
    875 
    876 /** Return List<String> of the rules in your parser instance
    877  *  leading up to a call to this method.  You could override if
    878  *  you want more details such as the file/line info of where
    879  *  in the parser java code a rule is invoked.
    880  *
    881  *  This is very useful for error messages and for context-sensitive
    882  *  error recovery.
    883  */
    884 - (AMutableArray *)getRuleInvocationStack
    885 {
    886     NSString *parserClassName = [[self className] retain];
    887     return [self getRuleInvocationStack:[RecognitionException newException] Recognizer:parserClassName];
    888 }
    889 
    890 /** A more general version of getRuleInvocationStack where you can
    891  *  pass in, for example, a RecognitionException to get it's rule
    892  *  stack trace.  This routine is shared with all recognizers, hence,
    893  *  static.
    894  *
    895  *  TODO: move to a utility class or something; weird having lexer call this
    896  */
    897 - (AMutableArray *)getRuleInvocationStack:(RecognitionException *)e
    898                                 Recognizer:(NSString *)recognizerClassName
    899 {
    900     // char *name;
    901     AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
    902     NSArray *stack = [e callStackSymbols];
    903     int i = 0;
    904     for (i = [stack count]-1; i >= 0; i--) {
    905         NSString *t = [stack objectAtIndex:i];
    906         // NSLog(@"stack %d = %@\n", i, t);
    907         if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
    908             // id aClass = objc_getClass( [t UTF8String] );
    909             continue; // skip support code such as this method
    910         }
    911         if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
    912             // name = sel_getName(method_getName(method));
    913             // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
    914             continue;
    915         }
    916         if ( ![t isEqualTo:recognizerClassName] ) {
    917             // name = class_getName( [t UTF8String] );
    918             continue; // must not be part of this parser
    919         }
    920         [rules addObject:t];
    921     }
    922 #ifdef DONTUSEYET
    923     StackTraceElement[] stack = e.getStackTrace();
    924     int i = 0;
    925     for (i=stack.length-1; i>=0; i--) {
    926         StackTraceElement t = stack[i];
    927         if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
    928             continue; // skip support code such as this method
    929         }
    930               if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
    931             continue;
    932         }
    933               if ( ![[t getClassName] equals:recognizerClassName] ) {
    934             continue; // must not be part of this parser
    935         }
    936               [rules addObject:[t getMethodName]];
    937     }
    938 #endif
    939     [stack release];
    940     return rules;
    941 }
    942 
    943 - (NSInteger) getBacktrackingLevel
    944 {
    945     return [state getBacktracking];
    946 }
    947       
    948 - (void) setBacktrackingLevel:(NSInteger)level
    949 {
    950     [state setBacktracking:level];
    951 }
    952       
    953         /** Used to print out token names like ID during debugging and
    954  *  error reporting.  The generated parsers implement a method
    955  *  that overrides this to point to their String[] tokenNames.
    956  */
    957 - (NSArray *)getTokenNames
    958 {
    959     return tokenNames;
    960 }
    961 
    962 /** For debugging and other purposes, might want the grammar name.
    963  *  Have ANTLR generate an implementation for this method.
    964  */
    965 - (NSString *)getGrammarFileName
    966 {
    967     return grammarFileName;
    968 }
    969 
    970 - (NSString *)getSourceName
    971 {
    972     return nil;
    973 }
    974 
    975 /** A convenience method for use most often with template rewrites.
    976  *  Convert a List<Token> to List<String>
    977  */
    978 - (AMutableArray *)toStrings:(AMutableArray *)tokens
    979 {
    980     if ( tokens == nil )
    981         return nil;
    982     AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
    983     id object;
    984     NSInteger i = 0;
    985     for (object in tokens) {
    986         [strings addObject:[object text]];
    987         i++;
    988     }
    989     return strings;
    990 }
    991 
    992 /** Given a rule number and a start token index number, return
    993  *  ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
    994  *  start index.  If this rule has parsed input starting from the
    995  *  start index before, then return where the rule stopped parsing.
    996  *  It returns the index of the last token matched by the rule.
    997  *
    998  *  For now we use a hashtable and just the slow Object-based one.
    999  *  Later, we can make a special one for ints and also one that
   1000  *  tosses out data after we commit past input position i.
   1001  */
   1002 - (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
   1003 {
   1004     ACNumber *stopIndexI;
   1005     HashRule *aHashRule;
   1006     if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
   1007         aHashRule = [HashRule newHashRuleWithLen:17];
   1008         [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
   1009     }
   1010     stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
   1011     if ( stopIndexI == nil ) {
   1012         return ANTLR_MEMO_RULE_UNKNOWN;
   1013     }
   1014     return [stopIndexI integerValue];
   1015 }
   1016 
   1017 /** Has this rule already parsed input at the current index in the
   1018  *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
   1019  *  If we attempted but failed to parse properly before, return
   1020  *  MEMO_RULE_FAILED.
   1021  *
   1022  *  This method has a side-effect: if we have seen this input for
   1023  *  this rule and successfully parsed before, then seek ahead to
   1024  *  1 past the stop token matched for this rule last time.
   1025  */
   1026 - (BOOL)alreadyParsedRule:(id<IntStream>)anInput RuleIndex:(NSInteger)ruleIndex
   1027 {
   1028     NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
   1029     if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
   1030         // NSLog(@"rule %d not yet encountered\n", ruleIndex);
   1031         return NO;
   1032     }
   1033     if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
   1034         if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
   1035         state.failed = YES;
   1036     }
   1037     else {
   1038         if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
   1039         [anInput seek:(aStopIndex+1)]; // jump to one past stop token
   1040     }
   1041     return YES;
   1042 }
   1043       
   1044 /** Record whether or not this rule parsed the input at this position
   1045  *  successfully.  Use a standard java hashtable for now.
   1046  */
   1047 - (void)memoize:(id<IntStream>)anInput
   1048       RuleIndex:(NSInteger)ruleIndex
   1049      StartIndex:(NSInteger)ruleStartIndex
   1050 {
   1051     RuleStack *aRuleStack;
   1052     NSInteger stopTokenIndex;
   1053 
   1054     aRuleStack = state.ruleMemo;
   1055     stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
   1056     if ( aRuleStack == nil ) {
   1057         if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
   1058         return;
   1059     }
   1060     if ( ruleIndex >= [aRuleStack length] ) {
   1061         if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
   1062         return;
   1063     }
   1064     if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
   1065         [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
   1066     }
   1067     return;
   1068 }
   1069    
   1070 /** return how many rule/input-index pairs there are in total.
   1071  *  TODO: this includes synpreds. :(
   1072  */
   1073 - (NSInteger)getRuleMemoizationCacheSize
   1074 {
   1075     RuleStack *aRuleStack;
   1076     HashRule *aHashRule;
   1077 
   1078     int aCnt = 0;
   1079     aRuleStack = state.ruleMemo;
   1080     for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
   1081         aHashRule = [aRuleStack objectAtIndex:i];
   1082         if ( aHashRule != nil ) {
   1083             aCnt += [aHashRule count]; // how many input indexes are recorded?
   1084         }
   1085     }
   1086     return aCnt;
   1087 }
   1088 
   1089 #pragma warning Have to fix traceIn and traceOut.
   1090 - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
   1091 {
   1092     NSLog(@"enter %@ %@", ruleName, inputSymbol);
   1093     if ( state.backtracking > 0 ) {
   1094         NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
   1095     }
   1096     NSLog(@"\n");
   1097 }
   1098 
   1099 - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
   1100 {
   1101     NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
   1102     if ( state.backtracking > 0 ) {
   1103         NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
   1104     }
   1105     NSLog(@"\n");
   1106 }
   1107 
   1108 
   1109 // call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
   1110 - (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<IntStream>)input
   1111 {
   1112     id<IntStream> input;
   1113 
   1114     state.backtracking++;
   1115     // input = state.token.input;
   1116     input = self.input;
   1117     int start = [input mark];
   1118     @try {
   1119         [self performSelector:synpredFragment];
   1120     }
   1121     @catch (RecognitionException *re) {
   1122         NSLog(@"impossible synpred: %@", re.name);
   1123     }
   1124     BOOL success = (state.failed == NO);
   1125     [input rewind:start];
   1126     state.backtracking--;
   1127     state.failed = NO;
   1128     return success;
   1129 }
   1130               
   1131 @end
   1132                                
   1133