Home | History | Annotate | Download | only in Framework
      1 // [The "BSD licence"]
      2 // Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit
      3 // All rights reserved.
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions
      7 // are met:
      8 // 1. Redistributions of source code must retain the above copyright
      9 //    notice, this list of conditions and the following disclaimer.
     10 // 2. Redistributions in binary form must reproduce the above copyright
     11 //    notice, this list of conditions and the following disclaimer in the
     12 //    documentation and/or other materials provided with the distribution.
     13 // 3. The name of the author may not be used to endorse or promote products
     14 //    derived from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #import <ANTLR/antlr.h>
     28 #import "ANTLRLexer.h"
     29 
     30 @implementation ANTLRLexer
     31 
     32 @synthesize input;
     33 @synthesize ruleNestingLevel;
     34 #pragma mark Initializer
     35 
     36 - (id) initWithCharStream:(id<ANTLRCharStream>)anInput
     37 {
     38 	self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]];
     39 	if ( self != nil ) {
     40         input = [anInput retain];
     41         if (state.token != nil)
     42             [((ANTLRCommonToken *)state.token) setInput:anInput];
     43 		ruleNestingLevel = 0;
     44 	}
     45 	return self;
     46 }
     47 
     48 - (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState
     49 {
     50 	self = [super initWithState:aState];
     51 	if ( self != nil ) {
     52         input = [anInput retain];
     53         if (state.token != nil)
     54             [((ANTLRCommonToken *)state.token) setInput:anInput];
     55 		ruleNestingLevel = 0;
     56 	}
     57 	return self;
     58 }
     59 
     60 - (void) dealloc
     61 {
     62     if ( input ) [input release];
     63     [super dealloc];
     64 }
     65 
     66 - (id) copyWithZone:(NSZone *)aZone
     67 {
     68     ANTLRLexer *copy;
     69 	
     70     copy = [[[self class] allocWithZone:aZone] init];
     71     //    copy = [super copyWithZone:aZone]; // allocation occurs here
     72     if ( input != nil )
     73         copy.input = input;
     74     copy.ruleNestingLevel = ruleNestingLevel;
     75     return copy;
     76 }
     77 
     78 - (void) reset
     79 {
     80     [super reset]; // reset all recognizer state variables
     81                    // wack Lexer state variables
     82     if ( input != nil ) {
     83         [input seek:0]; // rewind the input
     84     }
     85     if ( state == nil ) {
     86         return; // no shared state work to do
     87     }
     88     state.token = nil;
     89     state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE;
     90     state.channel = ANTLRCommonToken.DEFAULT_CHANNEL;
     91     state.tokenStartCharIndex = -1;
     92     state.tokenStartCharPositionInLine = -1;
     93     state.tokenStartLine = -1;
     94     state.text = nil;
     95 }
     96 
     97 // token stuff
     98 #pragma mark Tokens
     99 
    100 - (id<ANTLRToken>)getToken
    101 {
    102     return [state getToken]; 
    103 }
    104 
    105 - (void) setToken: (id<ANTLRToken>) aToken
    106 {
    107     if (state.token != aToken) {
    108         [aToken retain];
    109         state.token = aToken;
    110     }
    111 }
    112 
    113 
    114 // this method may be overridden in the generated lexer if we generate a filtering lexer.
    115 - (id<ANTLRToken>) nextToken
    116 {
    117 	while (YES) {
    118         [self setToken:nil];
    119         state.channel = ANTLRCommonToken.DEFAULT_CHANNEL;
    120         state.tokenStartCharIndex = input.index;
    121         state.tokenStartCharPositionInLine = input.charPositionInLine;
    122         state.tokenStartLine = input.line;
    123         state.text = nil;
    124         
    125         // [self setText:[self text]];
    126 		if ([input LA:1] == ANTLRCharStreamEOF) {
    127             ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input
    128                                                           Type:ANTLRTokenTypeEOF
    129                                                        Channel:ANTLRCommonToken.DEFAULT_CHANNEL
    130                                                          Start:input.index
    131                                                           Stop:input.index];
    132             [eof setLine:input.line];
    133             [eof setCharPositionInLine:input.charPositionInLine];
    134 			return eof;
    135 		}
    136 		@try {
    137 			[self mTokens];
    138             // SEL aMethod = @selector(mTokens);
    139             // [[self class] instancesRespondToSelector:aMethod];
    140             if ( state.token == nil)
    141                 [self emit];
    142             else if ( state.token == [ANTLRCommonToken skipToken] ) {
    143                 continue;
    144             }
    145 			return state.token;
    146 		}
    147 		@catch (ANTLRNoViableAltException *nva) {
    148 			[self reportError:nva];
    149 			[self recover:nva];
    150 		}
    151 		@catch (ANTLRRecognitionException *e) {
    152 			[self reportError:e];
    153 		}
    154 	}
    155 }
    156 
    157 - (void) mTokens
    158 {   // abstract, defined in generated source as a starting point for matching
    159     [self doesNotRecognizeSelector:_cmd];
    160 }
    161 
    162 - (void) skip
    163 {
    164     state.token = [ANTLRCommonToken skipToken];
    165 }
    166 
    167 - (id<ANTLRCharStream>) input
    168 {
    169     return input; 
    170 }
    171 
    172 - (void) setInput:(id<ANTLRCharStream>) anInput
    173 {
    174     if ( anInput != input ) {
    175         if ( input ) [input release];
    176     }
    177     input = nil;
    178     [self reset];
    179     input = anInput;
    180     [input retain];
    181 }
    182 
    183 /** Currently does not support multiple emits per nextToken invocation
    184  *  for efficiency reasons.  Subclass and override this method and
    185  *  nextToken (to push tokens into a list and pull from that list rather
    186  *  than a single variable as this implementation does).
    187  */
    188 - (void) emit:(id<ANTLRToken>)aToken
    189 {
    190 	state.token = aToken;
    191 }
    192 
    193 /** The standard method called to automatically emit a token at the
    194  *  outermost lexical rule.  The token object should point into the
    195  *  char buffer start..stop.  If there is a text override in 'text',
    196  *  use that to set the token's text.  Override this method to emit
    197  *  custom Token objects.
    198  *
    199  *  If you are building trees, then you should also override
    200  *  Parser or TreeParser.getMissingSymbol().
    201  */
    202 - (void) emit
    203 {
    204 	id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input
    205                                                   Type:state.type
    206                                                Channel:state.channel
    207                                                  Start:state.tokenStartCharIndex
    208                                                   Stop:input.index-1];
    209 	[aToken setLine:state.tokenStartLine];
    210     aToken.text = [self text];
    211 	[aToken setCharPositionInLine:state.tokenStartCharPositionInLine];
    212     [aToken retain];
    213 	[self emit:aToken];
    214 	// [aToken release];
    215 }
    216 
    217 // matching
    218 #pragma mark Matching
    219 - (void) matchString:(NSString *)aString
    220 {
    221     unichar c;
    222 	unsigned int i = 0;
    223 	unsigned int stringLength = [aString length];
    224 	while ( i < stringLength ) {
    225 		c = [input LA:1];
    226         if ( c != [aString characterAtIndex:i] ) {
    227 			if ([state getBacktracking] > 0) {
    228 				state.failed = YES;
    229 				return;
    230 			}
    231 			ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input];
    232             mte.c = c;
    233 			[self recover:mte];
    234 			@throw mte;
    235 		}
    236 		i++;
    237 		[input consume];
    238 		state.failed = NO;
    239 	}
    240 }
    241 
    242 - (void) matchAny
    243 {
    244 	[input consume];
    245 }
    246 
    247 - (void) matchChar:(unichar) aChar
    248 {
    249 	// TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype.
    250 	//		 try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code)
    251     unichar charLA;
    252 	charLA = [input LA:1];
    253 	if ( charLA != aChar) {
    254 		if ([state getBacktracking] > 0) {
    255 			state.failed = YES;
    256 			return;
    257 		}
    258 		ANTLRMismatchedTokenException  *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input];
    259         mte.c = charLA;
    260 		[self recover:mte];
    261 		@throw mte;
    262 	}
    263 	[input consume];
    264 	state.failed = NO;
    265 }
    266 
    267 - (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar
    268 {
    269 	unichar charLA = (unichar)[input LA:1];
    270 	if ( charLA < fromChar || charLA > toChar ) {
    271 		if ([state getBacktracking] > 0) {
    272 			state.failed = YES;
    273 			return;
    274 		}
    275 		ANTLRMismatchedRangeException  *mre = [ANTLRMismatchedRangeException
    276 					newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar)
    277 							   stream:input];
    278         mre.c = charLA;
    279 		[self recover:mre];
    280 		@throw mre;
    281 	}		
    282 	[input consume];
    283 	state.failed = NO;
    284 }
    285 
    286 	// info
    287 #pragma mark Informational
    288 
    289 - (NSUInteger) line
    290 {
    291 	return input.line;
    292 }
    293 
    294 - (NSUInteger) charPositionInLine
    295 {
    296 	return input.charPositionInLine;
    297 }
    298 
    299 - (NSInteger) index
    300 {
    301     return 0;
    302 }
    303 
    304 - (NSString *) text
    305 {
    306     if (state.text != nil) {
    307         return state.text;
    308     }
    309 	return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)];
    310 }
    311 
    312 - (void) setText:(NSString *) theText
    313 {
    314     state.text = theText;
    315 }
    316 
    317 	// error handling
    318 - (void) reportError:(ANTLRRecognitionException *)e
    319 {
    320     /** TODO: not thought about recovery in lexer yet.
    321      *
    322      // if we've already reported an error and have not matched a token
    323      // yet successfully, don't report any errors.
    324      if ( errorRecovery ) {
    325      //System.err.print("[SPURIOUS] ");
    326      return;
    327      }
    328      errorRecovery = true;
    329      */
    330     
    331     [self displayRecognitionError:[self getTokenNames] Exception:e];
    332 }
    333 
    334 - (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames
    335 {
    336 /*    NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n",
    337                      e.name];
    338  */
    339     NSString *msg = nil;
    340     if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
    341         ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
    342         msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"",
    343                [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]];
    344     }
    345     else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
    346         ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e;
    347         // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    348         // and "(decision="+nvae.decisionNumber+") and
    349         // "state "+nvae.stateNumber
    350         msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"",
    351                [self getCharErrorDisplay:(nvae.c)]];
    352     }
    353     else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
    354         ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
    355         // for development, can add "(decision="+eee.decisionNumber+")"
    356         msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"",
    357                [self getCharErrorDisplay:(eee.c)]];
    358     }
    359     else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) {
    360         ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
    361         msg = [NSString stringWithFormat:@"mismatched character \"%@\"  expecting set \"%@\"",
    362                [self getCharErrorDisplay:(mse.c)], mse.expecting];
    363     }
    364     else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
    365         ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
    366         msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"",
    367                [self getCharErrorDisplay:(mse.c)], mse.expecting];
    368     }
    369     else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) {
    370         ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e;
    371         msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"",
    372                [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)],
    373                [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]];
    374     }
    375     else {
    376         msg = [super getErrorMessage:e TokenNames:[self getTokenNames]];
    377     }
    378     return msg;
    379 }
    380 
    381 - (NSString *)getCharErrorDisplay:(NSInteger)c
    382 {
    383     NSString *s;
    384     switch ( c ) {
    385         case ANTLRTokenTypeEOF :
    386             s = @"<EOF>";
    387             break;
    388         case '\n' :
    389             s = @"\\n";
    390             break;
    391         case '\t' :
    392             s = @"\\t";
    393             break;
    394         case '\r' :
    395             s = @"\\r";
    396             break;
    397         default:
    398             s = [NSString stringWithFormat:@"%c", (char)c];
    399             break;
    400     }
    401     return s;
    402 }
    403 
    404 /** Lexers can normally match any char in it's vocabulary after matching
    405  *  a token, so do the easy thing and just kill a character and hope
    406  *  it all works out.  You can instead use the rule invocation stack
    407  *  to do sophisticated error recovery if you are in a fragment rule.
    408  */
    409 - (void)recover:(ANTLRRecognitionException *)re
    410 {
    411     //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
    412     //re.printStackTrace();
    413     [input consume];
    414 }
    415 
    416 - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex
    417 {
    418     NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine];
    419     [super traceIn:ruleName Index:ruleIndex Object:inputSymbol];
    420 }
    421 
    422 - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex
    423 {
    424     NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine];
    425     [super traceOut:ruleName Index:ruleIndex Object:inputSymbol];
    426 }
    427 
    428 @end
    429