1 // [The "BSD licence"] 2 // Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit 3 // All rights reserved. 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions 7 // are met: 8 // 1. Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // 2. Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // 3. The name of the author may not be used to endorse or promote products 14 // derived from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #import <ANTLR/antlr.h> 28 #import "ANTLRLexer.h" 29 30 @implementation ANTLRLexer 31 32 @synthesize input; 33 @synthesize ruleNestingLevel; 34 #pragma mark Initializer 35 36 - (id) initWithCharStream:(id<ANTLRCharStream>)anInput 37 { 38 self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]]; 39 if ( self != nil ) { 40 input = [anInput retain]; 41 if (state.token != nil) 42 [((ANTLRCommonToken *)state.token) setInput:anInput]; 43 ruleNestingLevel = 0; 44 } 45 return self; 46 } 47 48 - (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState 49 { 50 self = [super initWithState:aState]; 51 if ( self != nil ) { 52 input = [anInput retain]; 53 if (state.token != nil) 54 [((ANTLRCommonToken *)state.token) setInput:anInput]; 55 ruleNestingLevel = 0; 56 } 57 return self; 58 } 59 60 - (void) dealloc 61 { 62 if ( input ) [input release]; 63 [super dealloc]; 64 } 65 66 - (id) copyWithZone:(NSZone *)aZone 67 { 68 ANTLRLexer *copy; 69 70 copy = [[[self class] allocWithZone:aZone] init]; 71 // copy = [super copyWithZone:aZone]; // allocation occurs here 72 if ( input != nil ) 73 copy.input = input; 74 copy.ruleNestingLevel = ruleNestingLevel; 75 return copy; 76 } 77 78 - (void) reset 79 { 80 [super reset]; // reset all recognizer state variables 81 // wack Lexer state variables 82 if ( input != nil ) { 83 [input seek:0]; // rewind the input 84 } 85 if ( state == nil ) { 86 return; // no shared state work to do 87 } 88 state.token = nil; 89 state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE; 90 state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; 91 state.tokenStartCharIndex = -1; 92 state.tokenStartCharPositionInLine = -1; 93 state.tokenStartLine = -1; 94 state.text = nil; 95 } 96 97 // token stuff 98 #pragma mark Tokens 99 100 - (id<ANTLRToken>)getToken 101 { 102 return [state getToken]; 103 } 104 105 - (void) setToken: (id<ANTLRToken>) aToken 106 { 107 if (state.token != aToken) { 108 [aToken retain]; 109 state.token = aToken; 110 } 111 } 112 113 114 // this method may be overridden in the generated lexer if we generate a filtering lexer. 115 - (id<ANTLRToken>) nextToken 116 { 117 while (YES) { 118 [self setToken:nil]; 119 state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; 120 state.tokenStartCharIndex = input.index; 121 state.tokenStartCharPositionInLine = input.charPositionInLine; 122 state.tokenStartLine = input.line; 123 state.text = nil; 124 125 // [self setText:[self text]]; 126 if ([input LA:1] == ANTLRCharStreamEOF) { 127 ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input 128 Type:ANTLRTokenTypeEOF 129 Channel:ANTLRCommonToken.DEFAULT_CHANNEL 130 Start:input.index 131 Stop:input.index]; 132 [eof setLine:input.line]; 133 [eof setCharPositionInLine:input.charPositionInLine]; 134 return eof; 135 } 136 @try { 137 [self mTokens]; 138 // SEL aMethod = @selector(mTokens); 139 // [[self class] instancesRespondToSelector:aMethod]; 140 if ( state.token == nil) 141 [self emit]; 142 else if ( state.token == [ANTLRCommonToken skipToken] ) { 143 continue; 144 } 145 return state.token; 146 } 147 @catch (ANTLRNoViableAltException *nva) { 148 [self reportError:nva]; 149 [self recover:nva]; 150 } 151 @catch (ANTLRRecognitionException *e) { 152 [self reportError:e]; 153 } 154 } 155 } 156 157 - (void) mTokens 158 { // abstract, defined in generated source as a starting point for matching 159 [self doesNotRecognizeSelector:_cmd]; 160 } 161 162 - (void) skip 163 { 164 state.token = [ANTLRCommonToken skipToken]; 165 } 166 167 - (id<ANTLRCharStream>) input 168 { 169 return input; 170 } 171 172 - (void) setInput:(id<ANTLRCharStream>) anInput 173 { 174 if ( anInput != input ) { 175 if ( input ) [input release]; 176 } 177 input = nil; 178 [self reset]; 179 input = anInput; 180 [input retain]; 181 } 182 183 /** Currently does not support multiple emits per nextToken invocation 184 * for efficiency reasons. Subclass and override this method and 185 * nextToken (to push tokens into a list and pull from that list rather 186 * than a single variable as this implementation does). 187 */ 188 - (void) emit:(id<ANTLRToken>)aToken 189 { 190 state.token = aToken; 191 } 192 193 /** The standard method called to automatically emit a token at the 194 * outermost lexical rule. The token object should point into the 195 * char buffer start..stop. If there is a text override in 'text', 196 * use that to set the token's text. Override this method to emit 197 * custom Token objects. 198 * 199 * If you are building trees, then you should also override 200 * Parser or TreeParser.getMissingSymbol(). 201 */ 202 - (void) emit 203 { 204 id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input 205 Type:state.type 206 Channel:state.channel 207 Start:state.tokenStartCharIndex 208 Stop:input.index-1]; 209 [aToken setLine:state.tokenStartLine]; 210 aToken.text = [self text]; 211 [aToken setCharPositionInLine:state.tokenStartCharPositionInLine]; 212 [aToken retain]; 213 [self emit:aToken]; 214 // [aToken release]; 215 } 216 217 // matching 218 #pragma mark Matching 219 - (void) matchString:(NSString *)aString 220 { 221 unichar c; 222 unsigned int i = 0; 223 unsigned int stringLength = [aString length]; 224 while ( i < stringLength ) { 225 c = [input LA:1]; 226 if ( c != [aString characterAtIndex:i] ) { 227 if ([state getBacktracking] > 0) { 228 state.failed = YES; 229 return; 230 } 231 ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input]; 232 mte.c = c; 233 [self recover:mte]; 234 @throw mte; 235 } 236 i++; 237 [input consume]; 238 state.failed = NO; 239 } 240 } 241 242 - (void) matchAny 243 { 244 [input consume]; 245 } 246 247 - (void) matchChar:(unichar) aChar 248 { 249 // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype. 250 // try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code) 251 unichar charLA; 252 charLA = [input LA:1]; 253 if ( charLA != aChar) { 254 if ([state getBacktracking] > 0) { 255 state.failed = YES; 256 return; 257 } 258 ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input]; 259 mte.c = charLA; 260 [self recover:mte]; 261 @throw mte; 262 } 263 [input consume]; 264 state.failed = NO; 265 } 266 267 - (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar 268 { 269 unichar charLA = (unichar)[input LA:1]; 270 if ( charLA < fromChar || charLA > toChar ) { 271 if ([state getBacktracking] > 0) { 272 state.failed = YES; 273 return; 274 } 275 ANTLRMismatchedRangeException *mre = [ANTLRMismatchedRangeException 276 newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar) 277 stream:input]; 278 mre.c = charLA; 279 [self recover:mre]; 280 @throw mre; 281 } 282 [input consume]; 283 state.failed = NO; 284 } 285 286 // info 287 #pragma mark Informational 288 289 - (NSUInteger) line 290 { 291 return input.line; 292 } 293 294 - (NSUInteger) charPositionInLine 295 { 296 return input.charPositionInLine; 297 } 298 299 - (NSInteger) index 300 { 301 return 0; 302 } 303 304 - (NSString *) text 305 { 306 if (state.text != nil) { 307 return state.text; 308 } 309 return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)]; 310 } 311 312 - (void) setText:(NSString *) theText 313 { 314 state.text = theText; 315 } 316 317 // error handling 318 - (void) reportError:(ANTLRRecognitionException *)e 319 { 320 /** TODO: not thought about recovery in lexer yet. 321 * 322 // if we've already reported an error and have not matched a token 323 // yet successfully, don't report any errors. 324 if ( errorRecovery ) { 325 //System.err.print("[SPURIOUS] "); 326 return; 327 } 328 errorRecovery = true; 329 */ 330 331 [self displayRecognitionError:[self getTokenNames] Exception:e]; 332 } 333 334 - (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames 335 { 336 /* NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n", 337 e.name]; 338 */ 339 NSString *msg = nil; 340 if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) { 341 ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e; 342 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"", 343 [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]]; 344 } 345 else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) { 346 ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e; 347 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 348 // and "(decision="+nvae.decisionNumber+") and 349 // "state "+nvae.stateNumber 350 msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"", 351 [self getCharErrorDisplay:(nvae.c)]]; 352 } 353 else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) { 354 ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e; 355 // for development, can add "(decision="+eee.decisionNumber+")" 356 msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"", 357 [self getCharErrorDisplay:(eee.c)]]; 358 } 359 else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) { 360 ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e; 361 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", 362 [self getCharErrorDisplay:(mse.c)], mse.expecting]; 363 } 364 else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) { 365 ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e; 366 msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", 367 [self getCharErrorDisplay:(mse.c)], mse.expecting]; 368 } 369 else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) { 370 ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e; 371 msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"", 372 [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)], 373 [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]]; 374 } 375 else { 376 msg = [super getErrorMessage:e TokenNames:[self getTokenNames]]; 377 } 378 return msg; 379 } 380 381 - (NSString *)getCharErrorDisplay:(NSInteger)c 382 { 383 NSString *s; 384 switch ( c ) { 385 case ANTLRTokenTypeEOF : 386 s = @"<EOF>"; 387 break; 388 case '\n' : 389 s = @"\\n"; 390 break; 391 case '\t' : 392 s = @"\\t"; 393 break; 394 case '\r' : 395 s = @"\\r"; 396 break; 397 default: 398 s = [NSString stringWithFormat:@"%c", (char)c]; 399 break; 400 } 401 return s; 402 } 403 404 /** Lexers can normally match any char in it's vocabulary after matching 405 * a token, so do the easy thing and just kill a character and hope 406 * it all works out. You can instead use the rule invocation stack 407 * to do sophisticated error recovery if you are in a fragment rule. 408 */ 409 - (void)recover:(ANTLRRecognitionException *)re 410 { 411 //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); 412 //re.printStackTrace(); 413 [input consume]; 414 } 415 416 - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex 417 { 418 NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; 419 [super traceIn:ruleName Index:ruleIndex Object:inputSymbol]; 420 } 421 422 - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex 423 { 424 NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; 425 [super traceOut:ruleName Index:ruleIndex Object:inputSymbol]; 426 } 427 428 @end 429