Home | History | Annotate | Download | only in Framework
      1 // [The "BSD licence"]
      2 // Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit
      3 // All rights reserved.
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions
      7 // are met:
      8 // 1. Redistributions of source code must retain the above copyright
      9 //    notice, this list of conditions and the following disclaimer.
     10 // 2. Redistributions in binary form must reproduce the above copyright
     11 //    notice, this list of conditions and the following disclaimer in the
     12 //    documentation and/or other materials provided with the distribution.
     13 // 3. The name of the author may not be used to endorse or promote products
     14 //    derived from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #import <ANTLR/antlr.h>
     28 #import "Lexer.h"
     29 
     30 @implementation Lexer
     31 
     32 @synthesize input;
     33 @synthesize ruleNestingLevel;
     34 #pragma mark Initializer
     35 
     36 - (id) initWithCharStream:(id<CharStream>)anInput
     37 {
     38     self = [super initWithState:[[RecognizerSharedState alloc] init]];
     39     if ( self != nil ) {
     40         input = [anInput retain];
     41         if (state.token != nil)
     42             [((CommonToken *)state.token) setInput:anInput];
     43         ruleNestingLevel = 0;
     44     }
     45     return self;
     46 }
     47 
     48 - (id) initWithCharStream:(id<CharStream>)anInput State:(RecognizerSharedState *)aState
     49 {
     50     self = [super initWithState:aState];
     51     if ( self != nil ) {
     52         input = [anInput retain];
     53         if (state.token != nil)
     54             [((CommonToken *)state.token) setInput:anInput];
     55         ruleNestingLevel = 0;
     56     }
     57     return self;
     58 }
     59 
     60 - (void) dealloc
     61 {
     62     if ( input ) [input release];
     63     [super dealloc];
     64 }
     65 
     66 - (id) copyWithZone:(NSZone *)aZone
     67 {
     68     Lexer *copy;
     69     
     70     copy = [[[self class] allocWithZone:aZone] init];
     71     //    copy = [super copyWithZone:aZone]; // allocation occurs here
     72     if ( input != nil )
     73         copy.input = input;
     74     copy.ruleNestingLevel = ruleNestingLevel;
     75     return copy;
     76 }
     77 
     78 - (void) reset
     79 {
     80     [super reset]; // reset all recognizer state variables
     81                    // wack Lexer state variables
     82     if ( input != nil ) {
     83         [input seek:0]; // rewind the input
     84     }
     85     if ( state == nil ) {
     86         return; // no shared state work to do
     87     }
     88     state.token = nil;
     89     state.type = CommonToken.INVALID_TOKEN_TYPE;
     90     state.channel = CommonToken.DEFAULT_CHANNEL;
     91     state.tokenStartCharIndex = -1;
     92     state.tokenStartCharPositionInLine = -1;
     93     state.tokenStartLine = -1;
     94     state.text = nil;
     95 }
     96 
     97 // token stuff
     98 #pragma mark Tokens
     99 
    100 - (id<Token>)getToken
    101 {
    102     return [state getToken]; 
    103 }
    104 
    105 - (void) setToken: (id<Token>) aToken
    106 {
    107     if (state.token != aToken) {
    108         [aToken retain];
    109         state.token = aToken;
    110     }
    111 }
    112 
    113 
    114 // this method may be overridden in the generated lexer if we generate a filtering lexer.
    115 - (id<Token>) nextToken
    116 {
    117     while (YES) {
    118         [self setToken:nil];
    119         state.channel = CommonToken.DEFAULT_CHANNEL;
    120         state.tokenStartCharIndex = input.index;
    121         state.tokenStartCharPositionInLine = input.getCharPositionInLine;
    122         state.tokenStartLine = input.getLine;
    123         state.text = nil;
    124         
    125         // [self setText:[self text]];
    126         if ([input LA:1] == CharStreamEOF) {
    127             CommonToken *eof = [CommonToken newToken:input
    128                                                           Type:TokenTypeEOF
    129                                                        Channel:CommonToken.DEFAULT_CHANNEL
    130                                                          Start:input.index
    131                                                           Stop:input.index];
    132             [eof setLine:input.getLine];
    133             [eof setCharPositionInLine:input.getCharPositionInLine];
    134             return eof;
    135         }
    136         @try {
    137             [self mTokens];
    138             // SEL aMethod = @selector(mTokens);
    139             // [[self class] instancesRespondToSelector:aMethod];
    140             if ( state.token == nil)
    141                 [self emit];
    142             else if ( state.token == [CommonToken skipToken] ) {
    143                 continue;
    144             }
    145             return state.token;
    146         }
    147         @catch (MismatchedRangeException *re) {
    148             [self reportError:re];
    149             // [self recover:re];
    150         }
    151         @catch (MismatchedTokenException *re) {
    152             [self reportError:re];
    153             // [self recover:re];
    154         }
    155         @catch (RecognitionException *re) {
    156             [self reportError:re];
    157             [self recover:re];
    158         }
    159     }
    160 }
    161 
    162 - (void) mTokens
    163 {   // abstract, defined in generated source as a starting point for matching
    164     [self doesNotRecognizeSelector:_cmd];
    165 }
    166 
    167 - (void) skip
    168 {
    169     state.token = [CommonToken skipToken];
    170 }
    171 
    172 - (id<CharStream>) input
    173 {
    174     return input; 
    175 }
    176 
    177 - (void) setInput:(id<CharStream>) anInput
    178 {
    179     if ( anInput != input ) {
    180         if ( input ) [input release];
    181     }
    182     input = nil;
    183     [self reset];
    184     input = anInput;
    185     [input retain];
    186 }
    187 
    188 /** Currently does not support multiple emits per nextToken invocation
    189  *  for efficiency reasons.  Subclass and override this method and
    190  *  nextToken (to push tokens into a list and pull from that list rather
    191  *  than a single variable as this implementation does).
    192  */
    193 - (void) emit:(id<Token>)aToken
    194 {
    195     state.token = aToken;
    196 }
    197 
    198 /** The standard method called to automatically emit a token at the
    199  *  outermost lexical rule.  The token object should point into the
    200  *  char buffer start..stop.  If there is a text override in 'text',
    201  *  use that to set the token's text.  Override this method to emit
    202  *  custom Token objects.
    203  *
    204  *  If you are building trees, then you should also override
    205  *  Parser or TreeParser.getMissingSymbol().
    206  */
    207 - (void) emit
    208 {
    209     id<Token> aToken = [CommonToken newToken:input
    210                                                   Type:state.type
    211                                                Channel:state.channel
    212                                                  Start:state.tokenStartCharIndex
    213                                                   Stop:input.index-1];
    214     aToken.text = [self text];
    215     [aToken setCharPositionInLine:state.tokenStartCharPositionInLine];
    216     [aToken setLine:state.tokenStartLine];
    217     [aToken retain];
    218     [self emit:aToken];
    219     // [aToken release];
    220 }
    221 
    222 // matching
    223 #pragma mark Matching
    224 - (void) matchString:(NSString *)aString
    225 {
    226     unichar c;
    227     unsigned int i = 0;
    228     unsigned int stringLength = [aString length];
    229     while ( i < stringLength ) {
    230         c = [input LA:1];
    231         if ( c != [aString characterAtIndex:i] ) {
    232             if ([state getBacktracking] > 0) {
    233                 state.failed = YES;
    234                 return;
    235             }
    236             MismatchedTokenException *mte = [MismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input];
    237             mte.c = c;
    238             [self recover:mte];
    239             @throw mte;
    240         }
    241         i++;
    242         [input consume];
    243         state.failed = NO;
    244     }
    245 }
    246 
    247 - (void) matchAny
    248 {
    249     [input consume];
    250 }
    251 
    252 - (void) matchChar:(unichar) aChar
    253 {
    254     // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype.
    255     //       try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code)
    256     unichar charLA;
    257     charLA = [input LA:1];
    258     if ( charLA != aChar) {
    259         if ([state getBacktracking] > 0) {
    260             state.failed = YES;
    261             return;
    262         }
    263         MismatchedTokenException  *mte = [MismatchedTokenException newExceptionChar:aChar Stream:input];
    264         mte.c = charLA;
    265         [self recover:mte];
    266         @throw mte;
    267     }
    268     [input consume];
    269     state.failed = NO;
    270 }
    271 
    272 - (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar
    273 {
    274     unichar charLA = (unichar)[input LA:1];
    275     if ( charLA < fromChar || charLA > toChar ) {
    276         if ([state getBacktracking] > 0) {
    277             state.failed = YES;
    278             return;
    279         }
    280         MismatchedRangeException  *mre = [MismatchedRangeException
    281                     newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar)
    282                                stream:input];
    283         mre.c = charLA;
    284         [self recover:mre];
    285         @throw mre;
    286     }       
    287     [input consume];
    288     state.failed = NO;
    289 }
    290 
    291     // info
    292 #pragma mark Informational
    293 
    294 - (NSUInteger) line
    295 {
    296     return input.getLine;
    297 }
    298 
    299 - (NSUInteger) charPositionInLine
    300 {
    301     return input.getCharPositionInLine;
    302 }
    303 
    304 - (NSInteger) index
    305 {
    306     return 0;
    307 }
    308 
    309 - (NSString *) text
    310 {
    311     if (state.text != nil) {
    312         return state.text;
    313     }
    314     return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)];
    315 }
    316 
    317 - (void) setText:(NSString *) theText
    318 {
    319     state.text = theText;
    320 }
    321 
    322     // error handling
    323 - (void) reportError:(RecognitionException *)e
    324 {
    325     /** TODO: not thought about recovery in lexer yet.
    326      *
    327      // if we've already reported an error and have not matched a token
    328      // yet successfully, don't report any errors.
    329      if ( errorRecovery ) {
    330      //System.err.print("[SPURIOUS] ");
    331      return;
    332      }
    333      errorRecovery = true;
    334      */
    335     
    336     [self displayRecognitionError:[self getTokenNames] Exception:e];
    337 }
    338 
    339 - (NSString *)getErrorMessage:(RecognitionException *)e TokenNames:(AMutableArray *)tokenNames
    340 {
    341 /*    NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in Lexer.m--%@\n",
    342                      e.name];
    343  */
    344     NSString *msg = nil;
    345     if ( [e isKindOfClass:[MismatchedTokenException class]] ) {
    346         MismatchedTokenException *mte = (MismatchedTokenException *)e;
    347         msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"",
    348             [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expectingChar]];
    349     }
    350     else if ( [e isKindOfClass:[NoViableAltException class]] ) {
    351         NoViableAltException *nvae = (NoViableAltException *)e;
    352         // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    353         // and "(decision="+nvae.decisionNumber+") and
    354         // "state "+nvae.stateNumber
    355         msg = [NSString stringWithFormat:@"no viable alternative decision:%d state:%d at character \"%@\"",
    356                nvae.decisionNumber, nvae.stateNumber, [self getCharErrorDisplay:(nvae.c)]];
    357     }
    358     else if ( [e isKindOfClass:[EarlyExitException class]] ) {
    359         EarlyExitException *eee = (EarlyExitException *)e;
    360         // for development, can add "(decision="+eee.decisionNumber+")"
    361         msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"",
    362                [self getCharErrorDisplay:(eee.c)]];
    363     }
    364     else if ( [e isKindOfClass:[MismatchedNotSetException class]] ) {
    365         MismatchedNotSetException *mse = (MismatchedNotSetException *)e;
    366         msg = [NSString stringWithFormat:@"mismatched character \"%@\"  expecting set \"%@\"",
    367                [self getCharErrorDisplay:(mse.c)], mse.expecting];
    368     }
    369     else if ( [e isKindOfClass:[MismatchedSetException class]] ) {
    370         MismatchedSetException *mse = (MismatchedSetException *)e;
    371         msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"",
    372                [self getCharErrorDisplay:(mse.c)], mse.expecting];
    373     }
    374     else if ( [e isKindOfClass:[MismatchedRangeException class]] ) {
    375         MismatchedRangeException *mre = (MismatchedRangeException *)e;
    376         msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"",
    377                [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)],
    378                [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]];
    379     }
    380     else {
    381         msg = [super getErrorMessage:e TokenNames:[self getTokenNames]];
    382     }
    383     return msg;
    384 }
    385 
    386 - (NSString *)getCharErrorDisplay:(NSInteger)c
    387 {
    388     NSString *s;
    389     switch ( c ) {
    390         case 0:
    391             s = @"char=<nil>";
    392             break;
    393         case TokenTypeEOF :
    394         case 65535:
    395             s = @"<EOF>";
    396             break;
    397         case '\n' :
    398             s = @"\\n";
    399             break;
    400         case '\t' :
    401             s = @"\\t";
    402             break;
    403         case '\r' :
    404             s = @"\\r";
    405             break;
    406         default:
    407             s = [NSString stringWithFormat:@"%c", (char)c];
    408             break;
    409     }
    410     return s;
    411 }
    412 
    413 /** Lexers can normally match any char in it's vocabulary after matching
    414  *  a token, so do the easy thing and just kill a character and hope
    415  *  it all works out.  You can instead use the rule invocation stack
    416  *  to do sophisticated error recovery if you are in a fragment rule.
    417  */
    418 - (void)recover:(RecognitionException *)re
    419 {
    420     //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
    421     //re.printStackTrace();
    422     [input consume];
    423 }
    424 
    425 - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex
    426 {
    427     NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.getLine, input.getCharPositionInLine];
    428     [super traceIn:ruleName Index:ruleIndex Object:inputSymbol];
    429 }
    430 
    431 - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex
    432 {
    433     NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.getLine, input.getCharPositionInLine];
    434     [super traceOut:ruleName Index:ruleIndex Object:inputSymbol];
    435 }
    436 
    437 @end
    438