Home | History | Annotate | Download | only in runtime
      1 /** A lexer is recognizer that draws input symbols from a character stream.
      2  *  lexer grammars result in a subclass of this object. A Lexer object
      3  *  uses simplified match() and error recovery mechanisms in the interest
      4  *  of speed.
      5  */
      6 org.antlr.runtime.Lexer = function(input, state) {
      7     if (state) {
      8         org.antlr.runtime.Lexer.superclass.constructor.call(this, state);
      9     }
     10     if (input) {
     11         this.input = input;
     12     }
     13 };
     14 
     15 org.antlr.lang.extend(org.antlr.runtime.Lexer, org.antlr.runtime.BaseRecognizer, {
     16     reset: function() {
     17         // reset all recognizer state variables
     18         org.antlr.runtime.Lexer.superclass.reset.call(this);
     19         if ( org.antlr.lang.isValue(this.input) ) {
     20             this.input.seek(0); // rewind the input
     21         }
     22         if ( !org.antlr.lang.isValue(this.state) ) {
     23             return; // no shared state work to do
     24         }
     25         this.state.token = null;
     26         this.state.type = org.antlr.runtime.Token.INVALID_TOKEN_TYPE;
     27         this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
     28         this.state.tokenStartCharIndex = -1;
     29         this.state.tokenStartCharPositionInLine = -1;
     30         this.state.tokenStartLine = -1;
     31         this.state.text = null;
     32     },
     33 
     34     /** Return a token from this source; i.e., match a token on the char
     35      *  stream.
     36      */
     37     nextToken: function() {
     38         while (true) {
     39             this.state.token = null;
     40             this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
     41             this.state.tokenStartCharIndex = this.input.index();
     42             this.state.tokenStartCharPositionInLine = this.input.getCharPositionInLine();
     43             this.state.tokenStartLine = this.input.getLine();
     44             this.state.text = null;
     45             if ( this.input.LA(1)===org.antlr.runtime.CharStream.EOF ) {
     46                 return org.antlr.runtime.Token.EOF_TOKEN;
     47             }
     48             try {
     49                 this.mTokens();
     50                 if ( !org.antlr.lang.isValue(this.state.token) ) {
     51                     this.emit();
     52                 }
     53                 else if ( this.state.token==org.antlr.runtime.Token.SKIP_TOKEN ) {
     54                     continue;
     55                 }
     56                 return this.state.token;
     57             }
     58             catch (re) {
     59                 if (re instanceof org.antlr.runtime.NoViableAltException) {
     60                     this.reportError(re);
     61                     this.recover(re);
     62                 } else if ( re instanceof org.antlr.runtime.RecognitionException ) {
     63                     this.reportError(re);
     64                 } else {
     65                     throw re;
     66                 }
     67             }
     68         }
     69     },
     70 
     71     /** Instruct the lexer to skip creating a token for current lexer rule
     72      *  and look for another token.  nextToken() knows to keep looking when
     73      *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
     74      *  if token==null at end of any token rule, it creates one for you
     75      *  and emits it.
     76      */
     77     skip: function() {
     78         this.state.token = org.antlr.runtime.Token.SKIP_TOKEN;
     79     },
     80 
     81     /** Set the char stream and reset the lexer */
     82     setCharStream: function(input) {
     83         this.input = null;
     84         this.reset();
     85         this.input = input;
     86     },
     87 
     88     getCharStream: function() {
     89         return this.input;
     90     },
     91 
     92     getSourceName: function() {
     93         return this.input.getSourceName();
     94     },
     95 
     96     /** Currently does not support multiple emits per nextToken invocation
     97      *  for efficiency reasons.  Subclass and override this method and
     98      *  nextToken (to push tokens into a list and pull from that list rather
     99      *  than a single variable as this implementation does).
    100      *
    101      *  The standard method called to automatically emit a token at the
    102      *  outermost lexical rule.  The token object should point into the
    103      *  char buffer start..stop.  If there is a text override in 'text',
    104      *  use that to set the token's text.  Override this method to emit
    105      *  custom Token objects.
    106      *
    107      *  If you are building trees, then you should also override
    108      *  Parser or TreeParser.getMissingSymbol().
    109      */
    110     emit: function() {
    111         if (arguments.length===0) {
    112             var t = new org.antlr.runtime.CommonToken(this.input, this.state.type, this.state.channel, this.state.tokenStartCharIndex, this.getCharIndex()-1);
    113             t.setLine(this.state.tokenStartLine);
    114             t.setText(this.state.text);
    115             t.setCharPositionInLine(this.state.tokenStartCharPositionInLine);
    116             this.state.token = t;
    117             return t;
    118         } else {
    119             this.state.token = arguments[0];
    120         }
    121     },
    122 
    123     match: function(s) {
    124         var i = 0,
    125             mte;
    126 
    127         if (org.antlr.lang.isString(s)) {
    128             while ( i<s.length ) {
    129                 if ( this.input.LA(1)!=s.charAt(i) ) {
    130                     if ( this.state.backtracking>0 ) {
    131                         this.state.failed = true;
    132                         return;
    133                     }
    134                     mte = new org.antlr.runtime.MismatchedTokenException(s.charAt(i), this.input);
    135                     this.recover(mte);
    136                     throw mte;
    137                 }
    138                 i++;
    139                 this.input.consume();
    140                 this.state.failed = false;
    141             }
    142         } else if (org.antlr.lang.isNumber(s)) {
    143             if ( this.input.LA(1)!=s ) {
    144                 if ( this.state.backtracking>0 ) {
    145                     this.state.failed = true;
    146                     return;
    147                 }
    148                 mte = new org.antlr.runtime.MismatchedTokenException(s, this.input);
    149                 this.recover(mte);
    150                 throw mte;
    151             }
    152             this.input.consume();
    153             this.state.failed = false;
    154         }
    155     },
    156 
    157     matchAny: function() {
    158         this.input.consume();
    159     },
    160 
    161     matchRange: function(a, b) {
    162         if ( this.input.LA(1)<a || this.input.LA(1)>b ) {
    163             if ( this.state.backtracking>0 ) {
    164                 this.state.failed = true;
    165                 return;
    166             }
    167             var mre = new org.antlr.runtime.MismatchedRangeException(a,b,this.input);
    168             this.recover(mre);
    169             throw mre;
    170         }
    171         this.input.consume();
    172         this.state.failed = false;
    173     },
    174 
    175     getLine: function() {
    176         return this.input.getLine();
    177     },
    178 
    179     getCharPositionInLine: function() {
    180         return this.input.getCharPositionInLine();
    181     },
    182 
    183     /** What is the index of the current character of lookahead? */
    184     getCharIndex: function() {
    185         return this.input.index();
    186     },
    187 
    188     /** Return the text matched so far for the current token or any
    189      *  text override.
    190      */
    191     getText: function() {
    192         if ( org.antlr.lang.isString(this.state.text) ) {
    193             return this.state.text;
    194         }
    195         return this.input.substring(this.state.tokenStartCharIndex,this.getCharIndex()-1);
    196     },
    197 
    198     /** Set the complete text of this token; it wipes any previous
    199      *  changes to the text.
    200      */
    201     setText: function(text) {
    202         this.state.text = text;
    203     },
    204 
    205     reportError: function(e) {
    206         /** TODO: not thought about recovery in lexer yet.
    207          *
    208         // if we've already reported an error and have not matched a token
    209         // yet successfully, don't report any errors.
    210         if ( errorRecovery ) {
    211             //System.err.print("[SPURIOUS] ");
    212             return;
    213         }
    214         errorRecovery = true;
    215          */
    216 
    217         this.displayRecognitionError(this.getTokenNames(), e);
    218     },
    219 
    220     getErrorMessage: function(e, tokenNames) {
    221         var msg = null;
    222         if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
    223             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting "+this.getCharErrorDisplay(e.expecting);
    224         }
    225         else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
    226             msg = "no viable alternative at character "+this.getCharErrorDisplay(e.c);
    227         }
    228         else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
    229             msg = "required (...)+ loop did not match anything at character "+this.getCharErrorDisplay(e.c);
    230         }
    231         else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
    232             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
    233         }
    234         else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
    235             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
    236         }
    237         else if ( e instanceof org.antlr.runtime.MismatchedRangeException ) {
    238             msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+
    239                 this.getCharErrorDisplay(e.a)+".."+this.getCharErrorDisplay(e.b);
    240         }
    241         else {
    242             msg = org.antlr.runtime.Lexer.superclass.getErrorMessage.call(this, e, tokenNames);
    243         }
    244         return msg;
    245     },
    246 
    247     getCharErrorDisplay: function(c) {
    248         var s = c; //String.fromCharCode(c);
    249         switch ( s ) {
    250             case org.antlr.runtime.Token.EOF :
    251                 s = "<EOF>";
    252                 break;
    253             case "\n" :
    254                 s = "\\n";
    255                 break;
    256             case "\t" :
    257                 s = "\\t";
    258                 break;
    259             case "\r" :
    260                 s = "\\r";
    261                 break;
    262         }
    263         return "'"+s+"'";
    264     },
    265 
    266     /** Lexers can normally match any char in it's vocabulary after matching
    267      *  a token, so do the easy thing and just kill a character and hope
    268      *  it all works out.  You can instead use the rule invocation stack
    269      *  to do sophisticated error recovery if you are in a fragment rule.
    270      */
    271     recover: function(re) {
    272         this.input.consume();
    273     },
    274 
    275     traceIn: function(ruleName, ruleIndex)  {
    276         var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
    277         org.antlr.runtime.Lexer.superclass.traceIn.call(this, ruleName, ruleIndex, inputSymbol);
    278     },
    279 
    280     traceOut: function(ruleName, ruleIndex)  {
    281 		var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
    282 		org.antlr.runtime.Lexer.superclass.traceOut.call(this, ruleName, ruleIndex, inputSymbol);
    283 	}
    284 });
    285