Home | History | Annotate | Download | only in runtime
      1 /*
      2  [The "BSD licence"]
      3  Copyright (c) 2005-2006 Terence Parr
      4  All rights reserved.
      5 
      6  Redistribution and use in source and binary forms, with or without
      7  modification, are permitted provided that the following conditions
      8  are met:
      9  1. Redistributions of source code must retain the above copyright
     10     notice, this list of conditions and the following disclaimer.
     11  2. Redistributions in binary form must reproduce the above copyright
     12     notice, this list of conditions and the following disclaimer in the
     13     documentation and/or other materials provided with the distribution.
     14  3. The name of the author may not be used to endorse or promote products
     15     derived from this software without specific prior written permission.
     16 
     17  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     18  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     19  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     20  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     21  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     22  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 */
     28 package org.antlr.runtime {
     29 
     30 	/** A lexer is recognizer that draws input symbols from a character stream.
     31 	 *  lexer grammars result in a subclass of this object. A Lexer object
     32 	 *  uses simplified match() and error recovery mechanisms in the interest
     33 	 *  of speed.
     34 	 */
     35 	public class Lexer extends BaseRecognizer implements TokenSource {
     36 		/** Where is the lexer drawing characters from? */
     37 	    protected var input:CharStream;
     38 
     39 		public function Lexer(input:CharStream = null, state:RecognizerSharedState = null) {
     40 		    super(state);
     41 			this.input = input;
     42 		}
     43 
     44 		public override function reset():void {
     45 			super.reset(); // reset all recognizer state variables
     46     		// wack Lexer state variables
     47     		if ( input!=null ) {
     48     			input.seek(0); // rewind the input
     49     		}
     50     		if ( state==null ) {
     51     			return; // no shared state work to do
     52     		}
     53     		state.token = null;
     54     		state.type = TokenConstants.INVALID_TOKEN_TYPE;
     55     		state.channel = TokenConstants.DEFAULT_CHANNEL;
     56     		state.tokenStartCharIndex = -1;
     57     		state.tokenStartCharPositionInLine = -1;
     58     		state.tokenStartLine = -1;
     59     		state.text = null;
     60 		}
     61 
     62 		/** Return a token from this source; i.e., match a token on the char
     63 		 *  stream.
     64 		 */
     65 	    public function nextToken():Token {
     66 			while (true) {
     67 				state.token = null;
     68 				state.channel = TokenConstants.DEFAULT_CHANNEL;
     69 				state.tokenStartCharIndex = input.index;
     70 				state.tokenStartCharPositionInLine = input.charPositionInLine;
     71 				state.tokenStartLine = input.line;
     72 				state.text = null;
     73 				if ( input.LA(1)==CharStreamConstants.EOF ) {
     74 	                return TokenConstants.EOF_TOKEN;
     75 	            }
     76 	            try {
     77 	                mTokens();
     78 					if ( state.token==null ) {
     79 						emit();
     80 					}
     81 					else if ( state.token==TokenConstants.SKIP_TOKEN ) {
     82 						continue;
     83 					}
     84 					return state.token;
     85 				}
     86 	            catch (nva:NoViableAltException) {
     87     				reportError(nva);
     88     				recover(nva); // throw out current char and try again
     89     			}
     90     			catch (re:RecognitionException) {
     91     				reportError(re);
     92     				// match() routine has already called recover()
     93     			}
     94 	        }
     95 	        // Can't happen, but will quiet complier error
     96 	        return null;
     97 	    }
     98 
     99 		/** Instruct the lexer to skip creating a token for current lexer rule
    100 		 *  and look for another token.  nextToken() knows to keep looking when
    101 		 *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
    102 		 *  if token==null at end of any token rule, it creates one for you
    103 		 *  and emits it.
    104 		 */
    105 		public function skip():void {
    106 			state.token = TokenConstants.SKIP_TOKEN;
    107 		}
    108 
    109 		/** This is the lexer entry point that sets instance var 'token' */
    110 		public function mTokens():void {
    111 			// abstract function
    112 			throw new Error("Not implemented");
    113 		}
    114 
    115 		/** Set the char stream and reset the lexer */
    116 		public function set charStream(input:CharStream):void {
    117 			this.input = null;
    118 			reset();
    119 			this.input = input;
    120 		}
    121 
    122 		public function get charStream():CharStream {
    123 			return input;
    124 		}
    125 
    126 		public override function get sourceName():String {
    127 			return input.sourceName;
    128 		}
    129 
    130 		/** Currently does not support multiple emits per nextToken invocation
    131 		 *  for efficiency reasons.  Subclass and override this method and
    132 		 *  nextToken (to push tokens into a list and pull from that list rather
    133 		 *  than a single variable as this implementation does).
    134 		 */
    135 		public function emitToken(token:Token):void {
    136 			state.token = token;
    137 		}
    138 
    139 		/** The standard method called to automatically emit a token at the
    140 		 *  outermost lexical rule.  The token object should point into the
    141 		 *  char buffer start..stop.  If there is a text override in 'text',
    142 		 *  use that to set the token's text.  Override this method to emit
    143 		 *  custom Token objects.
    144 		 */
    145 		public function emit():Token {
    146 			var t:Token = CommonToken.createFromStream(input, state.type, state.channel, state.tokenStartCharIndex, charIndex - 1);
    147 			t.line = state.tokenStartLine;
    148 			t.text = state.text;
    149 			t.charPositionInLine = state.tokenStartCharPositionInLine;
    150 			emitToken(t);
    151 			return t;
    152 		}
    153 
    154 		public function matchString(s:String):void {
    155 	        var i:int = 0;
    156 	        while ( i<s.length ) {
    157 	            if ( input.LA(1) != s.charCodeAt(i) ) {
    158 					if ( state.backtracking>0 ) {
    159 						state.failed = true;
    160 						return;
    161 					}
    162 					var mte:MismatchedTokenException =
    163 						new MismatchedTokenException(s.charCodeAt(i), input);
    164 					recover(mte);
    165 					throw mte;
    166 	            }
    167 	            i++;
    168 	            input.consume();
    169 				state.failed = false;
    170 	        }
    171 	    }
    172 
    173 	    public function matchAny():void {
    174 	        input.consume();
    175 	    }
    176 
    177 	    public function match(c:int):void {
    178 	        if ( input.LA(1)!=c ) {
    179 				if ( state.backtracking>0 ) {
    180 					state.failed = true;
    181 					return;
    182 				}
    183 				var mte:MismatchedTokenException =
    184 					new MismatchedTokenException(c, input);
    185 				recover(mte);  // don't really recover; just consume in lexer
    186 				throw mte;
    187 	        }
    188 	        input.consume();
    189 			state.failed = false;
    190 	    }
    191 
    192 	    public function matchRange(a:int, b:int):void
    193 		{
    194 	        if ( input.LA(1)<a || input.LA(1)>b ) {
    195 				if ( state.backtracking>0 ) {
    196 					state.failed = true;
    197 					return;
    198 				}
    199 	            var mre:MismatchedRangeException =
    200 					new MismatchedRangeException(a,b,input);
    201 				recover(mre);
    202 				throw mre;
    203 	        }
    204 	        input.consume();
    205 			state.failed = false;
    206 	    }
    207 
    208 	    public function get line():int {
    209 	        return input.line;
    210 	    }
    211 
    212 	    public function get charPositionInLine():int {
    213 	        return input.charPositionInLine;
    214 	    }
    215 
    216 		/** What is the index of the current character of lookahead? */
    217 		public function get charIndex():int {
    218 			return input.index;
    219 		}
    220 
    221 		/** Return the text matched so far for the current token or any
    222 		 *  text override.
    223 		 */
    224 		public function get text():String {
    225 			if ( state.text!=null ) {
    226 				return state.text;
    227 			}
    228 			return input.substring(state.tokenStartCharIndex, charIndex-1);
    229 		}
    230 
    231 		/** Set the complete text of this token; it wipes any previous
    232 		 *  changes to the text.
    233 		 */
    234 		public function set text(text:String):void {
    235 			state.text = text;
    236 		}
    237 
    238 		public override function reportError(e:RecognitionException):void {
    239 			displayRecognitionError(this.tokenNames, e);
    240 		}
    241 
    242 		public override function getErrorMessage(e:RecognitionException, tokenNames:Array):String {
    243 			var msg:String = null;
    244 			if ( e is MismatchedTokenException ) {
    245 				var mte:MismatchedTokenException = MismatchedTokenException(e);
    246 				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting "+getCharErrorDisplay(mte.expecting);
    247 			}
    248 			else if ( e is NoViableAltException ) {
    249 				var nvae:NoViableAltException = NoViableAltException(e);
    250 				// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    251 				// and "(decision="+nvae.decisionNumber+") and
    252 				// "state "+nvae.stateNumber
    253 				msg = "no viable alternative at character "+getCharErrorDisplay(e.c);
    254 			}
    255 			else if ( e is EarlyExitException ) {
    256 				var eee:EarlyExitException = EarlyExitException(e);
    257 				// for development, can add "(decision="+eee.decisionNumber+")"
    258 				msg = "required (...)+ loop did not match anything at character "+getCharErrorDisplay(e.c);
    259 			}
    260 			else if ( e is MismatchedNotSetException ) {
    261 				var mnse:MismatchedNotSetException = MismatchedNotSetException(e);
    262 				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mnse.expecting;
    263 			}
    264 			else if ( e is MismatchedSetException ) {
    265 				var mse:MismatchedSetException = MismatchedSetException(e);
    266 				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+mse.expecting;
    267 			}
    268 			else if ( e is MismatchedRangeException ) {
    269 				var mre:MismatchedRangeException = MismatchedRangeException(e);
    270 				msg = "mismatched character "+getCharErrorDisplay(e.c)+" expecting set "+
    271 					getCharErrorDisplay(mre.a)+".."+getCharErrorDisplay(mre.b);
    272 			}
    273 			else {
    274 				msg = super.getErrorMessage(e, tokenNames);
    275 			}
    276 			return msg;
    277 		}
    278 
    279 		public function getCharErrorDisplay(c:int):String {
    280 			var s:String = String.fromCharCode(c);
    281 			switch ( c ) {
    282 				case TokenConstants.EOF :
    283 					s = "<EOF>";
    284 					break;
    285 				case '\n' :
    286 					s = "\\n";
    287 					break;
    288 				case '\t' :
    289 					s = "\\t";
    290 					break;
    291 				case '\r' :
    292 					s = "\\r";
    293 					break;
    294 			}
    295 			return "'"+s+"'";
    296 		}
    297 
    298 		/** Lexers can normally match any char in it's vocabulary after matching
    299 		 *  a token, so do the easy thing and just kill a character and hope
    300 		 *  it all works out.  You can instead use the rule invocation stack
    301 		 *  to do sophisticated error recovery if you are in a fragment rule.
    302 		 *
    303 		 *  @return This method should return the exception it was provided as an
    304 		 *  argument.  This differs from the Java runtime so that an exception variable
    305 		 *  does not need to be declared in the generated code, thus reducing a large
    306 		 *  number of compiler warnings in generated code.
    307 		 */
    308 		public function recover(re:RecognitionException):RecognitionException {
    309 			input.consume();
    310 			return re;
    311 		}
    312 
    313 		public function traceIn(ruleName:String, ruleIndex:int):void {
    314 			var inputSymbol:String = String.fromCharCode(input.LT(1))+" line="+ line +":"+ charPositionInLine;
    315 			super.traceInSymbol(ruleName, ruleIndex, inputSymbol);
    316 		}
    317 
    318 		public function traceOut(ruleName:String, ruleIndex:int):void {
    319 			var inputSymbol:String = String.fromCharCode(input.LT(1))+" line="+ line +":"+ charPositionInLine;
    320 			super.traceOutSymbol(ruleName, ruleIndex, inputSymbol);
    321 		}
    322 	}
    323 }