1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2011 Terence Parr 4 * All rights reserved. 5 * 6 * Conversion to C#: 7 * Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 namespace Antlr.Runtime 34 { 35 public abstract class SlimLexer 36 : BaseRecognizer 37 , ITokenSource<SlimToken> 38 { 39 /** <summary>Where is the lexer drawing characters from?</summary> */ 40 protected SlimStringStream input; 41 SlimToken _token; 42 bool _emitted; 43 bool _skip; 44 45 public SlimLexer() 46 { 47 } 48 49 public SlimLexer( ICharStream input ) 50 { 51 this.input = (SlimStringStream)input; 52 } 53 54 public SlimLexer( ICharStream input, RecognizerSharedState state ) 55 : base( state ) 56 { 57 this.input = (SlimStringStream)input; 58 } 59 60 #region Properties 61 public string Text 62 { 63 /** <summary>Return the text matched so far for the current token or any text override.</summary> */ 64 get 65 { 66 if ( state.text != null ) 67 { 68 return state.text; 69 } 70 return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex ); 71 } 72 /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */ 73 set 74 { 75 state.text = value; 76 } 77 } 78 public int Line 79 { 80 get 81 { 82 return input.Line; 83 } 84 set 85 { 86 input.Line = value; 87 } 88 } 89 public int CharPositionInLine 90 { 91 get 92 { 93 return input.CharPositionInLine; 94 } 95 set 96 { 97 input.CharPositionInLine = value; 98 } 99 } 100 #endregion 101 102 public override void Reset() 103 { 104 base.Reset(); // reset all recognizer state variables 105 // wack Lexer state variables 106 if ( input != null ) 107 { 108 input.Seek( 0 ); // rewind the input 109 } 110 if ( state == null ) 111 { 112 return; // no shared state work to do 113 } 114 _token = default( SlimToken ); 115 _emitted = false; 116 _skip = false; 117 //state.token = null; 118 state.type = TokenTypes.Invalid; 119 state.channel = TokenChannels.Default; 120 state.tokenStartCharIndex = -1; 121 #if TRACK_POSITION 122 state.tokenStartCharPositionInLine = -1; 123 state.tokenStartLine = -1; 124 #endif 125 state.text = null; 126 } 127 128 /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */ 129 public virtual SlimToken NextToken() 130 { 131 for ( ; ; ) 132 { 133 _token = default( SlimToken ); 134 _emitted = false; 135 _skip = false; 136 //state.token = null; 137 state.channel = TokenChannels.Default; 138 state.tokenStartCharIndex = input.Index; 139 #if TRACK_POSITION 140 state.tokenStartCharPositionInLine = input.CharPositionInLine; 141 state.tokenStartLine = input.Line; 142 #endif 143 state.text = null; 144 if ( input.LA( 1 ) == CharStreamConstants.EndOfFile ) 145 { 146 return new SlimToken(TokenTypes.EndOfFile); 147 } 148 try 149 { 150 mTokens(); 151 if ( _skip ) 152 { 153 continue; 154 } 155 else if ( !_emitted ) 156 { 157 Emit(); 158 } 159 160 return _token; 161 } 162 catch ( NoViableAltException nva ) 163 { 164 ReportError( nva ); 165 Recover( nva ); // throw out current char and try again 166 } 167 catch ( RecognitionException re ) 168 { 169 ReportError( re ); 170 // match() routine has already called recover() 171 } 172 } 173 } 174 IToken ITokenSource.NextToken() 175 { 176 return NextToken(); 177 } 178 179 /** <summary> 180 * Instruct the lexer to skip creating a token for current lexer rule 181 * and look for another token. nextToken() knows to keep looking when 182 * a lexer rule finishes with token set to SKIP_TOKEN. Recall that 183 * if token==null at end of any token rule, it creates one for you 184 * and emits it. 185 * </summary> 186 */ 187 public virtual void Skip() 188 { 189 _skip = true; 190 //state.token = Tokens.Skip; 191 } 192 193 /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */ 194 public abstract void mTokens(); 195 196 public ICharStream CharStream 197 { 198 get 199 { 200 return input; 201 } 202 /** <summary>Set the char stream and reset the lexer</summary> */ 203 set 204 { 205 input = null; 206 Reset(); 207 input = (SlimStringStream)value; 208 } 209 } 210 211 public override string SourceName 212 { 213 get 214 { 215 return input.SourceName; 216 } 217 } 218 219 ///** <summary> 220 // * Currently does not support multiple emits per nextToken invocation 221 // * for efficiency reasons. Subclass and override this method and 222 // * nextToken (to push tokens into a list and pull from that list rather 223 // * than a single variable as this implementation does). 224 // * </summary> 225 // */ 226 //public void Emit( T token ) 227 //{ 228 // _token = token; 229 //} 230 231 /** <summary> 232 * The standard method called to automatically emit a token at the 233 * outermost lexical rule. The token object should point into the 234 * char buffer start..stop. If there is a text override in 'text', 235 * use that to set the token's text. Override this method to emit 236 * custom Token objects. 237 * </summary> 238 * 239 * <remarks> 240 * If you are building trees, then you should also override 241 * Parser or TreeParser.getMissingSymbol(). 242 * </remarks> 243 */ 244 public void Emit() 245 { 246 _token = new SlimToken() 247 { 248 //InputStream = input, 249 Type = state.type, 250 Channel = state.channel, 251 //CharPositionInLine = state.tokenStartCharPositionInLine, 252 //Line = state.tokenStartLine, 253 //Text = state.text 254 }; 255 //Emit( t ); 256 //return t; 257 258 //IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 ); 259 //t.Line = state.tokenStartLine; 260 //t.Text = state.text; 261 //t.CharPositionInLine = state.tokenStartCharPositionInLine; 262 //Emit( t ); 263 //return t; 264 } 265 266 public void Match( string s ) 267 { 268 int i = 0; 269 while ( i < s.Length ) 270 { 271 if ( input.LA( 1 ) != s[i] ) 272 { 273 if ( state.backtracking > 0 ) 274 { 275 state.failed = true; 276 return; 277 } 278 MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames); 279 Recover( mte ); 280 throw mte; 281 } 282 i++; 283 input.Consume(); 284 state.failed = false; 285 } 286 } 287 288 public void MatchAny() 289 { 290 input.Consume(); 291 } 292 293 public void Match( int c ) 294 { 295 if ( input.LA( 1 ) != c ) 296 { 297 if ( state.backtracking > 0 ) 298 { 299 state.failed = true; 300 return; 301 } 302 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames); 303 Recover( mte ); // don't really recover; just consume in lexer 304 throw mte; 305 } 306 input.Consume(); 307 state.failed = false; 308 } 309 310 public void MatchRange( int a, int b ) 311 { 312 if ( input.LA( 1 ) < a || input.LA( 1 ) > b ) 313 { 314 if ( state.backtracking > 0 ) 315 { 316 state.failed = true; 317 return; 318 } 319 MismatchedRangeException mre = 320 new MismatchedRangeException( a, b, input ); 321 Recover( mre ); 322 throw mre; 323 } 324 input.Consume(); 325 state.failed = false; 326 } 327 328 /** <summary>What is the index of the current character of lookahead?</summary> */ 329 public int CharIndex 330 { 331 get 332 { 333 return input.Index; 334 } 335 } 336 337 public override void ReportError( RecognitionException e ) 338 { 339 /** TODO: not thought about recovery in lexer yet. 340 * 341 // if we've already reported an error and have not matched a token 342 // yet successfully, don't report any errors. 343 if ( errorRecovery ) { 344 //System.err.print("[SPURIOUS] "); 345 return; 346 } 347 errorRecovery = true; 348 */ 349 350 DisplayRecognitionError( this.TokenNames, e ); 351 } 352 353 public override string GetErrorMessage( RecognitionException e, string[] tokenNames ) 354 { 355 string msg = null; 356 if ( e is MismatchedTokenException ) 357 { 358 MismatchedTokenException mte = (MismatchedTokenException)e; 359 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting ); 360 } 361 else if ( e is NoViableAltException ) 362 { 363 NoViableAltException nvae = (NoViableAltException)e; 364 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 365 // and "(decision="+nvae.decisionNumber+") and 366 // "state "+nvae.stateNumber 367 msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character ); 368 } 369 else if ( e is EarlyExitException ) 370 { 371 EarlyExitException eee = (EarlyExitException)e; 372 // for development, can add "(decision="+eee.decisionNumber+")" 373 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character ); 374 } 375 else if ( e is MismatchedNotSetException ) 376 { 377 MismatchedNotSetException mse = (MismatchedNotSetException)e; 378 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; 379 } 380 else if ( e is MismatchedSetException ) 381 { 382 MismatchedSetException mse = (MismatchedSetException)e; 383 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; 384 } 385 else if ( e is MismatchedRangeException ) 386 { 387 MismatchedRangeException mre = (MismatchedRangeException)e; 388 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + 389 GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B ); 390 } 391 else 392 { 393 msg = base.GetErrorMessage( e, tokenNames ); 394 } 395 return msg; 396 } 397 398 public virtual string GetCharErrorDisplay( int c ) 399 { 400 string s = ( (char)c ).ToString(); 401 switch ( c ) 402 { 403 case TokenTypes.EndOfFile: 404 s = "<EOF>"; 405 break; 406 case '\n': 407 s = "\\n"; 408 break; 409 case '\t': 410 s = "\\t"; 411 break; 412 case '\r': 413 s = "\\r"; 414 break; 415 } 416 return "'" + s + "'"; 417 } 418 419 /** <summary> 420 * Lexers can normally match any char in it's vocabulary after matching 421 * a token, so do the easy thing and just kill a character and hope 422 * it all works out. You can instead use the rule invocation stack 423 * to do sophisticated error recovery if you are in a fragment rule. 424 * </summary> 425 */ 426 public virtual void Recover( RecognitionException re ) 427 { 428 //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); 429 //re.printStackTrace(); 430 input.Consume(); 431 } 432 433 public virtual void TraceIn( string ruleName, int ruleIndex ) 434 { 435 string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; 436 base.TraceIn( ruleName, ruleIndex, inputSymbol ); 437 } 438 439 public virtual void TraceOut( string ruleName, int ruleIndex ) 440 { 441 string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; 442 base.TraceOut( ruleName, ruleIndex, inputSymbol ); 443 } 444 } 445 } 446