Home | History | Annotate | Download | only in SlimParsing
      1 /*
      2  * [The "BSD license"]
      3  * Copyright (c) 2011 Terence Parr
      4  * All rights reserved.
      5  *
      6  * Conversion to C#:
      7  * Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc.
      8  * All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. The name of the author may not be used to endorse or promote products
     19  *    derived from this software without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 namespace Antlr.Runtime
     34 {
     35     public abstract class SlimLexer
     36         : BaseRecognizer
     37         , ITokenSource<SlimToken>
     38     {
     39         /** <summary>Where is the lexer drawing characters from?</summary> */
     40         protected SlimStringStream input;
     41         SlimToken _token;
     42         bool _emitted;
     43         bool _skip;
     44 
     45         public SlimLexer()
     46         {
     47         }
     48 
     49         public SlimLexer( ICharStream input )
     50         {
     51             this.input = (SlimStringStream)input;
     52         }
     53 
     54         public SlimLexer( ICharStream input, RecognizerSharedState state )
     55             : base( state )
     56         {
     57             this.input = (SlimStringStream)input;
     58         }
     59 
     60         #region Properties
     61         public string Text
     62         {
     63             /** <summary>Return the text matched so far for the current token or any text override.</summary> */
     64             get
     65             {
     66                 if ( state.text != null )
     67                 {
     68                     return state.text;
     69                 }
     70                 return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex );
     71             }
     72             /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
     73             set
     74             {
     75                 state.text = value;
     76             }
     77         }
     78         public int Line
     79         {
     80             get
     81             {
     82                 return input.Line;
     83             }
     84             set
     85             {
     86                 input.Line = value;
     87             }
     88         }
     89         public int CharPositionInLine
     90         {
     91             get
     92             {
     93                 return input.CharPositionInLine;
     94             }
     95             set
     96             {
     97                 input.CharPositionInLine = value;
     98             }
     99         }
    100         #endregion
    101 
    102         public override void Reset()
    103         {
    104             base.Reset(); // reset all recognizer state variables
    105             // wack Lexer state variables
    106             if ( input != null )
    107             {
    108                 input.Seek( 0 ); // rewind the input
    109             }
    110             if ( state == null )
    111             {
    112                 return; // no shared state work to do
    113             }
    114             _token = default( SlimToken );
    115             _emitted = false;
    116             _skip = false;
    117             //state.token = null;
    118             state.type = TokenTypes.Invalid;
    119             state.channel = TokenChannels.Default;
    120             state.tokenStartCharIndex = -1;
    121 #if TRACK_POSITION
    122             state.tokenStartCharPositionInLine = -1;
    123             state.tokenStartLine = -1;
    124 #endif
    125             state.text = null;
    126         }
    127 
    128         /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
    129         public virtual SlimToken NextToken()
    130         {
    131             for ( ; ; )
    132             {
    133                 _token = default( SlimToken );
    134                 _emitted = false;
    135                 _skip = false;
    136                 //state.token = null;
    137                 state.channel = TokenChannels.Default;
    138                 state.tokenStartCharIndex = input.Index;
    139 #if TRACK_POSITION
    140                 state.tokenStartCharPositionInLine = input.CharPositionInLine;
    141                 state.tokenStartLine = input.Line;
    142 #endif
    143                 state.text = null;
    144                 if ( input.LA( 1 ) == CharStreamConstants.EndOfFile )
    145                 {
    146                     return new SlimToken(TokenTypes.EndOfFile);
    147                 }
    148                 try
    149                 {
    150                     mTokens();
    151                     if ( _skip )
    152                     {
    153                         continue;
    154                     }
    155                     else if ( !_emitted )
    156                     {
    157                         Emit();
    158                     }
    159 
    160                     return _token;
    161                 }
    162                 catch ( NoViableAltException nva )
    163                 {
    164                     ReportError( nva );
    165                     Recover( nva ); // throw out current char and try again
    166                 }
    167                 catch ( RecognitionException re )
    168                 {
    169                     ReportError( re );
    170                     // match() routine has already called recover()
    171                 }
    172             }
    173         }
    174         IToken ITokenSource.NextToken()
    175         {
    176             return NextToken();
    177         }
    178 
    179         /** <summary>
    180          *  Instruct the lexer to skip creating a token for current lexer rule
    181          *  and look for another token.  nextToken() knows to keep looking when
    182          *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
    183          *  if token==null at end of any token rule, it creates one for you
    184          *  and emits it.
    185          *  </summary>
    186          */
    187         public virtual void Skip()
    188         {
    189             _skip = true;
    190             //state.token = Tokens.Skip;
    191         }
    192 
    193         /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
    194         public abstract void mTokens();
    195 
    196         public ICharStream CharStream
    197         {
    198             get
    199             {
    200                 return input;
    201             }
    202             /** <summary>Set the char stream and reset the lexer</summary> */
    203             set
    204             {
    205                 input = null;
    206                 Reset();
    207                 input = (SlimStringStream)value;
    208             }
    209         }
    210 
    211         public override string SourceName
    212         {
    213             get
    214             {
    215                 return input.SourceName;
    216             }
    217         }
    218 
    219         ///** <summary>
    220         // *  Currently does not support multiple emits per nextToken invocation
    221         // *  for efficiency reasons.  Subclass and override this method and
    222         // *  nextToken (to push tokens into a list and pull from that list rather
    223         // *  than a single variable as this implementation does).
    224         // *  </summary>
    225         // */
    226         //public void Emit( T token )
    227         //{
    228         //    _token = token;
    229         //}
    230 
    231         /** <summary>
    232          *  The standard method called to automatically emit a token at the
    233          *  outermost lexical rule.  The token object should point into the
    234          *  char buffer start..stop.  If there is a text override in 'text',
    235          *  use that to set the token's text.  Override this method to emit
    236          *  custom Token objects.
    237          *  </summary>
    238          *
    239          *  <remarks>
    240          *  If you are building trees, then you should also override
    241          *  Parser or TreeParser.getMissingSymbol().
    242          *  </remarks>
    243          */
    244         public void Emit()
    245         {
    246             _token = new SlimToken()
    247             {
    248                 //InputStream = input,
    249                 Type = state.type,
    250                 Channel = state.channel,
    251                 //CharPositionInLine = state.tokenStartCharPositionInLine,
    252                 //Line = state.tokenStartLine,
    253                 //Text = state.text
    254             };
    255             //Emit( t );
    256             //return t;
    257 
    258             //IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 );
    259             //t.Line = state.tokenStartLine;
    260             //t.Text = state.text;
    261             //t.CharPositionInLine = state.tokenStartCharPositionInLine;
    262             //Emit( t );
    263             //return t;
    264         }
    265 
    266         public void Match( string s )
    267         {
    268             int i = 0;
    269             while ( i < s.Length )
    270             {
    271                 if ( input.LA( 1 ) != s[i] )
    272                 {
    273                     if ( state.backtracking > 0 )
    274                     {
    275                         state.failed = true;
    276                         return;
    277                     }
    278                     MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
    279                     Recover( mte );
    280                     throw mte;
    281                 }
    282                 i++;
    283                 input.Consume();
    284                 state.failed = false;
    285             }
    286         }
    287 
    288         public void MatchAny()
    289         {
    290             input.Consume();
    291         }
    292 
    293         public void Match( int c )
    294         {
    295             if ( input.LA( 1 ) != c )
    296             {
    297                 if ( state.backtracking > 0 )
    298                 {
    299                     state.failed = true;
    300                     return;
    301                 }
    302                 MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
    303                 Recover( mte );  // don't really recover; just consume in lexer
    304                 throw mte;
    305             }
    306             input.Consume();
    307             state.failed = false;
    308         }
    309 
    310         public void MatchRange( int a, int b )
    311         {
    312             if ( input.LA( 1 ) < a || input.LA( 1 ) > b )
    313             {
    314                 if ( state.backtracking > 0 )
    315                 {
    316                     state.failed = true;
    317                     return;
    318                 }
    319                 MismatchedRangeException mre =
    320                     new MismatchedRangeException( a, b, input );
    321                 Recover( mre );
    322                 throw mre;
    323             }
    324             input.Consume();
    325             state.failed = false;
    326         }
    327 
    328         /** <summary>What is the index of the current character of lookahead?</summary> */
    329         public int CharIndex
    330         {
    331             get
    332             {
    333                 return input.Index;
    334             }
    335         }
    336 
    337         public override void ReportError( RecognitionException e )
    338         {
    339             /** TODO: not thought about recovery in lexer yet.
    340              *
    341             // if we've already reported an error and have not matched a token
    342             // yet successfully, don't report any errors.
    343             if ( errorRecovery ) {
    344                 //System.err.print("[SPURIOUS] ");
    345                 return;
    346             }
    347             errorRecovery = true;
    348              */
    349 
    350             DisplayRecognitionError( this.TokenNames, e );
    351         }
    352 
    353         public override string GetErrorMessage( RecognitionException e, string[] tokenNames )
    354         {
    355             string msg = null;
    356             if ( e is MismatchedTokenException )
    357             {
    358                 MismatchedTokenException mte = (MismatchedTokenException)e;
    359                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting );
    360             }
    361             else if ( e is NoViableAltException )
    362             {
    363                 NoViableAltException nvae = (NoViableAltException)e;
    364                 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
    365                 // and "(decision="+nvae.decisionNumber+") and
    366                 // "state "+nvae.stateNumber
    367                 msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character );
    368             }
    369             else if ( e is EarlyExitException )
    370             {
    371                 EarlyExitException eee = (EarlyExitException)e;
    372                 // for development, can add "(decision="+eee.decisionNumber+")"
    373                 msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character );
    374             }
    375             else if ( e is MismatchedNotSetException )
    376             {
    377                 MismatchedNotSetException mse = (MismatchedNotSetException)e;
    378                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
    379             }
    380             else if ( e is MismatchedSetException )
    381             {
    382                 MismatchedSetException mse = (MismatchedSetException)e;
    383                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
    384             }
    385             else if ( e is MismatchedRangeException )
    386             {
    387                 MismatchedRangeException mre = (MismatchedRangeException)e;
    388                 msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " +
    389                       GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B );
    390             }
    391             else
    392             {
    393                 msg = base.GetErrorMessage( e, tokenNames );
    394             }
    395             return msg;
    396         }
    397 
    398         public virtual string GetCharErrorDisplay( int c )
    399         {
    400             string s = ( (char)c ).ToString();
    401             switch ( c )
    402             {
    403             case TokenTypes.EndOfFile:
    404                 s = "<EOF>";
    405                 break;
    406             case '\n':
    407                 s = "\\n";
    408                 break;
    409             case '\t':
    410                 s = "\\t";
    411                 break;
    412             case '\r':
    413                 s = "\\r";
    414                 break;
    415             }
    416             return "'" + s + "'";
    417         }
    418 
    419         /** <summary>
    420          *  Lexers can normally match any char in it's vocabulary after matching
    421          *  a token, so do the easy thing and just kill a character and hope
    422          *  it all works out.  You can instead use the rule invocation stack
    423          *  to do sophisticated error recovery if you are in a fragment rule.
    424          *  </summary>
    425          */
    426         public virtual void Recover( RecognitionException re )
    427         {
    428             //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
    429             //re.printStackTrace();
    430             input.Consume();
    431         }
    432 
    433         public virtual void TraceIn( string ruleName, int ruleIndex )
    434         {
    435             string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
    436             base.TraceIn( ruleName, ruleIndex, inputSymbol );
    437         }
    438 
    439         public virtual void TraceOut( string ruleName, int ruleIndex )
    440         {
    441             string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
    442             base.TraceOut( ruleName, ruleIndex, inputSymbol );
    443         }
    444     }
    445 }
    446