Home | History | Annotate | Download | only in src
      1 /** \file
      2  *
      3  * Base implementation of an antlr 3 lexer.
      4  *
      5  * An ANTLR3 lexer implements a base recongizer, a token source and
      6  * a lexer interface. It constructs a base recognizer with default
      7  * functions, then overrides any of these that are parser specific (usual
      8  * default implementation of base recognizer.
      9  */
     10 
     11 // [The "BSD licence"]
     12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
     13 // http://www.temporal-wave.com
     14 // http://www.linkedin.com/in/jimidle
     15 //
     16 // All rights reserved.
     17 //
     18 // Redistribution and use in source and binary forms, with or without
     19 // modification, are permitted provided that the following conditions
     20 // are met:
     21 // 1. Redistributions of source code must retain the above copyright
     22 //    notice, this list of conditions and the following disclaimer.
     23 // 2. Redistributions in binary form must reproduce the above copyright
     24 //    notice, this list of conditions and the following disclaimer in the
     25 //    documentation and/or other materials provided with the distribution.
     26 // 3. The name of the author may not be used to endorse or promote products
     27 //    derived from this software without specific prior written permission.
     28 //
     29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     39 
     40 #include    <antlr3lexer.h>
     41 
     42 static void					mTokens						(pANTLR3_LEXER lexer);
     43 static void					setCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
     44 static void					pushCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
     45 static void					popCharStream				(pANTLR3_LEXER lexer);
     46 
     47 static void					emitNew						(pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token);
     48 static pANTLR3_COMMON_TOKEN emit						(pANTLR3_LEXER lexer);
     49 static ANTLR3_BOOLEAN	    matchs						(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
     50 static ANTLR3_BOOLEAN	    matchc						(pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
     51 static ANTLR3_BOOLEAN	    matchRange					(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
     52 static void					matchAny					(pANTLR3_LEXER lexer);
     53 static void					recover						(pANTLR3_LEXER lexer);
     54 static ANTLR3_UINT32	    getLine						(pANTLR3_LEXER lexer);
     55 static ANTLR3_MARKER	    getCharIndex				(pANTLR3_LEXER lexer);
     56 static ANTLR3_UINT32	    getCharPositionInLine		(pANTLR3_LEXER lexer);
     57 static pANTLR3_STRING	    getText						(pANTLR3_LEXER lexer);
     58 static pANTLR3_COMMON_TOKEN nextToken					(pANTLR3_TOKEN_SOURCE toksource);
     59 
     60 static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
     61 static void					reportError					(pANTLR3_BASE_RECOGNIZER rec);
     62 static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
     63 static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
     64 															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
     65 
     66 static void					reset						(pANTLR3_BASE_RECOGNIZER rec);
     67 
     68 static void					freeLexer					(pANTLR3_LEXER lexer);
     69 
     70 
     71 ANTLR3_API pANTLR3_LEXER
     72 antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
     73 {
     74     pANTLR3_LEXER   lexer;
     75     pANTLR3_COMMON_TOKEN	specialT;
     76 
     77 	/* Allocate memory
     78 	*/
     79 	lexer   = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
     80 
     81 	if	(lexer == NULL)
     82 	{
     83 		return	NULL;
     84 	}
     85 
     86 	/* Now we need to create the base recognizer
     87 	*/
     88 	lexer->rec	    =  antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
     89 
     90 	if	(lexer->rec == NULL)
     91 	{
     92 		lexer->free(lexer);
     93 		return	NULL;
     94 	}
     95 	lexer->rec->super  =  lexer;
     96 
     97 	lexer->rec->displayRecognitionError	    = displayRecognitionError;
     98 	lexer->rec->reportError					= reportError;
     99 	lexer->rec->reset						= reset;
    100 	lexer->rec->getCurrentInputSymbol		= getCurrentInputSymbol;
    101 	lexer->rec->getMissingSymbol			= getMissingSymbol;
    102 
    103 	/* Now install the token source interface
    104 	*/
    105 	if	(lexer->rec->state->tokSource == NULL)
    106 	{
    107 		lexer->rec->state->tokSource	= (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));
    108 
    109 		if	(lexer->rec->state->tokSource == NULL)
    110 		{
    111 			lexer->rec->free(lexer->rec);
    112 			lexer->free(lexer);
    113 
    114 			return	NULL;
    115 		}
    116 		lexer->rec->state->tokSource->super    =  lexer;
    117 
    118 		/* Install the default nextToken() method, which may be overridden
    119 		 * by generated code, or by anything else in fact.
    120 		 */
    121 		lexer->rec->state->tokSource->nextToken	    =  nextToken;
    122 		lexer->rec->state->tokSource->strFactory    = NULL;
    123 
    124 		lexer->rec->state->tokFactory				= NULL;
    125 	}
    126 
    127     /* Install the lexer API
    128      */
    129     lexer->setCharStream			=  setCharStream;
    130     lexer->mTokens					= (void (*)(void *))(mTokens);
    131     lexer->setCharStream			=  setCharStream;
    132     lexer->pushCharStream			=  pushCharStream;
    133     lexer->popCharStream			=  popCharStream;
    134     lexer->emit						=  emit;
    135     lexer->emitNew					=  emitNew;
    136     lexer->matchs					=  matchs;
    137     lexer->matchc					=  matchc;
    138     lexer->matchRange				=  matchRange;
    139     lexer->matchAny					=  matchAny;
    140     lexer->recover					=  recover;
    141     lexer->getLine					=  getLine;
    142     lexer->getCharIndex				=  getCharIndex;
    143     lexer->getCharPositionInLine    =  getCharPositionInLine;
    144     lexer->getText					=  getText;
    145     lexer->free						=  freeLexer;
    146 
    147     /* Initialise the eof token
    148      */
    149     specialT					= &(lexer->rec->state->tokSource->eofToken);
    150     antlr3SetTokenAPI	  (specialT);
    151     specialT->setType	  (specialT, ANTLR3_TOKEN_EOF);
    152     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
    153     specialT->strFactory        = NULL;
    154 	specialT->textState			= ANTLR3_TEXT_NONE;
    155 	specialT->custom			= NULL;
    156 	specialT->user1				= 0;
    157 	specialT->user2				= 0;
    158 	specialT->user3				= 0;
    159 
    160 	// Initialize the skip token.
    161 	//
    162     specialT					= &(lexer->rec->state->tokSource->skipToken);
    163     antlr3SetTokenAPI	  (specialT);
    164     specialT->setType	  (specialT, ANTLR3_TOKEN_INVALID);
    165     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
    166     specialT->strFactory        = NULL;
    167 	specialT->custom			= NULL;
    168 	specialT->user1				= 0;
    169 	specialT->user2				= 0;
    170 	specialT->user3				= 0;
    171     return  lexer;
    172 }
    173 
    174 static void
    175 reset	(pANTLR3_BASE_RECOGNIZER rec)
    176 {
    177     pANTLR3_LEXER   lexer;
    178 
    179     lexer   = rec->super;
    180 
    181     lexer->rec->state->token			    = NULL;
    182     lexer->rec->state->type			    = ANTLR3_TOKEN_INVALID;
    183     lexer->rec->state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
    184     lexer->rec->state->tokenStartCharIndex	    = -1;
    185     lexer->rec->state->tokenStartCharPositionInLine = -1;
    186     lexer->rec->state->tokenStartLine		    = -1;
    187 
    188     lexer->rec->state->text	                    = NULL;
    189 
    190     // OK - that's all hunky dory, but we may well have had
    191     // a token factory that needs a reset. Do that here
    192     //
    193     if  (lexer->rec->state->tokFactory != NULL)
    194     {
    195         lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
    196     }
    197 }
    198 
    199 ///
    200 /// \brief
    201 /// Returns the next available token from the current input stream.
    202 ///
    203 /// \param toksource
    204 /// Points to the implementation of a token source. The lexer is
    205 /// addressed by the super structure pointer.
    206 ///
    207 /// \returns
    208 /// The next token in the current input stream or the EOF token
    209 /// if there are no more tokens.
    210 ///
    211 /// \remarks
    212 /// Write remarks for nextToken here.
    213 ///
    214 /// \see nextToken
    215 ///
    216 ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
    217 nextTokenStr	    (pANTLR3_TOKEN_SOURCE toksource)
    218 {
    219     pANTLR3_LEXER                   lexer;
    220     pANTLR3_RECOGNIZER_SHARED_STATE state;
    221     pANTLR3_INPUT_STREAM            input;
    222     pANTLR3_INT_STREAM              istream;
    223 
    224     lexer   = (pANTLR3_LEXER)(toksource->super);
    225     state   = lexer->rec->state;
    226     input   = lexer->input;
    227     istream = input->istream;
    228 
    229     /// Loop until we get a non skipped token or EOF
    230     ///
    231     for	(;;)
    232     {
    233         // Get rid of any previous token (token factory takes care of
    234         // any de-allocation when this token is finally used up.
    235         //
    236         state->token		    = NULL;
    237         state->error		    = ANTLR3_FALSE;	    // Start out without an exception
    238         state->failed		    = ANTLR3_FALSE;
    239 
    240         // Now call the matching rules and see if we can generate a new token
    241         //
    242         for	(;;)
    243         {
    244             // Record the start of the token in our input stream.
    245             //
    246             state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
    247             state->tokenStartCharIndex	            = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
    248             state->tokenStartCharPositionInLine     = input->charPositionInLine;
    249             state->tokenStartLine		    = input->line;
    250             state->text			            = NULL;
    251             state->custom                           = NULL;
    252             state->user1                            = 0;
    253             state->user2                            = 0;
    254             state->user3                            = 0;
    255 
    256             if  (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
    257             {
    258                 // Reached the end of the current stream, nothing more to do if this is
    259                 // the last in the stack.
    260                 //
    261                 pANTLR3_COMMON_TOKEN    teof = &(toksource->eofToken);
    262 
    263                 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
    264                 teof->setStopIndex  (teof, lexer->getCharIndex(lexer));
    265                 teof->setLine	    (teof, lexer->getLine(lexer));
    266                 teof->factoryMade = ANTLR3_TRUE;	// This isn't really manufactured but it stops things from trying to free it
    267                 return  teof;
    268             }
    269 
    270             state->token		= NULL;
    271             state->error		= ANTLR3_FALSE;	    // Start out without an exception
    272             state->failed		= ANTLR3_FALSE;
    273 
    274             // Call the generated lexer, see if it can get a new token together.
    275             //
    276             lexer->mTokens(lexer->ctx);
    277 
    278             if  (state->error  == ANTLR3_TRUE)
    279             {
    280                 // Recognition exception, report it and try to recover.
    281                 //
    282                 state->failed	    = ANTLR3_TRUE;
    283                 lexer->rec->reportError(lexer->rec);
    284                 lexer->recover(lexer);
    285             }
    286             else
    287             {
    288                 if (state->token == NULL)
    289                 {
    290                     // Emit the real token, which adds it in to the token stream basically
    291                     //
    292                     emit(lexer);
    293                 }
    294                 else if	(state->token ==  &(toksource->skipToken))
    295                 {
    296                     // A real token could have been generated, but "Computer say's naaaaah" and it
    297                     // it is just something we need to skip altogether.
    298                     //
    299                     continue;
    300                 }
    301 
    302                 // Good token, not skipped, not EOF token
    303                 //
    304                 return  state->token;
    305             }
    306         }
    307     }
    308 }
    309 
    310 /**
    311  * \brief
    312  * Default implementation of the nextToken() call for a lexer.
    313  *
    314  * \param toksource
    315  * Points to the implementation of a token source. The lexer is
    316  * addressed by the super structure pointer.
    317  *
    318  * \returns
    319  * The next token in the current input stream or the EOF token
    320  * if there are no more tokens in any input stream in the stack.
    321  *
    322  * Write detailed description for nextToken here.
    323  *
    324  * \remarks
    325  * Write remarks for nextToken here.
    326  *
    327  * \see nextTokenStr
    328  */
    329 static pANTLR3_COMMON_TOKEN
    330 nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
    331 {
    332 	pANTLR3_COMMON_TOKEN tok;
    333 
    334 	// Find the next token in the current stream
    335 	//
    336 	tok = nextTokenStr(toksource);
    337 
    338 	// If we got to the EOF token then switch to the previous
    339 	// input stream if there were any and just return the
    340 	// EOF if there are none. We must check the next token
    341 	// in any outstanding input stream we pop into the active
    342 	// role to see if it was sitting at EOF after PUSHing the
    343 	// stream we just consumed, otherwise we will return EOF
    344 	// on the reinstalled input stream, when in actual fact
    345 	// there might be more input streams to POP before the
    346 	// real EOF of the whole logical inptu stream. Hence we
    347 	// use a while loop here until we find somethign in the stream
    348 	// that isn't EOF or we reach the actual end of the last input
    349 	// stream on the stack.
    350 	//
    351 	while	(tok->type == ANTLR3_TOKEN_EOF)
    352 	{
    353 		pANTLR3_LEXER   lexer;
    354 
    355 		lexer   = (pANTLR3_LEXER)(toksource->super);
    356 
    357 		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
    358 		{
    359 			// We have another input stream in the stack so we
    360 			// need to revert to it, then resume the loop to check
    361 			// it wasn't sitting at EOF itself.
    362 			//
    363 			lexer->popCharStream(lexer);
    364 			tok = nextTokenStr(toksource);
    365 		}
    366 		else
    367 		{
    368 			// There were no more streams on the input stack
    369 			// so this EOF is the 'real' logical EOF for
    370 			// the input stream. So we just exit the loop and
    371 			// return the EOF we have found.
    372 			//
    373 			break;
    374 		}
    375 
    376 	}
    377 
    378 	// return whatever token we have, which may be EOF
    379 	//
    380 	return  tok;
    381 }
    382 
    383 ANTLR3_API pANTLR3_LEXER
    384 antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
    385 {
    386     pANTLR3_LEXER   lexer;
    387 
    388     // Create a basic lexer first
    389     //
    390     lexer   = antlr3LexerNew(sizeHint, state);
    391 
    392     if	(lexer != NULL)
    393     {
    394 		// Install the input stream and reset the lexer
    395 		//
    396 		setCharStream(lexer, input);
    397     }
    398 
    399     return  lexer;
    400 }
    401 
    402 static void mTokens	    (pANTLR3_LEXER lexer)
    403 {
    404     if	(lexer)	    // Fool compiler, avoid pragmas
    405     {
    406 		ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
    407     }
    408 }
    409 
    410 static void
    411 reportError		    (pANTLR3_BASE_RECOGNIZER rec)
    412 {
    413     // Indicate this recognizer had an error while processing.
    414 	//
    415 	rec->state->errorCount++;
    416 
    417     rec->displayRecognitionError(rec, rec->state->tokenNames);
    418 }
    419 
    420 #ifdef	ANTLR3_WINDOWS
    421 #pragma warning( disable : 4100 )
    422 #endif
    423 
    424 /** Default lexer error handler (works for 8 bit streams only!!!)
    425  */
    426 static void
    427 displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
    428 {
    429     pANTLR3_LEXER			lexer;
    430 	pANTLR3_EXCEPTION	    ex;
    431 	pANTLR3_STRING			ftext;
    432 
    433     lexer   = (pANTLR3_LEXER)(recognizer->super);
    434 	ex		= lexer->rec->state->exception;
    435 
    436 	// See if there is a 'filename' we can use
    437     //
    438     if	(ex->name == NULL)
    439     {
    440 		ANTLR3_FPRINTF(stderr, "-unknown source-(");
    441     }
    442     else
    443     {
    444 		ftext = ex->streamName->to8(ex->streamName);
    445 		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
    446     }
    447 
    448     ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
    449     ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
    450 						ex->type,
    451 						(pANTLR3_UINT8)	   (ex->message),
    452 					    ex->charPositionInLine+1
    453 		    );
    454 	{
    455 		ANTLR3_INT32	width;
    456 
    457 		width	= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
    458 
    459 		if	(width >= 1)
    460 		{
    461 			if	(isprint(ex->c))
    462 			{
    463 				ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
    464 			}
    465 			else
    466 			{
    467 				ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
    468 			}
    469 			ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
    470 		}
    471 		else
    472 		{
    473 			ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
    474 			ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
    475 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
    476 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
    477 								);
    478 			width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
    479 
    480 			if	(width >= 1)
    481 			{
    482 				ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
    483 			}
    484 			else
    485 			{
    486 				ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
    487 			}
    488 		}
    489 	}
    490 }
    491 
    492 static void setCharStream   (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
    493 {
    494     /* Install the input interface
    495      */
    496     lexer->input	= input;
    497 
    498     /* We may need a token factory for the lexer; we don't destroy any existing factory
    499      * until the lexer is destroyed, as people may still be using the tokens it produced.
    500      * TODO: Later I will provide a dup() method for a token so that it can extract itself
    501      * out of the factory.
    502      */
    503     if	(lexer->rec->state->tokFactory == NULL)
    504     {
    505 	lexer->rec->state->tokFactory	= antlr3TokenFactoryNew(input);
    506     }
    507     else
    508     {
    509 	/* When the input stream is being changed on the fly, rather than
    510 	 * at the start of a new lexer, then we must tell the tokenFactory
    511 	 * which input stream to adorn the tokens with so that when they
    512 	 * are asked to provide their original input strings they can
    513 	 * do so from the correct text stream.
    514 	 */
    515 	lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
    516     }
    517 
    518     /* Propagate the string factory so that we preserve the encoding form from
    519      * the input stream.
    520      */
    521     if	(lexer->rec->state->tokSource->strFactory == NULL)
    522     {
    523         lexer->rec->state->tokSource->strFactory	= input->strFactory;
    524 
    525         // Set the newly acquired string factory up for our pre-made tokens
    526         // for EOF.
    527         //
    528         if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
    529         {
    530             lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
    531         }
    532     }
    533 
    534     /* This is a lexer, install the appropriate exception creator
    535      */
    536     lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
    537 
    538     /* Set the current token to nothing
    539      */
    540     lexer->rec->state->token		= NULL;
    541     lexer->rec->state->text			= NULL;
    542     lexer->rec->state->tokenStartCharIndex	= -1;
    543 
    544     /* Copy the name of the char stream to the token source
    545      */
    546     lexer->rec->state->tokSource->fileName = input->fileName;
    547 }
    548 
    549 /*!
    550  * \brief
    551  * Change to a new input stream, remembering the old one.
    552  *
    553  * \param lexer
    554  * Pointer to the lexer instance to switch input streams for.
    555  *
    556  * \param input
    557  * New input stream to install as the current one.
    558  *
    559  * Switches the current character input stream to
    560  * a new one, saving the old one, which we will revert to at the end of this
    561  * new one.
    562  */
    563 static void
    564 pushCharStream  (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
    565 {
    566 	// Do we need a new input stream stack?
    567 	//
    568 	if	(lexer->rec->state->streams == NULL)
    569 	{
    570 		// This is the first call to stack a new
    571 		// stream and so we must create the stack first.
    572 		//
    573 		lexer->rec->state->streams = antlr3StackNew(0);
    574 
    575 		if  (lexer->rec->state->streams == NULL)
    576 		{
    577 			// Could not do this, we just fail to push it.
    578 			// TODO: Consider if this is what we want to do, but then
    579 			//       any programmer can override this method to do something else.
    580 			return;
    581 		}
    582 	}
    583 
    584 	// We have a stack, so we can save the current input stream
    585 	// into it.
    586 	//
    587 	lexer->input->istream->mark(lexer->input->istream);
    588 	lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
    589 
    590 	// And now we can install this new one
    591 	//
    592 	lexer->setCharStream(lexer, input);
    593 }
    594 
    595 /*!
    596  * \brief
    597  * Stops using the current input stream and reverts to any prior
    598  * input stream on the stack.
    599  *
    600  * \param lexer
    601  * Description of parameter lexer.
    602  *
    603  * Pointer to a function that abandons the current input stream, whether it
    604  * is empty or not and reverts to the previous stacked input stream.
    605  *
    606  * \remark
    607  * The function fails silently if there are no prior input streams.
    608  */
    609 static void
    610 popCharStream   (pANTLR3_LEXER lexer)
    611 {
    612     pANTLR3_INPUT_STREAM input;
    613 
    614     // If we do not have a stream stack or we are already at the
    615     // stack bottom, then do nothing.
    616     //
    617     if	(lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
    618     {
    619 	// We just leave the current stream to its fate, we do not close
    620 	// it or anything as we do not know what the programmer intended
    621 	// for it. This method can always be overridden of course.
    622 	// So just find out what was currently saved on the stack and use
    623 	// that now, then pop it from the stack.
    624 	//
    625 	input	= (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
    626 	lexer->rec->state->streams->pop(lexer->rec->state->streams);
    627 
    628 	// Now install the stream as the current one.
    629 	//
    630 	lexer->setCharStream(lexer, input);
    631 	lexer->input->istream->rewindLast(lexer->input->istream);
    632     }
    633     return;
    634 }
    635 
    636 static void emitNew	    (pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token)
    637 {
    638     lexer->rec->state->token    = token;	/* Voila!   */
    639 }
    640 
    641 static pANTLR3_COMMON_TOKEN
    642 emit	    (pANTLR3_LEXER lexer)
    643 {
    644     pANTLR3_COMMON_TOKEN	token;
    645 
    646     /* We could check pointers to token factories and so on, but
    647     * we are in code that we want to run as fast as possible
    648     * so we are not checking any errors. So make sure you have installed an input stream before
    649     * trying to emit a new token.
    650     */
    651     token   = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
    652 
    653     /* Install the supplied information, and some other bits we already know
    654     * get added automatically, such as the input stream it is associated with
    655     * (though it can all be overridden of course)
    656     */
    657     token->type		    = lexer->rec->state->type;
    658     token->channel	    = lexer->rec->state->channel;
    659     token->start	    = lexer->rec->state->tokenStartCharIndex;
    660     token->stop		    = lexer->getCharIndex(lexer) - 1;
    661     token->line		    = lexer->rec->state->tokenStartLine;
    662     token->charPosition	= lexer->rec->state->tokenStartCharPositionInLine;
    663 
    664     if	(lexer->rec->state->text != NULL)
    665     {
    666         token->textState	    = ANTLR3_TEXT_STRING;
    667         token->tokText.text	    = lexer->rec->state->text;
    668     }
    669     else
    670     {
    671         token->textState	= ANTLR3_TEXT_NONE;
    672     }
    673     token->lineStart	= lexer->input->currentLine;
    674     token->user1	= lexer->rec->state->user1;
    675     token->user2	= lexer->rec->state->user2;
    676     token->user3	= lexer->rec->state->user3;
    677     token->custom	= lexer->rec->state->custom;
    678 
    679     lexer->rec->state->token	    = token;
    680 
    681     return  token;
    682 }
    683 
    684 /**
    685  * Free the resources allocated by a lexer
    686  */
    687 static void
    688 freeLexer    (pANTLR3_LEXER lexer)
    689 {
    690 	// This may have ben a delegate or delegator lexer, in which case the
    691 	// state may already have been freed (and set to NULL therefore)
    692 	// so we ignore the state if we don't have it.
    693 	//
    694 	if	(lexer->rec->state != NULL)
    695 	{
    696 		if	(lexer->rec->state->streams != NULL)
    697 		{
    698 			lexer->rec->state->streams->free(lexer->rec->state->streams);
    699 		}
    700 		if	(lexer->rec->state->tokFactory != NULL)
    701 		{
    702 			lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
    703 			lexer->rec->state->tokFactory = NULL;
    704 		}
    705 		if	(lexer->rec->state->tokSource != NULL)
    706 		{
    707 			ANTLR3_FREE(lexer->rec->state->tokSource);
    708 			lexer->rec->state->tokSource = NULL;
    709 		}
    710 	}
    711 	if	(lexer->rec != NULL)
    712 	{
    713 		lexer->rec->free(lexer->rec);
    714 		lexer->rec = NULL;
    715 	}
    716 	ANTLR3_FREE(lexer);
    717 }
    718 
    719 /** Implementation of matchs for the lexer, overrides any
    720  *  base implementation in the base recognizer.
    721  *
    722  *  \remark
    723  *  Note that the generated code lays down arrays of ints for constant
    724  *  strings so that they are int UTF32 form!
    725  */
    726 static ANTLR3_BOOLEAN
    727 matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
    728 {
    729 	while   (*string != ANTLR3_STRING_TERMINATOR)
    730 	{
    731 		if  (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
    732 		{
    733 			if	(lexer->rec->state->backtracking > 0)
    734 			{
    735 				lexer->rec->state->failed = ANTLR3_TRUE;
    736 				return ANTLR3_FALSE;
    737 			}
    738 
    739 			lexer->rec->exConstruct(lexer->rec);
    740 			lexer->rec->state->failed	 = ANTLR3_TRUE;
    741 
    742 			/* TODO: Implement exception creation more fully perhaps
    743 			 */
    744 			lexer->recover(lexer);
    745 			return  ANTLR3_FALSE;
    746 		}
    747 
    748 		/* Matched correctly, do consume it
    749 		 */
    750 		lexer->input->istream->consume(lexer->input->istream);
    751 		string++;
    752 
    753 		/* Reset any failed indicator
    754 		 */
    755 		lexer->rec->state->failed = ANTLR3_FALSE;
    756 	}
    757 
    758 
    759 	return  ANTLR3_TRUE;
    760 }
    761 
    762 /** Implementation of matchc for the lexer, overrides any
    763  *  base implementation in the base recognizer.
    764  *
    765  *  \remark
    766  *  Note that the generated code lays down arrays of ints for constant
    767  *  strings so that they are int UTF32 form!
    768  */
    769 static ANTLR3_BOOLEAN
    770 matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
    771 {
    772 	if	(lexer->input->istream->_LA(lexer->input->istream, 1) == c)
    773 	{
    774 		/* Matched correctly, do consume it
    775 		 */
    776 		lexer->input->istream->consume(lexer->input->istream);
    777 
    778 		/* Reset any failed indicator
    779 		 */
    780 		lexer->rec->state->failed = ANTLR3_FALSE;
    781 
    782 		return	ANTLR3_TRUE;
    783 	}
    784 
    785 	/* Failed to match, exception and recovery time.
    786 	 */
    787 	if	(lexer->rec->state->backtracking > 0)
    788 	{
    789 		lexer->rec->state->failed  = ANTLR3_TRUE;
    790 		return	ANTLR3_FALSE;
    791 	}
    792 
    793 	lexer->rec->exConstruct(lexer->rec);
    794 
    795 	/* TODO: Implement exception creation more fully perhaps
    796 	 */
    797 	lexer->recover(lexer);
    798 
    799 	return  ANTLR3_FALSE;
    800 }
    801 
    802 /** Implementation of match range for the lexer, overrides any
    803  *  base implementation in the base recognizer.
    804  *
    805  *  \remark
    806  *  Note that the generated code lays down arrays of ints for constant
    807  *  strings so that they are int UTF32 form!
    808  */
    809 static ANTLR3_BOOLEAN
    810 matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
    811 {
    812     ANTLR3_UCHAR    c;
    813 
    814     /* What is in the stream at the moment?
    815      */
    816     c	= lexer->input->istream->_LA(lexer->input->istream, 1);
    817     if	( c >= low && c <= high)
    818     {
    819 	/* Matched correctly, consume it
    820 	 */
    821 	lexer->input->istream->consume(lexer->input->istream);
    822 
    823 	/* Reset any failed indicator
    824 	 */
    825 	lexer->rec->state->failed = ANTLR3_FALSE;
    826 
    827 	return	ANTLR3_TRUE;
    828     }
    829 
    830     /* Failed to match, execption and recovery time.
    831      */
    832 
    833     if	(lexer->rec->state->backtracking > 0)
    834     {
    835 	lexer->rec->state->failed  = ANTLR3_TRUE;
    836 	return	ANTLR3_FALSE;
    837     }
    838 
    839     lexer->rec->exConstruct(lexer->rec);
    840 
    841     /* TODO: Implement exception creation more fully
    842      */
    843     lexer->recover(lexer);
    844 
    845     return  ANTLR3_FALSE;
    846 }
    847 
    848 static void
    849 matchAny	    (pANTLR3_LEXER lexer)
    850 {
    851     lexer->input->istream->consume(lexer->input->istream);
    852 }
    853 
    854 static void
    855 recover	    (pANTLR3_LEXER lexer)
    856 {
    857     lexer->input->istream->consume(lexer->input->istream);
    858 }
    859 
    860 static ANTLR3_UINT32
    861 getLine	    (pANTLR3_LEXER lexer)
    862 {
    863     return  lexer->input->getLine(lexer->input);
    864 }
    865 
    866 static ANTLR3_UINT32
    867 getCharPositionInLine	(pANTLR3_LEXER lexer)
    868 {
    869     return  lexer->input->charPositionInLine;
    870 }
    871 
    872 static ANTLR3_MARKER	getCharIndex	    (pANTLR3_LEXER lexer)
    873 {
    874     return lexer->input->istream->index(lexer->input->istream);
    875 }
    876 
    877 static pANTLR3_STRING
    878 getText	    (pANTLR3_LEXER lexer)
    879 {
    880 	if (lexer->rec->state->text)
    881 	{
    882 		return	lexer->rec->state->text;
    883 
    884 	}
    885 	return  lexer->input->substr(
    886 									lexer->input,
    887 									lexer->rec->state->tokenStartCharIndex,
    888 									lexer->getCharIndex(lexer) - lexer->input->charByteSize
    889 							);
    890 
    891 }
    892 
    893 static void *
    894 getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
    895 {
    896 	return NULL;
    897 }
    898 
    899 static void *
    900 getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
    901 									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
    902 {
    903 	return NULL;
    904 }
    905