Home | History | Annotate | Download | only in include
      1 ANTLR_BEGIN_NAMESPACE()
      2 
      3 template<class ImplTraits, class SuperType>
      4 ANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream()
      5 {
      6 	m_lastMarker = 0;
      7 	m_upper_case = false;
      8 }
      9 
     10 template<class ImplTraits, class SuperType>
     11 ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType	IntStream<ImplTraits, SuperType>::getSourceName()
     12 {
     13 	return m_streamName;
     14 }
     15 
     16 template<class ImplTraits, class SuperType>
     17 ANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& 	IntStream<ImplTraits, SuperType>::get_streamName()
     18 {
     19 	return m_streamName;
     20 }
     21 
     22 template<class ImplTraits, class SuperType>
     23 ANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& 	IntStream<ImplTraits, SuperType>::get_streamName() const
     24 {
     25 	return m_streamName;
     26 }
     27 
     28 template<class ImplTraits, class SuperType>
     29 ANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const
     30 {
     31 	return m_lastMarker;
     32 }
     33 
     34 template<class ImplTraits, class SuperType>
     35 ANTLR_INLINE void	IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag)
     36 {
     37 	m_upper_case = flag;
     38 }
     39 
     40 template<class ImplTraits, class SuperType>
     41 ANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super()
     42 {
     43 	return static_cast<SuperType*>(this);
     44 }
     45 
     46 template<class ImplTraits, class SuperType>
     47 void	IntStream<ImplTraits, SuperType>::consume()
     48 {
     49 	SuperType* input = this->get_super();
     50 
     51 	const ANTLR_UINT8* nextChar = input->get_nextChar();
     52 	const ANTLR_UINT8* data = input->get_data();
     53 	ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
     54 
     55     if	( nextChar < ( data + sizeBuf ) )
     56     {	
     57 		/* Indicate one more character in this line
     58 		 */
     59 		input->inc_charPositionInLine();
     60 	
     61 		if  ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() )
     62 		{
     63 			/* Reset for start of a new line of input
     64 			 */
     65 			input->inc_line();
     66 			input->set_charPositionInLine(0);
     67 			input->set_currentLine(nextChar + 1);
     68 		}
     69 
     70 		/* Increment to next character position
     71 		 */
     72 		input->set_nextChar( nextChar + 1 );
     73     }
     74 }
     75 
     76 template<class ImplTraits, class SuperType>
     77 ANTLR_UINT32	IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la )
     78 {
     79 	SuperType* input = this->get_super();
     80 	const ANTLR_UINT8* nextChar = input->get_nextChar();
     81 	const ANTLR_UINT8* data = input->get_data();
     82 	ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
     83 
     84     if	(( nextChar + la - 1) >= (data + sizeBuf))
     85     {
     86 		return	ANTLR_CHARSTREAM_EOF;
     87     }
     88     else
     89     {
     90 		if( !m_upper_case )
     91 			return	(ANTLR_UCHAR)(*(nextChar + la - 1));
     92 		else
     93 			return	(ANTLR_UCHAR)toupper(*(nextChar + la - 1));
     94     }
     95 }
     96 
     97 template<class ImplTraits, class SuperType>
     98 ANTLR_MARKER IntStream<ImplTraits, SuperType>::mark()
     99 {
    100 	LexState<ImplTraits>*	    state;
    101     SuperType* input = this->get_super();
    102 
    103     /* New mark point 
    104      */
    105     input->inc_markDepth();
    106 
    107     /* See if we are revisiting a mark as we can just reuse the vector
    108      * entry if we are, otherwise, we need a new one
    109      */
    110     if	(input->get_markDepth() > input->get_markers().size() )
    111     {	
    112 		input->get_markers().push_back( LexState<ImplTraits>() );
    113 		LexState<ImplTraits>& state_r = input->get_markers().back();
    114 		state = &state_r;
    115     }
    116     else
    117     {
    118 		LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 );
    119 		state	= &state_r;
    120 
    121 		/* Assume no errors for speed, it will just blow up if the table failed
    122 		 * for some reasons, hence lots of unit tests on the tables ;-)
    123 		 */
    124     }
    125 
    126     /* We have created or retrieved the state, so update it with the current
    127      * elements of the lexer state.
    128      */
    129     state->set_charPositionInLine( input->get_charPositionInLine() );
    130     state->set_currentLine( input->get_currentLine() );
    131     state->set_line( input->get_line() );
    132     state->set_nextChar( input->get_nextChar() );
    133 
    134     m_lastMarker = input->get_markDepth();
    135 
    136     /* And that's it
    137      */
    138     return  input->get_markDepth();
    139 }
    140 
    141 template<class ImplTraits, class SuperType>
    142 ANTLR_MARKER	IntStream<ImplTraits, SuperType>::index()
    143 {
    144 	SuperType* input = this->get_super();
    145 	return input->index_impl();
    146 }
    147 
    148 template<class ImplTraits, class SuperType>
    149 void	IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark)
    150 {
    151     SuperType* input = this->get_super();
    152 
    153     /* Perform any clean up of the marks
    154      */
    155     this->release(mark);
    156 
    157     /* Find the supplied mark state 
    158      */
    159 	ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 );
    160     typename ImplTraits::LexStateType&   state = input->get_markers().at( idx );
    161 
    162     /* Seek input pointer to the requested point (note we supply the void *pointer
    163      * to whatever is implementing the int stream to seek).
    164      */
    165 	this->seek( (ANTLR_MARKER)state.get_nextChar() );
    166     
    167     /* Reset to the reset of the information in the mark
    168      */
    169     input->set_charPositionInLine( state.get_charPositionInLine() );
    170     input->set_currentLine( state.get_currentLine() );
    171     input->set_line( state.get_line() );
    172     input->set_nextChar( state.get_nextChar() );
    173 
    174     /* And we are done
    175      */
    176 }
    177 
    178 template<class ImplTraits, class SuperType>
    179 void	IntStream<ImplTraits, SuperType>::rewindLast()
    180 {
    181 	this->rewind(m_lastMarker);
    182 }
    183 
    184 template<class ImplTraits, class SuperType>
    185 void	IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark)
    186 {
    187 	SuperType* input = this->get_super();
    188 
    189 	/* We don't do much here in fact as we never free any higher marks in
    190      * the hashtable as we just resuse any memory allocated for them.
    191      */
    192     input->set_markDepth( (ANTLR_UINT32)(mark - 1) );
    193 
    194 }
    195 
    196 template<class ImplTraits, class SuperType>
    197 void IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
    198 {
    199 }
    200 
    201 template<class ImplTraits, class SuperType>
    202 void	IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
    203 {
    204 	ANTLR_INT32   count;
    205 	SuperType* input = this->get_super();
    206 
    207 	ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar();
    208 	/* If the requested seek point is less than the current
    209 	* input point, then we assume that we are resetting from a mark
    210 	* and do not need to scan, but can just set to there.
    211 	*/
    212 	if	(seekPoint <= nextChar)
    213 	{
    214 		input->set_nextChar((ANTLR_UINT8*) seekPoint);
    215 	}
    216 	else
    217 	{
    218 		count	= (ANTLR_UINT32)(seekPoint - nextChar);
    219 
    220 		while (count--)
    221 		{
    222 			this->consume();
    223 		}
    224 	}
    225 }
    226 
    227 template<class ImplTraits, class SuperType>
    228 IntStream<ImplTraits, SuperType>::~IntStream()
    229 {
    230 }
    231 
    232 template<class ImplTraits, class SuperType>
    233 ANTLR_UINT32	EBCDIC_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la)
    234 {
    235 	// EBCDIC to ASCII conversion table
    236 	//
    237 	// This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX
    238 	// translation and the character tables are published all over the interweb.
    239 	// 
    240 	const ANTLR_UCHAR e2a[256] =
    241 	{
    242 		0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f,
    243 		0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    244 		0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97,
    245 		0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f,
    246 		0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b,
    247 		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 
    248 		0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
    249 		0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
    250 		0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
    251 		0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
    252 		0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
    253 		0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f,
    254 		0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
    255 		0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
    256 		0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
    257 		0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
    258 		0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
    259 		0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
    260 		0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
    261 		0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
    262 		0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
    263 		0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae,
    264 		0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
    265 		0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7,
    266 		0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
    267 		0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
    268 		0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
    269 		0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff,
    270 		0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
    271 		0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
    272 		0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
    273 		0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e
    274 	};
    275 
    276 	SuperType* input = this->get_super();
    277 
    278     if	(( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() ))
    279     {
    280         return	ANTLR_CHARSTREAM_EOF;
    281     }
    282     else
    283     {
    284         // Translate the required character via the constant conversion table
    285         //
    286         return	e2a[(*(input->get_nextChar() + la - 1))];
    287     }
    288 }
    289 
    290 template<class ImplTraits, class SuperType>
    291 void EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream()
    292 {
    293 	SuperType* super = this->get_super();
    294 	super->set_charByteSize(1);
    295 }
    296 
    297 template<class ImplTraits, class SuperType>
    298 ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i)
    299 {
    300 	return this->_LA(i, ClassForwarder< typename ImplTraits::Endianness >() );
    301 }
    302 
    303 template<class ImplTraits, class SuperType>
    304 void UTF16_IntStream<ImplTraits, SuperType>::consume()
    305 {
    306 	this->consume( ClassForwarder< typename ImplTraits::Endianness >() );
    307 }
    308 
    309 template<class ImplTraits, class SuperType>
    310 ANTLR_MARKER	UTF16_IntStream<ImplTraits, SuperType>::index()
    311 {
    312 	SuperType* input = this->get_super();
    313     return  (ANTLR_MARKER)(input->get_nextChar());
    314 }
    315 
    316 template<class ImplTraits, class SuperType>
    317 void UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
    318 {
    319 	SuperType* input = this->get_super();
    320 
    321 	// If the requested seek point is less than the current
    322 	// input point, then we assume that we are resetting from a mark
    323 	// and do not need to scan, but can just set to there as rewind will
    324     // reset line numbers and so on.
    325 	//
    326 	if	(seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
    327 	{
    328 		input->set_nextChar( seekPoint );
    329 	}
    330 	else
    331 	{
    332         // Call consume until we reach the asked for seek point or EOF
    333         //
    334         while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) )
    335 	    {
    336 			this->consume();
    337 	    }
    338 	}
    339 }
    340 
    341 template<class ImplTraits, class SuperType>
    342 void IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian)
    343 {
    344 	// We must install different UTF16 routines according to whether the input
    345 	// is the same endianess as the machine we are executing upon or not. If it is not
    346 	// then we must install methods that can convert the endianess on the fly as they go
    347 	//
    348 
    349 	if(machineBigEndian == true)
    350 	{
    351 		// Machine is Big Endian, if the input is also then install the 
    352 		// methods that do not access input by bytes and reverse them.
    353 		// Otherwise install endian aware methods.
    354 		//
    355 		if  (inputBigEndian == true) 
    356 		{
    357 			// Input is machine compatible
    358 			//
    359 			m_endian_spec = 1;
    360 		}
    361 		else
    362 		{
    363 			// Need to use methods that know that the input is little endian
    364 			//
    365 			m_endian_spec = 2;
    366 		}
    367 	}
    368 	else
    369 	{
    370 		// Machine is Little Endian, if the input is also then install the 
    371 		// methods that do not access input by bytes and reverse them.
    372 		// Otherwise install endian aware methods.
    373 		//
    374 		if  (inputBigEndian == false) 
    375 		{
    376 			// Input is machine compatible
    377 			//
    378 			m_endian_spec =  1;
    379 		}
    380 		else
    381 		{
    382 			// Need to use methods that know that the input is Big Endian
    383 			//
    384 			m_endian_spec	= 3;
    385 		}
    386 	}
    387 }
    388 
    389 template<class ImplTraits, class SuperType>
    390 void UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
    391 {
    392 	SuperType* super = this->get_super();
    393 	super->set_charByteSize(2);
    394 
    395 	this->findout_endian_spec( machineBigEndian, inputBigEndian );
    396 }
    397 
    398 template<class ImplTraits, class SuperType>
    399 ANTLR_UINT32 IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
    400 {
    401 	assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
    402 	switch(m_endian_spec)
    403 	{
    404 	case 1:
    405 		return this->_LA(i, ClassForwarder<BYTE_AGNOSTIC>() );
    406 		break;
    407 	case 2:
    408 		return this->_LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
    409 		break;
    410 	case 3:
    411 		return this->_LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() );
    412 		break;
    413 	default:
    414 		break;
    415 	}
    416 	return 0;
    417 }
    418 
    419 template<class ImplTraits, class SuperType>
    420 void	IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
    421 {
    422 	assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
    423 	switch(m_endian_spec)
    424 	{
    425 	case 1:
    426 		this->consume( ClassForwarder<BYTE_AGNOSTIC>() );
    427 		break;
    428 	case 2:
    429 		this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
    430 		break;
    431 	case 3:
    432 		this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() );
    433 		break;
    434 	default:
    435 		break;
    436 	}
    437 }
    438 
    439 template<class ImplTraits, class SuperType>
    440 ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
    441 {
    442 	SuperType* input;
    443     UTF32   ch;
    444     UTF32   ch2;
    445     UTF16*	nextChar;
    446 
    447     // Find the input interface and where we are currently pointing to
    448     // in the input stream
    449     //
    450 	input   = this->get_super;
    451 	nextChar    = input->get_nextChar();
    452 
    453     // If a positive offset then advance forward, else retreat
    454     //
    455     if  (la >= 0)
    456     {
    457         while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
    458         {
    459             // Advance our copy of the input pointer
    460             //
    461             // Next char in natural machine byte order
    462             //
    463             ch  = *nextChar++;
    464 
    465             // If we have a surrogate pair then we need to consume
    466             // a following valid LO surrogate.
    467             //
    468             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    469             {
    470                 // If the 16 bits following the high surrogate are in the source buffer...
    471                 //
    472                 if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
    473                 {
    474                     // Next character is in natural machine byte order
    475                     //
    476                     ch2 = *nextChar;
    477 
    478                     // If it's a valid low surrogate, consume it
    479                     //
    480                     if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    481                     {
    482                         // We consumed one 16 bit character
    483                         //
    484 						nextChar++;
    485                     }
    486                     // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    487                     // it.
    488                     //
    489                 } 
    490                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    491                 // it because the buffer ended
    492                 //
    493             }
    494             // Note that we did not check for an invalid low surrogate here, or that fact that the
    495             // lo surrogate was missing. We just picked out one 16 bit character unless the character
    496             // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
    497             //
    498         }
    499     }
    500     else
    501     {
    502         // We need to go backwards from our input point
    503         //
    504         while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
    505         {
    506             // Get the previous 16 bit character
    507             //
    508             ch = *--nextChar;
    509 
    510             // If we found a low surrogate then go back one more character if
    511             // the hi surrogate is there
    512             //
    513             if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 
    514             {
    515                 ch2 = *(nextChar-1);
    516                 if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 
    517                 {
    518                     // Yes, there is a high surrogate to match it so decrement one more and point to that
    519                     //
    520                     nextChar--;
    521                 }
    522             }
    523         }
    524     }
    525 
    526     // Our local copy of nextChar is now pointing to either the correct character or end of file
    527     //
    528     // Input buffer size is always in bytes
    529     //
    530 	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
    531 	{
    532 		return	ANTLR_CHARSTREAM_EOF;
    533 	}
    534 	else
    535 	{
    536         // Pick up the next 16 character (native machine byte order)
    537         //
    538         ch = *nextChar++;
    539 
    540         // If we have a surrogate pair then we need to consume
    541         // a following valid LO surrogate.
    542         //
    543         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    544         {
    545             // If the 16 bits following the high surrogate are in the source buffer...
    546             //
    547             if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
    548             {
    549                 // Next character is in natural machine byte order
    550                 //
    551                 ch2 = *nextChar;
    552 
    553                 // If it's a valid low surrogate, consume it
    554                 //
    555                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    556                 {
    557                     // Construct the UTF32 code point
    558                     //
    559                     ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
    560 								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
    561                 }
    562                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    563                 // it.
    564                 //
    565             } 
    566             // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    567             // it because the buffer ended
    568             //
    569         }
    570     }
    571     return ch;
    572 }
    573 
    574 template<class ImplTraits, class SuperType>
    575 ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
    576 {
    577 	SuperType* input;
    578     UTF32           ch;
    579     UTF32           ch2;
    580     ANTLR_UCHAR*   nextChar;
    581 
    582     // Find the input interface and where we are currently pointing to
    583     // in the input stream
    584     //
    585 	input       = this->get_super();
    586     nextChar    = input->get_nextChar();
    587 
    588     // If a positive offset then advance forward, else retreat
    589     //
    590     if  (la >= 0)
    591     {
    592         while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
    593         {
    594             // Advance our copy of the input pointer
    595             //
    596             // Next char in Little Endian byte order
    597             //
    598             ch  = (*nextChar) + (*(nextChar+1) << 8);
    599             nextChar += 2;
    600 
    601             // If we have a surrogate pair then we need to consume
    602             // a following valid LO surrogate.
    603             //
    604             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    605             {
    606                 // If the 16 bits following the high surrogate are in the source buffer...
    607                 //
    608                 if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
    609                 {
    610                     // Next character is in little endian byte order
    611                     //
    612                     ch2 = (*nextChar) + (*(nextChar+1) << 8);
    613 
    614                     // If it's a valid low surrogate, consume it
    615                     //
    616                     if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    617                     {
    618                         // We consumed one 16 bit character
    619                         //
    620 						nextChar += 2;
    621                     }
    622                     // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    623                     // it.
    624                     //
    625                 } 
    626                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    627                 // it because the buffer ended
    628                 //
    629             }
    630             // Note that we did not check for an invalid low surrogate here, or that fact that the
    631             // lo surrogate was missing. We just picked out one 16 bit character unless the character
    632             // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
    633             //
    634         }
    635     }
    636     else
    637     {
    638         // We need to go backwards from our input point
    639         //
    640         while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
    641         {
    642             // Get the previous 16 bit character
    643             //
    644             ch = (*nextChar - 2) + ((*nextChar -1) << 8);
    645             nextChar -= 2;
    646 
    647             // If we found a low surrogate then go back one more character if
    648             // the hi surrogate is there
    649             //
    650             if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 
    651             {
    652                 ch2 = (*nextChar - 2) + ((*nextChar -1) << 8);
    653                 if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 
    654                 {
    655                     // Yes, there is a high surrogate to match it so decrement one more and point to that
    656                     //
    657                     nextChar -=2;
    658                 }
    659             }
    660         }
    661     }
    662 
    663     // Our local copy of nextChar is now pointing to either the correct character or end of file
    664     //
    665     // Input buffer size is always in bytes
    666     //
    667 	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
    668 	{
    669 		return	ANTLR_CHARSTREAM_EOF;
    670 	}
    671 	else
    672 	{
    673         // Pick up the next 16 character (little endian byte order)
    674         //
    675         ch = (*nextChar) + (*(nextChar+1) << 8);
    676         nextChar += 2;
    677 
    678         // If we have a surrogate pair then we need to consume
    679         // a following valid LO surrogate.
    680         //
    681         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    682         {
    683             // If the 16 bits following the high surrogate are in the source buffer...
    684             //
    685             if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
    686             {
    687                 // Next character is in little endian byte order
    688                 //
    689                 ch2 = (*nextChar) + (*(nextChar+1) << 8);
    690 
    691                 // If it's a valid low surrogate, consume it
    692                 //
    693                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    694                 {
    695                     // Construct the UTF32 code point
    696                     //
    697                     ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
    698 								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
    699                 }
    700                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    701                 // it.
    702                 //
    703             } 
    704             // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    705             // it because the buffer ended
    706             //
    707         }
    708     }
    709     return ch;
    710 }
    711 
    712 template<class ImplTraits, class SuperType>
    713 ANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
    714 {
    715 	SuperType* input;
    716     UTF32           ch;
    717     UTF32           ch2;
    718     ANTLR_UCHAR*   nextChar;
    719 
    720     // Find the input interface and where we are currently pointing to
    721     // in the input stream
    722     //
    723 	input       = this->get_super();
    724     nextChar    = input->get_nextChar();
    725 
    726     // If a positive offset then advance forward, else retreat
    727     //
    728     if  (la >= 0)
    729     {
    730         while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
    731         {
    732             // Advance our copy of the input pointer
    733             //
    734             // Next char in Big Endian byte order
    735             //
    736             ch  = ((*nextChar) << 8) + *(nextChar+1);
    737             nextChar += 2;
    738 
    739             // If we have a surrogate pair then we need to consume
    740             // a following valid LO surrogate.
    741             //
    742             if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    743             {
    744                 // If the 16 bits following the high surrogate are in the source buffer...
    745                 //
    746                 if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
    747                 {
    748                     // Next character is in big endian byte order
    749                     //
    750                     ch2 = ((*nextChar) << 8) + *(nextChar+1);
    751 
    752                     // If it's a valid low surrogate, consume it
    753                     //
    754                     if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    755                     {
    756                         // We consumed one 16 bit character
    757                         //
    758 						nextChar += 2;
    759                     }
    760                     // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    761                     // it.
    762                     //
    763                 } 
    764                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    765                 // it because the buffer ended
    766                 //
    767             }
    768             // Note that we did not check for an invalid low surrogate here, or that fact that the
    769             // lo surrogate was missing. We just picked out one 16 bit character unless the character
    770             // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
    771             //
    772         }
    773     }
    774     else
    775     {
    776         // We need to go backwards from our input point
    777         //
    778         while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
    779         {
    780             // Get the previous 16 bit character
    781             //
    782             ch = ((*nextChar - 2) << 8) + (*nextChar -1);
    783             nextChar -= 2;
    784 
    785             // If we found a low surrogate then go back one more character if
    786             // the hi surrogate is there
    787             //
    788             if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 
    789             {
    790                 ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
    791                 if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 
    792                 {
    793                     // Yes, there is a high surrogate to match it so decrement one more and point to that
    794                     //
    795                     nextChar -=2;
    796                 }
    797             }
    798         }
    799     }
    800 
    801     // Our local copy of nextChar is now pointing to either the correct character or end of file
    802     //
    803     // Input buffer size is always in bytes
    804     //
    805 	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
    806 	{
    807 		return	ANTLR_CHARSTREAM_EOF;
    808 	}
    809 	else
    810 	{
    811         // Pick up the next 16 character (big endian byte order)
    812         //
    813         ch = ((*nextChar) << 8) + *(nextChar+1);
    814         nextChar += 2;
    815 
    816         // If we have a surrogate pair then we need to consume
    817         // a following valid LO surrogate.
    818         //
    819         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    820         {
    821             // If the 16 bits following the high surrogate are in the source buffer...
    822             //
    823             if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
    824             {
    825                 // Next character is in big endian byte order
    826                 //
    827                 ch2 = ((*nextChar) << 8) + *(nextChar+1);
    828 
    829                 // If it's a valid low surrogate, consume it
    830                 //
    831                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    832                 {
    833                     // Construct the UTF32 code point
    834                     //
    835                     ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
    836 								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
    837                 }
    838                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    839                 // it.
    840                 //
    841             } 
    842             // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    843             // it because the buffer ended
    844             //
    845         }
    846     }
    847     return ch;
    848 }
    849 
    850 template<class ImplTraits, class SuperType>
    851 void	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> )
    852 {
    853 	SuperType* input;
    854     UTF32   ch;
    855     UTF32   ch2;
    856 
    857 	input   = this->get_super();
    858 
    859     // Buffer size is always in bytes
    860     //
    861 	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
    862 	{	
    863 		// Indicate one more character in this line
    864 		//
    865 		input->inc_charPositionInLine();
    866 
    867 		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
    868 		{
    869 			// Reset for start of a new line of input
    870 			//
    871 			input->inc_line();
    872 			input->set_charPositionInLine(0);
    873 			input->set_currentLine( input->get_nextChar() + 1 );
    874 		}
    875 
    876 		// Increment to next character position, accounting for any surrogates
    877 		//
    878         // Next char in natural machine byte order
    879         //
    880         ch  = *(input->get_nextChar());
    881 
    882         // We consumed one 16 bit character
    883         //
    884 		input->set_nextChar( input->get_nextChar() + 1 );
    885 
    886         // If we have a surrogate pair then we need to consume
    887         // a following valid LO surrogate.
    888         //
    889         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
    890 
    891             // If the 16 bits following the high surrogate are in the source buffer...
    892             //
    893             if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
    894             {
    895                 // Next character is in natural machine byte order
    896                 //
    897                 ch2 = *(input->get_nextChar());
    898 
    899                 // If it's a valid low surrogate, consume it
    900                 //
    901                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    902                 {
    903                     // We consumed one 16 bit character
    904                     //
    905 					input->set_nextChar( input->get_nextChar() + 1 );
    906                 }
    907                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    908                 // it.
    909                 //
    910             } 
    911             // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    912             // it because the buffer ended
    913             //
    914         } 
    915         // Note that we did not check for an invalid low surrogate here, or that fact that the
    916         // lo surrogate was missing. We just picked out one 16 bit character unless the character
    917         // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
    918         //
    919 	}
    920 
    921 }
    922 
    923 template<class ImplTraits, class SuperType>
    924 void	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> )
    925 {
    926 	SuperType* input;
    927     UTF32   ch;
    928     UTF32   ch2;
    929 
    930 	input   = this->get_super();
    931 
    932     // Buffer size is always in bytes
    933     //
    934 	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
    935 	{	
    936 		// Indicate one more character in this line
    937 		//
    938 		input->inc_charPositionInLine();
    939 
    940 		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
    941 		{
    942 			// Reset for start of a new line of input
    943 			//
    944 			input->inc_line();
    945 			input->set_charPositionInLine(0);
    946 			input->set_currentLine(input->get_nextChar() + 1);
    947 		}
    948 
    949 		// Increment to next character position, accounting for any surrogates
    950 		//
    951         // Next char in litle endian form
    952         //
    953         ch  = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
    954 
    955         // We consumed one 16 bit character
    956         //
    957 		input->set_nextChar( input->get_nextChar() + 1);
    958 
    959         // If we have a surrogate pair then we need to consume
    960         // a following valid LO surrogate.
    961         //
    962         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
    963 		{
    964             // If the 16 bits following the high surrogate are in the source buffer...
    965             //
    966             if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
    967             {
    968                 ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
    969 
    970                 // If it's a valid low surrogate, consume it
    971                 //
    972                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
    973                 {
    974                     // We consumed one 16 bit character
    975                     //
    976 					input->set_nextChar( input->get_nextChar() + 1);
    977                 }
    978                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    979                 // it.
    980                 //
    981             } 
    982             // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
    983             // it because the buffer ended
    984             //
    985         } 
    986         // Note that we did not check for an invalid low surrogate here, or that fact that the
    987         // lo surrogate was missing. We just picked out one 16 bit character unless the character
    988         // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
    989         //
    990 	}
    991 }
    992 
    993 template<class ImplTraits, class SuperType>
    994 void	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> )
    995 {
    996 	SuperType* input;
    997     UTF32   ch;
    998     UTF32   ch2;
    999 
   1000 	input   = this->get_super();
   1001 
   1002     // Buffer size is always in bytes
   1003     //
   1004 	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
   1005 	{	
   1006 		// Indicate one more character in this line
   1007 		//
   1008 		input->inc_charPositionInLine();
   1009 
   1010 		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
   1011 		{
   1012 			// Reset for start of a new line of input
   1013 			//
   1014 			input->inc_line();
   1015 			input->set_charPositionInLine(0);
   1016 			input->set_currentLine(input->get_nextChar() + 1);
   1017 		}
   1018 
   1019 		// Increment to next character position, accounting for any surrogates
   1020 		//
   1021         // Next char in big endian form
   1022         //
   1023         ch  = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
   1024 
   1025         // We consumed one 16 bit character
   1026         //
   1027 		input->set_nextChar( input->get_nextChar() + 1);
   1028 
   1029         // If we have a surrogate pair then we need to consume
   1030         // a following valid LO surrogate.
   1031         //
   1032         if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 
   1033 		{
   1034             // If the 16 bits following the high surrogate are in the source buffer...
   1035             //
   1036             if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
   1037             {
   1038                 // Big endian
   1039                 //
   1040                 ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
   1041 
   1042                 // If it's a valid low surrogate, consume it
   1043                 //
   1044                 if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 
   1045                 {
   1046                     // We consumed one 16 bit character
   1047                     //
   1048 					input->set_nextChar( input->get_nextChar() + 1);
   1049                 }
   1050                 // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
   1051                 // it.
   1052                 //
   1053             } 
   1054             // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
   1055             // it because the buffer ended
   1056             //
   1057         } 
   1058         // Note that we did not check for an invalid low surrogate here, or that fact that the
   1059         // lo surrogate was missing. We just picked out one 16 bit character unless the character
   1060         // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
   1061         //
   1062 	}
   1063 }
   1064 
   1065 template<class ImplTraits, class SuperType>
   1066 ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i)
   1067 {
   1068 	return this->_LA( i, ClassForwarder<typename ImplTraits::Endianness>() );
   1069 }
   1070 
   1071 template<class ImplTraits, class SuperType>
   1072 ANTLR_MARKER	UTF32_IntStream<ImplTraits, SuperType>::index()
   1073 {
   1074 	SuperType* input = this->get_super();
   1075     return  (ANTLR_MARKER)(input->get_nextChar());
   1076 }
   1077 
   1078 template<class ImplTraits, class SuperType>
   1079 void UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
   1080 {
   1081 	SuperType* input;
   1082 
   1083 	input   = this->get_super();
   1084 
   1085 	// If the requested seek point is less than the current
   1086 	// input point, then we assume that we are resetting from a mark
   1087 	// and do not need to scan, but can just set to there as rewind will
   1088         // reset line numbers and so on.
   1089 	//
   1090 	if	(seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
   1091 	{
   1092 		input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) );
   1093 	}
   1094 	else
   1095 	{
   1096         // Call consume until we reach the asked for seek point or EOF
   1097         //
   1098         while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) )
   1099 	    {
   1100 			this->consume();
   1101 	    }
   1102 	}
   1103 
   1104 }
   1105 
   1106 template<class ImplTraits, class SuperType>
   1107 void UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
   1108 {
   1109 	SuperType* super = this->get_super();
   1110 	super->set_charByteSize(4);
   1111 
   1112 	this->findout_endian_spec(machineBigEndian, inputBigEndian);
   1113 }
   1114 
   1115 template<class ImplTraits, class SuperType>
   1116 ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
   1117 {
   1118     SuperType* input = this->get_super();
   1119 
   1120     if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
   1121     {
   1122 		return	ANTLR_CHARSTREAM_EOF;
   1123     }
   1124     else
   1125     {
   1126 		return	(ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
   1127     }
   1128 }
   1129 
   1130 template<class ImplTraits, class SuperType>
   1131 ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
   1132 {
   1133 	SuperType* input = this->get_super();
   1134 
   1135     if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
   1136     {
   1137 		return	ANTLR_CHARSTREAM_EOF;
   1138     }
   1139     else
   1140     {
   1141         ANTLR_UCHAR   c;
   1142 
   1143         c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
   1144 
   1145         // Swap Endianess to Big Endian
   1146         //
   1147         return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
   1148     }
   1149 }
   1150 
   1151 template<class ImplTraits, class SuperType>
   1152 ANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
   1153 {
   1154 	SuperType* input = this->get_super();
   1155 
   1156     if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
   1157     {
   1158 		return	ANTLR_CHARSTREAM_EOF;
   1159     }
   1160     else
   1161     {
   1162         ANTLR_UCHAR   c;
   1163 
   1164         c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
   1165 
   1166         // Swap Endianess to Little Endian
   1167         //
   1168         return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
   1169     }
   1170 }
   1171 
   1172 template<class ImplTraits, class SuperType>
   1173 void	UTF32_IntStream<ImplTraits, SuperType>::consume()
   1174 {
   1175 	SuperType* input = this->get_super();
   1176 
   1177     // SizeBuf is always in bytes
   1178     //
   1179 	if	( input->get_nextChar()  < (input->get_data() + input->get_sizeBuf()/4 ))
   1180     {	
   1181 		/* Indicate one more character in this line
   1182 		 */
   1183 		input->inc_charPositionInLine();
   1184 	
   1185 		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
   1186 		{
   1187 			/* Reset for start of a new line of input
   1188 			 */
   1189 			input->inc_line();
   1190 			input->set_charPositionInLine(0);
   1191 			input->set_currentLine(	input->get_nextChar() + 1 );
   1192 		}
   1193 
   1194 		/* Increment to next character position
   1195 		 */
   1196 		input->set_nextChar( input->get_nextChar() + 1 );
   1197     }
   1198 }
   1199 
   1200 template<class ImplTraits, class SuperType>
   1201 void UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
   1202 {
   1203 	SuperType* super = this->get_super();
   1204 	super->set_charByteSize(0);
   1205 }
   1206 
   1207 // ------------------------------------------------------
   1208 // Following is from Unicode.org (see antlr3convertutf.c)
   1209 //
   1210 
   1211 /// Index into the table below with the first byte of a UTF-8 sequence to
   1212 /// get the number of trailing bytes that are supposed to follow it.
   1213 /// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
   1214 /// left as-is for anyone who may want to do such conversion, which was
   1215 /// allowed in earlier algorithms.
   1216 ///
   1217 template<class ImplTraits, class SuperType>
   1218 const ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8()
   1219 {
   1220 	static const ANTLR_UINT32 trailingBytesForUTF8[256] = {
   1221 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1222 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1223 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1224 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1225 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1226 		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1227 		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
   1228 		2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
   1229 	};
   1230 
   1231 	return trailingBytesForUTF8;
   1232 }
   1233 
   1234 /// Magic values subtracted from a buffer value during UTF8 conversion.
   1235 /// This table contains as many values as there might be trailing bytes
   1236 /// in a UTF-8 sequence.
   1237 ///
   1238 template<class ImplTraits, class SuperType>
   1239 const UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8()
   1240 {
   1241 	static const UTF32 offsetsFromUTF8[6] = 
   1242 		{   0x00000000UL, 0x00003080UL, 0x000E2080UL, 
   1243 			0x03C82080UL, 0xFA082080UL, 0x82082080UL 
   1244 		};
   1245 	return 	offsetsFromUTF8;
   1246 }
   1247 
   1248 // End of Unicode.org tables
   1249 // -------------------------
   1250 
   1251 
   1252 /** \brief Consume the next character in a UTF8 input stream
   1253  *
   1254  * \param input Input stream context pointer
   1255  */
   1256 template<class ImplTraits, class SuperType>
   1257 void UTF8_IntStream<ImplTraits, SuperType>::consume()
   1258 {
   1259     SuperType* input = this->get_super();
   1260 	const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
   1261 	const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
   1262 
   1263     ANTLR_UINT32           extraBytesToRead;
   1264     ANTLR_UCHAR            ch;
   1265     ANTLR_UINT8*           nextChar;
   1266 
   1267     nextChar = input->get_nextChar();
   1268 
   1269     if	(nextChar < (input->get_data() + input->get_sizeBuf()))
   1270     {	
   1271 		// Indicate one more character in this line
   1272 		//
   1273 		input->inc_charPositionInLine();
   1274 	
   1275         // Are there more bytes needed to make up the whole thing?
   1276         //
   1277         extraBytesToRead = trailingBytesForUTF8[*nextChar];
   1278 
   1279         if	((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf()))
   1280         {
   1281             input->set_nextChar( input->get_data() + input->get_sizeBuf() );
   1282             return;
   1283         }
   1284 
   1285         // Cases deliberately fall through (see note A in antlrconvertutf.c)
   1286         // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so
   1287         // we allow it.
   1288         //
   1289         ch  = 0;
   1290        	switch (extraBytesToRead) 
   1291 		{
   1292 			case 5: ch += *nextChar++; ch <<= 6;
   1293 			case 4: ch += *nextChar++; ch <<= 6;
   1294 			case 3: ch += *nextChar++; ch <<= 6;
   1295 			case 2: ch += *nextChar++; ch <<= 6;
   1296 			case 1: ch += *nextChar++; ch <<= 6;
   1297 			case 0: ch += *nextChar++;
   1298 		}
   1299 
   1300         // Magically correct the input value
   1301         //
   1302 		ch -= offsetsFromUTF8[extraBytesToRead];
   1303 		if  (ch == input->get_newlineChar())
   1304 		{
   1305 			/* Reset for start of a new line of input
   1306 			 */
   1307 			input->inc_line();
   1308 			input->set_charPositionInLine(0);
   1309 			input->set_currentLine(nextChar);
   1310 		}
   1311 
   1312         // Update input pointer
   1313         //
   1314         input->set_nextChar(nextChar);
   1315     }
   1316 }
   1317 
   1318 /** \brief Return the input element assuming a UTF8 input
   1319  *
   1320  * \param[in] input Input stream context pointer
   1321  * \param[in] la 1 based offset of next input stream element
   1322  *
   1323  * \return Next input character in internal ANTLR3 encoding (UTF32)
   1324  */
   1325 template<class ImplTraits, class SuperType>
   1326 ANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::_LA(ANTLR_INT32 la)
   1327 {
   1328     SuperType* input = this->get_super();
   1329 	const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
   1330 	const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
   1331     ANTLR_UINT32           extraBytesToRead;
   1332     ANTLR_UCHAR            ch;
   1333     ANTLR_UINT8*           nextChar;
   1334 
   1335     nextChar = input->get_nextChar();
   1336 
   1337     // Do we need to traverse forwards or backwards?
   1338     // - LA(0) is treated as LA(1) and we assume that the nextChar is
   1339     //   already positioned.
   1340     // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding
   1341     // - LA(-n) means we must traverse backwards n chracters
   1342     //
   1343     if (la > 1) {
   1344 
   1345         // Make sure that we have at least one character left before trying to
   1346         // loop through the buffer.
   1347         //
   1348         if	(nextChar < (input->get_data() + input->get_sizeBuf()))
   1349         {	
   1350             // Now traverse n-1 characters forward
   1351             //
   1352             while (--la > 0)
   1353             {
   1354                 // Does the next character require trailing bytes?
   1355                 // If so advance the pointer by that many bytes as well as advancing
   1356                 // one position for what will be at least a single byte character.
   1357                 //
   1358                 nextChar += trailingBytesForUTF8[*nextChar] + 1;
   1359 
   1360                 // Does that calculation take us past the byte length of the buffer?
   1361                 //
   1362                 if	(nextChar >= (input->get_data() + input->get_sizeBuf()))
   1363                 {
   1364                     return ANTLR_CHARSTREAM_EOF;
   1365                 }
   1366             }
   1367         }
   1368         else
   1369         {
   1370             return ANTLR_CHARSTREAM_EOF;
   1371         }
   1372     }
   1373     else
   1374     {
   1375         // LA is negative so we decrease the pointer by n character positions
   1376         //
   1377         while   (nextChar > input->get_data() && la++ < 0)
   1378         {
   1379             // Traversing backwards in UTF8 means decermenting by one
   1380             // then continuing to decrement while ever a character pattern
   1381             // is flagged as being a trailing byte of an encoded code point.
   1382             // Trailing UTF8 bytes always start with 10 in binary. We assumne that
   1383             // the UTF8 is well formed and do not check boundary conditions
   1384             //
   1385             nextChar--;
   1386             while ((*nextChar & 0xC0) == 0x80)
   1387             {
   1388                 nextChar--;
   1389             }
   1390         }
   1391     }
   1392 
   1393     // nextChar is now pointing at the UTF8 encoded character that we need to
   1394     // decode and return.
   1395     //
   1396     // Are there more bytes needed to make up the whole thing?
   1397     //
   1398     extraBytesToRead = trailingBytesForUTF8[*nextChar];
   1399     if	(nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf()))
   1400     {
   1401         return ANTLR_CHARSTREAM_EOF;
   1402     }
   1403 
   1404     // Cases deliberately fall through (see note A in antlrconvertutf.c)
   1405     // 
   1406     ch  = 0;
   1407     switch (extraBytesToRead) 
   1408 	{
   1409         case 5: ch += *nextChar++; ch <<= 6;
   1410         case 4: ch += *nextChar++; ch <<= 6;
   1411         case 3: ch += *nextChar++; ch <<= 6;
   1412         case 2: ch += *nextChar++; ch <<= 6;
   1413         case 1: ch += *nextChar++; ch <<= 6;
   1414         case 0: ch += *nextChar++;
   1415     }
   1416 
   1417     // Magically correct the input value
   1418     //
   1419     ch -= offsetsFromUTF8[extraBytesToRead];
   1420 
   1421     return ch;
   1422 }
   1423 
   1424 template<class ImplTraits>
   1425 TokenIntStream<ImplTraits>::TokenIntStream()
   1426 {
   1427 	m_cachedSize = 0;
   1428 }
   1429 
   1430 template<class ImplTraits>
   1431 ANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const
   1432 {
   1433 	return m_cachedSize;
   1434 }
   1435 
   1436 template<class ImplTraits>
   1437 void TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize )
   1438 {
   1439 	m_cachedSize = cachedSize;
   1440 }
   1441 
   1442 /** Move the input pointer to the next incoming token.  The stream
   1443  *  must become active with LT(1) available.  consume() simply
   1444  *  moves the input pointer so that LT(1) points at the next
   1445  *  input symbol. Consume at least one token.
   1446  *
   1447  *  Walk past any token not on the channel the parser is listening to.
   1448  */
   1449 template<class ImplTraits>
   1450 void TokenIntStream<ImplTraits>::consume()
   1451 {
   1452 	TokenStreamType* cts = static_cast<TokenStreamType*>(this);
   1453 
   1454     if((ANTLR_UINT32)cts->get_p() < m_cachedSize )
   1455 	{
   1456 		cts->inc_p();
   1457 		cts->set_p( cts->skipOffTokenChannels(cts->get_p()) );
   1458 	}
   1459 }
   1460 template<class ImplTraits>
   1461 void  TokenIntStream<ImplTraits>::consumeInitialHiddenTokens()
   1462 {
   1463 	ANTLR_MARKER	first;
   1464 	ANTLR_INT32	i;
   1465 	TokenStreamType*	ts;
   1466 
   1467 	ts	    = this->get_super();
   1468 	first	= this->index();
   1469 
   1470 	for	(i=0; i<first; i++)
   1471 	{
   1472 		ts->get_debugger()->consumeHiddenToken(ts->get(i));
   1473 	}
   1474 
   1475 	ts->set_initialStreamState(false);
   1476 }
   1477 
   1478 
   1479 template<class ImplTraits>
   1480 ANTLR_UINT32	TokenIntStream<ImplTraits>::_LA( ANTLR_INT32 i )
   1481 {
   1482 	const CommonTokenType*    tok;
   1483 	TokenStreamType*    ts	    = static_cast<TokenStreamType*>(this);
   1484 
   1485 	tok	    =  ts->_LT(i);
   1486 
   1487 	if	(tok != NULL)
   1488 	{
   1489 		return	tok->get_type();
   1490 	}
   1491 	else
   1492 	{
   1493 		return	CommonTokenType::TOKEN_INVALID;
   1494 	}
   1495 
   1496 }
   1497 
   1498 template<class ImplTraits>
   1499 ANTLR_MARKER	TokenIntStream<ImplTraits>::mark()
   1500 {
   1501     BaseType::m_lastMarker = this->index();
   1502     return  BaseType::m_lastMarker;
   1503 }
   1504 
   1505 template<class ImplTraits>
   1506 ANTLR_UINT32 TokenIntStream<ImplTraits>::size()
   1507 {
   1508     if (this->get_cachedSize() > 0)
   1509     {
   1510 		return  this->get_cachedSize();
   1511     }
   1512     TokenStreamType* cts   = this->get_super();
   1513 
   1514     this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) );
   1515     return  this->get_cachedSize();
   1516 }
   1517 
   1518 template<class ImplTraits>
   1519 void	TokenIntStream<ImplTraits>::release()
   1520 {
   1521     return;
   1522 }
   1523 
   1524 template<class ImplTraits>
   1525 ANTLR_MARKER   TokenIntStream<ImplTraits>::tindex()
   1526 {
   1527 	return this->get_super()->get_p();
   1528 }
   1529 
   1530 template<class ImplTraits>
   1531 void	TokenIntStream<ImplTraits>::rewindLast()
   1532 {
   1533     this->rewind( this->get_lastMarker() );
   1534 }
   1535 
   1536 template<class ImplTraits>
   1537 void	TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker)
   1538 {
   1539 	return this->seek(marker);
   1540 }
   1541 
   1542 template<class ImplTraits>
   1543 void	TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index)
   1544 {
   1545     TokenStreamType* cts = static_cast<TokenStreamType*>(this);
   1546 
   1547     cts->set_p( static_cast<ANTLR_INT32>(index) );
   1548 }
   1549 
   1550 
   1551 /// Return a string that represents the name assoicated with the input source
   1552 ///
   1553 /// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream.
   1554 ///
   1555 /// /returns 
   1556 /// /implements ANTLR3_INT_STREAM_struct::getSourceName()
   1557 ///
   1558 template<class ImplTraits>
   1559 typename TokenIntStream<ImplTraits>::StringType
   1560 TokenIntStream<ImplTraits>::getSourceName()
   1561 {
   1562 	// Slightly convoluted as we must trace back to the lexer's input source
   1563 	// via the token source. The streamName that is here is not initialized
   1564 	// because this is a token stream, not a file or string stream, which are the
   1565 	// only things that have a context for a source name.
   1566 	//
   1567 	return this->get_super()->get_tokenSource()->get_fileName();
   1568 }
   1569 
   1570 template<class ImplTraits>
   1571 void  TreeNodeIntStream<ImplTraits>::consume()
   1572 {
   1573 	CommonTreeNodeStreamType* ctns = this->get_super();
   1574 	if( ctns->get_p() == -1 )
   1575 		ctns->fillBufferRoot();
   1576 	ctns->inc_p();
   1577 }
   1578 template<class ImplTraits>
   1579 ANTLR_MARKER		TreeNodeIntStream<ImplTraits>::tindex()
   1580 {
   1581 	CommonTreeNodeStreamType* ctns = this->get_super();
   1582 	return (ANTLR_MARKER)(ctns->get_p());
   1583 }
   1584 
   1585 template<class ImplTraits>
   1586 ANTLR_UINT32		TreeNodeIntStream<ImplTraits>::_LA(ANTLR_INT32 i)
   1587 {
   1588 	CommonTreeNodeStreamType* tns	    = this->get_super();
   1589 
   1590 	// Ask LT for the 'token' at that position
   1591 	//
   1592 	TreeType* t = tns->_LT(i);
   1593 
   1594 	if	(t == NULL)
   1595 	{
   1596 		return	CommonTokenType::TOKEN_INVALID;
   1597 	}
   1598 
   1599 	// Token node was there so return the type of it
   1600 	//
   1601 	return  t->get_type();
   1602 }
   1603 
   1604 template<class ImplTraits>
   1605 ANTLR_MARKER	TreeNodeIntStream<ImplTraits>::mark()
   1606 {
   1607 	CommonTreeNodeStreamType* ctns	    = this->get_super();
   1608 	
   1609 	if	(ctns->get_p() == -1)
   1610 	{
   1611 		ctns->fillBufferRoot();
   1612 	}
   1613 
   1614 	// Return the current mark point
   1615 	//
   1616 	this->set_lastMarker( this->index() );
   1617 
   1618 	return this->get_lastMarker();
   1619 
   1620 }
   1621 
   1622 template<class ImplTraits>
   1623 void  TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER marker)
   1624 {
   1625 
   1626 }
   1627 
   1628 template<class ImplTraits>
   1629 void TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker)
   1630 {
   1631 	this->seek(marker);
   1632 }
   1633 
   1634 template<class ImplTraits>
   1635 void TreeNodeIntStream<ImplTraits>::rewindLast()
   1636 {
   1637 	this->seek( this->get_lastMarker() );
   1638 }
   1639 
   1640 template<class ImplTraits>
   1641 void	TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index)
   1642 {
   1643 	CommonTreeNodeStreamType* ctns	    = this->get_super();
   1644 	ctns->set_p( ANTLR_UINT32_CAST(index) );
   1645 }
   1646 
   1647 template<class ImplTraits>
   1648 ANTLR_UINT32	TreeNodeIntStream<ImplTraits>::size()
   1649 {
   1650 	CommonTreeNodeStreamType* ctns	    = this->get_super();
   1651 	
   1652 	if	(ctns->get_p() == -1)
   1653 	{
   1654 		ctns->fillBufferRoot();
   1655 	}
   1656 
   1657 	return ctns->get_nodes().size();
   1658 }
   1659 
   1660 
   1661 ANTLR_END_NAMESPACE()
   1662