Home | History | Annotate | Download | only in src
      1 /**
      2  * Contains the default implementation of the common token used within
      3  * java. Custom tokens should create this structure and then append to it using the
      4  * custom pointer to install their own structure and API.
      5  */
      6 
      7 // [The "BSD licence"]
      8 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
      9 // http://www.temporal-wave.com
     10 // http://www.linkedin.com/in/jimidle
     11 //
     12 // All rights reserved.
     13 //
     14 // Redistribution and use in source and binary forms, with or without
     15 // modification, are permitted provided that the following conditions
     16 // are met:
     17 // 1. Redistributions of source code must retain the above copyright
     18 //    notice, this list of conditions and the following disclaimer.
     19 // 2. Redistributions in binary form must reproduce the above copyright
     20 //    notice, this list of conditions and the following disclaimer in the
     21 //    documentation and/or other materials provided with the distribution.
     22 // 3. The name of the author may not be used to endorse or promote products
     23 //    derived from this software without specific prior written permission.
     24 //
     25 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     26 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     27 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     28 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     29 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     30 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     31 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     32 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     33 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     34 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     35 
     36 #include    <antlr3.h>
     37 
     38 /* Token API
     39  */
     40 static  pANTLR3_STRING	getText					(pANTLR3_COMMON_TOKEN token);
     41 static  void			setText					(pANTLR3_COMMON_TOKEN token, pANTLR3_STRING text);
     42 static  void			setText8				(pANTLR3_COMMON_TOKEN token, pANTLR3_UINT8 text);
     43 static	ANTLR3_UINT32   getType					(pANTLR3_COMMON_TOKEN token);
     44 static  void			setType					(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 type);
     45 static  ANTLR3_UINT32   getLine					(pANTLR3_COMMON_TOKEN token);
     46 static  void			setLine					(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 line);
     47 static  ANTLR3_INT32    getCharPositionInLine	(pANTLR3_COMMON_TOKEN token);
     48 static  void			setCharPositionInLine	(pANTLR3_COMMON_TOKEN token, ANTLR3_INT32 pos);
     49 static  ANTLR3_UINT32   getChannel				(pANTLR3_COMMON_TOKEN token);
     50 static  void			setChannel				(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 channel);
     51 static  ANTLR3_MARKER   getTokenIndex			(pANTLR3_COMMON_TOKEN token);
     52 static  void			setTokenIndex			(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER);
     53 static  ANTLR3_MARKER   getStartIndex			(pANTLR3_COMMON_TOKEN token);
     54 static  void			setStartIndex			(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index);
     55 static  ANTLR3_MARKER   getStopIndex			(pANTLR3_COMMON_TOKEN token);
     56 static  void			setStopIndex			(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index);
     57 static  pANTLR3_STRING  toString				(pANTLR3_COMMON_TOKEN token);
     58 
     59 /* Factory API
     60  */
     61 static	void			factoryClose	(pANTLR3_TOKEN_FACTORY factory);
     62 static	pANTLR3_COMMON_TOKEN	newToken	(void);
     63 static  void			setInputStream	(pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input);
     64 static	void                    factoryReset    (pANTLR3_TOKEN_FACTORY factory);
     65 
     66 /* Internal management functions
     67  */
     68 static	void			newPool		(pANTLR3_TOKEN_FACTORY factory);
     69 static	pANTLR3_COMMON_TOKEN    newPoolToken	(pANTLR3_TOKEN_FACTORY factory);
     70 
     71 
     72 ANTLR3_API pANTLR3_COMMON_TOKEN
     73 antlr3CommonTokenNew(ANTLR3_UINT32 ttype)
     74 {
     75 	pANTLR3_COMMON_TOKEN    token;
     76 
     77 	// Create a raw token with the interface installed
     78 	//
     79 	token   = newToken();
     80 
     81 	if	(token != NULL)
     82 	{
     83 		token->setType(token, ttype);
     84 	}
     85 
     86 	// All good
     87 	//
     88 	return  token;
     89 }
     90 
     91 ANTLR3_API pANTLR3_TOKEN_FACTORY
     92 antlr3TokenFactoryNew(pANTLR3_INPUT_STREAM input)
     93 {
     94     pANTLR3_TOKEN_FACTORY   factory;
     95 
     96     /* allocate memory
     97      */
     98     factory	= (pANTLR3_TOKEN_FACTORY) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_TOKEN_FACTORY));
     99 
    100     if	(factory == NULL)
    101     {
    102 	return	NULL;
    103     }
    104 
    105     /* Install factory API
    106      */
    107     factory->newToken	    = newPoolToken;
    108     factory->close	    = factoryClose;
    109     factory->setInputStream = setInputStream;
    110     factory->reset          = factoryReset;
    111 
    112     /* Allocate the initial pool
    113      */
    114     factory->thisPool	= -1;
    115     factory->pools      = NULL;
    116     factory->maxPool    = -1;
    117     newPool(factory);
    118 
    119     /* Factory space is good, we now want to initialize our cheating token
    120      * which one it is initialized is the model for all tokens we manufacture
    121      */
    122     antlr3SetTokenAPI(&factory->unTruc);
    123 
    124     /* Set some initial variables for future copying
    125      */
    126     factory->unTruc.factoryMade	= ANTLR3_TRUE;
    127 
    128     // Input stream
    129     //
    130     setInputStream(factory, input);
    131 
    132     return  factory;
    133 
    134 }
    135 
    136 static void
    137 setInputStream	(pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input)
    138 {
    139     factory->input          =  input;
    140     factory->unTruc.input   =  input;
    141 	if	(input != NULL)
    142 	{
    143 		factory->unTruc.strFactory	= input->strFactory;
    144 	}
    145 	else
    146 	{
    147 		factory->unTruc.strFactory = NULL;
    148     }
    149 }
    150 
    151 static void
    152 newPool(pANTLR3_TOKEN_FACTORY factory)
    153 {
    154     /* Increment factory count
    155      */
    156     factory->thisPool++;
    157 
    158     // If we were reusing this token factory then we may already have a pool
    159     // allocated. If we exceeded the max avaible then we must allocate a new
    160     // one.
    161     if  (factory->thisPool > factory->maxPool)
    162     {
    163         /* Ensure we have enough pointers allocated
    164          */
    165         factory->pools = (pANTLR3_COMMON_TOKEN *)
    166 		         ANTLR3_REALLOC(	(void *)factory->pools,	    /* Current pools pointer (starts at NULL)	*/
    167 					    (ANTLR3_UINT32)((factory->thisPool + 1) * sizeof(pANTLR3_COMMON_TOKEN *))	/* Memory for new pool pointers */
    168 					    );
    169 
    170         /* Allocate a new pool for the factory
    171          */
    172         factory->pools[factory->thisPool]	=
    173 			        (pANTLR3_COMMON_TOKEN)
    174 				    ANTLR3_CALLOC(1, (size_t)(sizeof(ANTLR3_COMMON_TOKEN) * ANTLR3_FACTORY_POOL_SIZE));
    175 
    176         // We now have a new pool and can track it as the maximum we have created so far
    177         //
    178         factory->maxPool = factory->thisPool;
    179     }
    180 
    181     /* Reset the counters
    182      */
    183     factory->nextToken	= 0;
    184 
    185     /* Done
    186      */
    187     return;
    188 }
    189 
    190 static pANTLR3_COMMON_TOKEN
    191 newPoolToken(pANTLR3_TOKEN_FACTORY factory)
    192 {
    193     pANTLR3_COMMON_TOKEN token;
    194 
    195     /* See if we need a new token pool before allocating a new
    196      * one
    197      */
    198     if (factory->nextToken >= ANTLR3_FACTORY_POOL_SIZE)
    199     {
    200         /* We ran out of tokens in the current pool, so we need a new pool
    201          */
    202         newPool(factory);
    203     }
    204 
    205     /* Assuming everything went well (we are trying for performance here so doing minimal
    206      * error checking. Then we can work out what the pointer is to the next token.
    207      */
    208     token = factory->pools[factory->thisPool] + factory->nextToken;
    209     factory->nextToken++;
    210 
    211     /* We have our token pointer now, so we can initialize it to the predefined model.
    212      * We only need do this though if the token is not already initialized, we just check
    213      * an api function pointer for this as they are allocated via calloc.
    214      */
    215     if  (token->setStartIndex == NULL)
    216     {
    217         antlr3SetTokenAPI(token);
    218 
    219         // It is factory made, and we need to copy the string factory pointer
    220         //
    221         token->factoryMade  = ANTLR3_TRUE;
    222         token->strFactory   = factory->input == NULL ? NULL : factory->input->strFactory;
    223         token->input        = factory->input;
    224     }
    225 
    226     /* And we are done
    227      */
    228     return token;
    229 }
    230 
    231 static	void
    232 factoryReset	    (pANTLR3_TOKEN_FACTORY factory)
    233 {
    234     // Just start again with pool #0 when we are
    235     // called.
    236     //
    237     factory->thisPool   = -1;
    238     newPool(factory);
    239 }
    240 
    241 static	void
    242 factoryClose	    (pANTLR3_TOKEN_FACTORY factory)
    243 {
    244     pANTLR3_COMMON_TOKEN    pool;
    245     ANTLR3_INT32	    poolCount;
    246     ANTLR3_UINT32	    limit;
    247     ANTLR3_UINT32	    token;
    248     pANTLR3_COMMON_TOKEN    check;
    249 
    250     /* We iterate the token pools one at a time
    251      */
    252     for	(poolCount = 0; poolCount <= factory->thisPool; poolCount++)
    253     {
    254 	/* Pointer to current pool
    255 	 */
    256 	pool	= factory->pools[poolCount];
    257 
    258 	/* Work out how many tokens we need to check in this pool.
    259 	 */
    260 	limit	= (poolCount == factory->thisPool ? factory->nextToken : ANTLR3_FACTORY_POOL_SIZE);
    261 
    262 	/* Marginal condition, we might be at the start of a brand new pool
    263 	 * where the nextToken is 0 and nothing has been allocated.
    264 	 */
    265 	if  (limit > 0)
    266 	{
    267 	    /* We have some tokens allocated from this pool
    268 	     */
    269 	    for (token = 0; token < limit; token++)
    270 	    {
    271 		/* Next one in the chain
    272 		 */
    273 		check	= pool + token;
    274 
    275 		/* If the programmer made this a custom token, then
    276 		 * see if we need to call their free routine.
    277 		 */
    278 		if  (check->custom != NULL && check->freeCustom != NULL)
    279 		{
    280 		    check->freeCustom(check->custom);
    281 		    check->custom = NULL;
    282 		}
    283 	    }
    284 	}
    285 
    286 	/* We can now free this pool allocation
    287 	 */
    288 	ANTLR3_FREE(factory->pools[poolCount]);
    289 	factory->pools[poolCount] = NULL;
    290     }
    291 
    292     /* All the pools are deallocated we can free the pointers to the pools
    293      * now.
    294      */
    295     ANTLR3_FREE(factory->pools);
    296 
    297     /* Finally, we can free the space for the factory itself
    298      */
    299     ANTLR3_FREE(factory);
    300 }
    301 
    302 
    303 static	pANTLR3_COMMON_TOKEN
    304 newToken(void)
    305 {
    306     pANTLR3_COMMON_TOKEN    token;
    307 
    308     /* Allocate memory for this
    309      */
    310     token   = (pANTLR3_COMMON_TOKEN) ANTLR3_CALLOC(1, (size_t)(sizeof(ANTLR3_COMMON_TOKEN)));
    311 
    312     if	(token == NULL)
    313     {
    314 	return	NULL;
    315     }
    316 
    317     // Install the API
    318     //
    319     antlr3SetTokenAPI(token);
    320     token->factoryMade = ANTLR3_FALSE;
    321 
    322     return  token;
    323 }
    324 
    325 ANTLR3_API void
    326 antlr3SetTokenAPI(pANTLR3_COMMON_TOKEN token)
    327 {
    328     token->getText		    = getText;
    329     token->setText		    = setText;
    330     token->setText8		    = setText8;
    331     token->getType		    = getType;
    332     token->setType		    = setType;
    333     token->getLine		    = getLine;
    334     token->setLine		    = setLine;
    335     token->setLine		    = setLine;
    336     token->getCharPositionInLine    = getCharPositionInLine;
    337     token->setCharPositionInLine    = setCharPositionInLine;
    338     token->getChannel		    = getChannel;
    339     token->setChannel		    = setChannel;
    340     token->getTokenIndex	    = getTokenIndex;
    341     token->setTokenIndex	    = setTokenIndex;
    342     token->getStartIndex	    = getStartIndex;
    343     token->setStartIndex	    = setStartIndex;
    344     token->getStopIndex		    = getStopIndex;
    345     token->setStopIndex		    = setStopIndex;
    346     token->toString		    = toString;
    347 
    348     return;
    349 }
    350 
    351 static  pANTLR3_STRING  getText			(pANTLR3_COMMON_TOKEN token)
    352 {
    353 	switch (token->textState)
    354 	{
    355 		case ANTLR3_TEXT_STRING:
    356 
    357 			// Someone already created a string for this token, so we just
    358 			// use it.
    359 			//
    360 			return	token->tokText.text;
    361 			break;
    362 
    363 		case ANTLR3_TEXT_CHARP:
    364 
    365 			// We had a straight text pointer installed, now we
    366 			// must convert it to a string. Note we have to do this here
    367 			// or otherwise setText8() will just install the same char*
    368 			//
    369 			if	(token->strFactory != NULL)
    370 			{
    371 				token->tokText.text	= token->strFactory->newStr8(token->strFactory, (pANTLR3_UINT8)token->tokText.chars);
    372 				token->textState	= ANTLR3_TEXT_STRING;
    373 				return token->tokText.text;
    374 			}
    375 			else
    376 			{
    377 				// We cannot do anything here
    378 				//
    379 				return NULL;
    380 			}
    381 			break;
    382 
    383 		default:
    384 
    385 			// EOF is a special case
    386 			//
    387 			if (token->type == ANTLR3_TOKEN_EOF)
    388 			{
    389 				token->tokText.text				= token->strFactory->newStr8(token->strFactory, (pANTLR3_UINT8)"<EOF>");
    390 				token->textState				= ANTLR3_TEXT_STRING;
    391 				token->tokText.text->factory	= token->strFactory;
    392 				return token->tokText.text;
    393 			}
    394 
    395 
    396 			// We had nothing installed in the token, create a new string
    397 			// from the input stream
    398 			//
    399 
    400 			if	(token->input != NULL)
    401 			{
    402 
    403 				return	token->input->substr(	token->input,
    404 												token->getStartIndex(token),
    405  												token->getStopIndex(token)
    406 											);
    407 			}
    408 
    409 			// Nothing to return, there is no input stream
    410 			//
    411 			return NULL;
    412 			break;
    413 	}
    414 }
    415 static  void		setText8		(pANTLR3_COMMON_TOKEN token, pANTLR3_UINT8 text)
    416 {
    417 	// No text to set, so ignore
    418 	//
    419 	if	(text == NULL) return;
    420 
    421 	switch	(token->textState)
    422 	{
    423 		case	ANTLR3_TEXT_NONE:
    424 		case	ANTLR3_TEXT_CHARP:	// Caller must free before setting again, if it needs to be freed
    425 
    426 			// Nothing in there yet, or just a char *, so just set the
    427 			// text as a pointer
    428 			//
    429 			token->textState		= ANTLR3_TEXT_CHARP;
    430 			token->tokText.chars	= (pANTLR3_UCHAR)text;
    431 			break;
    432 
    433 		default:
    434 
    435 			// It was already a pANTLR3_STRING, so just override it
    436 			//
    437 			token->tokText.text->set8(token->tokText.text, (const char *)text);
    438 			break;
    439 	}
    440 
    441 	// We are done
    442 	//
    443 	return;
    444 }
    445 
    446 /** \brief Install the supplied text string as teh text for the token.
    447  * The method assumes that the existing text (if any) was created by a factory
    448  * and so does not attempt to release any memory it is using.Text not created
    449  * by a string fctory (not advised) should be released prior to this call.
    450  */
    451 static  void		setText			(pANTLR3_COMMON_TOKEN token, pANTLR3_STRING text)
    452 {
    453 	// Merely replaces and existing pre-defined text with the supplied
    454 	// string
    455 	//
    456 	token->textState	= ANTLR3_TEXT_STRING;
    457 	token->tokText.text	= text;
    458 
    459 	/* We are done
    460 	*/
    461 	return;
    462 }
    463 
    464 static	ANTLR3_UINT32   getType			(pANTLR3_COMMON_TOKEN token)
    465 {
    466     return  token->type;
    467 }
    468 
    469 static  void		setType			(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 type)
    470 {
    471     token->type = type;
    472 }
    473 
    474 static  ANTLR3_UINT32   getLine			(pANTLR3_COMMON_TOKEN token)
    475 {
    476     return  token->line;
    477 }
    478 
    479 static  void		setLine			(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 line)
    480 {
    481     token->line = line;
    482 }
    483 
    484 static  ANTLR3_INT32    getCharPositionInLine	(pANTLR3_COMMON_TOKEN token)
    485 {
    486     return  token->charPosition;
    487 }
    488 
    489 static  void		setCharPositionInLine	(pANTLR3_COMMON_TOKEN token, ANTLR3_INT32 pos)
    490 {
    491     token->charPosition = pos;
    492 }
    493 
    494 static  ANTLR3_UINT32   getChannel		(pANTLR3_COMMON_TOKEN token)
    495 {
    496     return  token->channel;
    497 }
    498 
    499 static  void		setChannel		(pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 channel)
    500 {
    501     token->channel  = channel;
    502 }
    503 
    504 static  ANTLR3_MARKER   getTokenIndex		(pANTLR3_COMMON_TOKEN token)
    505 {
    506     return  token->index;
    507 }
    508 
    509 static  void		setTokenIndex		(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index)
    510 {
    511     token->index    = index;
    512 }
    513 
    514 static  ANTLR3_MARKER   getStartIndex		(pANTLR3_COMMON_TOKEN token)
    515 {
    516 	return  token->start == -1 ? (ANTLR3_MARKER)(token->input->data) : token->start;
    517 }
    518 
    519 static  void		setStartIndex		(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER start)
    520 {
    521     token->start    = start;
    522 }
    523 
    524 static  ANTLR3_MARKER   getStopIndex		(pANTLR3_COMMON_TOKEN token)
    525 {
    526     return  token->stop;
    527 }
    528 
    529 static  void		setStopIndex		(pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER stop)
    530 {
    531     token->stop	= stop;
    532 }
    533 
    534 static  pANTLR3_STRING    toString		(pANTLR3_COMMON_TOKEN token)
    535 {
    536     pANTLR3_STRING  text;
    537     pANTLR3_STRING  outtext;
    538 
    539     text    =	token->getText(token);
    540 
    541     if	(text == NULL)
    542     {
    543 		return NULL;
    544     }
    545 
    546 	if	(text->factory == NULL)
    547 	{
    548 		return text;		// This usally means it is the EOF token
    549 	}
    550 
    551     /* A new empty string to assemble all the stuff in
    552      */
    553     outtext = text->factory->newRaw(text->factory);
    554 
    555     /* Now we use our handy dandy string utility to assemble the
    556      * the reporting string
    557      * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]";
    558      */
    559     outtext->append8(outtext, "[Index: ");
    560     outtext->addi   (outtext, (ANTLR3_INT32)token->getTokenIndex(token));
    561     outtext->append8(outtext, " (Start: ");
    562     outtext->addi   (outtext, (ANTLR3_INT32)token->getStartIndex(token));
    563     outtext->append8(outtext, "-Stop: ");
    564     outtext->addi   (outtext, (ANTLR3_INT32)token->getStopIndex(token));
    565     outtext->append8(outtext, ") ='");
    566     outtext->appendS(outtext, text);
    567     outtext->append8(outtext, "', type<");
    568     outtext->addi   (outtext, token->type);
    569     outtext->append8(outtext, "> ");
    570 
    571     if	(token->getChannel(token) > ANTLR3_TOKEN_DEFAULT_CHANNEL)
    572     {
    573 		outtext->append8(outtext, "(channel = ");
    574 		outtext->addi	(outtext, (ANTLR3_INT32)token->getChannel(token));
    575 		outtext->append8(outtext, ") ");
    576     }
    577 
    578     outtext->append8(outtext, "Line: ");
    579     outtext->addi   (outtext, (ANTLR3_INT32)token->getLine(token));
    580     outtext->append8(outtext, " LinePos:");
    581     outtext->addi   (outtext, token->getCharPositionInLine(token));
    582     outtext->addc   (outtext, ']');
    583 
    584     return  outtext;
    585 }
    586 
    587