Home | History | Annotate | Download | only in src
      1 /** \file
      2  * Implementation of the ANTLR3 string and string factory classes
      3  */
      4 
      5 // [The "BSD licence"]
      6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
      7 // http://www.temporal-wave.com
      8 // http://www.linkedin.com/in/jimidle
      9 //
     10 // All rights reserved.
     11 //
     12 // Redistribution and use in source and binary forms, with or without
     13 // modification, are permitted provided that the following conditions
     14 // are met:
     15 // 1. Redistributions of source code must retain the above copyright
     16 //    notice, this list of conditions and the following disclaimer.
     17 // 2. Redistributions in binary form must reproduce the above copyright
     18 //    notice, this list of conditions and the following disclaimer in the
     19 //    documentation and/or other materials provided with the distribution.
     20 // 3. The name of the author may not be used to endorse or promote products
     21 //    derived from this software without specific prior written permission.
     22 //
     23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33 
     34 #include    <antlr3string.h>
     35 
     36 /* Factory API
     37  */
     38 static    pANTLR3_STRING    newRaw8	(pANTLR3_STRING_FACTORY factory);
     39 static    pANTLR3_STRING    newRawUTF16	(pANTLR3_STRING_FACTORY factory);
     40 static    pANTLR3_STRING    newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
     41 static    pANTLR3_STRING    newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
     42 static    pANTLR3_STRING    newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
     43 static    pANTLR3_STRING    newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
     44 static    pANTLR3_STRING    newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
     45 static    pANTLR3_STRING    newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
     46 static    pANTLR3_STRING    newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
     47 static    pANTLR3_STRING    newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
     48 static    void		    destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
     49 static    pANTLR3_STRING    printable8	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
     50 static    pANTLR3_STRING    printableUTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
     51 static    void		    closeFactory(pANTLR3_STRING_FACTORY factory);
     52 
     53 /* String API
     54  */
     55 static    pANTLR3_UINT8	    set8	(pANTLR3_STRING string, const char * chars);
     56 static    pANTLR3_UINT8	    setUTF16_8	(pANTLR3_STRING string, const char * chars);
     57 static    pANTLR3_UINT8	    setUTF16_UTF16	(pANTLR3_STRING string, const char * chars);
     58 static    pANTLR3_UINT8	    append8	(pANTLR3_STRING string, const char * newbit);
     59 static    pANTLR3_UINT8	    appendUTF16_8	(pANTLR3_STRING string, const char * newbit);
     60 static    pANTLR3_UINT8	    appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit);
     61 static	  pANTLR3_UINT8	    insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
     62 static	  pANTLR3_UINT8	    insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
     63 static	  pANTLR3_UINT8	    insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
     64 
     65 static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars);
     66 static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit);
     67 static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
     68 
     69 static    pANTLR3_UINT8	    addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c);
     70 static    pANTLR3_UINT8	    addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c);
     71 static    pANTLR3_UINT8	    addi8	(pANTLR3_STRING string, ANTLR3_INT32 i);
     72 static    pANTLR3_UINT8	    addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i);
     73 static	  pANTLR3_UINT8	    inserti8	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
     74 static	  pANTLR3_UINT8	    insertiUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
     75 
     76 static    ANTLR3_UINT32     compare8	(pANTLR3_STRING string, const char * compStr);
     77 static    ANTLR3_UINT32     compareUTF16_8	(pANTLR3_STRING string, const char * compStr);
     78 static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
     79 static    ANTLR3_UINT32     compareS	(pANTLR3_STRING string, pANTLR3_STRING compStr);
     80 static    ANTLR3_UCHAR      charAt8	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
     81 static    ANTLR3_UCHAR      charAtUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
     82 static    pANTLR3_STRING    subString8	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
     83 static    pANTLR3_STRING    subStringUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
     84 static	  ANTLR3_INT32	    toInt32_8	(pANTLR3_STRING string);
     85 static	  ANTLR3_INT32	    toInt32_UTF16  (pANTLR3_STRING string);
     86 static	  pANTLR3_STRING    to8_8		(pANTLR3_STRING string);
     87 static	  pANTLR3_STRING    to8_UTF16		(pANTLR3_STRING string);
     88 static	pANTLR3_STRING		toUTF8_8	(pANTLR3_STRING string);
     89 static	pANTLR3_STRING		toUTF8_UTF16	(pANTLR3_STRING string);
     90 
     91 /* Local helpers
     92  */
     93 static	void			stringInit8	(pANTLR3_STRING string);
     94 static	void			stringInitUTF16	(pANTLR3_STRING string);
     95 static	void	ANTLR3_CDECL	stringFree	(pANTLR3_STRING string);
     96 
     97 ANTLR3_API pANTLR3_STRING_FACTORY
     98 antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
     99 {
    100 	pANTLR3_STRING_FACTORY  factory;
    101 
    102 	/* Allocate memory
    103 	*/
    104 	factory	= (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
    105 
    106 	if	(factory == NULL)
    107 	{
    108 		return	NULL;
    109 	}
    110 
    111 	/* Now we make a new list to track the strings.
    112 	*/
    113 	factory->strings	= antlr3VectorNew(0);
    114 	factory->index	= 0;
    115 
    116 	if	(factory->strings == NULL)
    117 	{
    118 		ANTLR3_FREE(factory);
    119 		return	NULL;
    120 	}
    121 
    122     // Install the API
    123     //
    124     // TODO: These encodings need equivalent functions to
    125     // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
    126 	// The STRING stuff was intended as a quick and dirty hack for people that did not
    127 	// want to worry about memory and performance very much, but nobody ever reads the
    128 	// notes or comments or uses the email list search. I want to discourage using these
    129 	// interfaces as it is much more efficient to use the pointers within the tokens
    130 	// directly, so I am not implementing the string stuff for the newer encodings.
    131     // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
    132 	// will not be useful beyond returning the text.
    133 	//
    134     switch(encoding)
    135     {
    136 		case    ANTLR3_ENC_UTF32:
    137 			break;
    138 
    139 		case    ANTLR3_ENC_UTF32BE:
    140 			break;
    141 
    142 		case    ANTLR3_ENC_UTF32LE:
    143 			break;
    144 
    145 		case    ANTLR3_ENC_UTF16BE:
    146 		case    ANTLR3_ENC_UTF16LE:
    147 		case    ANTLR3_ENC_UTF16:
    148 
    149 			factory->newRaw	    =  newRawUTF16;
    150 			factory->newSize	=  newSizeUTF16;
    151 			factory->newPtr	    =  newPtrUTF16_UTF16;
    152 			factory->newPtr8	=  newPtrUTF16_8;
    153 			factory->newStr	    =  newStrUTF16_UTF16;
    154 			factory->newStr8	=  newStrUTF16_8;
    155 			factory->printable	=  printableUTF16;
    156 			factory->destroy	=  destroy;
    157 			factory->close	    =  closeFactory;
    158 			break;
    159 
    160 		case    ANTLR3_ENC_UTF8:
    161 		case    ANTLR3_ENC_EBCDIC:
    162 		case    ANTLR3_ENC_8BIT:
    163 		default:
    164 
    165 			factory->newRaw	    =  newRaw8;
    166 			factory->newSize	=  newSize8;
    167 			factory->newPtr	    =  newPtr8;
    168 			factory->newPtr8	=  newPtr8;
    169 			factory->newStr	    =  newStr8;
    170 			factory->newStr8	=  newStr8;
    171 			factory->printable	=  printable8;
    172 			factory->destroy	=  destroy;
    173 			factory->close	    =  closeFactory;
    174 			break;
    175     }
    176 	return  factory;
    177 }
    178 
    179 
    180 /**
    181  *
    182  * \param factory
    183  * \return
    184  */
    185 static    pANTLR3_STRING
    186 newRaw8	(pANTLR3_STRING_FACTORY factory)
    187 {
    188     pANTLR3_STRING  string;
    189 
    190     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
    191 
    192     if	(string == NULL)
    193     {
    194 		return	NULL;
    195     }
    196 
    197     /* Structure is allocated, now fill in the API etc.
    198      */
    199     stringInit8(string);
    200     string->factory = factory;
    201 
    202     /* Add the string into the allocated list
    203      */
    204     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
    205     string->index   = factory->index++;
    206 
    207     return string;
    208 }
    209 /**
    210  *
    211  * \param factory
    212  * \return
    213  */
    214 static    pANTLR3_STRING
    215 newRawUTF16	(pANTLR3_STRING_FACTORY factory)
    216 {
    217     pANTLR3_STRING  string;
    218 
    219     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
    220 
    221     if	(string == NULL)
    222     {
    223 		return	NULL;
    224     }
    225 
    226     /* Structure is allocated, now fill in the API etc.
    227      */
    228     stringInitUTF16(string);
    229     string->factory = factory;
    230 
    231     /* Add the string into the allocated list
    232      */
    233     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
    234     string->index   = factory->index++;
    235 
    236     return string;
    237 }
    238 static
    239 void	ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
    240 {
    241     /* First free the string itself if there was anything in it
    242      */
    243     if	(string->chars)
    244     {
    245 	ANTLR3_FREE(string->chars);
    246     }
    247 
    248     /* Now free the space for this string
    249      */
    250     ANTLR3_FREE(string);
    251 
    252     return;
    253 }
    254 /**
    255  *
    256  * \param string
    257  * \return
    258  */
    259 static	void
    260 stringInit8  (pANTLR3_STRING string)
    261 {
    262     string->len			= 0;
    263     string->size		= 0;
    264     string->chars		= NULL;
    265     string->encoding	= ANTLR3_ENC_8BIT ;
    266 
    267     /* API for 8 bit strings*/
    268 
    269     string->set		= set8;
    270     string->set8	= set8;
    271     string->append	= append8;
    272     string->append8	= append8;
    273     string->insert	= insert8;
    274     string->insert8	= insert8;
    275     string->addi	= addi8;
    276     string->inserti	= inserti8;
    277     string->addc	= addc8;
    278     string->charAt	= charAt8;
    279     string->compare	= compare8;
    280     string->compare8	= compare8;
    281     string->subString	= subString8;
    282     string->toInt32	= toInt32_8;
    283     string->to8		= to8_8;
    284     string->toUTF8	= toUTF8_8;
    285     string->compareS	= compareS;
    286     string->setS	= setS;
    287     string->appendS	= appendS;
    288     string->insertS	= insertS;
    289 
    290 }
    291 /**
    292  *
    293  * \param string
    294  * \return
    295  */
    296 static	void
    297 stringInitUTF16  (pANTLR3_STRING string)
    298 {
    299     string->len		= 0;
    300     string->size	= 0;
    301     string->chars	= NULL;
    302     string->encoding	= ANTLR3_ENC_8BIT;
    303 
    304     /* API for UTF16 strings */
    305 
    306     string->set		= setUTF16_UTF16;
    307     string->set8	= setUTF16_8;
    308     string->append	= appendUTF16_UTF16;
    309     string->append8	= appendUTF16_8;
    310     string->insert	= insertUTF16_UTF16;
    311     string->insert8	= insertUTF16_8;
    312     string->addi	= addiUTF16;
    313     string->inserti	= insertiUTF16;
    314     string->addc	= addcUTF16;
    315     string->charAt	= charAtUTF16;
    316     string->compare	= compareUTF16_UTF16;
    317     string->compare8	= compareUTF16_8;
    318     string->subString	= subStringUTF16;
    319     string->toInt32	= toInt32_UTF16;
    320     string->to8		= to8_UTF16;
    321     string->toUTF8	= toUTF8_UTF16;
    322 
    323     string->compareS	= compareS;
    324     string->setS	= setS;
    325     string->appendS	= appendS;
    326     string->insertS	= insertS;
    327 }
    328 /**
    329  *
    330  * \param string
    331  * \return
    332  * TODO: Implement UTF-8
    333  */
    334 static	void
    335 stringInitUTF8  (pANTLR3_STRING string)
    336 {
    337     string->len	    = 0;
    338     string->size    = 0;
    339     string->chars   = NULL;
    340 
    341     /* API */
    342 
    343 }
    344 
    345 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
    346 // a memcpy as we make no assumptions about the 8 bit encoding.
    347 //
    348 static	pANTLR3_STRING
    349 toUTF8_8	(pANTLR3_STRING string)
    350 {
    351 	return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
    352 }
    353 
    354 // Convert a UTF16 string into a UTF8 representation using the Unicode.org
    355 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
    356 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
    357 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
    358 //
    359 static	pANTLR3_STRING
    360 toUTF8_UTF16	(pANTLR3_STRING string)
    361 {
    362 
    363     UTF8	      * outputEnd;
    364     UTF16	      * inputEnd;
    365     pANTLR3_STRING	utf8String;
    366 
    367     ConversionResult	cResult;
    368 
    369     // Allocate the output buffer, which needs to accommodate potentially
    370     // 3X (in bytes) the input size (in chars).
    371     //
    372     utf8String	= string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
    373 
    374     if	(utf8String != NULL)
    375     {
    376         // Free existing allocation
    377         //
    378         ANTLR3_FREE(utf8String->chars);
    379 
    380         // Reallocate according to maximum expected size
    381         //
    382         utf8String->size	= string->len *3;
    383         utf8String->chars	= (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
    384 
    385         if	(utf8String->chars != NULL)
    386         {
    387             inputEnd  = (UTF16 *)	(string->chars);
    388             outputEnd = (UTF8 *)	(utf8String->chars);
    389 
    390             // Call the Unicode converter
    391             //
    392             cResult =  ConvertUTF16toUTF8
    393                 (
    394                 (const UTF16**)&inputEnd,
    395                 ((const UTF16 *)(string->chars)) + string->len,
    396                 &outputEnd,
    397                 outputEnd + utf8String->size - 1,
    398                 lenientConversion
    399                 );
    400 
    401             // We don't really care if things failed or not here, we just converted
    402             // everything that was vaguely possible and stopped when it wasn't. It is
    403             // up to the grammar programmer to verify that the input is sensible.
    404             //
    405             utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
    406 
    407             *(outputEnd+1) = '\0';		// Always null terminate
    408         }
    409     }
    410     return utf8String;
    411 }
    412 
    413 /**
    414  * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
    415  *
    416  * \param[in] factory - Pointer to the string factory that owns strings
    417  * \param[in] size - In characters
    418  * \return pointer to the new string.
    419  */
    420 static    pANTLR3_STRING
    421 newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
    422 {
    423     pANTLR3_STRING  string;
    424 
    425     string  = factory->newRaw(factory);
    426 
    427     if	(string == NULL)
    428     {
    429         return	string;
    430     }
    431 
    432     /* Always add one more byte for a terminator ;-)
    433     */
    434     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
    435     *(string->chars)	= '\0';
    436     string->size	= size + 1;
    437 
    438 
    439     return string;
    440 }
    441 /**
    442  * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
    443  *
    444  * \param[in] factory - Pointer to the string factory that owns strings
    445  * \param[in] size - In characters (count double for surrogate pairs!!!)
    446  * \return pointer to the new string.
    447  */
    448 static    pANTLR3_STRING
    449 newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
    450 {
    451     pANTLR3_STRING  string;
    452 
    453     string  = factory->newRaw(factory);
    454 
    455     if	(string == NULL)
    456     {
    457         return	string;
    458     }
    459 
    460     /* Always add one more byte for a terminator ;-)
    461     */
    462     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
    463     *(string->chars)	= '\0';
    464     string->size	= size+1;	/* Size is always in characters, as is len */
    465 
    466     return string;
    467 }
    468 
    469 /** Creates a new 8 bit string initialized with the 8 bit characters at the
    470  *  supplied ptr, of pre-determined size.
    471  * \param[in] factory - Pointer to the string factory that owns the strings
    472  * \param[in] ptr - Pointer to 8 bit encoded characters
    473  * \return pointer to the new string
    474  */
    475 static    pANTLR3_STRING
    476 newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
    477 {
    478 	pANTLR3_STRING  string;
    479 
    480 	string  = factory->newSize(factory, size);
    481 
    482 	if	(string == NULL)
    483 	{
    484 		return	NULL;
    485 	}
    486 
    487 	if	(size <= 0)
    488 	{
    489 		return	string;
    490 	}
    491 
    492 	if	(ptr != NULL)
    493 	{
    494 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
    495 		*(string->chars + size) = '\0';	    /* Terminate, these strings are usually used for Token streams and printing etc.	*/
    496 		string->len = size;
    497 	}
    498 
    499 	return  string;
    500 }
    501 
    502 /** Creates a new UTF16 string initialized with the 8 bit characters at the
    503  *  supplied 8 bit character ptr, of pre-determined size.
    504  * \param[in] factory - Pointer to the string factory that owns the strings
    505  * \param[in] ptr - Pointer to 8 bit encoded characters
    506  * \return pointer to the new string
    507  */
    508 static    pANTLR3_STRING
    509 newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
    510 {
    511 	pANTLR3_STRING  string;
    512 
    513 	/* newSize accepts size in characters, not bytes
    514 	*/
    515 	string  = factory->newSize(factory, size);
    516 
    517 	if	(string == NULL)
    518 	{
    519 		return	NULL;
    520 	}
    521 
    522 	if	(size <= 0)
    523 	{
    524 		return	string;
    525 	}
    526 
    527 	if	(ptr != NULL)
    528 	{
    529 		pANTLR3_UINT16	out;
    530 		ANTLR3_INT32    inSize;
    531 
    532 		out = (pANTLR3_UINT16)(string->chars);
    533 		inSize	= size;
    534 
    535 		while	(inSize-- > 0)
    536 		{
    537 			*out++ = (ANTLR3_UINT16)(*ptr++);
    538 		}
    539 
    540 		/* Terminate, these strings are usually used for Token streams and printing etc.
    541 		*/
    542 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
    543 
    544 		string->len = size;
    545 	}
    546 
    547 	return  string;
    548 }
    549 
    550 /** Creates a new UTF16 string initialized with the UTF16 characters at the
    551  *  supplied ptr, of pre-determined size.
    552  * \param[in] factory - Pointer to the string factory that owns the strings
    553  * \param[in] ptr - Pointer to UTF16 encoded characters
    554  * \return pointer to the new string
    555  */
    556 static    pANTLR3_STRING
    557 newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
    558 {
    559 	pANTLR3_STRING  string;
    560 
    561 	string  = factory->newSize(factory, size);
    562 
    563 	if	(string == NULL)
    564 	{
    565 		return	NULL;
    566 	}
    567 
    568 	if	(size <= 0)
    569 	{
    570 		return	string;
    571 	}
    572 
    573 	if	(ptr != NULL)
    574 	{
    575 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
    576 
    577 		/* Terminate, these strings are usually used for Token streams and printing etc.
    578 		*/
    579 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
    580 		string->len = size;
    581 	}
    582 
    583 	return  string;
    584 }
    585 
    586 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
    587  * \param[in] factory - Pointer to the string factory that owns strings.
    588  * \param[in] ptr - Pointer to the 8 bit encoded string
    589  * \return Pointer to the newly initialized string
    590  */
    591 static    pANTLR3_STRING
    592 newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
    593 {
    594     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
    595 }
    596 
    597 /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
    598  * \param[in] factory - Pointer to the string factory that owns strings.
    599  * \param[in] ptr - Pointer to the 8 bit encoded string
    600  * \return POinter to the newly initialized string
    601  */
    602 static    pANTLR3_STRING
    603 newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
    604 {
    605     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
    606 }
    607 
    608 /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
    609  * \param[in] factory - Pointer to the string factory that owns strings.
    610  * \param[in] ptr - Pointer to the UTF16 encoded string
    611  * \return Pointer to the newly initialized string
    612  */
    613 static    pANTLR3_STRING
    614 newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
    615 {
    616     pANTLR3_UINT16  in;
    617     ANTLR3_UINT32   count;
    618 
    619     /** First, determine the length of the input string
    620      */
    621     in	    = (pANTLR3_UINT16)ptr;
    622     count   = 0;
    623 
    624     while   (*in++ != '\0')
    625     {
    626 		count++;
    627     }
    628     return factory->newPtr(factory, ptr, count);
    629 }
    630 
    631 static    void
    632 destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
    633 {
    634     // Record which string we are deleting
    635     //
    636     ANTLR3_UINT32 strIndex = string->index;
    637 
    638     // Ensure that the string was not factory made, or we would try
    639     // to delete memory that wasn't allocated outside the factory
    640     // block.
    641     // Remove the specific indexed string from the vector
    642     //
    643     factory->strings->del(factory->strings, strIndex);
    644 
    645     // One less string in the vector, so decrement the factory index
    646     // so that the next string allocated is indexed correctly with
    647     // respect to the vector.
    648     //
    649     factory->index--;
    650 
    651     // Now we have to reindex the strings in the vector that followed
    652     // the one we just deleted. We only do this if the one we just deleted
    653     // was not the last one.
    654     //
    655     if  (strIndex< factory->index)
    656     {
    657         // We must reindex the strings after the one we just deleted.
    658         // The one that follows the one we just deleted is also out
    659         // of whack, so we start there.
    660         //
    661         ANTLR3_UINT32 i;
    662 
    663         for (i = strIndex; i < factory->index; i++)
    664         {
    665             // Renumber the entry
    666             //
    667             ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
    668         }
    669     }
    670 
    671     // The string has been destroyed and the elements of the factory are reindexed.
    672     //
    673 
    674 }
    675 
    676 static    pANTLR3_STRING
    677 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
    678 {
    679     pANTLR3_STRING  string;
    680 
    681     /* We don't need to be too efficient here, this is mostly for error messages and so on.
    682      */
    683     pANTLR3_UINT8   scannedText;
    684     ANTLR3_UINT32   i;
    685 
    686     /* Assume we need as much as twice as much space to parse out the control characters
    687      */
    688     string  = factory->newSize(factory, instr->len *2 + 1);
    689 
    690     /* Scan through and replace unprintable (in terms of this routine)
    691      * characters
    692      */
    693     scannedText = string->chars;
    694 
    695     for	(i = 0; i < instr->len; i++)
    696     {
    697 		if (*(instr->chars + i) == '\n')
    698 		{
    699 			*scannedText++ = '\\';
    700 			*scannedText++ = 'n';
    701 		}
    702 		else if (*(instr->chars + i) == '\r')
    703 		{
    704 			*scannedText++ = '\\';
    705 			*scannedText++ = 'r';
    706 		}
    707 		else if	(!isprint(*(instr->chars +i)))
    708 		{
    709 			*scannedText++ = '?';
    710 		}
    711 		else
    712 		{
    713 			*scannedText++ = *(instr->chars + i);
    714 		}
    715     }
    716     *scannedText  = '\0';
    717 
    718     string->len	= (ANTLR3_UINT32)(scannedText - string->chars);
    719 
    720     return  string;
    721 }
    722 
    723 static    pANTLR3_STRING
    724 printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
    725 {
    726     pANTLR3_STRING  string;
    727 
    728     /* We don't need to be too efficient here, this is mostly for error messages and so on.
    729      */
    730     pANTLR3_UINT16  scannedText;
    731     pANTLR3_UINT16  inText;
    732     ANTLR3_UINT32   i;
    733     ANTLR3_UINT32   outLen;
    734 
    735     /* Assume we need as much as twice as much space to parse out the control characters
    736      */
    737     string  = factory->newSize(factory, instr->len *2 + 1);
    738 
    739     /* Scan through and replace unprintable (in terms of this routine)
    740      * characters
    741      */
    742     scannedText = (pANTLR3_UINT16)(string->chars);
    743     inText	= (pANTLR3_UINT16)(instr->chars);
    744     outLen	= 0;
    745 
    746     for	(i = 0; i < instr->len; i++)
    747     {
    748 		if (*(inText + i) == '\n')
    749 		{
    750 			*scannedText++   = '\\';
    751 			*scannedText++   = 'n';
    752 			outLen	    += 2;
    753 		}
    754 		else if (*(inText + i) == '\r')
    755 		{
    756 			*scannedText++   = '\\';
    757 			*scannedText++   = 'r';
    758 			outLen	    += 2;
    759 		}
    760 		else if	(!isprint(*(inText +i)))
    761 		{
    762 			*scannedText++ = '?';
    763 			outLen++;
    764 		}
    765 		else
    766 		{
    767 			*scannedText++ = *(inText + i);
    768 			outLen++;
    769 		}
    770     }
    771     *scannedText  = '\0';
    772 
    773     string->len	= outLen;
    774 
    775     return  string;
    776 }
    777 
    778 /** Fascist Capitalist Pig function created
    779  *  to oppress the workers comrade.
    780  */
    781 static    void
    782 closeFactory	(pANTLR3_STRING_FACTORY factory)
    783 {
    784     /* Delete the vector we were tracking the strings with, this will
    785      * causes all the allocated strings to be deallocated too
    786      */
    787     factory->strings->free(factory->strings);
    788 
    789     /* Delete the space for the factory itself
    790      */
    791     ANTLR3_FREE((void *)factory);
    792 }
    793 
    794 static    pANTLR3_UINT8
    795 append8	(pANTLR3_STRING string, const char * newbit)
    796 {
    797     ANTLR3_UINT32 len;
    798 
    799     len	= (ANTLR3_UINT32)strlen(newbit);
    800 
    801     if	(string->size < (string->len + len + 1))
    802     {
    803 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
    804 		string->size	= string->len + len + 1;
    805     }
    806 
    807     /* Note we copy one more byte than the strlen in order to get the trailing
    808      */
    809     ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
    810     string->len	+= len;
    811 
    812     return string->chars;
    813 }
    814 
    815 static    pANTLR3_UINT8
    816 appendUTF16_8	(pANTLR3_STRING string, const char * newbit)
    817 {
    818     ANTLR3_UINT32   len;
    819     pANTLR3_UINT16  apPoint;
    820     ANTLR3_UINT32   count;
    821 
    822     len	= (ANTLR3_UINT32)strlen(newbit);
    823 
    824     if	(string->size < (string->len + len + 1))
    825     {
    826 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
    827 		string->size	= string->len + len + 1;
    828     }
    829 
    830     apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
    831     string->len	+= len;
    832 
    833     for	(count = 0; count < len; count++)
    834     {
    835 		*apPoint++   = *(newbit + count);
    836     }
    837     *apPoint = '\0';
    838 
    839     return string->chars;
    840 }
    841 
    842 static    pANTLR3_UINT8
    843 appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit)
    844 {
    845     ANTLR3_UINT32 len;
    846     pANTLR3_UINT16  in;
    847 
    848     /** First, determine the length of the input string
    849      */
    850     in	    = (pANTLR3_UINT16)newbit;
    851     len   = 0;
    852 
    853     while   (*in++ != '\0')
    854     {
    855 		len++;
    856     }
    857 
    858     if	(string->size < (string->len + len + 1))
    859     {
    860 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
    861 		string->size	= string->len + len + 1;
    862     }
    863 
    864     /* Note we copy one more byte than the strlen in order to get the trailing delimiter
    865      */
    866     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
    867     string->len	+= len;
    868 
    869     return string->chars;
    870 }
    871 
    872 static    pANTLR3_UINT8
    873 set8	(pANTLR3_STRING string, const char * chars)
    874 {
    875     ANTLR3_UINT32	len;
    876 
    877     len = (ANTLR3_UINT32)strlen(chars);
    878     if	(string->size < len + 1)
    879     {
    880 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
    881 		string->size	= len + 1;
    882     }
    883 
    884     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
    885      */
    886     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
    887     string->len	    = len;
    888 
    889     return  string->chars;
    890 
    891 }
    892 
    893 static    pANTLR3_UINT8
    894 setUTF16_8	(pANTLR3_STRING string, const char * chars)
    895 {
    896     ANTLR3_UINT32	len;
    897     ANTLR3_UINT32	count;
    898     pANTLR3_UINT16	apPoint;
    899 
    900     len = (ANTLR3_UINT32)strlen(chars);
    901     if	(string->size < len + 1)
    902 	{
    903 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
    904 		string->size	= len + 1;
    905     }
    906     apPoint = ((pANTLR3_UINT16)string->chars);
    907     string->len	= len;
    908 
    909     for	(count = 0; count < string->len; count++)
    910     {
    911 		*apPoint++   = *(chars + count);
    912     }
    913     *apPoint = '\0';
    914 
    915     return  string->chars;
    916 }
    917 
    918 static    pANTLR3_UINT8
    919 setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
    920 {
    921     ANTLR3_UINT32   len;
    922     pANTLR3_UINT16  in;
    923 
    924     /** First, determine the length of the input string
    925      */
    926     in	    = (pANTLR3_UINT16)chars;
    927     len   = 0;
    928 
    929     while   (*in++ != '\0')
    930     {
    931 		len++;
    932     }
    933 
    934     if	(string->size < len + 1)
    935     {
    936 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
    937 		string->size	= len + 1;
    938     }
    939 
    940     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
    941      */
    942     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
    943     string->len	    = len;
    944 
    945     return  string->chars;
    946 
    947 }
    948 
    949 static    pANTLR3_UINT8
    950 addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c)
    951 {
    952     if	(string->size < string->len + 2)
    953     {
    954 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
    955 		string->size	= string->len + 2;
    956     }
    957     *(string->chars + string->len)	= (ANTLR3_UINT8)c;
    958     *(string->chars + string->len + 1)	= '\0';
    959     string->len++;
    960 
    961     return  string->chars;
    962 }
    963 
    964 static    pANTLR3_UINT8
    965 addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c)
    966 {
    967     pANTLR3_UINT16  ptr;
    968 
    969     if	(string->size < string->len + 2)
    970     {
    971 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
    972 		string->size	= string->len + 2;
    973     }
    974     ptr	= (pANTLR3_UINT16)(string->chars);
    975 
    976     *(ptr + string->len)	= (ANTLR3_UINT16)c;
    977     *(ptr + string->len + 1)	= '\0';
    978     string->len++;
    979 
    980     return  string->chars;
    981 }
    982 
    983 static    pANTLR3_UINT8
    984 addi8	(pANTLR3_STRING string, ANTLR3_INT32 i)
    985 {
    986     ANTLR3_UINT8	    newbit[32];
    987 
    988     sprintf((char *)newbit, "%d", i);
    989 
    990     return  string->append8(string, (const char *)newbit);
    991 }
    992 static    pANTLR3_UINT8
    993 addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i)
    994 {
    995     ANTLR3_UINT8	    newbit[32];
    996 
    997     sprintf((char *)newbit, "%d", i);
    998 
    999     return  string->append8(string, (const char *)newbit);
   1000 }
   1001 
   1002 static	  pANTLR3_UINT8
   1003 inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
   1004 {
   1005     ANTLR3_UINT8	    newbit[32];
   1006 
   1007     sprintf((char *)newbit, "%d", i);
   1008     return  string->insert8(string, point, (const char *)newbit);
   1009 }
   1010 static	  pANTLR3_UINT8
   1011 insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
   1012 {
   1013     ANTLR3_UINT8	    newbit[32];
   1014 
   1015     sprintf((char *)newbit, "%d", i);
   1016     return  string->insert8(string, point, (const char *)newbit);
   1017 }
   1018 
   1019 static	pANTLR3_UINT8
   1020 insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
   1021 {
   1022     ANTLR3_UINT32	len;
   1023 
   1024     if	(point >= string->len)
   1025     {
   1026 		return	string->append(string, newbit);
   1027     }
   1028 
   1029     len	= (ANTLR3_UINT32)strlen(newbit);
   1030 
   1031     if	(len == 0)
   1032     {
   1033 		return	string->chars;
   1034     }
   1035 
   1036     if	(string->size < (string->len + len + 1))
   1037     {
   1038 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
   1039 		string->size	= string->len + len + 1;
   1040     }
   1041 
   1042     /* Move the characters we are inserting before, including the delimiter
   1043      */
   1044     ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
   1045 
   1046     /* Note we copy the exact number of bytes
   1047      */
   1048     ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
   1049 
   1050     string->len += len;
   1051 
   1052     return  string->chars;
   1053 }
   1054 
   1055 static	pANTLR3_UINT8
   1056 insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
   1057 {
   1058     ANTLR3_UINT32	len;
   1059     ANTLR3_UINT32	count;
   1060     pANTLR3_UINT16	inPoint;
   1061 
   1062     if	(point >= string->len)
   1063     {
   1064 		return	string->append8(string, newbit);
   1065     }
   1066 
   1067     len	= (ANTLR3_UINT32)strlen(newbit);
   1068 
   1069     if	(len == 0)
   1070     {
   1071 		return	string->chars;
   1072     }
   1073 
   1074     if	(string->size < (string->len + len + 1))
   1075     {
   1076 	string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
   1077 	string->size	= string->len + len + 1;
   1078     }
   1079 
   1080     /* Move the characters we are inserting before, including the delimiter
   1081      */
   1082     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
   1083 
   1084     string->len += len;
   1085 
   1086     inPoint = ((pANTLR3_UINT16)(string->chars))+point;
   1087     for	(count = 0; count<len; count++)
   1088     {
   1089 		*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
   1090     }
   1091 
   1092     return  string->chars;
   1093 }
   1094 
   1095 static	pANTLR3_UINT8
   1096 insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
   1097 {
   1098     ANTLR3_UINT32	len;
   1099     pANTLR3_UINT16	in;
   1100 
   1101     if	(point >= string->len)
   1102     {
   1103 		return	string->append(string, newbit);
   1104     }
   1105 
   1106     /** First, determine the length of the input string
   1107      */
   1108     in	    = (pANTLR3_UINT16)newbit;
   1109     len	    = 0;
   1110 
   1111     while   (*in++ != '\0')
   1112     {
   1113 		len++;
   1114     }
   1115 
   1116     if	(len == 0)
   1117     {
   1118 		return	string->chars;
   1119     }
   1120 
   1121     if	(string->size < (string->len + len + 1))
   1122     {
   1123 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
   1124 		string->size	= string->len + len + 1;
   1125     }
   1126 
   1127     /* Move the characters we are inserting before, including the delimiter
   1128      */
   1129     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
   1130 
   1131 
   1132     /* Note we copy the exact number of characters
   1133      */
   1134     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
   1135 
   1136     string->len += len;
   1137 
   1138     return  string->chars;
   1139 }
   1140 
   1141 static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars)
   1142 {
   1143     return  string->set(string, (const char *)(chars->chars));
   1144 }
   1145 
   1146 static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit)
   1147 {
   1148     /* We may be passed an empty string, in which case we just return the current pointer
   1149      */
   1150     if	(newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
   1151     {
   1152 		return	string->chars;
   1153     }
   1154     else
   1155     {
   1156 		return  string->append(string, (const char *)(newbit->chars));
   1157     }
   1158 }
   1159 
   1160 static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
   1161 {
   1162     return  string->insert(string, point, (const char *)(newbit->chars));
   1163 }
   1164 
   1165 /* Function that compares the text of a string to the supplied
   1166  * 8 bit character string and returns a result a la strcmp()
   1167  */
   1168 static ANTLR3_UINT32
   1169 compare8	(pANTLR3_STRING string, const char * compStr)
   1170 {
   1171     return  strcmp((const char *)(string->chars), compStr);
   1172 }
   1173 
   1174 /* Function that compares the text of a string with the supplied character string
   1175  * (which is assumed to be in the same encoding as the string itself) and returns a result
   1176  * a la strcmp()
   1177  */
   1178 static ANTLR3_UINT32
   1179 compareUTF16_8	(pANTLR3_STRING string, const char * compStr)
   1180 {
   1181     pANTLR3_UINT16  ourString;
   1182     ANTLR3_UINT32   charDiff;
   1183 
   1184     ourString	= (pANTLR3_UINT16)(string->chars);
   1185 
   1186     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
   1187     {
   1188 		charDiff = *ourString - *compStr;
   1189 		if  (charDiff != 0)
   1190 		{
   1191 			return charDiff;
   1192 		}
   1193 		ourString++;
   1194 		compStr++;
   1195     }
   1196 
   1197     /* At this point, one of the strings was terminated
   1198      */
   1199     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
   1200 
   1201 }
   1202 
   1203 /* Function that compares the text of a string with the supplied character string
   1204  * (which is assumed to be in the same encoding as the string itself) and returns a result
   1205  * a la strcmp()
   1206  */
   1207 static ANTLR3_UINT32
   1208 compareUTF16_UTF16	(pANTLR3_STRING string, const char * compStr8)
   1209 {
   1210     pANTLR3_UINT16  ourString;
   1211     pANTLR3_UINT16  compStr;
   1212     ANTLR3_UINT32   charDiff;
   1213 
   1214     ourString	= (pANTLR3_UINT16)(string->chars);
   1215     compStr	= (pANTLR3_UINT16)(compStr8);
   1216 
   1217     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
   1218     {
   1219 		charDiff = *ourString - *compStr;
   1220 		if  (charDiff != 0)
   1221 		{
   1222 			return charDiff;
   1223 		}
   1224 		ourString++;
   1225 		compStr++;
   1226     }
   1227 
   1228     /* At this point, one of the strings was terminated
   1229      */
   1230     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
   1231 }
   1232 
   1233 /* Function that compares the text of a string with the supplied string
   1234  * (which is assumed to be in the same encoding as the string itself) and returns a result
   1235  * a la strcmp()
   1236  */
   1237 static ANTLR3_UINT32
   1238 compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
   1239 {
   1240     return  string->compare(string, (const char *)compStr->chars);
   1241 }
   1242 
   1243 
   1244 /* Function that returns the character indexed at the supplied
   1245  * offset as a 32 bit character.
   1246  */
   1247 static ANTLR3_UCHAR
   1248 charAt8	    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
   1249 {
   1250     if	(offset > string->len)
   1251     {
   1252 		return (ANTLR3_UCHAR)'\0';
   1253     }
   1254     else
   1255     {
   1256 		return  (ANTLR3_UCHAR)(*(string->chars + offset));
   1257     }
   1258 }
   1259 
   1260 /* Function that returns the character indexed at the supplied
   1261  * offset as a 32 bit character.
   1262  */
   1263 static ANTLR3_UCHAR
   1264 charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
   1265 {
   1266     if	(offset > string->len)
   1267     {
   1268 		return (ANTLR3_UCHAR)'\0';
   1269     }
   1270     else
   1271     {
   1272 		return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
   1273     }
   1274 }
   1275 
   1276 /* Function that returns a substring of the supplied string a la .subString(s,e)
   1277  * in java runtimes.
   1278  */
   1279 static pANTLR3_STRING
   1280 subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
   1281 {
   1282     pANTLR3_STRING newStr;
   1283 
   1284     if	(endIndex > string->len)
   1285     {
   1286 		endIndex = string->len + 1;
   1287     }
   1288     newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
   1289 
   1290     return newStr;
   1291 }
   1292 
   1293 /* Returns a substring of the supplied string a la .subString(s,e)
   1294  * in java runtimes.
   1295  */
   1296 static pANTLR3_STRING
   1297 subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
   1298 {
   1299     pANTLR3_STRING newStr;
   1300 
   1301     if	(endIndex > string->len)
   1302     {
   1303 		endIndex = string->len + 1;
   1304     }
   1305     newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
   1306 
   1307     return newStr;
   1308 }
   1309 
   1310 /* Function that can convert the characters in the string to an integer
   1311  */
   1312 static ANTLR3_INT32
   1313 toInt32_8	    (struct ANTLR3_STRING_struct * string)
   1314 {
   1315     return  atoi((const char *)(string->chars));
   1316 }
   1317 
   1318 /* Function that can convert the characters in the string to an integer
   1319  */
   1320 static ANTLR3_INT32
   1321 toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
   1322 {
   1323     pANTLR3_UINT16  input;
   1324     ANTLR3_INT32   value;
   1325     ANTLR3_BOOLEAN  negate;
   1326 
   1327     value   = 0;
   1328     input   = (pANTLR3_UINT16)(string->chars);
   1329     negate  = ANTLR3_FALSE;
   1330 
   1331     if	(*input == (ANTLR3_UCHAR)'-')
   1332     {
   1333 		negate = ANTLR3_TRUE;
   1334 		input++;
   1335     }
   1336     else if (*input == (ANTLR3_UCHAR)'+')
   1337     {
   1338 		input++;
   1339     }
   1340 
   1341     while   (*input != '\0' && isdigit(*input))
   1342     {
   1343 		value	 = value * 10;
   1344 		value	+= ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
   1345 		input++;
   1346     }
   1347 
   1348     return negate ? -value : value;
   1349 }
   1350 
   1351 /* Function that returns a pointer to an 8 bit version of the string,
   1352  * which in this case is just the string as this is
   1353  * 8 bit encodiing anyway.
   1354  */
   1355 static	  pANTLR3_STRING	    to8_8	(pANTLR3_STRING string)
   1356 {
   1357     return  string;
   1358 }
   1359 
   1360 /* Function that returns an 8 bit version of the string,
   1361  * which in this case is returning all the UTF16 characters
   1362  * narrowed back into 8 bits, with characters that are too large
   1363  * replaced with '_'
   1364  */
   1365 static	  pANTLR3_STRING    to8_UTF16	(pANTLR3_STRING string)
   1366 {
   1367 	pANTLR3_STRING  newStr;
   1368 	ANTLR3_UINT32   i;
   1369 
   1370 	/* Create a new 8 bit string
   1371 	*/
   1372 	newStr  = newRaw8(string->factory);
   1373 
   1374 	if	(newStr == NULL)
   1375 	{
   1376 		return	NULL;
   1377 	}
   1378 
   1379 	/* Always add one more byte for a terminator
   1380 	*/
   1381 	newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
   1382 	newStr->size    = string->len + 1;
   1383 	newStr->len	    = string->len;
   1384 
   1385 	/* Now copy each UTF16 charActer , making it an 8 bit character of
   1386 	* some sort.
   1387 	*/
   1388 	for	(i=0; i<string->len; i++)
   1389 	{
   1390 		ANTLR3_UCHAR	c;
   1391 
   1392 		c = *(((pANTLR3_UINT16)(string->chars)) + i);
   1393 
   1394 		*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
   1395 	}
   1396 
   1397 	/* Terminate
   1398 	*/
   1399 	*(newStr->chars + newStr->len) = '\0';
   1400 
   1401 	return newStr;
   1402 }
   1403