1 /** \file 2 * Implementation of the ANTLR3 string and string factory classes 3 */ 4 5 // [The "BSD licence"] 6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 7 // http://www.temporal-wave.com 8 // http://www.linkedin.com/in/jimidle 9 // 10 // All rights reserved. 11 // 12 // Redistribution and use in source and binary forms, with or without 13 // modification, are permitted provided that the following conditions 14 // are met: 15 // 1. Redistributions of source code must retain the above copyright 16 // notice, this list of conditions and the following disclaimer. 17 // 2. Redistributions in binary form must reproduce the above copyright 18 // notice, this list of conditions and the following disclaimer in the 19 // documentation and/or other materials provided with the distribution. 20 // 3. The name of the author may not be used to endorse or promote products 21 // derived from this software without specific prior written permission. 22 // 23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34 #include <antlr3string.h> 35 36 /* Factory API 37 */ 38 static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory); 39 static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory); 40 static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size); 41 static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size); 42 static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); 43 static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); 44 static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); 45 static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); 46 static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); 47 static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); 48 static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); 49 static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); 50 static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); 51 static void closeFactory(pANTLR3_STRING_FACTORY factory); 52 53 /* String API 54 */ 55 static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars); 56 static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars); 57 static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars); 58 static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit); 59 static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit); 60 static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit); 61 static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); 62 static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); 63 static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); 64 65 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars); 66 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit); 67 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit); 68 69 static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c); 70 static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c); 71 static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i); 72 static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i); 73 static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i); 74 static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i); 75 76 static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr); 77 static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr); 78 static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr); 79 static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr); 80 static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset); 81 static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset); 82 static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); 83 static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); 84 static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string); 85 static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string); 86 static pANTLR3_STRING to8_8 (pANTLR3_STRING string); 87 static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string); 88 static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string); 89 static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string); 90 91 /* Local helpers 92 */ 93 static void stringInit8 (pANTLR3_STRING string); 94 static void stringInitUTF16 (pANTLR3_STRING string); 95 static void ANTLR3_CDECL stringFree (pANTLR3_STRING string); 96 97 ANTLR3_API pANTLR3_STRING_FACTORY 98 antlr3StringFactoryNew(ANTLR3_UINT32 encoding) 99 { 100 pANTLR3_STRING_FACTORY factory; 101 102 /* Allocate memory 103 */ 104 factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY)); 105 106 if (factory == NULL) 107 { 108 return NULL; 109 } 110 111 /* Now we make a new list to track the strings. 112 */ 113 factory->strings = antlr3VectorNew(0); 114 factory->index = 0; 115 116 if (factory->strings == NULL) 117 { 118 ANTLR3_FREE(factory); 119 return NULL; 120 } 121 122 // Install the API 123 // 124 // TODO: These encodings need equivalent functions to 125 // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff. 126 // The STRING stuff was intended as a quick and dirty hack for people that did not 127 // want to worry about memory and performance very much, but nobody ever reads the 128 // notes or comments or uses the email list search. I want to discourage using these 129 // interfaces as it is much more efficient to use the pointers within the tokens 130 // directly, so I am not implementing the string stuff for the newer encodings. 131 // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they 132 // will not be useful beyond returning the text. 133 // 134 switch(encoding) 135 { 136 case ANTLR3_ENC_UTF32: 137 break; 138 139 case ANTLR3_ENC_UTF32BE: 140 break; 141 142 case ANTLR3_ENC_UTF32LE: 143 break; 144 145 case ANTLR3_ENC_UTF16BE: 146 case ANTLR3_ENC_UTF16LE: 147 case ANTLR3_ENC_UTF16: 148 149 factory->newRaw = newRawUTF16; 150 factory->newSize = newSizeUTF16; 151 factory->newPtr = newPtrUTF16_UTF16; 152 factory->newPtr8 = newPtrUTF16_8; 153 factory->newStr = newStrUTF16_UTF16; 154 factory->newStr8 = newStrUTF16_8; 155 factory->printable = printableUTF16; 156 factory->destroy = destroy; 157 factory->close = closeFactory; 158 break; 159 160 case ANTLR3_ENC_UTF8: 161 case ANTLR3_ENC_EBCDIC: 162 case ANTLR3_ENC_8BIT: 163 default: 164 165 factory->newRaw = newRaw8; 166 factory->newSize = newSize8; 167 factory->newPtr = newPtr8; 168 factory->newPtr8 = newPtr8; 169 factory->newStr = newStr8; 170 factory->newStr8 = newStr8; 171 factory->printable = printable8; 172 factory->destroy = destroy; 173 factory->close = closeFactory; 174 break; 175 } 176 return factory; 177 } 178 179 180 /** 181 * 182 * \param factory 183 * \return 184 */ 185 static pANTLR3_STRING 186 newRaw8 (pANTLR3_STRING_FACTORY factory) 187 { 188 pANTLR3_STRING string; 189 190 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING)); 191 192 if (string == NULL) 193 { 194 return NULL; 195 } 196 197 /* Structure is allocated, now fill in the API etc. 198 */ 199 stringInit8(string); 200 string->factory = factory; 201 202 /* Add the string into the allocated list 203 */ 204 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE); 205 string->index = factory->index++; 206 207 return string; 208 } 209 /** 210 * 211 * \param factory 212 * \return 213 */ 214 static pANTLR3_STRING 215 newRawUTF16 (pANTLR3_STRING_FACTORY factory) 216 { 217 pANTLR3_STRING string; 218 219 string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING)); 220 221 if (string == NULL) 222 { 223 return NULL; 224 } 225 226 /* Structure is allocated, now fill in the API etc. 227 */ 228 stringInitUTF16(string); 229 string->factory = factory; 230 231 /* Add the string into the allocated list 232 */ 233 factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE); 234 string->index = factory->index++; 235 236 return string; 237 } 238 static 239 void ANTLR3_CDECL stringFree (pANTLR3_STRING string) 240 { 241 /* First free the string itself if there was anything in it 242 */ 243 if (string->chars) 244 { 245 ANTLR3_FREE(string->chars); 246 } 247 248 /* Now free the space for this string 249 */ 250 ANTLR3_FREE(string); 251 252 return; 253 } 254 /** 255 * 256 * \param string 257 * \return 258 */ 259 static void 260 stringInit8 (pANTLR3_STRING string) 261 { 262 string->len = 0; 263 string->size = 0; 264 string->chars = NULL; 265 string->encoding = ANTLR3_ENC_8BIT ; 266 267 /* API for 8 bit strings*/ 268 269 string->set = set8; 270 string->set8 = set8; 271 string->append = append8; 272 string->append8 = append8; 273 string->insert = insert8; 274 string->insert8 = insert8; 275 string->addi = addi8; 276 string->inserti = inserti8; 277 string->addc = addc8; 278 string->charAt = charAt8; 279 string->compare = compare8; 280 string->compare8 = compare8; 281 string->subString = subString8; 282 string->toInt32 = toInt32_8; 283 string->to8 = to8_8; 284 string->toUTF8 = toUTF8_8; 285 string->compareS = compareS; 286 string->setS = setS; 287 string->appendS = appendS; 288 string->insertS = insertS; 289 290 } 291 /** 292 * 293 * \param string 294 * \return 295 */ 296 static void 297 stringInitUTF16 (pANTLR3_STRING string) 298 { 299 string->len = 0; 300 string->size = 0; 301 string->chars = NULL; 302 string->encoding = ANTLR3_ENC_8BIT; 303 304 /* API for UTF16 strings */ 305 306 string->set = setUTF16_UTF16; 307 string->set8 = setUTF16_8; 308 string->append = appendUTF16_UTF16; 309 string->append8 = appendUTF16_8; 310 string->insert = insertUTF16_UTF16; 311 string->insert8 = insertUTF16_8; 312 string->addi = addiUTF16; 313 string->inserti = insertiUTF16; 314 string->addc = addcUTF16; 315 string->charAt = charAtUTF16; 316 string->compare = compareUTF16_UTF16; 317 string->compare8 = compareUTF16_8; 318 string->subString = subStringUTF16; 319 string->toInt32 = toInt32_UTF16; 320 string->to8 = to8_UTF16; 321 string->toUTF8 = toUTF8_UTF16; 322 323 string->compareS = compareS; 324 string->setS = setS; 325 string->appendS = appendS; 326 string->insertS = insertS; 327 } 328 /** 329 * 330 * \param string 331 * \return 332 * TODO: Implement UTF-8 333 */ 334 static void 335 stringInitUTF8 (pANTLR3_STRING string) 336 { 337 string->len = 0; 338 string->size = 0; 339 string->chars = NULL; 340 341 /* API */ 342 343 } 344 345 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself 346 // a memcpy as we make no assumptions about the 8 bit encoding. 347 // 348 static pANTLR3_STRING 349 toUTF8_8 (pANTLR3_STRING string) 350 { 351 return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len); 352 } 353 354 // Convert a UTF16 string into a UTF8 representation using the Unicode.org 355 // supplied C algorithms, which are now contained within the ANTLR3 C runtime 356 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h 357 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter. 358 // 359 static pANTLR3_STRING 360 toUTF8_UTF16 (pANTLR3_STRING string) 361 { 362 363 UTF8 * outputEnd; 364 UTF16 * inputEnd; 365 pANTLR3_STRING utf8String; 366 367 ConversionResult cResult; 368 369 // Allocate the output buffer, which needs to accommodate potentially 370 // 3X (in bytes) the input size (in chars). 371 // 372 utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)""); 373 374 if (utf8String != NULL) 375 { 376 // Free existing allocation 377 // 378 ANTLR3_FREE(utf8String->chars); 379 380 // Reallocate according to maximum expected size 381 // 382 utf8String->size = string->len *3; 383 utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1); 384 385 if (utf8String->chars != NULL) 386 { 387 inputEnd = (UTF16 *) (string->chars); 388 outputEnd = (UTF8 *) (utf8String->chars); 389 390 // Call the Unicode converter 391 // 392 cResult = ConvertUTF16toUTF8 393 ( 394 (const UTF16**)&inputEnd, 395 ((const UTF16 *)(string->chars)) + string->len, 396 &outputEnd, 397 outputEnd + utf8String->size - 1, 398 lenientConversion 399 ); 400 401 // We don't really care if things failed or not here, we just converted 402 // everything that was vaguely possible and stopped when it wasn't. It is 403 // up to the grammar programmer to verify that the input is sensible. 404 // 405 utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars); 406 407 *(outputEnd+1) = '\0'; // Always null terminate 408 } 409 } 410 return utf8String; 411 } 412 413 /** 414 * Creates a new string with enough capacity for size 8 bit characters plus a terminator. 415 * 416 * \param[in] factory - Pointer to the string factory that owns strings 417 * \param[in] size - In characters 418 * \return pointer to the new string. 419 */ 420 static pANTLR3_STRING 421 newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size) 422 { 423 pANTLR3_STRING string; 424 425 string = factory->newRaw(factory); 426 427 if (string == NULL) 428 { 429 return string; 430 } 431 432 /* Always add one more byte for a terminator ;-) 433 */ 434 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1))); 435 *(string->chars) = '\0'; 436 string->size = size + 1; 437 438 439 return string; 440 } 441 /** 442 * Creates a new string with enough capacity for size UTF16 characters plus a terminator. 443 * 444 * \param[in] factory - Pointer to the string factory that owns strings 445 * \param[in] size - In characters (count double for surrogate pairs!!!) 446 * \return pointer to the new string. 447 */ 448 static pANTLR3_STRING 449 newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size) 450 { 451 pANTLR3_STRING string; 452 453 string = factory->newRaw(factory); 454 455 if (string == NULL) 456 { 457 return string; 458 } 459 460 /* Always add one more byte for a terminator ;-) 461 */ 462 string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1))); 463 *(string->chars) = '\0'; 464 string->size = size+1; /* Size is always in characters, as is len */ 465 466 return string; 467 } 468 469 /** Creates a new 8 bit string initialized with the 8 bit characters at the 470 * supplied ptr, of pre-determined size. 471 * \param[in] factory - Pointer to the string factory that owns the strings 472 * \param[in] ptr - Pointer to 8 bit encoded characters 473 * \return pointer to the new string 474 */ 475 static pANTLR3_STRING 476 newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) 477 { 478 pANTLR3_STRING string; 479 480 string = factory->newSize(factory, size); 481 482 if (string == NULL) 483 { 484 return NULL; 485 } 486 487 if (size <= 0) 488 { 489 return string; 490 } 491 492 if (ptr != NULL) 493 { 494 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size); 495 *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */ 496 string->len = size; 497 } 498 499 return string; 500 } 501 502 /** Creates a new UTF16 string initialized with the 8 bit characters at the 503 * supplied 8 bit character ptr, of pre-determined size. 504 * \param[in] factory - Pointer to the string factory that owns the strings 505 * \param[in] ptr - Pointer to 8 bit encoded characters 506 * \return pointer to the new string 507 */ 508 static pANTLR3_STRING 509 newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) 510 { 511 pANTLR3_STRING string; 512 513 /* newSize accepts size in characters, not bytes 514 */ 515 string = factory->newSize(factory, size); 516 517 if (string == NULL) 518 { 519 return NULL; 520 } 521 522 if (size <= 0) 523 { 524 return string; 525 } 526 527 if (ptr != NULL) 528 { 529 pANTLR3_UINT16 out; 530 ANTLR3_INT32 inSize; 531 532 out = (pANTLR3_UINT16)(string->chars); 533 inSize = size; 534 535 while (inSize-- > 0) 536 { 537 *out++ = (ANTLR3_UINT16)(*ptr++); 538 } 539 540 /* Terminate, these strings are usually used for Token streams and printing etc. 541 */ 542 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0'; 543 544 string->len = size; 545 } 546 547 return string; 548 } 549 550 /** Creates a new UTF16 string initialized with the UTF16 characters at the 551 * supplied ptr, of pre-determined size. 552 * \param[in] factory - Pointer to the string factory that owns the strings 553 * \param[in] ptr - Pointer to UTF16 encoded characters 554 * \return pointer to the new string 555 */ 556 static pANTLR3_STRING 557 newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) 558 { 559 pANTLR3_STRING string; 560 561 string = factory->newSize(factory, size); 562 563 if (string == NULL) 564 { 565 return NULL; 566 } 567 568 if (size <= 0) 569 { 570 return string; 571 } 572 573 if (ptr != NULL) 574 { 575 ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16))); 576 577 /* Terminate, these strings are usually used for Token streams and printing etc. 578 */ 579 *(((pANTLR3_UINT16)(string->chars)) + size) = '\0'; 580 string->len = size; 581 } 582 583 return string; 584 } 585 586 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer. 587 * \param[in] factory - Pointer to the string factory that owns strings. 588 * \param[in] ptr - Pointer to the 8 bit encoded string 589 * \return Pointer to the newly initialized string 590 */ 591 static pANTLR3_STRING 592 newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) 593 { 594 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr)); 595 } 596 597 /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer. 598 * \param[in] factory - Pointer to the string factory that owns strings. 599 * \param[in] ptr - Pointer to the 8 bit encoded string 600 * \return POinter to the newly initialized string 601 */ 602 static pANTLR3_STRING 603 newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) 604 { 605 return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr)); 606 } 607 608 /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer. 609 * \param[in] factory - Pointer to the string factory that owns strings. 610 * \param[in] ptr - Pointer to the UTF16 encoded string 611 * \return Pointer to the newly initialized string 612 */ 613 static pANTLR3_STRING 614 newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) 615 { 616 pANTLR3_UINT16 in; 617 ANTLR3_UINT32 count; 618 619 /** First, determine the length of the input string 620 */ 621 in = (pANTLR3_UINT16)ptr; 622 count = 0; 623 624 while (*in++ != '\0') 625 { 626 count++; 627 } 628 return factory->newPtr(factory, ptr, count); 629 } 630 631 static void 632 destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string) 633 { 634 // Record which string we are deleting 635 // 636 ANTLR3_UINT32 strIndex = string->index; 637 638 // Ensure that the string was not factory made, or we would try 639 // to delete memory that wasn't allocated outside the factory 640 // block. 641 // Remove the specific indexed string from the vector 642 // 643 factory->strings->del(factory->strings, strIndex); 644 645 // One less string in the vector, so decrement the factory index 646 // so that the next string allocated is indexed correctly with 647 // respect to the vector. 648 // 649 factory->index--; 650 651 // Now we have to reindex the strings in the vector that followed 652 // the one we just deleted. We only do this if the one we just deleted 653 // was not the last one. 654 // 655 if (strIndex< factory->index) 656 { 657 // We must reindex the strings after the one we just deleted. 658 // The one that follows the one we just deleted is also out 659 // of whack, so we start there. 660 // 661 ANTLR3_UINT32 i; 662 663 for (i = strIndex; i < factory->index; i++) 664 { 665 // Renumber the entry 666 // 667 ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i; 668 } 669 } 670 671 // The string has been destroyed and the elements of the factory are reindexed. 672 // 673 674 } 675 676 static pANTLR3_STRING 677 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr) 678 { 679 pANTLR3_STRING string; 680 681 /* We don't need to be too efficient here, this is mostly for error messages and so on. 682 */ 683 pANTLR3_UINT8 scannedText; 684 ANTLR3_UINT32 i; 685 686 /* Assume we need as much as twice as much space to parse out the control characters 687 */ 688 string = factory->newSize(factory, instr->len *2 + 1); 689 690 /* Scan through and replace unprintable (in terms of this routine) 691 * characters 692 */ 693 scannedText = string->chars; 694 695 for (i = 0; i < instr->len; i++) 696 { 697 if (*(instr->chars + i) == '\n') 698 { 699 *scannedText++ = '\\'; 700 *scannedText++ = 'n'; 701 } 702 else if (*(instr->chars + i) == '\r') 703 { 704 *scannedText++ = '\\'; 705 *scannedText++ = 'r'; 706 } 707 else if (!isprint(*(instr->chars +i))) 708 { 709 *scannedText++ = '?'; 710 } 711 else 712 { 713 *scannedText++ = *(instr->chars + i); 714 } 715 } 716 *scannedText = '\0'; 717 718 string->len = (ANTLR3_UINT32)(scannedText - string->chars); 719 720 return string; 721 } 722 723 static pANTLR3_STRING 724 printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr) 725 { 726 pANTLR3_STRING string; 727 728 /* We don't need to be too efficient here, this is mostly for error messages and so on. 729 */ 730 pANTLR3_UINT16 scannedText; 731 pANTLR3_UINT16 inText; 732 ANTLR3_UINT32 i; 733 ANTLR3_UINT32 outLen; 734 735 /* Assume we need as much as twice as much space to parse out the control characters 736 */ 737 string = factory->newSize(factory, instr->len *2 + 1); 738 739 /* Scan through and replace unprintable (in terms of this routine) 740 * characters 741 */ 742 scannedText = (pANTLR3_UINT16)(string->chars); 743 inText = (pANTLR3_UINT16)(instr->chars); 744 outLen = 0; 745 746 for (i = 0; i < instr->len; i++) 747 { 748 if (*(inText + i) == '\n') 749 { 750 *scannedText++ = '\\'; 751 *scannedText++ = 'n'; 752 outLen += 2; 753 } 754 else if (*(inText + i) == '\r') 755 { 756 *scannedText++ = '\\'; 757 *scannedText++ = 'r'; 758 outLen += 2; 759 } 760 else if (!isprint(*(inText +i))) 761 { 762 *scannedText++ = '?'; 763 outLen++; 764 } 765 else 766 { 767 *scannedText++ = *(inText + i); 768 outLen++; 769 } 770 } 771 *scannedText = '\0'; 772 773 string->len = outLen; 774 775 return string; 776 } 777 778 /** Fascist Capitalist Pig function created 779 * to oppress the workers comrade. 780 */ 781 static void 782 closeFactory (pANTLR3_STRING_FACTORY factory) 783 { 784 /* Delete the vector we were tracking the strings with, this will 785 * causes all the allocated strings to be deallocated too 786 */ 787 factory->strings->free(factory->strings); 788 789 /* Delete the space for the factory itself 790 */ 791 ANTLR3_FREE((void *)factory); 792 } 793 794 static pANTLR3_UINT8 795 append8 (pANTLR3_STRING string, const char * newbit) 796 { 797 ANTLR3_UINT32 len; 798 799 len = (ANTLR3_UINT32)strlen(newbit); 800 801 if (string->size < (string->len + len + 1)) 802 { 803 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1)); 804 string->size = string->len + len + 1; 805 } 806 807 /* Note we copy one more byte than the strlen in order to get the trailing 808 */ 809 ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1)); 810 string->len += len; 811 812 return string->chars; 813 } 814 815 static pANTLR3_UINT8 816 appendUTF16_8 (pANTLR3_STRING string, const char * newbit) 817 { 818 ANTLR3_UINT32 len; 819 pANTLR3_UINT16 apPoint; 820 ANTLR3_UINT32 count; 821 822 len = (ANTLR3_UINT32)strlen(newbit); 823 824 if (string->size < (string->len + len + 1)) 825 { 826 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1)))); 827 string->size = string->len + len + 1; 828 } 829 830 apPoint = ((pANTLR3_UINT16)string->chars) + string->len; 831 string->len += len; 832 833 for (count = 0; count < len; count++) 834 { 835 *apPoint++ = *(newbit + count); 836 } 837 *apPoint = '\0'; 838 839 return string->chars; 840 } 841 842 static pANTLR3_UINT8 843 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit) 844 { 845 ANTLR3_UINT32 len; 846 pANTLR3_UINT16 in; 847 848 /** First, determine the length of the input string 849 */ 850 in = (pANTLR3_UINT16)newbit; 851 len = 0; 852 853 while (*in++ != '\0') 854 { 855 len++; 856 } 857 858 if (string->size < (string->len + len + 1)) 859 { 860 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) )); 861 string->size = string->len + len + 1; 862 } 863 864 /* Note we copy one more byte than the strlen in order to get the trailing delimiter 865 */ 866 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1))); 867 string->len += len; 868 869 return string->chars; 870 } 871 872 static pANTLR3_UINT8 873 set8 (pANTLR3_STRING string, const char * chars) 874 { 875 ANTLR3_UINT32 len; 876 877 len = (ANTLR3_UINT32)strlen(chars); 878 if (string->size < len + 1) 879 { 880 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1)); 881 string->size = len + 1; 882 } 883 884 /* Note we copy one more byte than the strlen in order to get the trailing '\0' 885 */ 886 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1)); 887 string->len = len; 888 889 return string->chars; 890 891 } 892 893 static pANTLR3_UINT8 894 setUTF16_8 (pANTLR3_STRING string, const char * chars) 895 { 896 ANTLR3_UINT32 len; 897 ANTLR3_UINT32 count; 898 pANTLR3_UINT16 apPoint; 899 900 len = (ANTLR3_UINT32)strlen(chars); 901 if (string->size < len + 1) 902 { 903 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1))); 904 string->size = len + 1; 905 } 906 apPoint = ((pANTLR3_UINT16)string->chars); 907 string->len = len; 908 909 for (count = 0; count < string->len; count++) 910 { 911 *apPoint++ = *(chars + count); 912 } 913 *apPoint = '\0'; 914 915 return string->chars; 916 } 917 918 static pANTLR3_UINT8 919 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars) 920 { 921 ANTLR3_UINT32 len; 922 pANTLR3_UINT16 in; 923 924 /** First, determine the length of the input string 925 */ 926 in = (pANTLR3_UINT16)chars; 927 len = 0; 928 929 while (*in++ != '\0') 930 { 931 len++; 932 } 933 934 if (string->size < len + 1) 935 { 936 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1))); 937 string->size = len + 1; 938 } 939 940 /* Note we copy one more byte than the strlen in order to get the trailing '\0' 941 */ 942 ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16))); 943 string->len = len; 944 945 return string->chars; 946 947 } 948 949 static pANTLR3_UINT8 950 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c) 951 { 952 if (string->size < string->len + 2) 953 { 954 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2)); 955 string->size = string->len + 2; 956 } 957 *(string->chars + string->len) = (ANTLR3_UINT8)c; 958 *(string->chars + string->len + 1) = '\0'; 959 string->len++; 960 961 return string->chars; 962 } 963 964 static pANTLR3_UINT8 965 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c) 966 { 967 pANTLR3_UINT16 ptr; 968 969 if (string->size < string->len + 2) 970 { 971 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2))); 972 string->size = string->len + 2; 973 } 974 ptr = (pANTLR3_UINT16)(string->chars); 975 976 *(ptr + string->len) = (ANTLR3_UINT16)c; 977 *(ptr + string->len + 1) = '\0'; 978 string->len++; 979 980 return string->chars; 981 } 982 983 static pANTLR3_UINT8 984 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i) 985 { 986 ANTLR3_UINT8 newbit[32]; 987 988 sprintf((char *)newbit, "%d", i); 989 990 return string->append8(string, (const char *)newbit); 991 } 992 static pANTLR3_UINT8 993 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i) 994 { 995 ANTLR3_UINT8 newbit[32]; 996 997 sprintf((char *)newbit, "%d", i); 998 999 return string->append8(string, (const char *)newbit); 1000 } 1001 1002 static pANTLR3_UINT8 1003 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i) 1004 { 1005 ANTLR3_UINT8 newbit[32]; 1006 1007 sprintf((char *)newbit, "%d", i); 1008 return string->insert8(string, point, (const char *)newbit); 1009 } 1010 static pANTLR3_UINT8 1011 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i) 1012 { 1013 ANTLR3_UINT8 newbit[32]; 1014 1015 sprintf((char *)newbit, "%d", i); 1016 return string->insert8(string, point, (const char *)newbit); 1017 } 1018 1019 static pANTLR3_UINT8 1020 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) 1021 { 1022 ANTLR3_UINT32 len; 1023 1024 if (point >= string->len) 1025 { 1026 return string->append(string, newbit); 1027 } 1028 1029 len = (ANTLR3_UINT32)strlen(newbit); 1030 1031 if (len == 0) 1032 { 1033 return string->chars; 1034 } 1035 1036 if (string->size < (string->len + len + 1)) 1037 { 1038 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1)); 1039 string->size = string->len + len + 1; 1040 } 1041 1042 /* Move the characters we are inserting before, including the delimiter 1043 */ 1044 ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1)); 1045 1046 /* Note we copy the exact number of bytes 1047 */ 1048 ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len)); 1049 1050 string->len += len; 1051 1052 return string->chars; 1053 } 1054 1055 static pANTLR3_UINT8 1056 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) 1057 { 1058 ANTLR3_UINT32 len; 1059 ANTLR3_UINT32 count; 1060 pANTLR3_UINT16 inPoint; 1061 1062 if (point >= string->len) 1063 { 1064 return string->append8(string, newbit); 1065 } 1066 1067 len = (ANTLR3_UINT32)strlen(newbit); 1068 1069 if (len == 0) 1070 { 1071 return string->chars; 1072 } 1073 1074 if (string->size < (string->len + len + 1)) 1075 { 1076 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1))); 1077 string->size = string->len + len + 1; 1078 } 1079 1080 /* Move the characters we are inserting before, including the delimiter 1081 */ 1082 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1))); 1083 1084 string->len += len; 1085 1086 inPoint = ((pANTLR3_UINT16)(string->chars))+point; 1087 for (count = 0; count<len; count++) 1088 { 1089 *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count)); 1090 } 1091 1092 return string->chars; 1093 } 1094 1095 static pANTLR3_UINT8 1096 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) 1097 { 1098 ANTLR3_UINT32 len; 1099 pANTLR3_UINT16 in; 1100 1101 if (point >= string->len) 1102 { 1103 return string->append(string, newbit); 1104 } 1105 1106 /** First, determine the length of the input string 1107 */ 1108 in = (pANTLR3_UINT16)newbit; 1109 len = 0; 1110 1111 while (*in++ != '\0') 1112 { 1113 len++; 1114 } 1115 1116 if (len == 0) 1117 { 1118 return string->chars; 1119 } 1120 1121 if (string->size < (string->len + len + 1)) 1122 { 1123 string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1))); 1124 string->size = string->len + len + 1; 1125 } 1126 1127 /* Move the characters we are inserting before, including the delimiter 1128 */ 1129 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1))); 1130 1131 1132 /* Note we copy the exact number of characters 1133 */ 1134 ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len))); 1135 1136 string->len += len; 1137 1138 return string->chars; 1139 } 1140 1141 static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars) 1142 { 1143 return string->set(string, (const char *)(chars->chars)); 1144 } 1145 1146 static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit) 1147 { 1148 /* We may be passed an empty string, in which case we just return the current pointer 1149 */ 1150 if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL) 1151 { 1152 return string->chars; 1153 } 1154 else 1155 { 1156 return string->append(string, (const char *)(newbit->chars)); 1157 } 1158 } 1159 1160 static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit) 1161 { 1162 return string->insert(string, point, (const char *)(newbit->chars)); 1163 } 1164 1165 /* Function that compares the text of a string to the supplied 1166 * 8 bit character string and returns a result a la strcmp() 1167 */ 1168 static ANTLR3_UINT32 1169 compare8 (pANTLR3_STRING string, const char * compStr) 1170 { 1171 return strcmp((const char *)(string->chars), compStr); 1172 } 1173 1174 /* Function that compares the text of a string with the supplied character string 1175 * (which is assumed to be in the same encoding as the string itself) and returns a result 1176 * a la strcmp() 1177 */ 1178 static ANTLR3_UINT32 1179 compareUTF16_8 (pANTLR3_STRING string, const char * compStr) 1180 { 1181 pANTLR3_UINT16 ourString; 1182 ANTLR3_UINT32 charDiff; 1183 1184 ourString = (pANTLR3_UINT16)(string->chars); 1185 1186 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0')) 1187 { 1188 charDiff = *ourString - *compStr; 1189 if (charDiff != 0) 1190 { 1191 return charDiff; 1192 } 1193 ourString++; 1194 compStr++; 1195 } 1196 1197 /* At this point, one of the strings was terminated 1198 */ 1199 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr)); 1200 1201 } 1202 1203 /* Function that compares the text of a string with the supplied character string 1204 * (which is assumed to be in the same encoding as the string itself) and returns a result 1205 * a la strcmp() 1206 */ 1207 static ANTLR3_UINT32 1208 compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8) 1209 { 1210 pANTLR3_UINT16 ourString; 1211 pANTLR3_UINT16 compStr; 1212 ANTLR3_UINT32 charDiff; 1213 1214 ourString = (pANTLR3_UINT16)(string->chars); 1215 compStr = (pANTLR3_UINT16)(compStr8); 1216 1217 while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0')) 1218 { 1219 charDiff = *ourString - *compStr; 1220 if (charDiff != 0) 1221 { 1222 return charDiff; 1223 } 1224 ourString++; 1225 compStr++; 1226 } 1227 1228 /* At this point, one of the strings was terminated 1229 */ 1230 return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr)); 1231 } 1232 1233 /* Function that compares the text of a string with the supplied string 1234 * (which is assumed to be in the same encoding as the string itself) and returns a result 1235 * a la strcmp() 1236 */ 1237 static ANTLR3_UINT32 1238 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr) 1239 { 1240 return string->compare(string, (const char *)compStr->chars); 1241 } 1242 1243 1244 /* Function that returns the character indexed at the supplied 1245 * offset as a 32 bit character. 1246 */ 1247 static ANTLR3_UCHAR 1248 charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset) 1249 { 1250 if (offset > string->len) 1251 { 1252 return (ANTLR3_UCHAR)'\0'; 1253 } 1254 else 1255 { 1256 return (ANTLR3_UCHAR)(*(string->chars + offset)); 1257 } 1258 } 1259 1260 /* Function that returns the character indexed at the supplied 1261 * offset as a 32 bit character. 1262 */ 1263 static ANTLR3_UCHAR 1264 charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset) 1265 { 1266 if (offset > string->len) 1267 { 1268 return (ANTLR3_UCHAR)'\0'; 1269 } 1270 else 1271 { 1272 return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset)); 1273 } 1274 } 1275 1276 /* Function that returns a substring of the supplied string a la .subString(s,e) 1277 * in java runtimes. 1278 */ 1279 static pANTLR3_STRING 1280 subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex) 1281 { 1282 pANTLR3_STRING newStr; 1283 1284 if (endIndex > string->len) 1285 { 1286 endIndex = string->len + 1; 1287 } 1288 newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex); 1289 1290 return newStr; 1291 } 1292 1293 /* Returns a substring of the supplied string a la .subString(s,e) 1294 * in java runtimes. 1295 */ 1296 static pANTLR3_STRING 1297 subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex) 1298 { 1299 pANTLR3_STRING newStr; 1300 1301 if (endIndex > string->len) 1302 { 1303 endIndex = string->len + 1; 1304 } 1305 newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex); 1306 1307 return newStr; 1308 } 1309 1310 /* Function that can convert the characters in the string to an integer 1311 */ 1312 static ANTLR3_INT32 1313 toInt32_8 (struct ANTLR3_STRING_struct * string) 1314 { 1315 return atoi((const char *)(string->chars)); 1316 } 1317 1318 /* Function that can convert the characters in the string to an integer 1319 */ 1320 static ANTLR3_INT32 1321 toInt32_UTF16 (struct ANTLR3_STRING_struct * string) 1322 { 1323 pANTLR3_UINT16 input; 1324 ANTLR3_INT32 value; 1325 ANTLR3_BOOLEAN negate; 1326 1327 value = 0; 1328 input = (pANTLR3_UINT16)(string->chars); 1329 negate = ANTLR3_FALSE; 1330 1331 if (*input == (ANTLR3_UCHAR)'-') 1332 { 1333 negate = ANTLR3_TRUE; 1334 input++; 1335 } 1336 else if (*input == (ANTLR3_UCHAR)'+') 1337 { 1338 input++; 1339 } 1340 1341 while (*input != '\0' && isdigit(*input)) 1342 { 1343 value = value * 10; 1344 value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0'); 1345 input++; 1346 } 1347 1348 return negate ? -value : value; 1349 } 1350 1351 /* Function that returns a pointer to an 8 bit version of the string, 1352 * which in this case is just the string as this is 1353 * 8 bit encodiing anyway. 1354 */ 1355 static pANTLR3_STRING to8_8 (pANTLR3_STRING string) 1356 { 1357 return string; 1358 } 1359 1360 /* Function that returns an 8 bit version of the string, 1361 * which in this case is returning all the UTF16 characters 1362 * narrowed back into 8 bits, with characters that are too large 1363 * replaced with '_' 1364 */ 1365 static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string) 1366 { 1367 pANTLR3_STRING newStr; 1368 ANTLR3_UINT32 i; 1369 1370 /* Create a new 8 bit string 1371 */ 1372 newStr = newRaw8(string->factory); 1373 1374 if (newStr == NULL) 1375 { 1376 return NULL; 1377 } 1378 1379 /* Always add one more byte for a terminator 1380 */ 1381 newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1)); 1382 newStr->size = string->len + 1; 1383 newStr->len = string->len; 1384 1385 /* Now copy each UTF16 charActer , making it an 8 bit character of 1386 * some sort. 1387 */ 1388 for (i=0; i<string->len; i++) 1389 { 1390 ANTLR3_UCHAR c; 1391 1392 c = *(((pANTLR3_UINT16)(string->chars)) + i); 1393 1394 *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c); 1395 } 1396 1397 /* Terminate 1398 */ 1399 *(newStr->chars + newStr->len) = '\0'; 1400 1401 return newStr; 1402 } 1403