1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1998-2008, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * ucnv.c: 10 * Implements APIs for the ICU's codeset conversion library; 11 * mostly calls through internal functions; 12 * created by Bertrand A. Damiba 13 * 14 * Modification History: 15 * 16 * Date Name Description 17 * 04/04/99 helena Fixed internal header inclusion. 18 * 05/09/00 helena Added implementation to handle fallback mappings. 19 * 06/20/2000 helena OS/400 port changes; mostly typecast. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/ustring.h" 27 #include "unicode/ucnv.h" 28 #include "unicode/ucnv_err.h" 29 #include "unicode/uset.h" 30 #include "putilimp.h" 31 #include "cmemory.h" 32 #include "cstring.h" 33 #include "uassert.h" 34 #include "utracimp.h" 35 #include "ustr_imp.h" 36 #include "ucnv_imp.h" 37 #include "ucnv_cnv.h" 38 #include "ucnv_bld.h" 39 40 /* size of intermediate and preflighting buffers in ucnv_convert() */ 41 #define CHUNK_SIZE 1024 42 43 typedef struct UAmbiguousConverter { 44 const char *name; 45 const UChar variant5c; 46 } UAmbiguousConverter; 47 48 static const UAmbiguousConverter ambiguousConverters[]={ 49 { "ibm-897_P100-1995", 0xa5 }, 50 { "ibm-942_P120-1999", 0xa5 }, 51 { "ibm-943_P130-1999", 0xa5 }, 52 { "ibm-946_P100-1995", 0xa5 }, 53 { "ibm-33722_P120-1999", 0xa5 }, 54 /*{ "ibm-54191_P100-2006", 0xa5 },*/ 55 /*{ "ibm-62383_P100-2007", 0xa5 },*/ 56 /*{ "ibm-891_P100-1995", 0x20a9 },*/ 57 { "ibm-944_P100-1995", 0x20a9 }, 58 { "ibm-949_P110-1999", 0x20a9 }, 59 { "ibm-1363_P110-1997", 0x20a9 }, 60 { "ISO_2022,locale=ko,version=0", 0x20a9 } 61 }; 62 63 /*Calls through createConverter */ 64 U_CAPI UConverter* U_EXPORT2 65 ucnv_open (const char *name, 66 UErrorCode * err) 67 { 68 UConverter *r; 69 70 if (err == NULL || U_FAILURE (*err)) { 71 return NULL; 72 } 73 74 r = ucnv_createConverter(NULL, name, err); 75 return r; 76 } 77 78 U_CAPI UConverter* U_EXPORT2 79 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) 80 { 81 return ucnv_createConverterFromPackage(packageName, converterName, err); 82 } 83 84 /*Extracts the UChar* to a char* and calls through createConverter */ 85 U_CAPI UConverter* U_EXPORT2 86 ucnv_openU (const UChar * name, 87 UErrorCode * err) 88 { 89 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 90 91 if (err == NULL || U_FAILURE(*err)) 92 return NULL; 93 if (name == NULL) 94 return ucnv_open (NULL, err); 95 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) 96 { 97 *err = U_ILLEGAL_ARGUMENT_ERROR; 98 return NULL; 99 } 100 return ucnv_open(u_austrcpy(asciiName, name), err); 101 } 102 103 /* Copy the string that is represented by the UConverterPlatform enum 104 * @param platformString An output buffer 105 * @param platform An enum representing a platform 106 * @return the length of the copied string. 107 */ 108 static int32_t 109 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) 110 { 111 switch (pltfrm) 112 { 113 case UCNV_IBM: 114 uprv_strcpy(platformString, "ibm-"); 115 return 4; 116 case UCNV_UNKNOWN: 117 break; 118 } 119 120 /* default to empty string */ 121 *platformString = 0; 122 return 0; 123 } 124 125 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls 126 *through createConverter*/ 127 U_CAPI UConverter* U_EXPORT2 128 ucnv_openCCSID (int32_t codepage, 129 UConverterPlatform platform, 130 UErrorCode * err) 131 { 132 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 133 int32_t myNameLen; 134 135 if (err == NULL || U_FAILURE (*err)) 136 return NULL; 137 138 /* ucnv_copyPlatformString could return "ibm-" or "cp" */ 139 myNameLen = ucnv_copyPlatformString(myName, platform); 140 T_CString_integerToString(myName + myNameLen, codepage, 10); 141 142 return ucnv_createConverter(NULL, myName, err); 143 } 144 145 /* Creating a temporary stack-based object that can be used in one thread, 146 and created from a converter that is shared across threads. 147 */ 148 149 U_CAPI UConverter* U_EXPORT2 150 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) 151 { 152 UConverter *localConverter, *allocatedConverter; 153 int32_t bufferSizeNeeded; 154 char *stackBufferChars = (char *)stackBuffer; 155 UErrorCode cbErr; 156 UConverterToUnicodeArgs toUArgs = { 157 sizeof(UConverterToUnicodeArgs), 158 TRUE, 159 NULL, 160 NULL, 161 NULL, 162 NULL, 163 NULL, 164 NULL 165 }; 166 UConverterFromUnicodeArgs fromUArgs = { 167 sizeof(UConverterFromUnicodeArgs), 168 TRUE, 169 NULL, 170 NULL, 171 NULL, 172 NULL, 173 NULL, 174 NULL 175 }; 176 177 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); 178 179 if (status == NULL || U_FAILURE(*status)){ 180 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); 181 return 0; 182 } 183 184 if (!pBufferSize || !cnv){ 185 *status = U_ILLEGAL_ARGUMENT_ERROR; 186 UTRACE_EXIT_STATUS(*status); 187 return 0; 188 } 189 190 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", 191 ucnv_getName(cnv, status), cnv, stackBuffer); 192 193 if (cnv->sharedData->impl->safeClone != NULL) { 194 /* call the custom safeClone function for sizing */ 195 bufferSizeNeeded = 0; 196 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); 197 } 198 else 199 { 200 /* inherent sizing */ 201 bufferSizeNeeded = sizeof(UConverter); 202 } 203 204 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 205 *pBufferSize = bufferSizeNeeded; 206 UTRACE_EXIT_VALUE(bufferSizeNeeded); 207 return 0; 208 } 209 210 211 /* Pointers on 64-bit platforms need to be aligned 212 * on a 64-bit boundary in memory. 213 */ 214 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 215 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); 216 if(*pBufferSize > offsetUp) { 217 *pBufferSize -= offsetUp; 218 stackBufferChars += offsetUp; 219 } else { 220 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 221 *pBufferSize = 1; 222 } 223 } 224 225 stackBuffer = (void *)stackBufferChars; 226 227 /* Now, see if we must allocate any memory */ 228 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL) 229 { 230 /* allocate one here...*/ 231 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); 232 233 if(localConverter == NULL) { 234 *status = U_MEMORY_ALLOCATION_ERROR; 235 UTRACE_EXIT_STATUS(*status); 236 return NULL; 237 } 238 239 if (U_SUCCESS(*status)) { 240 *status = U_SAFECLONE_ALLOCATED_WARNING; 241 } 242 243 /* record the fact that memory was allocated */ 244 *pBufferSize = bufferSizeNeeded; 245 } else { 246 /* just use the stack buffer */ 247 localConverter = (UConverter*) stackBuffer; 248 allocatedConverter = NULL; 249 } 250 251 uprv_memset(localConverter, 0, bufferSizeNeeded); 252 253 /* Copy initial state */ 254 uprv_memcpy(localConverter, cnv, sizeof(UConverter)); 255 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; 256 257 /* copy the substitution string */ 258 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 259 localConverter->subChars = (uint8_t *)localConverter->subUChars; 260 } else { 261 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 262 if (localConverter->subChars == NULL) { 263 uprv_free(allocatedConverter); 264 UTRACE_EXIT_STATUS(*status); 265 return NULL; 266 } 267 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 268 } 269 270 /* now either call the safeclone fcn or not */ 271 if (cnv->sharedData->impl->safeClone != NULL) { 272 /* call the custom safeClone function */ 273 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); 274 } 275 276 if(localConverter==NULL || U_FAILURE(*status)) { 277 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { 278 uprv_free(allocatedConverter->subChars); 279 } 280 uprv_free(allocatedConverter); 281 UTRACE_EXIT_STATUS(*status); 282 return NULL; 283 } 284 285 /* increment refcount of shared data if needed */ 286 /* 287 Checking whether it's an algorithic converter is okay 288 in multithreaded applications because the value never changes. 289 Don't check referenceCounter for any other value. 290 */ 291 if (cnv->sharedData->referenceCounter != ~0) { 292 ucnv_incrementRefCount(cnv->sharedData); 293 } 294 295 if(localConverter == (UConverter*)stackBuffer) { 296 /* we're using user provided data - set to not destroy */ 297 localConverter->isCopyLocal = TRUE; 298 } 299 300 /* allow callback functions to handle any memory allocation */ 301 toUArgs.converter = fromUArgs.converter = localConverter; 302 cbErr = U_ZERO_ERROR; 303 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); 304 cbErr = U_ZERO_ERROR; 305 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); 306 307 UTRACE_EXIT_PTR_STATUS(localConverter, *status); 308 return localConverter; 309 } 310 311 312 313 /*Decreases the reference counter in the shared immutable section of the object 314 *and frees the mutable part*/ 315 316 U_CAPI void U_EXPORT2 317 ucnv_close (UConverter * converter) 318 { 319 UErrorCode errorCode = U_ZERO_ERROR; 320 321 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); 322 323 if (converter == NULL) 324 { 325 UTRACE_EXIT(); 326 return; 327 } 328 329 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", 330 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); 331 332 /* In order to speed up the close, only call the callbacks when they have been changed. 333 This performance check will only work when the callbacks are set within a shared library 334 or from user code that statically links this code. */ 335 /* first, notify the callback functions that the converter is closed */ 336 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 337 UConverterToUnicodeArgs toUArgs = { 338 sizeof(UConverterToUnicodeArgs), 339 TRUE, 340 NULL, 341 NULL, 342 NULL, 343 NULL, 344 NULL, 345 NULL 346 }; 347 348 toUArgs.converter = converter; 349 errorCode = U_ZERO_ERROR; 350 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); 351 } 352 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 353 UConverterFromUnicodeArgs fromUArgs = { 354 sizeof(UConverterFromUnicodeArgs), 355 TRUE, 356 NULL, 357 NULL, 358 NULL, 359 NULL, 360 NULL, 361 NULL 362 }; 363 fromUArgs.converter = converter; 364 errorCode = U_ZERO_ERROR; 365 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); 366 } 367 368 if (converter->sharedData->impl->close != NULL) { 369 converter->sharedData->impl->close(converter); 370 } 371 372 if (converter->subChars != (uint8_t *)converter->subUChars) { 373 uprv_free(converter->subChars); 374 } 375 376 /* 377 Checking whether it's an algorithic converter is okay 378 in multithreaded applications because the value never changes. 379 Don't check referenceCounter for any other value. 380 */ 381 if (converter->sharedData->referenceCounter != ~0) { 382 ucnv_unloadSharedDataIfReady(converter->sharedData); 383 } 384 385 if(!converter->isCopyLocal){ 386 uprv_free(converter); 387 } 388 389 UTRACE_EXIT(); 390 } 391 392 /*returns a single Name from the list, will return NULL if out of bounds 393 */ 394 U_CAPI const char* U_EXPORT2 395 ucnv_getAvailableName (int32_t n) 396 { 397 if (0 <= n && n <= 0xffff) { 398 UErrorCode err = U_ZERO_ERROR; 399 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); 400 if (U_SUCCESS(err)) { 401 return name; 402 } 403 } 404 return NULL; 405 } 406 407 U_CAPI int32_t U_EXPORT2 408 ucnv_countAvailable () 409 { 410 UErrorCode err = U_ZERO_ERROR; 411 return ucnv_bld_countAvailableConverters(&err); 412 } 413 414 U_CAPI void U_EXPORT2 415 ucnv_getSubstChars (const UConverter * converter, 416 char *mySubChar, 417 int8_t * len, 418 UErrorCode * err) 419 { 420 if (U_FAILURE (*err)) 421 return; 422 423 if (converter->subCharLen <= 0) { 424 /* Unicode string or empty string from ucnv_setSubstString(). */ 425 *len = 0; 426 return; 427 } 428 429 if (*len < converter->subCharLen) /*not enough space in subChars */ 430 { 431 *err = U_INDEX_OUTOFBOUNDS_ERROR; 432 return; 433 } 434 435 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ 436 *len = converter->subCharLen; /*store # of bytes copied to buffer */ 437 } 438 439 U_CAPI void U_EXPORT2 440 ucnv_setSubstChars (UConverter * converter, 441 const char *mySubChar, 442 int8_t len, 443 UErrorCode * err) 444 { 445 if (U_FAILURE (*err)) 446 return; 447 448 /*Makes sure that the subChar is within the codepages char length boundaries */ 449 if ((len > converter->sharedData->staticData->maxBytesPerChar) 450 || (len < converter->sharedData->staticData->minBytesPerChar)) 451 { 452 *err = U_ILLEGAL_ARGUMENT_ERROR; 453 return; 454 } 455 456 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ 457 converter->subCharLen = len; /*sets the new len */ 458 459 /* 460 * There is currently (2001Feb) no separate API to set/get subChar1. 461 * In order to always have subChar written after it is explicitly set, 462 * we set subChar1 to 0. 463 */ 464 converter->subChar1 = 0; 465 466 return; 467 } 468 469 U_CAPI void U_EXPORT2 470 ucnv_setSubstString(UConverter *cnv, 471 const UChar *s, 472 int32_t length, 473 UErrorCode *err) { 474 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; 475 char chars[UCNV_ERROR_BUFFER_LENGTH]; 476 477 UConverter *clone; 478 uint8_t *subChars; 479 int32_t cloneSize, length8; 480 481 /* Let the following functions check all arguments. */ 482 cloneSize = sizeof(cloneBuffer); 483 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); 484 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); 485 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); 486 ucnv_close(clone); 487 if (U_FAILURE(*err)) { 488 return; 489 } 490 491 if (cnv->sharedData->impl->writeSub == NULL 492 #if !UCONFIG_NO_LEGACY_CONVERSION 493 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && 494 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) 495 #endif 496 ) { 497 /* The converter is not stateful. Store the charset bytes as a fixed string. */ 498 subChars = (uint8_t *)chars; 499 } else { 500 /* 501 * The converter has a non-default writeSub() function, indicating 502 * that it is stateful. 503 * Store the Unicode string for on-the-fly conversion for correct 504 * state handling. 505 */ 506 if (length > UCNV_ERROR_BUFFER_LENGTH) { 507 /* 508 * Should not occur. The converter should output at least one byte 509 * per UChar, which means that ucnv_fromUChars() should catch all 510 * overflows. 511 */ 512 *err = U_BUFFER_OVERFLOW_ERROR; 513 return; 514 } 515 subChars = (uint8_t *)s; 516 if (length < 0) { 517 length = u_strlen(s); 518 } 519 length8 = length * U_SIZEOF_UCHAR; 520 } 521 522 /* 523 * For storing the substitution string, select either the small buffer inside 524 * UConverter or allocate a subChars buffer. 525 */ 526 if (length8 > UCNV_MAX_SUBCHAR_LEN) { 527 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ 528 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 529 /* Allocate a new buffer for the string. */ 530 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 531 if (cnv->subChars == NULL) { 532 cnv->subChars = (uint8_t *)cnv->subUChars; 533 *err = U_MEMORY_ALLOCATION_ERROR; 534 return; 535 } 536 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 537 } 538 } 539 540 /* Copy the substitution string into the UConverter or its subChars buffer. */ 541 if (length8 == 0) { 542 cnv->subCharLen = 0; 543 } else { 544 uprv_memcpy(cnv->subChars, subChars, length8); 545 if (subChars == (uint8_t *)chars) { 546 cnv->subCharLen = (int8_t)length8; 547 } else /* subChars == s */ { 548 cnv->subCharLen = (int8_t)-length; 549 } 550 } 551 552 /* See comment in ucnv_setSubstChars(). */ 553 cnv->subChar1 = 0; 554 } 555 556 /*resets the internal states of a converter 557 *goal : have the same behaviour than a freshly created converter 558 */ 559 static void _reset(UConverter *converter, UConverterResetChoice choice, 560 UBool callCallback) { 561 if(converter == NULL) { 562 return; 563 } 564 565 if(callCallback) { 566 /* first, notify the callback functions that the converter is reset */ 567 UErrorCode errorCode; 568 569 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 570 UConverterToUnicodeArgs toUArgs = { 571 sizeof(UConverterToUnicodeArgs), 572 TRUE, 573 NULL, 574 NULL, 575 NULL, 576 NULL, 577 NULL, 578 NULL 579 }; 580 toUArgs.converter = converter; 581 errorCode = U_ZERO_ERROR; 582 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); 583 } 584 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 585 UConverterFromUnicodeArgs fromUArgs = { 586 sizeof(UConverterFromUnicodeArgs), 587 TRUE, 588 NULL, 589 NULL, 590 NULL, 591 NULL, 592 NULL, 593 NULL 594 }; 595 fromUArgs.converter = converter; 596 errorCode = U_ZERO_ERROR; 597 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); 598 } 599 } 600 601 /* now reset the converter itself */ 602 if(choice<=UCNV_RESET_TO_UNICODE) { 603 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; 604 converter->mode = 0; 605 converter->toULength = 0; 606 converter->invalidCharLength = converter->UCharErrorBufferLength = 0; 607 converter->preToULength = 0; 608 } 609 if(choice!=UCNV_RESET_TO_UNICODE) { 610 converter->fromUnicodeStatus = 0; 611 converter->fromUChar32 = 0; 612 converter->invalidUCharLength = converter->charErrorBufferLength = 0; 613 converter->preFromUFirstCP = U_SENTINEL; 614 converter->preFromULength = 0; 615 } 616 617 if (converter->sharedData->impl->reset != NULL) { 618 /* call the custom reset function */ 619 converter->sharedData->impl->reset(converter, choice); 620 } 621 } 622 623 U_CAPI void U_EXPORT2 624 ucnv_reset(UConverter *converter) 625 { 626 _reset(converter, UCNV_RESET_BOTH, TRUE); 627 } 628 629 U_CAPI void U_EXPORT2 630 ucnv_resetToUnicode(UConverter *converter) 631 { 632 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); 633 } 634 635 U_CAPI void U_EXPORT2 636 ucnv_resetFromUnicode(UConverter *converter) 637 { 638 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); 639 } 640 641 U_CAPI int8_t U_EXPORT2 642 ucnv_getMaxCharSize (const UConverter * converter) 643 { 644 return converter->maxBytesPerUChar; 645 } 646 647 648 U_CAPI int8_t U_EXPORT2 649 ucnv_getMinCharSize (const UConverter * converter) 650 { 651 return converter->sharedData->staticData->minBytesPerChar; 652 } 653 654 U_CAPI const char* U_EXPORT2 655 ucnv_getName (const UConverter * converter, UErrorCode * err) 656 657 { 658 if (U_FAILURE (*err)) 659 return NULL; 660 if(converter->sharedData->impl->getName){ 661 const char* temp= converter->sharedData->impl->getName(converter); 662 if(temp) 663 return temp; 664 } 665 return converter->sharedData->staticData->name; 666 } 667 668 U_CAPI int32_t U_EXPORT2 669 ucnv_getCCSID(const UConverter * converter, 670 UErrorCode * err) 671 { 672 int32_t ccsid; 673 if (U_FAILURE (*err)) 674 return -1; 675 676 ccsid = converter->sharedData->staticData->codepage; 677 if (ccsid == 0) { 678 /* Rare case. This is for cases like gb18030, 679 which doesn't have an IBM cannonical name, but does have an IBM alias. */ 680 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); 681 if (U_SUCCESS(*err) && standardName) { 682 const char *ccsidStr = uprv_strchr(standardName, '-'); 683 if (ccsidStr) { 684 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ 685 } 686 } 687 } 688 return ccsid; 689 } 690 691 692 U_CAPI UConverterPlatform U_EXPORT2 693 ucnv_getPlatform (const UConverter * converter, 694 UErrorCode * err) 695 { 696 if (U_FAILURE (*err)) 697 return UCNV_UNKNOWN; 698 699 return (UConverterPlatform)converter->sharedData->staticData->platform; 700 } 701 702 U_CAPI void U_EXPORT2 703 ucnv_getToUCallBack (const UConverter * converter, 704 UConverterToUCallback *action, 705 const void **context) 706 { 707 *action = converter->fromCharErrorBehaviour; 708 *context = converter->toUContext; 709 } 710 711 U_CAPI void U_EXPORT2 712 ucnv_getFromUCallBack (const UConverter * converter, 713 UConverterFromUCallback *action, 714 const void **context) 715 { 716 *action = converter->fromUCharErrorBehaviour; 717 *context = converter->fromUContext; 718 } 719 720 U_CAPI void U_EXPORT2 721 ucnv_setToUCallBack (UConverter * converter, 722 UConverterToUCallback newAction, 723 const void* newContext, 724 UConverterToUCallback *oldAction, 725 const void** oldContext, 726 UErrorCode * err) 727 { 728 if (U_FAILURE (*err)) 729 return; 730 if (oldAction) *oldAction = converter->fromCharErrorBehaviour; 731 converter->fromCharErrorBehaviour = newAction; 732 if (oldContext) *oldContext = converter->toUContext; 733 converter->toUContext = newContext; 734 } 735 736 U_CAPI void U_EXPORT2 737 ucnv_setFromUCallBack (UConverter * converter, 738 UConverterFromUCallback newAction, 739 const void* newContext, 740 UConverterFromUCallback *oldAction, 741 const void** oldContext, 742 UErrorCode * err) 743 { 744 if (U_FAILURE (*err)) 745 return; 746 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; 747 converter->fromUCharErrorBehaviour = newAction; 748 if (oldContext) *oldContext = converter->fromUContext; 749 converter->fromUContext = newContext; 750 } 751 752 static void 753 _updateOffsets(int32_t *offsets, int32_t length, 754 int32_t sourceIndex, int32_t errorInputLength) { 755 int32_t *limit; 756 int32_t delta, offset; 757 758 if(sourceIndex>=0) { 759 /* 760 * adjust each offset by adding the previous sourceIndex 761 * minus the length of the input sequence that caused an 762 * error, if any 763 */ 764 delta=sourceIndex-errorInputLength; 765 } else { 766 /* 767 * set each offset to -1 because this conversion function 768 * does not handle offsets 769 */ 770 delta=-1; 771 } 772 773 limit=offsets+length; 774 if(delta==0) { 775 /* most common case, nothing to do */ 776 } else if(delta>0) { 777 /* add the delta to each offset (but not if the offset is <0) */ 778 while(offsets<limit) { 779 offset=*offsets; 780 if(offset>=0) { 781 *offsets=offset+delta; 782 } 783 ++offsets; 784 } 785 } else /* delta<0 */ { 786 /* 787 * set each offset to -1 because this conversion function 788 * does not handle offsets 789 * or the error input sequence started in a previous buffer 790 */ 791 while(offsets<limit) { 792 *offsets++=-1; 793 } 794 } 795 } 796 797 /* ucnv_fromUnicode --------------------------------------------------------- */ 798 799 /* 800 * Implementation note for m:n conversions 801 * 802 * While collecting source units to find the longest match for m:n conversion, 803 * some source units may need to be stored for a partial match. 804 * When a second buffer does not yield a match on all of the previously stored 805 * source units, then they must be "replayed", i.e., fed back into the converter. 806 * 807 * The code relies on the fact that replaying will not nest - 808 * converting a replay buffer will not result in a replay. 809 * This is because a replay is necessary only after the _continuation_ of a 810 * partial match failed, but a replay buffer is converted as a whole. 811 * It may result in some of its units being stored again for a partial match, 812 * but there will not be a continuation _during_ the replay which could fail. 813 * 814 * It is conceivable that a callback function could call the converter 815 * recursively in a way that causes another replay to be stored, but that 816 * would be an error in the callback function. 817 * Such violations will cause assertion failures in a debug build, 818 * and wrong output, but they will not cause a crash. 819 */ 820 821 static void 822 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { 823 UConverterFromUnicode fromUnicode; 824 UConverter *cnv; 825 const UChar *s; 826 char *t; 827 int32_t *offsets; 828 int32_t sourceIndex; 829 int32_t errorInputLength; 830 UBool converterSawEndOfInput, calledCallback; 831 832 /* variables for m:n conversion */ 833 UChar replay[UCNV_EXT_MAX_UCHARS]; 834 const UChar *realSource, *realSourceLimit; 835 int32_t realSourceIndex; 836 UBool realFlush; 837 838 cnv=pArgs->converter; 839 s=pArgs->source; 840 t=pArgs->target; 841 offsets=pArgs->offsets; 842 843 /* get the converter implementation function */ 844 sourceIndex=0; 845 if(offsets==NULL) { 846 fromUnicode=cnv->sharedData->impl->fromUnicode; 847 } else { 848 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; 849 if(fromUnicode==NULL) { 850 /* there is no WithOffsets implementation */ 851 fromUnicode=cnv->sharedData->impl->fromUnicode; 852 /* we will write -1 for each offset */ 853 sourceIndex=-1; 854 } 855 } 856 857 if(cnv->preFromULength>=0) { 858 /* normal mode */ 859 realSource=NULL; 860 861 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 862 realSourceLimit=NULL; 863 realFlush=FALSE; 864 realSourceIndex=0; 865 } else { 866 /* 867 * Previous m:n conversion stored source units from a partial match 868 * and failed to consume all of them. 869 * We need to "replay" them from a temporary buffer and convert them first. 870 */ 871 realSource=pArgs->source; 872 realSourceLimit=pArgs->sourceLimit; 873 realFlush=pArgs->flush; 874 realSourceIndex=sourceIndex; 875 876 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 877 pArgs->source=replay; 878 pArgs->sourceLimit=replay-cnv->preFromULength; 879 pArgs->flush=FALSE; 880 sourceIndex=-1; 881 882 cnv->preFromULength=0; 883 } 884 885 /* 886 * loop for conversion and error handling 887 * 888 * loop { 889 * convert 890 * loop { 891 * update offsets 892 * handle end of input 893 * handle errors/call callback 894 * } 895 * } 896 */ 897 for(;;) { 898 if(U_SUCCESS(*err)) { 899 /* convert */ 900 fromUnicode(pArgs, err); 901 902 /* 903 * set a flag for whether the converter 904 * successfully processed the end of the input 905 * 906 * need not check cnv->preFromULength==0 because a replay (<0) will cause 907 * s<sourceLimit before converterSawEndOfInput is checked 908 */ 909 converterSawEndOfInput= 910 (UBool)(U_SUCCESS(*err) && 911 pArgs->flush && pArgs->source==pArgs->sourceLimit && 912 cnv->fromUChar32==0); 913 } else { 914 /* handle error from ucnv_convertEx() */ 915 converterSawEndOfInput=FALSE; 916 } 917 918 /* no callback called yet for this iteration */ 919 calledCallback=FALSE; 920 921 /* no sourceIndex adjustment for conversion, only for callback output */ 922 errorInputLength=0; 923 924 /* 925 * loop for offsets and error handling 926 * 927 * iterates at most 3 times: 928 * 1. to clean up after the conversion function 929 * 2. after the callback 930 * 3. after the callback again if there was truncated input 931 */ 932 for(;;) { 933 /* update offsets if we write any */ 934 if(offsets!=NULL) { 935 int32_t length=(int32_t)(pArgs->target-t); 936 if(length>0) { 937 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 938 939 /* 940 * if a converter handles offsets and updates the offsets 941 * pointer at the end, then pArgs->offset should not change 942 * here; 943 * however, some converters do not handle offsets at all 944 * (sourceIndex<0) or may not update the offsets pointer 945 */ 946 pArgs->offsets=offsets+=length; 947 } 948 949 if(sourceIndex>=0) { 950 sourceIndex+=(int32_t)(pArgs->source-s); 951 } 952 } 953 954 if(cnv->preFromULength<0) { 955 /* 956 * switch the source to new replay units (cannot occur while replaying) 957 * after offset handling and before end-of-input and callback handling 958 */ 959 if(realSource==NULL) { 960 realSource=pArgs->source; 961 realSourceLimit=pArgs->sourceLimit; 962 realFlush=pArgs->flush; 963 realSourceIndex=sourceIndex; 964 965 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 966 pArgs->source=replay; 967 pArgs->sourceLimit=replay-cnv->preFromULength; 968 pArgs->flush=FALSE; 969 if((sourceIndex+=cnv->preFromULength)<0) { 970 sourceIndex=-1; 971 } 972 973 cnv->preFromULength=0; 974 } else { 975 /* see implementation note before _fromUnicodeWithCallback() */ 976 U_ASSERT(realSource==NULL); 977 *err=U_INTERNAL_PROGRAM_ERROR; 978 } 979 } 980 981 /* update pointers */ 982 s=pArgs->source; 983 t=pArgs->target; 984 985 if(U_SUCCESS(*err)) { 986 if(s<pArgs->sourceLimit) { 987 /* 988 * continue with the conversion loop while there is still input left 989 * (continue converting by breaking out of only the inner loop) 990 */ 991 break; 992 } else if(realSource!=NULL) { 993 /* switch back from replaying to the real source and continue */ 994 pArgs->source=realSource; 995 pArgs->sourceLimit=realSourceLimit; 996 pArgs->flush=realFlush; 997 sourceIndex=realSourceIndex; 998 999 realSource=NULL; 1000 break; 1001 } else if(pArgs->flush && cnv->fromUChar32!=0) { 1002 /* 1003 * the entire input stream is consumed 1004 * and there is a partial, truncated input sequence left 1005 */ 1006 1007 /* inject an error and continue with callback handling */ 1008 *err=U_TRUNCATED_CHAR_FOUND; 1009 calledCallback=FALSE; /* new error condition */ 1010 } else { 1011 /* input consumed */ 1012 if(pArgs->flush) { 1013 /* 1014 * return to the conversion loop once more if the flush 1015 * flag is set and the conversion function has not 1016 * successfully processed the end of the input yet 1017 * 1018 * (continue converting by breaking out of only the inner loop) 1019 */ 1020 if(!converterSawEndOfInput) { 1021 break; 1022 } 1023 1024 /* reset the converter without calling the callback function */ 1025 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); 1026 } 1027 1028 /* done successfully */ 1029 return; 1030 } 1031 } 1032 1033 /* U_FAILURE(*err) */ 1034 { 1035 UErrorCode e; 1036 1037 if( calledCallback || 1038 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1039 (e!=U_INVALID_CHAR_FOUND && 1040 e!=U_ILLEGAL_CHAR_FOUND && 1041 e!=U_TRUNCATED_CHAR_FOUND) 1042 ) { 1043 /* 1044 * the callback did not or cannot resolve the error: 1045 * set output pointers and return 1046 * 1047 * the check for buffer overflow is redundant but it is 1048 * a high-runner case and hopefully documents the intent 1049 * well 1050 * 1051 * if we were replaying, then the replay buffer must be 1052 * copied back into the UConverter 1053 * and the real arguments must be restored 1054 */ 1055 if(realSource!=NULL) { 1056 int32_t length; 1057 1058 U_ASSERT(cnv->preFromULength==0); 1059 1060 length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1061 if(length>0) { 1062 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); 1063 cnv->preFromULength=(int8_t)-length; 1064 } 1065 1066 pArgs->source=realSource; 1067 pArgs->sourceLimit=realSourceLimit; 1068 pArgs->flush=realFlush; 1069 } 1070 1071 return; 1072 } 1073 } 1074 1075 /* callback handling */ 1076 { 1077 UChar32 codePoint; 1078 1079 /* get and write the code point */ 1080 codePoint=cnv->fromUChar32; 1081 errorInputLength=0; 1082 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); 1083 cnv->invalidUCharLength=(int8_t)errorInputLength; 1084 1085 /* set the converter state to deal with the next character */ 1086 cnv->fromUChar32=0; 1087 1088 /* call the callback function */ 1089 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, 1090 cnv->invalidUCharBuffer, errorInputLength, codePoint, 1091 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, 1092 err); 1093 } 1094 1095 /* 1096 * loop back to the offset handling 1097 * 1098 * this flag will indicate after offset handling 1099 * that a callback was called; 1100 * if the callback did not resolve the error, then we return 1101 */ 1102 calledCallback=TRUE; 1103 } 1104 } 1105 } 1106 1107 /* 1108 * Output the fromUnicode overflow buffer. 1109 * Call this function if(cnv->charErrorBufferLength>0). 1110 * @return TRUE if overflow 1111 */ 1112 static UBool 1113 ucnv_outputOverflowFromUnicode(UConverter *cnv, 1114 char **target, const char *targetLimit, 1115 int32_t **pOffsets, 1116 UErrorCode *err) { 1117 int32_t *offsets; 1118 char *overflow, *t; 1119 int32_t i, length; 1120 1121 t=*target; 1122 if(pOffsets!=NULL) { 1123 offsets=*pOffsets; 1124 } else { 1125 offsets=NULL; 1126 } 1127 1128 overflow=(char *)cnv->charErrorBuffer; 1129 length=cnv->charErrorBufferLength; 1130 i=0; 1131 while(i<length) { 1132 if(t==targetLimit) { 1133 /* the overflow buffer contains too much, keep the rest */ 1134 int32_t j=0; 1135 1136 do { 1137 overflow[j++]=overflow[i++]; 1138 } while(i<length); 1139 1140 cnv->charErrorBufferLength=(int8_t)j; 1141 *target=t; 1142 if(offsets!=NULL) { 1143 *pOffsets=offsets; 1144 } 1145 *err=U_BUFFER_OVERFLOW_ERROR; 1146 return TRUE; 1147 } 1148 1149 /* copy the overflow contents to the target */ 1150 *t++=overflow[i++]; 1151 if(offsets!=NULL) { 1152 *offsets++=-1; /* no source index available for old output */ 1153 } 1154 } 1155 1156 /* the overflow buffer is completely copied to the target */ 1157 cnv->charErrorBufferLength=0; 1158 *target=t; 1159 if(offsets!=NULL) { 1160 *pOffsets=offsets; 1161 } 1162 return FALSE; 1163 } 1164 1165 U_CAPI void U_EXPORT2 1166 ucnv_fromUnicode(UConverter *cnv, 1167 char **target, const char *targetLimit, 1168 const UChar **source, const UChar *sourceLimit, 1169 int32_t *offsets, 1170 UBool flush, 1171 UErrorCode *err) { 1172 UConverterFromUnicodeArgs args; 1173 const UChar *s; 1174 char *t; 1175 1176 /* check parameters */ 1177 if(err==NULL || U_FAILURE(*err)) { 1178 return; 1179 } 1180 1181 if(cnv==NULL || target==NULL || source==NULL) { 1182 *err=U_ILLEGAL_ARGUMENT_ERROR; 1183 return; 1184 } 1185 1186 s=*source; 1187 t=*target; 1188 1189 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { 1190 /* 1191 Prevent code from going into an infinite loop in case we do hit this 1192 limit. The limit pointer is expected to be on a UChar * boundary. 1193 This also prevents the next argument check from failing. 1194 */ 1195 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); 1196 } 1197 1198 /* 1199 * All these conditions should never happen. 1200 * 1201 * 1) Make sure that the limits are >= to the address source or target 1202 * 1203 * 2) Make sure that the buffer sizes do not exceed the number range for 1204 * int32_t because some functions use the size (in units or bytes) 1205 * rather than comparing pointers, and because offsets are int32_t values. 1206 * 1207 * size_t is guaranteed to be unsigned and large enough for the job. 1208 * 1209 * Return with an error instead of adjusting the limits because we would 1210 * not be able to maintain the semantics that either the source must be 1211 * consumed or the target filled (unless an error occurs). 1212 * An adjustment would be targetLimit=t+0x7fffffff; for example. 1213 * 1214 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1215 * to a char * pointer and provide an incomplete UChar code unit. 1216 */ 1217 if (sourceLimit<s || targetLimit<t || 1218 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || 1219 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || 1220 (((const char *)sourceLimit-(const char *)s) & 1) != 0) 1221 { 1222 *err=U_ILLEGAL_ARGUMENT_ERROR; 1223 return; 1224 } 1225 1226 /* output the target overflow buffer */ 1227 if( cnv->charErrorBufferLength>0 && 1228 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) 1229 ) { 1230 /* U_BUFFER_OVERFLOW_ERROR */ 1231 return; 1232 } 1233 /* *target may have moved, therefore stop using t */ 1234 1235 if(!flush && s==sourceLimit && cnv->preFromULength>=0) { 1236 /* the overflow buffer is emptied and there is no new input: we are done */ 1237 return; 1238 } 1239 1240 /* 1241 * Do not simply return with a buffer overflow error if 1242 * !flush && t==targetLimit 1243 * because it is possible that the source will not generate any output. 1244 * For example, the skip callback may be called; 1245 * it does not output anything. 1246 */ 1247 1248 /* prepare the converter arguments */ 1249 args.converter=cnv; 1250 args.flush=flush; 1251 args.offsets=offsets; 1252 args.source=s; 1253 args.sourceLimit=sourceLimit; 1254 args.target=*target; 1255 args.targetLimit=targetLimit; 1256 args.size=sizeof(args); 1257 1258 _fromUnicodeWithCallback(&args, err); 1259 1260 *source=args.source; 1261 *target=args.target; 1262 } 1263 1264 /* ucnv_toUnicode() --------------------------------------------------------- */ 1265 1266 static void 1267 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1268 UConverterToUnicode toUnicode; 1269 UConverter *cnv; 1270 const char *s; 1271 UChar *t; 1272 int32_t *offsets; 1273 int32_t sourceIndex; 1274 int32_t errorInputLength; 1275 UBool converterSawEndOfInput, calledCallback; 1276 1277 /* variables for m:n conversion */ 1278 char replay[UCNV_EXT_MAX_BYTES]; 1279 const char *realSource, *realSourceLimit; 1280 int32_t realSourceIndex; 1281 UBool realFlush; 1282 1283 cnv=pArgs->converter; 1284 s=pArgs->source; 1285 t=pArgs->target; 1286 offsets=pArgs->offsets; 1287 1288 /* get the converter implementation function */ 1289 sourceIndex=0; 1290 if(offsets==NULL) { 1291 toUnicode=cnv->sharedData->impl->toUnicode; 1292 } else { 1293 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; 1294 if(toUnicode==NULL) { 1295 /* there is no WithOffsets implementation */ 1296 toUnicode=cnv->sharedData->impl->toUnicode; 1297 /* we will write -1 for each offset */ 1298 sourceIndex=-1; 1299 } 1300 } 1301 1302 if(cnv->preToULength>=0) { 1303 /* normal mode */ 1304 realSource=NULL; 1305 1306 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 1307 realSourceLimit=NULL; 1308 realFlush=FALSE; 1309 realSourceIndex=0; 1310 } else { 1311 /* 1312 * Previous m:n conversion stored source units from a partial match 1313 * and failed to consume all of them. 1314 * We need to "replay" them from a temporary buffer and convert them first. 1315 */ 1316 realSource=pArgs->source; 1317 realSourceLimit=pArgs->sourceLimit; 1318 realFlush=pArgs->flush; 1319 realSourceIndex=sourceIndex; 1320 1321 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1322 pArgs->source=replay; 1323 pArgs->sourceLimit=replay-cnv->preToULength; 1324 pArgs->flush=FALSE; 1325 sourceIndex=-1; 1326 1327 cnv->preToULength=0; 1328 } 1329 1330 /* 1331 * loop for conversion and error handling 1332 * 1333 * loop { 1334 * convert 1335 * loop { 1336 * update offsets 1337 * handle end of input 1338 * handle errors/call callback 1339 * } 1340 * } 1341 */ 1342 for(;;) { 1343 if(U_SUCCESS(*err)) { 1344 /* convert */ 1345 toUnicode(pArgs, err); 1346 1347 /* 1348 * set a flag for whether the converter 1349 * successfully processed the end of the input 1350 * 1351 * need not check cnv->preToULength==0 because a replay (<0) will cause 1352 * s<sourceLimit before converterSawEndOfInput is checked 1353 */ 1354 converterSawEndOfInput= 1355 (UBool)(U_SUCCESS(*err) && 1356 pArgs->flush && pArgs->source==pArgs->sourceLimit && 1357 cnv->toULength==0); 1358 } else { 1359 /* handle error from getNextUChar() or ucnv_convertEx() */ 1360 converterSawEndOfInput=FALSE; 1361 } 1362 1363 /* no callback called yet for this iteration */ 1364 calledCallback=FALSE; 1365 1366 /* no sourceIndex adjustment for conversion, only for callback output */ 1367 errorInputLength=0; 1368 1369 /* 1370 * loop for offsets and error handling 1371 * 1372 * iterates at most 3 times: 1373 * 1. to clean up after the conversion function 1374 * 2. after the callback 1375 * 3. after the callback again if there was truncated input 1376 */ 1377 for(;;) { 1378 /* update offsets if we write any */ 1379 if(offsets!=NULL) { 1380 int32_t length=(int32_t)(pArgs->target-t); 1381 if(length>0) { 1382 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 1383 1384 /* 1385 * if a converter handles offsets and updates the offsets 1386 * pointer at the end, then pArgs->offset should not change 1387 * here; 1388 * however, some converters do not handle offsets at all 1389 * (sourceIndex<0) or may not update the offsets pointer 1390 */ 1391 pArgs->offsets=offsets+=length; 1392 } 1393 1394 if(sourceIndex>=0) { 1395 sourceIndex+=(int32_t)(pArgs->source-s); 1396 } 1397 } 1398 1399 if(cnv->preToULength<0) { 1400 /* 1401 * switch the source to new replay units (cannot occur while replaying) 1402 * after offset handling and before end-of-input and callback handling 1403 */ 1404 if(realSource==NULL) { 1405 realSource=pArgs->source; 1406 realSourceLimit=pArgs->sourceLimit; 1407 realFlush=pArgs->flush; 1408 realSourceIndex=sourceIndex; 1409 1410 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1411 pArgs->source=replay; 1412 pArgs->sourceLimit=replay-cnv->preToULength; 1413 pArgs->flush=FALSE; 1414 if((sourceIndex+=cnv->preToULength)<0) { 1415 sourceIndex=-1; 1416 } 1417 1418 cnv->preToULength=0; 1419 } else { 1420 /* see implementation note before _fromUnicodeWithCallback() */ 1421 U_ASSERT(realSource==NULL); 1422 *err=U_INTERNAL_PROGRAM_ERROR; 1423 } 1424 } 1425 1426 /* update pointers */ 1427 s=pArgs->source; 1428 t=pArgs->target; 1429 1430 if(U_SUCCESS(*err)) { 1431 if(s<pArgs->sourceLimit) { 1432 /* 1433 * continue with the conversion loop while there is still input left 1434 * (continue converting by breaking out of only the inner loop) 1435 */ 1436 break; 1437 } else if(realSource!=NULL) { 1438 /* switch back from replaying to the real source and continue */ 1439 pArgs->source=realSource; 1440 pArgs->sourceLimit=realSourceLimit; 1441 pArgs->flush=realFlush; 1442 sourceIndex=realSourceIndex; 1443 1444 realSource=NULL; 1445 break; 1446 } else if(pArgs->flush && cnv->toULength>0) { 1447 /* 1448 * the entire input stream is consumed 1449 * and there is a partial, truncated input sequence left 1450 */ 1451 1452 /* inject an error and continue with callback handling */ 1453 *err=U_TRUNCATED_CHAR_FOUND; 1454 calledCallback=FALSE; /* new error condition */ 1455 } else { 1456 /* input consumed */ 1457 if(pArgs->flush) { 1458 /* 1459 * return to the conversion loop once more if the flush 1460 * flag is set and the conversion function has not 1461 * successfully processed the end of the input yet 1462 * 1463 * (continue converting by breaking out of only the inner loop) 1464 */ 1465 if(!converterSawEndOfInput) { 1466 break; 1467 } 1468 1469 /* reset the converter without calling the callback function */ 1470 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1471 } 1472 1473 /* done successfully */ 1474 return; 1475 } 1476 } 1477 1478 /* U_FAILURE(*err) */ 1479 { 1480 UErrorCode e; 1481 1482 if( calledCallback || 1483 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1484 (e!=U_INVALID_CHAR_FOUND && 1485 e!=U_ILLEGAL_CHAR_FOUND && 1486 e!=U_TRUNCATED_CHAR_FOUND && 1487 e!=U_ILLEGAL_ESCAPE_SEQUENCE && 1488 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) 1489 ) { 1490 /* 1491 * the callback did not or cannot resolve the error: 1492 * set output pointers and return 1493 * 1494 * the check for buffer overflow is redundant but it is 1495 * a high-runner case and hopefully documents the intent 1496 * well 1497 * 1498 * if we were replaying, then the replay buffer must be 1499 * copied back into the UConverter 1500 * and the real arguments must be restored 1501 */ 1502 if(realSource!=NULL) { 1503 int32_t length; 1504 1505 U_ASSERT(cnv->preToULength==0); 1506 1507 length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1508 if(length>0) { 1509 uprv_memcpy(cnv->preToU, pArgs->source, length); 1510 cnv->preToULength=(int8_t)-length; 1511 } 1512 1513 pArgs->source=realSource; 1514 pArgs->sourceLimit=realSourceLimit; 1515 pArgs->flush=realFlush; 1516 } 1517 1518 return; 1519 } 1520 } 1521 1522 /* copy toUBytes[] to invalidCharBuffer[] */ 1523 errorInputLength=cnv->invalidCharLength=cnv->toULength; 1524 if(errorInputLength>0) { 1525 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); 1526 } 1527 1528 /* set the converter state to deal with the next character */ 1529 cnv->toULength=0; 1530 1531 /* call the callback function */ 1532 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { 1533 cnv->toUCallbackReason = UCNV_UNASSIGNED; 1534 } 1535 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, 1536 cnv->invalidCharBuffer, errorInputLength, 1537 cnv->toUCallbackReason, 1538 err); 1539 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ 1540 1541 /* 1542 * loop back to the offset handling 1543 * 1544 * this flag will indicate after offset handling 1545 * that a callback was called; 1546 * if the callback did not resolve the error, then we return 1547 */ 1548 calledCallback=TRUE; 1549 } 1550 } 1551 } 1552 1553 /* 1554 * Output the toUnicode overflow buffer. 1555 * Call this function if(cnv->UCharErrorBufferLength>0). 1556 * @return TRUE if overflow 1557 */ 1558 static UBool 1559 ucnv_outputOverflowToUnicode(UConverter *cnv, 1560 UChar **target, const UChar *targetLimit, 1561 int32_t **pOffsets, 1562 UErrorCode *err) { 1563 int32_t *offsets; 1564 UChar *overflow, *t; 1565 int32_t i, length; 1566 1567 t=*target; 1568 if(pOffsets!=NULL) { 1569 offsets=*pOffsets; 1570 } else { 1571 offsets=NULL; 1572 } 1573 1574 overflow=cnv->UCharErrorBuffer; 1575 length=cnv->UCharErrorBufferLength; 1576 i=0; 1577 while(i<length) { 1578 if(t==targetLimit) { 1579 /* the overflow buffer contains too much, keep the rest */ 1580 int32_t j=0; 1581 1582 do { 1583 overflow[j++]=overflow[i++]; 1584 } while(i<length); 1585 1586 cnv->UCharErrorBufferLength=(int8_t)j; 1587 *target=t; 1588 if(offsets!=NULL) { 1589 *pOffsets=offsets; 1590 } 1591 *err=U_BUFFER_OVERFLOW_ERROR; 1592 return TRUE; 1593 } 1594 1595 /* copy the overflow contents to the target */ 1596 *t++=overflow[i++]; 1597 if(offsets!=NULL) { 1598 *offsets++=-1; /* no source index available for old output */ 1599 } 1600 } 1601 1602 /* the overflow buffer is completely copied to the target */ 1603 cnv->UCharErrorBufferLength=0; 1604 *target=t; 1605 if(offsets!=NULL) { 1606 *pOffsets=offsets; 1607 } 1608 return FALSE; 1609 } 1610 1611 U_CAPI void U_EXPORT2 1612 ucnv_toUnicode(UConverter *cnv, 1613 UChar **target, const UChar *targetLimit, 1614 const char **source, const char *sourceLimit, 1615 int32_t *offsets, 1616 UBool flush, 1617 UErrorCode *err) { 1618 UConverterToUnicodeArgs args; 1619 const char *s; 1620 UChar *t; 1621 1622 /* check parameters */ 1623 if(err==NULL || U_FAILURE(*err)) { 1624 return; 1625 } 1626 1627 if(cnv==NULL || target==NULL || source==NULL) { 1628 *err=U_ILLEGAL_ARGUMENT_ERROR; 1629 return; 1630 } 1631 1632 s=*source; 1633 t=*target; 1634 1635 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { 1636 /* 1637 Prevent code from going into an infinite loop in case we do hit this 1638 limit. The limit pointer is expected to be on a UChar * boundary. 1639 This also prevents the next argument check from failing. 1640 */ 1641 targetLimit = (const UChar *)(((const char *)targetLimit) - 1); 1642 } 1643 1644 /* 1645 * All these conditions should never happen. 1646 * 1647 * 1) Make sure that the limits are >= to the address source or target 1648 * 1649 * 2) Make sure that the buffer sizes do not exceed the number range for 1650 * int32_t because some functions use the size (in units or bytes) 1651 * rather than comparing pointers, and because offsets are int32_t values. 1652 * 1653 * size_t is guaranteed to be unsigned and large enough for the job. 1654 * 1655 * Return with an error instead of adjusting the limits because we would 1656 * not be able to maintain the semantics that either the source must be 1657 * consumed or the target filled (unless an error occurs). 1658 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1659 * 1660 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1661 * to a char * pointer and provide an incomplete UChar code unit. 1662 */ 1663 if (sourceLimit<s || targetLimit<t || 1664 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || 1665 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || 1666 (((const char *)targetLimit-(const char *)t) & 1) != 0 1667 ) { 1668 *err=U_ILLEGAL_ARGUMENT_ERROR; 1669 return; 1670 } 1671 1672 /* output the target overflow buffer */ 1673 if( cnv->UCharErrorBufferLength>0 && 1674 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) 1675 ) { 1676 /* U_BUFFER_OVERFLOW_ERROR */ 1677 return; 1678 } 1679 /* *target may have moved, therefore stop using t */ 1680 1681 if(!flush && s==sourceLimit && cnv->preToULength>=0) { 1682 /* the overflow buffer is emptied and there is no new input: we are done */ 1683 return; 1684 } 1685 1686 /* 1687 * Do not simply return with a buffer overflow error if 1688 * !flush && t==targetLimit 1689 * because it is possible that the source will not generate any output. 1690 * For example, the skip callback may be called; 1691 * it does not output anything. 1692 */ 1693 1694 /* prepare the converter arguments */ 1695 args.converter=cnv; 1696 args.flush=flush; 1697 args.offsets=offsets; 1698 args.source=s; 1699 args.sourceLimit=sourceLimit; 1700 args.target=*target; 1701 args.targetLimit=targetLimit; 1702 args.size=sizeof(args); 1703 1704 _toUnicodeWithCallback(&args, err); 1705 1706 *source=args.source; 1707 *target=args.target; 1708 } 1709 1710 /* ucnv_to/fromUChars() ----------------------------------------------------- */ 1711 1712 U_CAPI int32_t U_EXPORT2 1713 ucnv_fromUChars(UConverter *cnv, 1714 char *dest, int32_t destCapacity, 1715 const UChar *src, int32_t srcLength, 1716 UErrorCode *pErrorCode) { 1717 const UChar *srcLimit; 1718 char *originalDest, *destLimit; 1719 int32_t destLength; 1720 1721 /* check arguments */ 1722 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1723 return 0; 1724 } 1725 1726 if( cnv==NULL || 1727 destCapacity<0 || (destCapacity>0 && dest==NULL) || 1728 srcLength<-1 || (srcLength!=0 && src==NULL) 1729 ) { 1730 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1731 return 0; 1732 } 1733 1734 /* initialize */ 1735 ucnv_resetFromUnicode(cnv); 1736 originalDest=dest; 1737 if(srcLength==-1) { 1738 srcLength=u_strlen(src); 1739 } 1740 if(srcLength>0) { 1741 srcLimit=src+srcLength; 1742 destLimit=dest+destCapacity; 1743 1744 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ 1745 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { 1746 destLimit=(char *)U_MAX_PTR(dest); 1747 } 1748 1749 /* perform the conversion */ 1750 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1751 destLength=(int32_t)(dest-originalDest); 1752 1753 /* if an overflow occurs, then get the preflighting length */ 1754 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1755 char buffer[1024]; 1756 1757 destLimit=buffer+sizeof(buffer); 1758 do { 1759 dest=buffer; 1760 *pErrorCode=U_ZERO_ERROR; 1761 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1762 destLength+=(int32_t)(dest-buffer); 1763 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1764 } 1765 } else { 1766 destLength=0; 1767 } 1768 1769 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); 1770 } 1771 1772 U_CAPI int32_t U_EXPORT2 1773 ucnv_toUChars(UConverter *cnv, 1774 UChar *dest, int32_t destCapacity, 1775 const char *src, int32_t srcLength, 1776 UErrorCode *pErrorCode) { 1777 const char *srcLimit; 1778 UChar *originalDest, *destLimit; 1779 int32_t destLength; 1780 1781 /* check arguments */ 1782 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1783 return 0; 1784 } 1785 1786 if( cnv==NULL || 1787 destCapacity<0 || (destCapacity>0 && dest==NULL) || 1788 srcLength<-1 || (srcLength!=0 && src==NULL)) 1789 { 1790 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1791 return 0; 1792 } 1793 1794 /* initialize */ 1795 ucnv_resetToUnicode(cnv); 1796 originalDest=dest; 1797 if(srcLength==-1) { 1798 srcLength=(int32_t)uprv_strlen(src); 1799 } 1800 if(srcLength>0) { 1801 srcLimit=src+srcLength; 1802 destLimit=dest+destCapacity; 1803 1804 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ 1805 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { 1806 destLimit=(UChar *)U_MAX_PTR(dest); 1807 } 1808 1809 /* perform the conversion */ 1810 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1811 destLength=(int32_t)(dest-originalDest); 1812 1813 /* if an overflow occurs, then get the preflighting length */ 1814 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) 1815 { 1816 UChar buffer[1024]; 1817 1818 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR; 1819 do { 1820 dest=buffer; 1821 *pErrorCode=U_ZERO_ERROR; 1822 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1823 destLength+=(int32_t)(dest-buffer); 1824 } 1825 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1826 } 1827 } else { 1828 destLength=0; 1829 } 1830 1831 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); 1832 } 1833 1834 /* ucnv_getNextUChar() ------------------------------------------------------ */ 1835 1836 U_CAPI UChar32 U_EXPORT2 1837 ucnv_getNextUChar(UConverter *cnv, 1838 const char **source, const char *sourceLimit, 1839 UErrorCode *err) { 1840 UConverterToUnicodeArgs args; 1841 UChar buffer[U16_MAX_LENGTH]; 1842 const char *s; 1843 UChar32 c; 1844 int32_t i, length; 1845 1846 /* check parameters */ 1847 if(err==NULL || U_FAILURE(*err)) { 1848 return 0xffff; 1849 } 1850 1851 if(cnv==NULL || source==NULL) { 1852 *err=U_ILLEGAL_ARGUMENT_ERROR; 1853 return 0xffff; 1854 } 1855 1856 s=*source; 1857 if(sourceLimit<s) { 1858 *err=U_ILLEGAL_ARGUMENT_ERROR; 1859 return 0xffff; 1860 } 1861 1862 /* 1863 * Make sure that the buffer sizes do not exceed the number range for 1864 * int32_t because some functions use the size (in units or bytes) 1865 * rather than comparing pointers, and because offsets are int32_t values. 1866 * 1867 * size_t is guaranteed to be unsigned and large enough for the job. 1868 * 1869 * Return with an error instead of adjusting the limits because we would 1870 * not be able to maintain the semantics that either the source must be 1871 * consumed or the target filled (unless an error occurs). 1872 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1873 */ 1874 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { 1875 *err=U_ILLEGAL_ARGUMENT_ERROR; 1876 return 0xffff; 1877 } 1878 1879 c=U_SENTINEL; 1880 1881 /* flush the target overflow buffer */ 1882 if(cnv->UCharErrorBufferLength>0) { 1883 UChar *overflow; 1884 1885 overflow=cnv->UCharErrorBuffer; 1886 i=0; 1887 length=cnv->UCharErrorBufferLength; 1888 U16_NEXT(overflow, i, length, c); 1889 1890 /* move the remaining overflow contents up to the beginning */ 1891 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { 1892 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, 1893 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1894 } 1895 1896 if(!U16_IS_LEAD(c) || i<length) { 1897 return c; 1898 } 1899 /* 1900 * Continue if the overflow buffer contained only a lead surrogate, 1901 * in case the converter outputs single surrogates from complete 1902 * input sequences. 1903 */ 1904 } 1905 1906 /* 1907 * flush==TRUE is implied for ucnv_getNextUChar() 1908 * 1909 * do not simply return even if s==sourceLimit because the converter may 1910 * not have seen flush==TRUE before 1911 */ 1912 1913 /* prepare the converter arguments */ 1914 args.converter=cnv; 1915 args.flush=TRUE; 1916 args.offsets=NULL; 1917 args.source=s; 1918 args.sourceLimit=sourceLimit; 1919 args.target=buffer; 1920 args.targetLimit=buffer+1; 1921 args.size=sizeof(args); 1922 1923 if(c<0) { 1924 /* 1925 * call the native getNextUChar() implementation if we are 1926 * at a character boundary (toULength==0) 1927 * 1928 * unlike with _toUnicode(), getNextUChar() implementations must set 1929 * U_TRUNCATED_CHAR_FOUND for truncated input, 1930 * in addition to setting toULength/toUBytes[] 1931 */ 1932 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { 1933 c=cnv->sharedData->impl->getNextUChar(&args, err); 1934 *source=s=args.source; 1935 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { 1936 /* reset the converter without calling the callback function */ 1937 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1938 return 0xffff; /* no output */ 1939 } else if(U_SUCCESS(*err) && c>=0) { 1940 return c; 1941 /* 1942 * else fall through to use _toUnicode() because 1943 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all 1944 * U_FAILURE: call _toUnicode() for callback handling (do not output c) 1945 */ 1946 } 1947 } 1948 1949 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ 1950 _toUnicodeWithCallback(&args, err); 1951 1952 if(*err==U_BUFFER_OVERFLOW_ERROR) { 1953 *err=U_ZERO_ERROR; 1954 } 1955 1956 i=0; 1957 length=(int32_t)(args.target-buffer); 1958 } else { 1959 /* write the lead surrogate from the overflow buffer */ 1960 buffer[0]=(UChar)c; 1961 args.target=buffer+1; 1962 i=0; 1963 length=1; 1964 } 1965 1966 /* buffer contents starts at i and ends before length */ 1967 1968 if(U_FAILURE(*err)) { 1969 c=0xffff; /* no output */ 1970 } else if(length==0) { 1971 /* no input or only state changes */ 1972 *err=U_INDEX_OUTOFBOUNDS_ERROR; 1973 /* no need to reset explicitly because _toUnicodeWithCallback() did it */ 1974 c=0xffff; /* no output */ 1975 } else { 1976 c=buffer[0]; 1977 i=1; 1978 if(!U16_IS_LEAD(c)) { 1979 /* consume c=buffer[0], done */ 1980 } else { 1981 /* got a lead surrogate, see if a trail surrogate follows */ 1982 UChar c2; 1983 1984 if(cnv->UCharErrorBufferLength>0) { 1985 /* got overflow output from the conversion */ 1986 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { 1987 /* got a trail surrogate, too */ 1988 c=U16_GET_SUPPLEMENTARY(c, c2); 1989 1990 /* move the remaining overflow contents up to the beginning */ 1991 if((--cnv->UCharErrorBufferLength)>0) { 1992 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, 1993 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1994 } 1995 } else { 1996 /* c is an unpaired lead surrogate, just return it */ 1997 } 1998 } else if(args.source<sourceLimit) { 1999 /* convert once more, to buffer[1] */ 2000 args.targetLimit=buffer+2; 2001 _toUnicodeWithCallback(&args, err); 2002 if(*err==U_BUFFER_OVERFLOW_ERROR) { 2003 *err=U_ZERO_ERROR; 2004 } 2005 2006 length=(int32_t)(args.target-buffer); 2007 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { 2008 /* got a trail surrogate, too */ 2009 c=U16_GET_SUPPLEMENTARY(c, c2); 2010 i=2; 2011 } 2012 } 2013 } 2014 } 2015 2016 /* 2017 * move leftover output from buffer[i..length[ 2018 * into the beginning of the overflow buffer 2019 */ 2020 if(i<length) { 2021 /* move further overflow back */ 2022 int32_t delta=length-i; 2023 if((length=cnv->UCharErrorBufferLength)>0) { 2024 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, 2025 length*U_SIZEOF_UCHAR); 2026 } 2027 cnv->UCharErrorBufferLength=(int8_t)(length+delta); 2028 2029 cnv->UCharErrorBuffer[0]=buffer[i++]; 2030 if(delta>1) { 2031 cnv->UCharErrorBuffer[1]=buffer[i]; 2032 } 2033 } 2034 2035 *source=args.source; 2036 return c; 2037 } 2038 2039 /* ucnv_convert() and siblings ---------------------------------------------- */ 2040 2041 U_CAPI void U_EXPORT2 2042 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, 2043 char **target, const char *targetLimit, 2044 const char **source, const char *sourceLimit, 2045 UChar *pivotStart, UChar **pivotSource, 2046 UChar **pivotTarget, const UChar *pivotLimit, 2047 UBool reset, UBool flush, 2048 UErrorCode *pErrorCode) { 2049 UChar pivotBuffer[CHUNK_SIZE]; 2050 const UChar *myPivotSource; 2051 UChar *myPivotTarget; 2052 const char *s; 2053 char *t; 2054 2055 UConverterToUnicodeArgs toUArgs; 2056 UConverterFromUnicodeArgs fromUArgs; 2057 UConverterConvert convert; 2058 2059 /* error checking */ 2060 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2061 return; 2062 } 2063 2064 if( targetCnv==NULL || sourceCnv==NULL || 2065 source==NULL || *source==NULL || 2066 target==NULL || *target==NULL || targetLimit==NULL 2067 ) { 2068 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2069 return; 2070 } 2071 2072 s=*source; 2073 t=*target; 2074 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { 2075 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2076 return; 2077 } 2078 2079 /* 2080 * Make sure that the buffer sizes do not exceed the number range for 2081 * int32_t. See ucnv_toUnicode() for a more detailed comment. 2082 */ 2083 if( 2084 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || 2085 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) 2086 ) { 2087 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2088 return; 2089 } 2090 2091 if(pivotStart==NULL) { 2092 if(!flush) { 2093 /* streaming conversion requires an explicit pivot buffer */ 2094 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2095 return; 2096 } 2097 2098 /* use the stack pivot buffer */ 2099 myPivotSource=myPivotTarget=pivotStart=pivotBuffer; 2100 pivotSource=(UChar **)&myPivotSource; 2101 pivotTarget=&myPivotTarget; 2102 pivotLimit=pivotBuffer+CHUNK_SIZE; 2103 } else if( pivotStart>=pivotLimit || 2104 pivotSource==NULL || *pivotSource==NULL || 2105 pivotTarget==NULL || *pivotTarget==NULL || 2106 pivotLimit==NULL 2107 ) { 2108 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2109 return; 2110 } 2111 2112 if(sourceLimit==NULL) { 2113 /* get limit of single-byte-NUL-terminated source string */ 2114 sourceLimit=uprv_strchr(*source, 0); 2115 } 2116 2117 if(reset) { 2118 ucnv_resetToUnicode(sourceCnv); 2119 ucnv_resetFromUnicode(targetCnv); 2120 *pivotSource=*pivotTarget=pivotStart; 2121 } else if(targetCnv->charErrorBufferLength>0) { 2122 /* output the targetCnv overflow buffer */ 2123 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { 2124 /* U_BUFFER_OVERFLOW_ERROR */ 2125 return; 2126 } 2127 /* *target has moved, therefore stop using t */ 2128 2129 if( !flush && 2130 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && 2131 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit 2132 ) { 2133 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ 2134 return; 2135 } 2136 } 2137 2138 /* Is direct-UTF-8 conversion available? */ 2139 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2140 targetCnv->sharedData->impl->fromUTF8!=NULL 2141 ) { 2142 convert=targetCnv->sharedData->impl->fromUTF8; 2143 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2144 sourceCnv->sharedData->impl->toUTF8!=NULL 2145 ) { 2146 convert=sourceCnv->sharedData->impl->toUTF8; 2147 } else { 2148 convert=NULL; 2149 } 2150 2151 /* 2152 * If direct-UTF-8 conversion is available, then we use a smaller 2153 * pivot buffer for error handling and partial matches 2154 * so that we quickly return to direct conversion. 2155 * 2156 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. 2157 * 2158 * We could reduce the pivot buffer size further, at the cost of 2159 * buffer overflows from callbacks. 2160 * The pivot buffer should not be smaller than the maximum number of 2161 * fromUnicode extension table input UChars 2162 * (for m:n conversion, see 2163 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) 2164 * or 2 for surrogate pairs. 2165 * 2166 * Too small a buffer can cause thrashing between pivoting and direct 2167 * conversion, with function call overhead outweighing the benefits 2168 * of direct conversion. 2169 */ 2170 if(convert!=NULL && (pivotLimit-pivotStart)>32) { 2171 pivotLimit=pivotStart+32; 2172 } 2173 2174 /* prepare the converter arguments */ 2175 fromUArgs.converter=targetCnv; 2176 fromUArgs.flush=FALSE; 2177 fromUArgs.offsets=NULL; 2178 fromUArgs.target=*target; 2179 fromUArgs.targetLimit=targetLimit; 2180 fromUArgs.size=sizeof(fromUArgs); 2181 2182 toUArgs.converter=sourceCnv; 2183 toUArgs.flush=flush; 2184 toUArgs.offsets=NULL; 2185 toUArgs.source=s; 2186 toUArgs.sourceLimit=sourceLimit; 2187 toUArgs.targetLimit=pivotLimit; 2188 toUArgs.size=sizeof(toUArgs); 2189 2190 /* 2191 * TODO: Consider separating this function into two functions, 2192 * extracting exactly the conversion loop, 2193 * for readability and to reduce the set of visible variables. 2194 * 2195 * Otherwise stop using s and t from here on. 2196 */ 2197 s=t=NULL; 2198 2199 /* 2200 * conversion loop 2201 * 2202 * The sequence of steps in the loop may appear backward, 2203 * but the principle is simple: 2204 * In the chain of 2205 * source - sourceCnv overflow - pivot - targetCnv overflow - target 2206 * empty out later buffers before refilling them from earlier ones. 2207 * 2208 * The targetCnv overflow buffer is flushed out only once before the loop. 2209 */ 2210 for(;;) { 2211 /* 2212 * if(pivot not empty or error or replay or flush fromUnicode) { 2213 * fromUnicode(pivot -> target); 2214 * } 2215 * 2216 * For pivoting conversion; and for direct conversion for 2217 * error callback handling and flushing the replay buffer. 2218 */ 2219 if( *pivotSource<*pivotTarget || 2220 U_FAILURE(*pErrorCode) || 2221 targetCnv->preFromULength<0 || 2222 fromUArgs.flush 2223 ) { 2224 fromUArgs.source=*pivotSource; 2225 fromUArgs.sourceLimit=*pivotTarget; 2226 _fromUnicodeWithCallback(&fromUArgs, pErrorCode); 2227 if(U_FAILURE(*pErrorCode)) { 2228 /* target overflow, or conversion error */ 2229 *pivotSource=(UChar *)fromUArgs.source; 2230 break; 2231 } 2232 2233 /* 2234 * _fromUnicodeWithCallback() must have consumed the pivot contents 2235 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() 2236 */ 2237 } 2238 2239 /* The pivot buffer is empty; reset it so we start at pivotStart. */ 2240 *pivotSource=*pivotTarget=pivotStart; 2241 2242 /* 2243 * if(sourceCnv overflow buffer not empty) { 2244 * move(sourceCnv overflow buffer -> pivot); 2245 * continue; 2246 * } 2247 */ 2248 /* output the sourceCnv overflow buffer */ 2249 if(sourceCnv->UCharErrorBufferLength>0) { 2250 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { 2251 /* U_BUFFER_OVERFLOW_ERROR */ 2252 *pErrorCode=U_ZERO_ERROR; 2253 } 2254 continue; 2255 } 2256 2257 /* 2258 * check for end of input and break if done 2259 * 2260 * Checking both flush and fromUArgs.flush ensures that the converters 2261 * have been called with the flush flag set if the ucnv_convertEx() 2262 * caller set it. 2263 */ 2264 if( toUArgs.source==sourceLimit && 2265 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && 2266 (!flush || fromUArgs.flush) 2267 ) { 2268 /* done successfully */ 2269 break; 2270 } 2271 2272 /* 2273 * use direct conversion if available 2274 * but not if continuing a partial match 2275 * or flushing the toUnicode replay buffer 2276 */ 2277 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { 2278 if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2279 /* remove a warning that may be set by this function */ 2280 *pErrorCode=U_ZERO_ERROR; 2281 } 2282 convert(&fromUArgs, &toUArgs, pErrorCode); 2283 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2284 break; 2285 } else if(U_FAILURE(*pErrorCode)) { 2286 if(sourceCnv->toULength>0) { 2287 /* 2288 * Fall through to calling _toUnicodeWithCallback() 2289 * for callback handling. 2290 * 2291 * The pivot buffer will be reset with 2292 * *pivotSource=*pivotTarget=pivotStart; 2293 * which indicates a toUnicode error to the caller 2294 * (*pivotSource==pivotStart shows no pivot UChars consumed). 2295 */ 2296 } else { 2297 /* 2298 * Indicate a fromUnicode error to the caller 2299 * (*pivotSource>pivotStart shows some pivot UChars consumed). 2300 */ 2301 *pivotSource=*pivotTarget=pivotStart+1; 2302 /* 2303 * Loop around to calling _fromUnicodeWithCallbacks() 2304 * for callback handling. 2305 */ 2306 continue; 2307 } 2308 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2309 /* 2310 * No error, but the implementation requested to temporarily 2311 * fall back to pivoting. 2312 */ 2313 *pErrorCode=U_ZERO_ERROR; 2314 /* 2315 * The following else branches are almost identical to the end-of-input 2316 * handling in _toUnicodeWithCallback(). 2317 * Avoid calling it just for the end of input. 2318 */ 2319 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ 2320 /* 2321 * the entire input stream is consumed 2322 * and there is a partial, truncated input sequence left 2323 */ 2324 2325 /* inject an error and continue with callback handling */ 2326 *pErrorCode=U_TRUNCATED_CHAR_FOUND; 2327 } else { 2328 /* input consumed */ 2329 if(flush) { 2330 /* reset the converters without calling the callback functions */ 2331 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); 2332 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); 2333 } 2334 2335 /* done successfully */ 2336 break; 2337 } 2338 } 2339 2340 /* 2341 * toUnicode(source -> pivot); 2342 * 2343 * For pivoting conversion; and for direct conversion for 2344 * error callback handling, continuing partial matches 2345 * and flushing the replay buffer. 2346 * 2347 * The pivot buffer is empty and reset. 2348 */ 2349 toUArgs.target=pivotStart; /* ==*pivotTarget */ 2350 /* toUArgs.targetLimit=pivotLimit; already set before the loop */ 2351 _toUnicodeWithCallback(&toUArgs, pErrorCode); 2352 *pivotTarget=toUArgs.target; 2353 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2354 /* pivot overflow: continue with the conversion loop */ 2355 *pErrorCode=U_ZERO_ERROR; 2356 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { 2357 /* conversion error, or there was nothing left to convert */ 2358 break; 2359 } 2360 /* 2361 * else: 2362 * _toUnicodeWithCallback() wrote into the pivot buffer, 2363 * continue with fromUnicode conversion. 2364 * 2365 * Set the fromUnicode flush flag if we flush and if toUnicode has 2366 * processed the end of the input. 2367 */ 2368 if( flush && toUArgs.source==sourceLimit && 2369 sourceCnv->preToULength>=0 && 2370 sourceCnv->UCharErrorBufferLength==0 2371 ) { 2372 fromUArgs.flush=TRUE; 2373 } 2374 } 2375 2376 /* 2377 * The conversion loop is exited when one of the following is true: 2378 * - the entire source text has been converted successfully to the target buffer 2379 * - a target buffer overflow occurred 2380 * - a conversion error occurred 2381 */ 2382 2383 *source=toUArgs.source; 2384 *target=fromUArgs.target; 2385 2386 /* terminate the target buffer if possible */ 2387 if(flush && U_SUCCESS(*pErrorCode)) { 2388 if(*target!=targetLimit) { 2389 **target=0; 2390 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { 2391 *pErrorCode=U_ZERO_ERROR; 2392 } 2393 } else { 2394 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; 2395 } 2396 } 2397 } 2398 2399 /* internal implementation of ucnv_convert() etc. with preflighting */ 2400 static int32_t 2401 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, 2402 char *target, int32_t targetCapacity, 2403 const char *source, int32_t sourceLength, 2404 UErrorCode *pErrorCode) { 2405 UChar pivotBuffer[CHUNK_SIZE]; 2406 UChar *pivot, *pivot2; 2407 2408 char *myTarget; 2409 const char *sourceLimit; 2410 const char *targetLimit; 2411 int32_t targetLength=0; 2412 2413 /* set up */ 2414 if(sourceLength<0) { 2415 sourceLimit=uprv_strchr(source, 0); 2416 } else { 2417 sourceLimit=source+sourceLength; 2418 } 2419 2420 /* if there is no input data, we're done */ 2421 if(source==sourceLimit) { 2422 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2423 } 2424 2425 pivot=pivot2=pivotBuffer; 2426 myTarget=target; 2427 targetLength=0; 2428 2429 if(targetCapacity>0) { 2430 /* perform real conversion */ 2431 targetLimit=target+targetCapacity; 2432 ucnv_convertEx(outConverter, inConverter, 2433 &myTarget, targetLimit, 2434 &source, sourceLimit, 2435 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2436 FALSE, 2437 TRUE, 2438 pErrorCode); 2439 targetLength=(int32_t)(myTarget-target); 2440 } 2441 2442 /* 2443 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing 2444 * to it but continue the conversion in order to store in targetCapacity 2445 * the number of bytes that was required. 2446 */ 2447 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) 2448 { 2449 char targetBuffer[CHUNK_SIZE]; 2450 2451 targetLimit=targetBuffer+CHUNK_SIZE; 2452 do { 2453 *pErrorCode=U_ZERO_ERROR; 2454 myTarget=targetBuffer; 2455 ucnv_convertEx(outConverter, inConverter, 2456 &myTarget, targetLimit, 2457 &source, sourceLimit, 2458 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2459 FALSE, 2460 TRUE, 2461 pErrorCode); 2462 targetLength+=(int32_t)(myTarget-targetBuffer); 2463 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 2464 2465 /* done with preflighting, set warnings and errors as appropriate */ 2466 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); 2467 } 2468 2469 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ 2470 return targetLength; 2471 } 2472 2473 U_CAPI int32_t U_EXPORT2 2474 ucnv_convert(const char *toConverterName, const char *fromConverterName, 2475 char *target, int32_t targetCapacity, 2476 const char *source, int32_t sourceLength, 2477 UErrorCode *pErrorCode) { 2478 UConverter in, out; /* stack-allocated */ 2479 UConverter *inConverter, *outConverter; 2480 int32_t targetLength; 2481 2482 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2483 return 0; 2484 } 2485 2486 if( source==NULL || sourceLength<-1 || 2487 targetCapacity<0 || (targetCapacity>0 && target==NULL) 2488 ) { 2489 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2490 return 0; 2491 } 2492 2493 /* if there is no input data, we're done */ 2494 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2495 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2496 } 2497 2498 /* create the converters */ 2499 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); 2500 if(U_FAILURE(*pErrorCode)) { 2501 return 0; 2502 } 2503 2504 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); 2505 if(U_FAILURE(*pErrorCode)) { 2506 ucnv_close(inConverter); 2507 return 0; 2508 } 2509 2510 targetLength=ucnv_internalConvert(outConverter, inConverter, 2511 target, targetCapacity, 2512 source, sourceLength, 2513 pErrorCode); 2514 2515 ucnv_close(inConverter); 2516 ucnv_close(outConverter); 2517 2518 return targetLength; 2519 } 2520 2521 /* @internal */ 2522 static int32_t 2523 ucnv_convertAlgorithmic(UBool convertToAlgorithmic, 2524 UConverterType algorithmicType, 2525 UConverter *cnv, 2526 char *target, int32_t targetCapacity, 2527 const char *source, int32_t sourceLength, 2528 UErrorCode *pErrorCode) { 2529 UConverter algoConverterStatic; /* stack-allocated */ 2530 UConverter *algoConverter, *to, *from; 2531 int32_t targetLength; 2532 2533 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2534 return 0; 2535 } 2536 2537 if( cnv==NULL || source==NULL || sourceLength<-1 || 2538 targetCapacity<0 || (targetCapacity>0 && target==NULL) 2539 ) { 2540 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2541 return 0; 2542 } 2543 2544 /* if there is no input data, we're done */ 2545 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2546 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2547 } 2548 2549 /* create the algorithmic converter */ 2550 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, 2551 "", 0, pErrorCode); 2552 if(U_FAILURE(*pErrorCode)) { 2553 return 0; 2554 } 2555 2556 /* reset the other converter */ 2557 if(convertToAlgorithmic) { 2558 /* cnv->Unicode->algo */ 2559 ucnv_resetToUnicode(cnv); 2560 to=algoConverter; 2561 from=cnv; 2562 } else { 2563 /* algo->Unicode->cnv */ 2564 ucnv_resetFromUnicode(cnv); 2565 from=algoConverter; 2566 to=cnv; 2567 } 2568 2569 targetLength=ucnv_internalConvert(to, from, 2570 target, targetCapacity, 2571 source, sourceLength, 2572 pErrorCode); 2573 2574 ucnv_close(algoConverter); 2575 2576 return targetLength; 2577 } 2578 2579 U_CAPI int32_t U_EXPORT2 2580 ucnv_toAlgorithmic(UConverterType algorithmicType, 2581 UConverter *cnv, 2582 char *target, int32_t targetCapacity, 2583 const char *source, int32_t sourceLength, 2584 UErrorCode *pErrorCode) { 2585 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, 2586 target, targetCapacity, 2587 source, sourceLength, 2588 pErrorCode); 2589 } 2590 2591 U_CAPI int32_t U_EXPORT2 2592 ucnv_fromAlgorithmic(UConverter *cnv, 2593 UConverterType algorithmicType, 2594 char *target, int32_t targetCapacity, 2595 const char *source, int32_t sourceLength, 2596 UErrorCode *pErrorCode) { 2597 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, 2598 target, targetCapacity, 2599 source, sourceLength, 2600 pErrorCode); 2601 } 2602 2603 U_CAPI UConverterType U_EXPORT2 2604 ucnv_getType(const UConverter* converter) 2605 { 2606 int8_t type = converter->sharedData->staticData->conversionType; 2607 #if !UCONFIG_NO_LEGACY_CONVERSION 2608 if(type == UCNV_MBCS) { 2609 return ucnv_MBCSGetType(converter); 2610 } 2611 #endif 2612 return (UConverterType)type; 2613 } 2614 2615 U_CAPI void U_EXPORT2 2616 ucnv_getStarters(const UConverter* converter, 2617 UBool starters[256], 2618 UErrorCode* err) 2619 { 2620 if (err == NULL || U_FAILURE(*err)) { 2621 return; 2622 } 2623 2624 if(converter->sharedData->impl->getStarters != NULL) { 2625 converter->sharedData->impl->getStarters(converter, starters, err); 2626 } else { 2627 *err = U_ILLEGAL_ARGUMENT_ERROR; 2628 } 2629 } 2630 2631 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) 2632 { 2633 UErrorCode errorCode; 2634 const char *name; 2635 int32_t i; 2636 2637 if(cnv==NULL) { 2638 return NULL; 2639 } 2640 2641 errorCode=U_ZERO_ERROR; 2642 name=ucnv_getName(cnv, &errorCode); 2643 if(U_FAILURE(errorCode)) { 2644 return NULL; 2645 } 2646 2647 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) 2648 { 2649 if(0==uprv_strcmp(name, ambiguousConverters[i].name)) 2650 { 2651 return ambiguousConverters+i; 2652 } 2653 } 2654 2655 return NULL; 2656 } 2657 2658 U_CAPI void U_EXPORT2 2659 ucnv_fixFileSeparator(const UConverter *cnv, 2660 UChar* source, 2661 int32_t sourceLength) { 2662 const UAmbiguousConverter *a; 2663 int32_t i; 2664 UChar variant5c; 2665 2666 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) 2667 { 2668 return; 2669 } 2670 2671 variant5c=a->variant5c; 2672 for(i=0; i<sourceLength; ++i) { 2673 if(source[i]==variant5c) { 2674 source[i]=0x5c; 2675 } 2676 } 2677 } 2678 2679 U_CAPI UBool U_EXPORT2 2680 ucnv_isAmbiguous(const UConverter *cnv) { 2681 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); 2682 } 2683 2684 U_CAPI void U_EXPORT2 2685 ucnv_setFallback(UConverter *cnv, UBool usesFallback) 2686 { 2687 cnv->useFallback = usesFallback; 2688 } 2689 2690 U_CAPI UBool U_EXPORT2 2691 ucnv_usesFallback(const UConverter *cnv) 2692 { 2693 return cnv->useFallback; 2694 } 2695 2696 U_CAPI void U_EXPORT2 2697 ucnv_getInvalidChars (const UConverter * converter, 2698 char *errBytes, 2699 int8_t * len, 2700 UErrorCode * err) 2701 { 2702 if (err == NULL || U_FAILURE(*err)) 2703 { 2704 return; 2705 } 2706 if (len == NULL || errBytes == NULL || converter == NULL) 2707 { 2708 *err = U_ILLEGAL_ARGUMENT_ERROR; 2709 return; 2710 } 2711 if (*len < converter->invalidCharLength) 2712 { 2713 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2714 return; 2715 } 2716 if ((*len = converter->invalidCharLength) > 0) 2717 { 2718 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); 2719 } 2720 } 2721 2722 U_CAPI void U_EXPORT2 2723 ucnv_getInvalidUChars (const UConverter * converter, 2724 UChar *errChars, 2725 int8_t * len, 2726 UErrorCode * err) 2727 { 2728 if (err == NULL || U_FAILURE(*err)) 2729 { 2730 return; 2731 } 2732 if (len == NULL || errChars == NULL || converter == NULL) 2733 { 2734 *err = U_ILLEGAL_ARGUMENT_ERROR; 2735 return; 2736 } 2737 if (*len < converter->invalidUCharLength) 2738 { 2739 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2740 return; 2741 } 2742 if ((*len = converter->invalidUCharLength) > 0) 2743 { 2744 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); 2745 } 2746 } 2747 2748 #define SIG_MAX_LEN 5 2749 2750 U_CAPI const char* U_EXPORT2 2751 ucnv_detectUnicodeSignature( const char* source, 2752 int32_t sourceLength, 2753 int32_t* signatureLength, 2754 UErrorCode* pErrorCode) { 2755 int32_t dummy; 2756 2757 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN 2758 * bytes we don't misdetect something 2759 */ 2760 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; 2761 int i = 0; 2762 2763 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ 2764 return NULL; 2765 } 2766 2767 if(source == NULL || sourceLength < -1){ 2768 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 2769 return NULL; 2770 } 2771 2772 if(signatureLength == NULL) { 2773 signatureLength = &dummy; 2774 } 2775 2776 if(sourceLength==-1){ 2777 sourceLength=(int32_t)uprv_strlen(source); 2778 } 2779 2780 2781 while(i<sourceLength&& i<SIG_MAX_LEN){ 2782 start[i]=source[i]; 2783 i++; 2784 } 2785 2786 if(start[0] == '\xFE' && start[1] == '\xFF') { 2787 *signatureLength=2; 2788 return "UTF-16BE"; 2789 } else if(start[0] == '\xFF' && start[1] == '\xFE') { 2790 if(start[2] == '\x00' && start[3] =='\x00') { 2791 *signatureLength=4; 2792 return "UTF-32LE"; 2793 } else { 2794 *signatureLength=2; 2795 return "UTF-16LE"; 2796 } 2797 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { 2798 *signatureLength=3; 2799 return "UTF-8"; 2800 } else if(start[0] == '\x00' && start[1] == '\x00' && 2801 start[2] == '\xFE' && start[3]=='\xFF') { 2802 *signatureLength=4; 2803 return "UTF-32BE"; 2804 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { 2805 *signatureLength=3; 2806 return "SCSU"; 2807 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { 2808 *signatureLength=3; 2809 return "BOCU-1"; 2810 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { 2811 /* 2812 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ 2813 * depending on the second UTF-16 code unit. 2814 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF 2815 * if it occurs. 2816 * 2817 * So far we have +/v 2818 */ 2819 if(start[3] == '\x38' && start[4] == '\x2D') { 2820 /* 5 bytes +/v8- */ 2821 *signatureLength=5; 2822 return "UTF-7"; 2823 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { 2824 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ 2825 *signatureLength=4; 2826 return "UTF-7"; 2827 } 2828 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ 2829 *signatureLength=4; 2830 return "UTF-EBCDIC"; 2831 } 2832 2833 2834 /* no known Unicode signature byte sequence recognized */ 2835 *signatureLength=0; 2836 return NULL; 2837 } 2838 2839 U_CAPI int32_t U_EXPORT2 2840 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) 2841 { 2842 if(status == NULL || U_FAILURE(*status)){ 2843 return -1; 2844 } 2845 if(cnv == NULL){ 2846 *status = U_ILLEGAL_ARGUMENT_ERROR; 2847 return -1; 2848 } 2849 2850 if(cnv->preFromULength > 0){ 2851 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; 2852 }else if(cnv->preFromULength < 0){ 2853 return -cnv->preFromULength ; 2854 }else if(cnv->fromUChar32 > 0){ 2855 return 1; 2856 }else if(cnv->preFromUFirstCP >0){ 2857 return U16_LENGTH(cnv->preFromUFirstCP); 2858 } 2859 return 0; 2860 2861 } 2862 2863 U_CAPI int32_t U_EXPORT2 2864 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ 2865 2866 if(status == NULL || U_FAILURE(*status)){ 2867 return -1; 2868 } 2869 if(cnv == NULL){ 2870 *status = U_ILLEGAL_ARGUMENT_ERROR; 2871 return -1; 2872 } 2873 2874 if(cnv->preToULength > 0){ 2875 return cnv->preToULength ; 2876 }else if(cnv->preToULength < 0){ 2877 return -cnv->preToULength; 2878 }else if(cnv->toULength > 0){ 2879 return cnv->toULength; 2880 } 2881 return 0; 2882 } 2883 #endif 2884 2885 /* 2886 * Hey, Emacs, please set the following: 2887 * 2888 * Local Variables: 2889 * indent-tabs-mode: nil 2890 * End: 2891 * 2892 */ 2893