1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1998-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * ucnv.c: 10 * Implements APIs for the ICU's codeset conversion library; 11 * mostly calls through internal functions; 12 * created by Bertrand A. Damiba 13 * 14 * Modification History: 15 * 16 * Date Name Description 17 * 04/04/99 helena Fixed internal header inclusion. 18 * 05/09/00 helena Added implementation to handle fallback mappings. 19 * 06/20/2000 helena OS/400 port changes; mostly typecast. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/ustring.h" 27 #include "unicode/ucnv.h" 28 #include "unicode/ucnv_err.h" 29 #include "unicode/uset.h" 30 #include "unicode/utf.h" 31 #include "unicode/utf16.h" 32 #include "putilimp.h" 33 #include "cmemory.h" 34 #include "cstring.h" 35 #include "uassert.h" 36 #include "utracimp.h" 37 #include "ustr_imp.h" 38 #include "ucnv_imp.h" 39 #include "ucnv_cnv.h" 40 #include "ucnv_bld.h" 41 42 /* size of intermediate and preflighting buffers in ucnv_convert() */ 43 #define CHUNK_SIZE 1024 44 45 typedef struct UAmbiguousConverter { 46 const char *name; 47 const UChar variant5c; 48 } UAmbiguousConverter; 49 50 static const UAmbiguousConverter ambiguousConverters[]={ 51 { "ibm-897_P100-1995", 0xa5 }, 52 { "ibm-942_P120-1999", 0xa5 }, 53 { "ibm-943_P130-1999", 0xa5 }, 54 { "ibm-946_P100-1995", 0xa5 }, 55 { "ibm-33722_P120-1999", 0xa5 }, 56 { "ibm-1041_P100-1995", 0xa5 }, 57 /*{ "ibm-54191_P100-2006", 0xa5 },*/ 58 /*{ "ibm-62383_P100-2007", 0xa5 },*/ 59 /*{ "ibm-891_P100-1995", 0x20a9 },*/ 60 { "ibm-944_P100-1995", 0x20a9 }, 61 { "ibm-949_P110-1999", 0x20a9 }, 62 { "ibm-1363_P110-1997", 0x20a9 }, 63 { "ISO_2022,locale=ko,version=0", 0x20a9 }, 64 { "ibm-1088_P100-1995", 0x20a9 } 65 }; 66 67 /*Calls through createConverter */ 68 U_CAPI UConverter* U_EXPORT2 69 ucnv_open (const char *name, 70 UErrorCode * err) 71 { 72 UConverter *r; 73 74 if (err == NULL || U_FAILURE (*err)) { 75 return NULL; 76 } 77 78 r = ucnv_createConverter(NULL, name, err); 79 return r; 80 } 81 82 U_CAPI UConverter* U_EXPORT2 83 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) 84 { 85 return ucnv_createConverterFromPackage(packageName, converterName, err); 86 } 87 88 /*Extracts the UChar* to a char* and calls through createConverter */ 89 U_CAPI UConverter* U_EXPORT2 90 ucnv_openU (const UChar * name, 91 UErrorCode * err) 92 { 93 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 94 95 if (err == NULL || U_FAILURE(*err)) 96 return NULL; 97 if (name == NULL) 98 return ucnv_open (NULL, err); 99 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) 100 { 101 *err = U_ILLEGAL_ARGUMENT_ERROR; 102 return NULL; 103 } 104 return ucnv_open(u_austrcpy(asciiName, name), err); 105 } 106 107 /* Copy the string that is represented by the UConverterPlatform enum 108 * @param platformString An output buffer 109 * @param platform An enum representing a platform 110 * @return the length of the copied string. 111 */ 112 static int32_t 113 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) 114 { 115 switch (pltfrm) 116 { 117 case UCNV_IBM: 118 uprv_strcpy(platformString, "ibm-"); 119 return 4; 120 case UCNV_UNKNOWN: 121 break; 122 } 123 124 /* default to empty string */ 125 *platformString = 0; 126 return 0; 127 } 128 129 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls 130 *through createConverter*/ 131 U_CAPI UConverter* U_EXPORT2 132 ucnv_openCCSID (int32_t codepage, 133 UConverterPlatform platform, 134 UErrorCode * err) 135 { 136 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 137 int32_t myNameLen; 138 139 if (err == NULL || U_FAILURE (*err)) 140 return NULL; 141 142 /* ucnv_copyPlatformString could return "ibm-" or "cp" */ 143 myNameLen = ucnv_copyPlatformString(myName, platform); 144 T_CString_integerToString(myName + myNameLen, codepage, 10); 145 146 return ucnv_createConverter(NULL, myName, err); 147 } 148 149 /* Creating a temporary stack-based object that can be used in one thread, 150 and created from a converter that is shared across threads. 151 */ 152 153 U_CAPI UConverter* U_EXPORT2 154 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) 155 { 156 UConverter *localConverter, *allocatedConverter; 157 int32_t bufferSizeNeeded; 158 char *stackBufferChars = (char *)stackBuffer; 159 UErrorCode cbErr; 160 UConverterToUnicodeArgs toUArgs = { 161 sizeof(UConverterToUnicodeArgs), 162 TRUE, 163 NULL, 164 NULL, 165 NULL, 166 NULL, 167 NULL, 168 NULL 169 }; 170 UConverterFromUnicodeArgs fromUArgs = { 171 sizeof(UConverterFromUnicodeArgs), 172 TRUE, 173 NULL, 174 NULL, 175 NULL, 176 NULL, 177 NULL, 178 NULL 179 }; 180 181 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); 182 183 if (status == NULL || U_FAILURE(*status)){ 184 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); 185 return 0; 186 } 187 188 if (!pBufferSize || !cnv){ 189 *status = U_ILLEGAL_ARGUMENT_ERROR; 190 UTRACE_EXIT_STATUS(*status); 191 return 0; 192 } 193 194 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", 195 ucnv_getName(cnv, status), cnv, stackBuffer); 196 197 if (cnv->sharedData->impl->safeClone != NULL) { 198 /* call the custom safeClone function for sizing */ 199 bufferSizeNeeded = 0; 200 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); 201 } 202 else 203 { 204 /* inherent sizing */ 205 bufferSizeNeeded = sizeof(UConverter); 206 } 207 208 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 209 *pBufferSize = bufferSizeNeeded; 210 UTRACE_EXIT_VALUE(bufferSizeNeeded); 211 return 0; 212 } 213 214 215 /* Pointers on 64-bit platforms need to be aligned 216 * on a 64-bit boundary in memory. 217 */ 218 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 219 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); 220 if(*pBufferSize > offsetUp) { 221 *pBufferSize -= offsetUp; 222 stackBufferChars += offsetUp; 223 } else { 224 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 225 *pBufferSize = 1; 226 } 227 } 228 229 stackBuffer = (void *)stackBufferChars; 230 231 /* Now, see if we must allocate any memory */ 232 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL) 233 { 234 /* allocate one here...*/ 235 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); 236 237 if(localConverter == NULL) { 238 *status = U_MEMORY_ALLOCATION_ERROR; 239 UTRACE_EXIT_STATUS(*status); 240 return NULL; 241 } 242 243 if (U_SUCCESS(*status)) { 244 *status = U_SAFECLONE_ALLOCATED_WARNING; 245 } 246 247 /* record the fact that memory was allocated */ 248 *pBufferSize = bufferSizeNeeded; 249 } else { 250 /* just use the stack buffer */ 251 localConverter = (UConverter*) stackBuffer; 252 allocatedConverter = NULL; 253 } 254 255 uprv_memset(localConverter, 0, bufferSizeNeeded); 256 257 /* Copy initial state */ 258 uprv_memcpy(localConverter, cnv, sizeof(UConverter)); 259 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; 260 261 /* copy the substitution string */ 262 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 263 localConverter->subChars = (uint8_t *)localConverter->subUChars; 264 } else { 265 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 266 if (localConverter->subChars == NULL) { 267 uprv_free(allocatedConverter); 268 UTRACE_EXIT_STATUS(*status); 269 return NULL; 270 } 271 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 272 } 273 274 /* now either call the safeclone fcn or not */ 275 if (cnv->sharedData->impl->safeClone != NULL) { 276 /* call the custom safeClone function */ 277 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); 278 } 279 280 if(localConverter==NULL || U_FAILURE(*status)) { 281 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { 282 uprv_free(allocatedConverter->subChars); 283 } 284 uprv_free(allocatedConverter); 285 UTRACE_EXIT_STATUS(*status); 286 return NULL; 287 } 288 289 /* increment refcount of shared data if needed */ 290 /* 291 Checking whether it's an algorithic converter is okay 292 in multithreaded applications because the value never changes. 293 Don't check referenceCounter for any other value. 294 */ 295 if (cnv->sharedData->referenceCounter != ~0) { 296 ucnv_incrementRefCount(cnv->sharedData); 297 } 298 299 if(localConverter == (UConverter*)stackBuffer) { 300 /* we're using user provided data - set to not destroy */ 301 localConverter->isCopyLocal = TRUE; 302 } 303 304 /* allow callback functions to handle any memory allocation */ 305 toUArgs.converter = fromUArgs.converter = localConverter; 306 cbErr = U_ZERO_ERROR; 307 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); 308 cbErr = U_ZERO_ERROR; 309 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); 310 311 UTRACE_EXIT_PTR_STATUS(localConverter, *status); 312 return localConverter; 313 } 314 315 316 317 /*Decreases the reference counter in the shared immutable section of the object 318 *and frees the mutable part*/ 319 320 U_CAPI void U_EXPORT2 321 ucnv_close (UConverter * converter) 322 { 323 UErrorCode errorCode = U_ZERO_ERROR; 324 325 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); 326 327 if (converter == NULL) 328 { 329 UTRACE_EXIT(); 330 return; 331 } 332 333 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", 334 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); 335 336 /* In order to speed up the close, only call the callbacks when they have been changed. 337 This performance check will only work when the callbacks are set within a shared library 338 or from user code that statically links this code. */ 339 /* first, notify the callback functions that the converter is closed */ 340 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 341 UConverterToUnicodeArgs toUArgs = { 342 sizeof(UConverterToUnicodeArgs), 343 TRUE, 344 NULL, 345 NULL, 346 NULL, 347 NULL, 348 NULL, 349 NULL 350 }; 351 352 toUArgs.converter = converter; 353 errorCode = U_ZERO_ERROR; 354 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); 355 } 356 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 357 UConverterFromUnicodeArgs fromUArgs = { 358 sizeof(UConverterFromUnicodeArgs), 359 TRUE, 360 NULL, 361 NULL, 362 NULL, 363 NULL, 364 NULL, 365 NULL 366 }; 367 fromUArgs.converter = converter; 368 errorCode = U_ZERO_ERROR; 369 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); 370 } 371 372 if (converter->sharedData->impl->close != NULL) { 373 converter->sharedData->impl->close(converter); 374 } 375 376 if (converter->subChars != (uint8_t *)converter->subUChars) { 377 uprv_free(converter->subChars); 378 } 379 380 /* 381 Checking whether it's an algorithic converter is okay 382 in multithreaded applications because the value never changes. 383 Don't check referenceCounter for any other value. 384 */ 385 if (converter->sharedData->referenceCounter != ~0) { 386 ucnv_unloadSharedDataIfReady(converter->sharedData); 387 } 388 389 if(!converter->isCopyLocal){ 390 uprv_free(converter); 391 } 392 393 UTRACE_EXIT(); 394 } 395 396 /*returns a single Name from the list, will return NULL if out of bounds 397 */ 398 U_CAPI const char* U_EXPORT2 399 ucnv_getAvailableName (int32_t n) 400 { 401 if (0 <= n && n <= 0xffff) { 402 UErrorCode err = U_ZERO_ERROR; 403 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); 404 if (U_SUCCESS(err)) { 405 return name; 406 } 407 } 408 return NULL; 409 } 410 411 U_CAPI int32_t U_EXPORT2 412 ucnv_countAvailable () 413 { 414 UErrorCode err = U_ZERO_ERROR; 415 return ucnv_bld_countAvailableConverters(&err); 416 } 417 418 U_CAPI void U_EXPORT2 419 ucnv_getSubstChars (const UConverter * converter, 420 char *mySubChar, 421 int8_t * len, 422 UErrorCode * err) 423 { 424 if (U_FAILURE (*err)) 425 return; 426 427 if (converter->subCharLen <= 0) { 428 /* Unicode string or empty string from ucnv_setSubstString(). */ 429 *len = 0; 430 return; 431 } 432 433 if (*len < converter->subCharLen) /*not enough space in subChars */ 434 { 435 *err = U_INDEX_OUTOFBOUNDS_ERROR; 436 return; 437 } 438 439 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ 440 *len = converter->subCharLen; /*store # of bytes copied to buffer */ 441 } 442 443 U_CAPI void U_EXPORT2 444 ucnv_setSubstChars (UConverter * converter, 445 const char *mySubChar, 446 int8_t len, 447 UErrorCode * err) 448 { 449 if (U_FAILURE (*err)) 450 return; 451 452 /*Makes sure that the subChar is within the codepages char length boundaries */ 453 if ((len > converter->sharedData->staticData->maxBytesPerChar) 454 || (len < converter->sharedData->staticData->minBytesPerChar)) 455 { 456 *err = U_ILLEGAL_ARGUMENT_ERROR; 457 return; 458 } 459 460 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ 461 converter->subCharLen = len; /*sets the new len */ 462 463 /* 464 * There is currently (2001Feb) no separate API to set/get subChar1. 465 * In order to always have subChar written after it is explicitly set, 466 * we set subChar1 to 0. 467 */ 468 converter->subChar1 = 0; 469 470 return; 471 } 472 473 U_CAPI void U_EXPORT2 474 ucnv_setSubstString(UConverter *cnv, 475 const UChar *s, 476 int32_t length, 477 UErrorCode *err) { 478 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; 479 char chars[UCNV_ERROR_BUFFER_LENGTH]; 480 481 UConverter *clone; 482 uint8_t *subChars; 483 int32_t cloneSize, length8; 484 485 /* Let the following functions check all arguments. */ 486 cloneSize = sizeof(cloneBuffer); 487 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); 488 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); 489 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); 490 ucnv_close(clone); 491 if (U_FAILURE(*err)) { 492 return; 493 } 494 495 if (cnv->sharedData->impl->writeSub == NULL 496 #if !UCONFIG_NO_LEGACY_CONVERSION 497 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && 498 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) 499 #endif 500 ) { 501 /* The converter is not stateful. Store the charset bytes as a fixed string. */ 502 subChars = (uint8_t *)chars; 503 } else { 504 /* 505 * The converter has a non-default writeSub() function, indicating 506 * that it is stateful. 507 * Store the Unicode string for on-the-fly conversion for correct 508 * state handling. 509 */ 510 if (length > UCNV_ERROR_BUFFER_LENGTH) { 511 /* 512 * Should not occur. The converter should output at least one byte 513 * per UChar, which means that ucnv_fromUChars() should catch all 514 * overflows. 515 */ 516 *err = U_BUFFER_OVERFLOW_ERROR; 517 return; 518 } 519 subChars = (uint8_t *)s; 520 if (length < 0) { 521 length = u_strlen(s); 522 } 523 length8 = length * U_SIZEOF_UCHAR; 524 } 525 526 /* 527 * For storing the substitution string, select either the small buffer inside 528 * UConverter or allocate a subChars buffer. 529 */ 530 if (length8 > UCNV_MAX_SUBCHAR_LEN) { 531 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ 532 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 533 /* Allocate a new buffer for the string. */ 534 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 535 if (cnv->subChars == NULL) { 536 cnv->subChars = (uint8_t *)cnv->subUChars; 537 *err = U_MEMORY_ALLOCATION_ERROR; 538 return; 539 } 540 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 541 } 542 } 543 544 /* Copy the substitution string into the UConverter or its subChars buffer. */ 545 if (length8 == 0) { 546 cnv->subCharLen = 0; 547 } else { 548 uprv_memcpy(cnv->subChars, subChars, length8); 549 if (subChars == (uint8_t *)chars) { 550 cnv->subCharLen = (int8_t)length8; 551 } else /* subChars == s */ { 552 cnv->subCharLen = (int8_t)-length; 553 } 554 } 555 556 /* See comment in ucnv_setSubstChars(). */ 557 cnv->subChar1 = 0; 558 } 559 560 /*resets the internal states of a converter 561 *goal : have the same behaviour than a freshly created converter 562 */ 563 static void _reset(UConverter *converter, UConverterResetChoice choice, 564 UBool callCallback) { 565 if(converter == NULL) { 566 return; 567 } 568 569 if(callCallback) { 570 /* first, notify the callback functions that the converter is reset */ 571 UErrorCode errorCode; 572 573 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 574 UConverterToUnicodeArgs toUArgs = { 575 sizeof(UConverterToUnicodeArgs), 576 TRUE, 577 NULL, 578 NULL, 579 NULL, 580 NULL, 581 NULL, 582 NULL 583 }; 584 toUArgs.converter = converter; 585 errorCode = U_ZERO_ERROR; 586 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); 587 } 588 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 589 UConverterFromUnicodeArgs fromUArgs = { 590 sizeof(UConverterFromUnicodeArgs), 591 TRUE, 592 NULL, 593 NULL, 594 NULL, 595 NULL, 596 NULL, 597 NULL 598 }; 599 fromUArgs.converter = converter; 600 errorCode = U_ZERO_ERROR; 601 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); 602 } 603 } 604 605 /* now reset the converter itself */ 606 if(choice<=UCNV_RESET_TO_UNICODE) { 607 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; 608 converter->mode = 0; 609 converter->toULength = 0; 610 converter->invalidCharLength = converter->UCharErrorBufferLength = 0; 611 converter->preToULength = 0; 612 } 613 if(choice!=UCNV_RESET_TO_UNICODE) { 614 converter->fromUnicodeStatus = 0; 615 converter->fromUChar32 = 0; 616 converter->invalidUCharLength = converter->charErrorBufferLength = 0; 617 converter->preFromUFirstCP = U_SENTINEL; 618 converter->preFromULength = 0; 619 } 620 621 if (converter->sharedData->impl->reset != NULL) { 622 /* call the custom reset function */ 623 converter->sharedData->impl->reset(converter, choice); 624 } 625 } 626 627 U_CAPI void U_EXPORT2 628 ucnv_reset(UConverter *converter) 629 { 630 _reset(converter, UCNV_RESET_BOTH, TRUE); 631 } 632 633 U_CAPI void U_EXPORT2 634 ucnv_resetToUnicode(UConverter *converter) 635 { 636 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); 637 } 638 639 U_CAPI void U_EXPORT2 640 ucnv_resetFromUnicode(UConverter *converter) 641 { 642 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); 643 } 644 645 U_CAPI int8_t U_EXPORT2 646 ucnv_getMaxCharSize (const UConverter * converter) 647 { 648 return converter->maxBytesPerUChar; 649 } 650 651 652 U_CAPI int8_t U_EXPORT2 653 ucnv_getMinCharSize (const UConverter * converter) 654 { 655 return converter->sharedData->staticData->minBytesPerChar; 656 } 657 658 U_CAPI const char* U_EXPORT2 659 ucnv_getName (const UConverter * converter, UErrorCode * err) 660 661 { 662 if (U_FAILURE (*err)) 663 return NULL; 664 if(converter->sharedData->impl->getName){ 665 const char* temp= converter->sharedData->impl->getName(converter); 666 if(temp) 667 return temp; 668 } 669 return converter->sharedData->staticData->name; 670 } 671 672 U_CAPI int32_t U_EXPORT2 673 ucnv_getCCSID(const UConverter * converter, 674 UErrorCode * err) 675 { 676 int32_t ccsid; 677 if (U_FAILURE (*err)) 678 return -1; 679 680 ccsid = converter->sharedData->staticData->codepage; 681 if (ccsid == 0) { 682 /* Rare case. This is for cases like gb18030, 683 which doesn't have an IBM canonical name, but does have an IBM alias. */ 684 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); 685 if (U_SUCCESS(*err) && standardName) { 686 const char *ccsidStr = uprv_strchr(standardName, '-'); 687 if (ccsidStr) { 688 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ 689 } 690 } 691 } 692 return ccsid; 693 } 694 695 696 U_CAPI UConverterPlatform U_EXPORT2 697 ucnv_getPlatform (const UConverter * converter, 698 UErrorCode * err) 699 { 700 if (U_FAILURE (*err)) 701 return UCNV_UNKNOWN; 702 703 return (UConverterPlatform)converter->sharedData->staticData->platform; 704 } 705 706 U_CAPI void U_EXPORT2 707 ucnv_getToUCallBack (const UConverter * converter, 708 UConverterToUCallback *action, 709 const void **context) 710 { 711 *action = converter->fromCharErrorBehaviour; 712 *context = converter->toUContext; 713 } 714 715 U_CAPI void U_EXPORT2 716 ucnv_getFromUCallBack (const UConverter * converter, 717 UConverterFromUCallback *action, 718 const void **context) 719 { 720 *action = converter->fromUCharErrorBehaviour; 721 *context = converter->fromUContext; 722 } 723 724 U_CAPI void U_EXPORT2 725 ucnv_setToUCallBack (UConverter * converter, 726 UConverterToUCallback newAction, 727 const void* newContext, 728 UConverterToUCallback *oldAction, 729 const void** oldContext, 730 UErrorCode * err) 731 { 732 if (U_FAILURE (*err)) 733 return; 734 if (oldAction) *oldAction = converter->fromCharErrorBehaviour; 735 converter->fromCharErrorBehaviour = newAction; 736 if (oldContext) *oldContext = converter->toUContext; 737 converter->toUContext = newContext; 738 } 739 740 U_CAPI void U_EXPORT2 741 ucnv_setFromUCallBack (UConverter * converter, 742 UConverterFromUCallback newAction, 743 const void* newContext, 744 UConverterFromUCallback *oldAction, 745 const void** oldContext, 746 UErrorCode * err) 747 { 748 if (U_FAILURE (*err)) 749 return; 750 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; 751 converter->fromUCharErrorBehaviour = newAction; 752 if (oldContext) *oldContext = converter->fromUContext; 753 converter->fromUContext = newContext; 754 } 755 756 static void 757 _updateOffsets(int32_t *offsets, int32_t length, 758 int32_t sourceIndex, int32_t errorInputLength) { 759 int32_t *limit; 760 int32_t delta, offset; 761 762 if(sourceIndex>=0) { 763 /* 764 * adjust each offset by adding the previous sourceIndex 765 * minus the length of the input sequence that caused an 766 * error, if any 767 */ 768 delta=sourceIndex-errorInputLength; 769 } else { 770 /* 771 * set each offset to -1 because this conversion function 772 * does not handle offsets 773 */ 774 delta=-1; 775 } 776 777 limit=offsets+length; 778 if(delta==0) { 779 /* most common case, nothing to do */ 780 } else if(delta>0) { 781 /* add the delta to each offset (but not if the offset is <0) */ 782 while(offsets<limit) { 783 offset=*offsets; 784 if(offset>=0) { 785 *offsets=offset+delta; 786 } 787 ++offsets; 788 } 789 } else /* delta<0 */ { 790 /* 791 * set each offset to -1 because this conversion function 792 * does not handle offsets 793 * or the error input sequence started in a previous buffer 794 */ 795 while(offsets<limit) { 796 *offsets++=-1; 797 } 798 } 799 } 800 801 /* ucnv_fromUnicode --------------------------------------------------------- */ 802 803 /* 804 * Implementation note for m:n conversions 805 * 806 * While collecting source units to find the longest match for m:n conversion, 807 * some source units may need to be stored for a partial match. 808 * When a second buffer does not yield a match on all of the previously stored 809 * source units, then they must be "replayed", i.e., fed back into the converter. 810 * 811 * The code relies on the fact that replaying will not nest - 812 * converting a replay buffer will not result in a replay. 813 * This is because a replay is necessary only after the _continuation_ of a 814 * partial match failed, but a replay buffer is converted as a whole. 815 * It may result in some of its units being stored again for a partial match, 816 * but there will not be a continuation _during_ the replay which could fail. 817 * 818 * It is conceivable that a callback function could call the converter 819 * recursively in a way that causes another replay to be stored, but that 820 * would be an error in the callback function. 821 * Such violations will cause assertion failures in a debug build, 822 * and wrong output, but they will not cause a crash. 823 */ 824 825 static void 826 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { 827 UConverterFromUnicode fromUnicode; 828 UConverter *cnv; 829 const UChar *s; 830 char *t; 831 int32_t *offsets; 832 int32_t sourceIndex; 833 int32_t errorInputLength; 834 UBool converterSawEndOfInput, calledCallback; 835 836 /* variables for m:n conversion */ 837 UChar replay[UCNV_EXT_MAX_UCHARS]; 838 const UChar *realSource, *realSourceLimit; 839 int32_t realSourceIndex; 840 UBool realFlush; 841 842 cnv=pArgs->converter; 843 s=pArgs->source; 844 t=pArgs->target; 845 offsets=pArgs->offsets; 846 847 /* get the converter implementation function */ 848 sourceIndex=0; 849 if(offsets==NULL) { 850 fromUnicode=cnv->sharedData->impl->fromUnicode; 851 } else { 852 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; 853 if(fromUnicode==NULL) { 854 /* there is no WithOffsets implementation */ 855 fromUnicode=cnv->sharedData->impl->fromUnicode; 856 /* we will write -1 for each offset */ 857 sourceIndex=-1; 858 } 859 } 860 861 if(cnv->preFromULength>=0) { 862 /* normal mode */ 863 realSource=NULL; 864 865 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 866 realSourceLimit=NULL; 867 realFlush=FALSE; 868 realSourceIndex=0; 869 } else { 870 /* 871 * Previous m:n conversion stored source units from a partial match 872 * and failed to consume all of them. 873 * We need to "replay" them from a temporary buffer and convert them first. 874 */ 875 realSource=pArgs->source; 876 realSourceLimit=pArgs->sourceLimit; 877 realFlush=pArgs->flush; 878 realSourceIndex=sourceIndex; 879 880 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 881 pArgs->source=replay; 882 pArgs->sourceLimit=replay-cnv->preFromULength; 883 pArgs->flush=FALSE; 884 sourceIndex=-1; 885 886 cnv->preFromULength=0; 887 } 888 889 /* 890 * loop for conversion and error handling 891 * 892 * loop { 893 * convert 894 * loop { 895 * update offsets 896 * handle end of input 897 * handle errors/call callback 898 * } 899 * } 900 */ 901 for(;;) { 902 if(U_SUCCESS(*err)) { 903 /* convert */ 904 fromUnicode(pArgs, err); 905 906 /* 907 * set a flag for whether the converter 908 * successfully processed the end of the input 909 * 910 * need not check cnv->preFromULength==0 because a replay (<0) will cause 911 * s<sourceLimit before converterSawEndOfInput is checked 912 */ 913 converterSawEndOfInput= 914 (UBool)(U_SUCCESS(*err) && 915 pArgs->flush && pArgs->source==pArgs->sourceLimit && 916 cnv->fromUChar32==0); 917 } else { 918 /* handle error from ucnv_convertEx() */ 919 converterSawEndOfInput=FALSE; 920 } 921 922 /* no callback called yet for this iteration */ 923 calledCallback=FALSE; 924 925 /* no sourceIndex adjustment for conversion, only for callback output */ 926 errorInputLength=0; 927 928 /* 929 * loop for offsets and error handling 930 * 931 * iterates at most 3 times: 932 * 1. to clean up after the conversion function 933 * 2. after the callback 934 * 3. after the callback again if there was truncated input 935 */ 936 for(;;) { 937 /* update offsets if we write any */ 938 if(offsets!=NULL) { 939 int32_t length=(int32_t)(pArgs->target-t); 940 if(length>0) { 941 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 942 943 /* 944 * if a converter handles offsets and updates the offsets 945 * pointer at the end, then pArgs->offset should not change 946 * here; 947 * however, some converters do not handle offsets at all 948 * (sourceIndex<0) or may not update the offsets pointer 949 */ 950 pArgs->offsets=offsets+=length; 951 } 952 953 if(sourceIndex>=0) { 954 sourceIndex+=(int32_t)(pArgs->source-s); 955 } 956 } 957 958 if(cnv->preFromULength<0) { 959 /* 960 * switch the source to new replay units (cannot occur while replaying) 961 * after offset handling and before end-of-input and callback handling 962 */ 963 if(realSource==NULL) { 964 realSource=pArgs->source; 965 realSourceLimit=pArgs->sourceLimit; 966 realFlush=pArgs->flush; 967 realSourceIndex=sourceIndex; 968 969 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 970 pArgs->source=replay; 971 pArgs->sourceLimit=replay-cnv->preFromULength; 972 pArgs->flush=FALSE; 973 if((sourceIndex+=cnv->preFromULength)<0) { 974 sourceIndex=-1; 975 } 976 977 cnv->preFromULength=0; 978 } else { 979 /* see implementation note before _fromUnicodeWithCallback() */ 980 U_ASSERT(realSource==NULL); 981 *err=U_INTERNAL_PROGRAM_ERROR; 982 } 983 } 984 985 /* update pointers */ 986 s=pArgs->source; 987 t=pArgs->target; 988 989 if(U_SUCCESS(*err)) { 990 if(s<pArgs->sourceLimit) { 991 /* 992 * continue with the conversion loop while there is still input left 993 * (continue converting by breaking out of only the inner loop) 994 */ 995 break; 996 } else if(realSource!=NULL) { 997 /* switch back from replaying to the real source and continue */ 998 pArgs->source=realSource; 999 pArgs->sourceLimit=realSourceLimit; 1000 pArgs->flush=realFlush; 1001 sourceIndex=realSourceIndex; 1002 1003 realSource=NULL; 1004 break; 1005 } else if(pArgs->flush && cnv->fromUChar32!=0) { 1006 /* 1007 * the entire input stream is consumed 1008 * and there is a partial, truncated input sequence left 1009 */ 1010 1011 /* inject an error and continue with callback handling */ 1012 *err=U_TRUNCATED_CHAR_FOUND; 1013 calledCallback=FALSE; /* new error condition */ 1014 } else { 1015 /* input consumed */ 1016 if(pArgs->flush) { 1017 /* 1018 * return to the conversion loop once more if the flush 1019 * flag is set and the conversion function has not 1020 * successfully processed the end of the input yet 1021 * 1022 * (continue converting by breaking out of only the inner loop) 1023 */ 1024 if(!converterSawEndOfInput) { 1025 break; 1026 } 1027 1028 /* reset the converter without calling the callback function */ 1029 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); 1030 } 1031 1032 /* done successfully */ 1033 return; 1034 } 1035 } 1036 1037 /* U_FAILURE(*err) */ 1038 { 1039 UErrorCode e; 1040 1041 if( calledCallback || 1042 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1043 (e!=U_INVALID_CHAR_FOUND && 1044 e!=U_ILLEGAL_CHAR_FOUND && 1045 e!=U_TRUNCATED_CHAR_FOUND) 1046 ) { 1047 /* 1048 * the callback did not or cannot resolve the error: 1049 * set output pointers and return 1050 * 1051 * the check for buffer overflow is redundant but it is 1052 * a high-runner case and hopefully documents the intent 1053 * well 1054 * 1055 * if we were replaying, then the replay buffer must be 1056 * copied back into the UConverter 1057 * and the real arguments must be restored 1058 */ 1059 if(realSource!=NULL) { 1060 int32_t length; 1061 1062 U_ASSERT(cnv->preFromULength==0); 1063 1064 length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1065 if(length>0) { 1066 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR); 1067 cnv->preFromULength=(int8_t)-length; 1068 } 1069 1070 pArgs->source=realSource; 1071 pArgs->sourceLimit=realSourceLimit; 1072 pArgs->flush=realFlush; 1073 } 1074 1075 return; 1076 } 1077 } 1078 1079 /* callback handling */ 1080 { 1081 UChar32 codePoint; 1082 1083 /* get and write the code point */ 1084 codePoint=cnv->fromUChar32; 1085 errorInputLength=0; 1086 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); 1087 cnv->invalidUCharLength=(int8_t)errorInputLength; 1088 1089 /* set the converter state to deal with the next character */ 1090 cnv->fromUChar32=0; 1091 1092 /* call the callback function */ 1093 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, 1094 cnv->invalidUCharBuffer, errorInputLength, codePoint, 1095 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, 1096 err); 1097 } 1098 1099 /* 1100 * loop back to the offset handling 1101 * 1102 * this flag will indicate after offset handling 1103 * that a callback was called; 1104 * if the callback did not resolve the error, then we return 1105 */ 1106 calledCallback=TRUE; 1107 } 1108 } 1109 } 1110 1111 /* 1112 * Output the fromUnicode overflow buffer. 1113 * Call this function if(cnv->charErrorBufferLength>0). 1114 * @return TRUE if overflow 1115 */ 1116 static UBool 1117 ucnv_outputOverflowFromUnicode(UConverter *cnv, 1118 char **target, const char *targetLimit, 1119 int32_t **pOffsets, 1120 UErrorCode *err) { 1121 int32_t *offsets; 1122 char *overflow, *t; 1123 int32_t i, length; 1124 1125 t=*target; 1126 if(pOffsets!=NULL) { 1127 offsets=*pOffsets; 1128 } else { 1129 offsets=NULL; 1130 } 1131 1132 overflow=(char *)cnv->charErrorBuffer; 1133 length=cnv->charErrorBufferLength; 1134 i=0; 1135 while(i<length) { 1136 if(t==targetLimit) { 1137 /* the overflow buffer contains too much, keep the rest */ 1138 int32_t j=0; 1139 1140 do { 1141 overflow[j++]=overflow[i++]; 1142 } while(i<length); 1143 1144 cnv->charErrorBufferLength=(int8_t)j; 1145 *target=t; 1146 if(offsets!=NULL) { 1147 *pOffsets=offsets; 1148 } 1149 *err=U_BUFFER_OVERFLOW_ERROR; 1150 return TRUE; 1151 } 1152 1153 /* copy the overflow contents to the target */ 1154 *t++=overflow[i++]; 1155 if(offsets!=NULL) { 1156 *offsets++=-1; /* no source index available for old output */ 1157 } 1158 } 1159 1160 /* the overflow buffer is completely copied to the target */ 1161 cnv->charErrorBufferLength=0; 1162 *target=t; 1163 if(offsets!=NULL) { 1164 *pOffsets=offsets; 1165 } 1166 return FALSE; 1167 } 1168 1169 U_CAPI void U_EXPORT2 1170 ucnv_fromUnicode(UConverter *cnv, 1171 char **target, const char *targetLimit, 1172 const UChar **source, const UChar *sourceLimit, 1173 int32_t *offsets, 1174 UBool flush, 1175 UErrorCode *err) { 1176 UConverterFromUnicodeArgs args; 1177 const UChar *s; 1178 char *t; 1179 1180 /* check parameters */ 1181 if(err==NULL || U_FAILURE(*err)) { 1182 return; 1183 } 1184 1185 if(cnv==NULL || target==NULL || source==NULL) { 1186 *err=U_ILLEGAL_ARGUMENT_ERROR; 1187 return; 1188 } 1189 1190 s=*source; 1191 t=*target; 1192 1193 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { 1194 /* 1195 Prevent code from going into an infinite loop in case we do hit this 1196 limit. The limit pointer is expected to be on a UChar * boundary. 1197 This also prevents the next argument check from failing. 1198 */ 1199 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); 1200 } 1201 1202 /* 1203 * All these conditions should never happen. 1204 * 1205 * 1) Make sure that the limits are >= to the address source or target 1206 * 1207 * 2) Make sure that the buffer sizes do not exceed the number range for 1208 * int32_t because some functions use the size (in units or bytes) 1209 * rather than comparing pointers, and because offsets are int32_t values. 1210 * 1211 * size_t is guaranteed to be unsigned and large enough for the job. 1212 * 1213 * Return with an error instead of adjusting the limits because we would 1214 * not be able to maintain the semantics that either the source must be 1215 * consumed or the target filled (unless an error occurs). 1216 * An adjustment would be targetLimit=t+0x7fffffff; for example. 1217 * 1218 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1219 * to a char * pointer and provide an incomplete UChar code unit. 1220 */ 1221 if (sourceLimit<s || targetLimit<t || 1222 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || 1223 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || 1224 (((const char *)sourceLimit-(const char *)s) & 1) != 0) 1225 { 1226 *err=U_ILLEGAL_ARGUMENT_ERROR; 1227 return; 1228 } 1229 1230 /* output the target overflow buffer */ 1231 if( cnv->charErrorBufferLength>0 && 1232 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) 1233 ) { 1234 /* U_BUFFER_OVERFLOW_ERROR */ 1235 return; 1236 } 1237 /* *target may have moved, therefore stop using t */ 1238 1239 if(!flush && s==sourceLimit && cnv->preFromULength>=0) { 1240 /* the overflow buffer is emptied and there is no new input: we are done */ 1241 return; 1242 } 1243 1244 /* 1245 * Do not simply return with a buffer overflow error if 1246 * !flush && t==targetLimit 1247 * because it is possible that the source will not generate any output. 1248 * For example, the skip callback may be called; 1249 * it does not output anything. 1250 */ 1251 1252 /* prepare the converter arguments */ 1253 args.converter=cnv; 1254 args.flush=flush; 1255 args.offsets=offsets; 1256 args.source=s; 1257 args.sourceLimit=sourceLimit; 1258 args.target=*target; 1259 args.targetLimit=targetLimit; 1260 args.size=sizeof(args); 1261 1262 _fromUnicodeWithCallback(&args, err); 1263 1264 *source=args.source; 1265 *target=args.target; 1266 } 1267 1268 /* ucnv_toUnicode() --------------------------------------------------------- */ 1269 1270 static void 1271 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1272 UConverterToUnicode toUnicode; 1273 UConverter *cnv; 1274 const char *s; 1275 UChar *t; 1276 int32_t *offsets; 1277 int32_t sourceIndex; 1278 int32_t errorInputLength; 1279 UBool converterSawEndOfInput, calledCallback; 1280 1281 /* variables for m:n conversion */ 1282 char replay[UCNV_EXT_MAX_BYTES]; 1283 const char *realSource, *realSourceLimit; 1284 int32_t realSourceIndex; 1285 UBool realFlush; 1286 1287 cnv=pArgs->converter; 1288 s=pArgs->source; 1289 t=pArgs->target; 1290 offsets=pArgs->offsets; 1291 1292 /* get the converter implementation function */ 1293 sourceIndex=0; 1294 if(offsets==NULL) { 1295 toUnicode=cnv->sharedData->impl->toUnicode; 1296 } else { 1297 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; 1298 if(toUnicode==NULL) { 1299 /* there is no WithOffsets implementation */ 1300 toUnicode=cnv->sharedData->impl->toUnicode; 1301 /* we will write -1 for each offset */ 1302 sourceIndex=-1; 1303 } 1304 } 1305 1306 if(cnv->preToULength>=0) { 1307 /* normal mode */ 1308 realSource=NULL; 1309 1310 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 1311 realSourceLimit=NULL; 1312 realFlush=FALSE; 1313 realSourceIndex=0; 1314 } else { 1315 /* 1316 * Previous m:n conversion stored source units from a partial match 1317 * and failed to consume all of them. 1318 * We need to "replay" them from a temporary buffer and convert them first. 1319 */ 1320 realSource=pArgs->source; 1321 realSourceLimit=pArgs->sourceLimit; 1322 realFlush=pArgs->flush; 1323 realSourceIndex=sourceIndex; 1324 1325 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1326 pArgs->source=replay; 1327 pArgs->sourceLimit=replay-cnv->preToULength; 1328 pArgs->flush=FALSE; 1329 sourceIndex=-1; 1330 1331 cnv->preToULength=0; 1332 } 1333 1334 /* 1335 * loop for conversion and error handling 1336 * 1337 * loop { 1338 * convert 1339 * loop { 1340 * update offsets 1341 * handle end of input 1342 * handle errors/call callback 1343 * } 1344 * } 1345 */ 1346 for(;;) { 1347 if(U_SUCCESS(*err)) { 1348 /* convert */ 1349 toUnicode(pArgs, err); 1350 1351 /* 1352 * set a flag for whether the converter 1353 * successfully processed the end of the input 1354 * 1355 * need not check cnv->preToULength==0 because a replay (<0) will cause 1356 * s<sourceLimit before converterSawEndOfInput is checked 1357 */ 1358 converterSawEndOfInput= 1359 (UBool)(U_SUCCESS(*err) && 1360 pArgs->flush && pArgs->source==pArgs->sourceLimit && 1361 cnv->toULength==0); 1362 } else { 1363 /* handle error from getNextUChar() or ucnv_convertEx() */ 1364 converterSawEndOfInput=FALSE; 1365 } 1366 1367 /* no callback called yet for this iteration */ 1368 calledCallback=FALSE; 1369 1370 /* no sourceIndex adjustment for conversion, only for callback output */ 1371 errorInputLength=0; 1372 1373 /* 1374 * loop for offsets and error handling 1375 * 1376 * iterates at most 3 times: 1377 * 1. to clean up after the conversion function 1378 * 2. after the callback 1379 * 3. after the callback again if there was truncated input 1380 */ 1381 for(;;) { 1382 /* update offsets if we write any */ 1383 if(offsets!=NULL) { 1384 int32_t length=(int32_t)(pArgs->target-t); 1385 if(length>0) { 1386 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 1387 1388 /* 1389 * if a converter handles offsets and updates the offsets 1390 * pointer at the end, then pArgs->offset should not change 1391 * here; 1392 * however, some converters do not handle offsets at all 1393 * (sourceIndex<0) or may not update the offsets pointer 1394 */ 1395 pArgs->offsets=offsets+=length; 1396 } 1397 1398 if(sourceIndex>=0) { 1399 sourceIndex+=(int32_t)(pArgs->source-s); 1400 } 1401 } 1402 1403 if(cnv->preToULength<0) { 1404 /* 1405 * switch the source to new replay units (cannot occur while replaying) 1406 * after offset handling and before end-of-input and callback handling 1407 */ 1408 if(realSource==NULL) { 1409 realSource=pArgs->source; 1410 realSourceLimit=pArgs->sourceLimit; 1411 realFlush=pArgs->flush; 1412 realSourceIndex=sourceIndex; 1413 1414 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1415 pArgs->source=replay; 1416 pArgs->sourceLimit=replay-cnv->preToULength; 1417 pArgs->flush=FALSE; 1418 if((sourceIndex+=cnv->preToULength)<0) { 1419 sourceIndex=-1; 1420 } 1421 1422 cnv->preToULength=0; 1423 } else { 1424 /* see implementation note before _fromUnicodeWithCallback() */ 1425 U_ASSERT(realSource==NULL); 1426 *err=U_INTERNAL_PROGRAM_ERROR; 1427 } 1428 } 1429 1430 /* update pointers */ 1431 s=pArgs->source; 1432 t=pArgs->target; 1433 1434 if(U_SUCCESS(*err)) { 1435 if(s<pArgs->sourceLimit) { 1436 /* 1437 * continue with the conversion loop while there is still input left 1438 * (continue converting by breaking out of only the inner loop) 1439 */ 1440 break; 1441 } else if(realSource!=NULL) { 1442 /* switch back from replaying to the real source and continue */ 1443 pArgs->source=realSource; 1444 pArgs->sourceLimit=realSourceLimit; 1445 pArgs->flush=realFlush; 1446 sourceIndex=realSourceIndex; 1447 1448 realSource=NULL; 1449 break; 1450 } else if(pArgs->flush && cnv->toULength>0) { 1451 /* 1452 * the entire input stream is consumed 1453 * and there is a partial, truncated input sequence left 1454 */ 1455 1456 /* inject an error and continue with callback handling */ 1457 *err=U_TRUNCATED_CHAR_FOUND; 1458 calledCallback=FALSE; /* new error condition */ 1459 } else { 1460 /* input consumed */ 1461 if(pArgs->flush) { 1462 /* 1463 * return to the conversion loop once more if the flush 1464 * flag is set and the conversion function has not 1465 * successfully processed the end of the input yet 1466 * 1467 * (continue converting by breaking out of only the inner loop) 1468 */ 1469 if(!converterSawEndOfInput) { 1470 break; 1471 } 1472 1473 /* reset the converter without calling the callback function */ 1474 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1475 } 1476 1477 /* done successfully */ 1478 return; 1479 } 1480 } 1481 1482 /* U_FAILURE(*err) */ 1483 { 1484 UErrorCode e; 1485 1486 if( calledCallback || 1487 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1488 (e!=U_INVALID_CHAR_FOUND && 1489 e!=U_ILLEGAL_CHAR_FOUND && 1490 e!=U_TRUNCATED_CHAR_FOUND && 1491 e!=U_ILLEGAL_ESCAPE_SEQUENCE && 1492 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) 1493 ) { 1494 /* 1495 * the callback did not or cannot resolve the error: 1496 * set output pointers and return 1497 * 1498 * the check for buffer overflow is redundant but it is 1499 * a high-runner case and hopefully documents the intent 1500 * well 1501 * 1502 * if we were replaying, then the replay buffer must be 1503 * copied back into the UConverter 1504 * and the real arguments must be restored 1505 */ 1506 if(realSource!=NULL) { 1507 int32_t length; 1508 1509 U_ASSERT(cnv->preToULength==0); 1510 1511 length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1512 if(length>0) { 1513 uprv_memcpy(cnv->preToU, pArgs->source, length); 1514 cnv->preToULength=(int8_t)-length; 1515 } 1516 1517 pArgs->source=realSource; 1518 pArgs->sourceLimit=realSourceLimit; 1519 pArgs->flush=realFlush; 1520 } 1521 1522 return; 1523 } 1524 } 1525 1526 /* copy toUBytes[] to invalidCharBuffer[] */ 1527 errorInputLength=cnv->invalidCharLength=cnv->toULength; 1528 if(errorInputLength>0) { 1529 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); 1530 } 1531 1532 /* set the converter state to deal with the next character */ 1533 cnv->toULength=0; 1534 1535 /* call the callback function */ 1536 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { 1537 cnv->toUCallbackReason = UCNV_UNASSIGNED; 1538 } 1539 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, 1540 cnv->invalidCharBuffer, errorInputLength, 1541 cnv->toUCallbackReason, 1542 err); 1543 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ 1544 1545 /* 1546 * loop back to the offset handling 1547 * 1548 * this flag will indicate after offset handling 1549 * that a callback was called; 1550 * if the callback did not resolve the error, then we return 1551 */ 1552 calledCallback=TRUE; 1553 } 1554 } 1555 } 1556 1557 /* 1558 * Output the toUnicode overflow buffer. 1559 * Call this function if(cnv->UCharErrorBufferLength>0). 1560 * @return TRUE if overflow 1561 */ 1562 static UBool 1563 ucnv_outputOverflowToUnicode(UConverter *cnv, 1564 UChar **target, const UChar *targetLimit, 1565 int32_t **pOffsets, 1566 UErrorCode *err) { 1567 int32_t *offsets; 1568 UChar *overflow, *t; 1569 int32_t i, length; 1570 1571 t=*target; 1572 if(pOffsets!=NULL) { 1573 offsets=*pOffsets; 1574 } else { 1575 offsets=NULL; 1576 } 1577 1578 overflow=cnv->UCharErrorBuffer; 1579 length=cnv->UCharErrorBufferLength; 1580 i=0; 1581 while(i<length) { 1582 if(t==targetLimit) { 1583 /* the overflow buffer contains too much, keep the rest */ 1584 int32_t j=0; 1585 1586 do { 1587 overflow[j++]=overflow[i++]; 1588 } while(i<length); 1589 1590 cnv->UCharErrorBufferLength=(int8_t)j; 1591 *target=t; 1592 if(offsets!=NULL) { 1593 *pOffsets=offsets; 1594 } 1595 *err=U_BUFFER_OVERFLOW_ERROR; 1596 return TRUE; 1597 } 1598 1599 /* copy the overflow contents to the target */ 1600 *t++=overflow[i++]; 1601 if(offsets!=NULL) { 1602 *offsets++=-1; /* no source index available for old output */ 1603 } 1604 } 1605 1606 /* the overflow buffer is completely copied to the target */ 1607 cnv->UCharErrorBufferLength=0; 1608 *target=t; 1609 if(offsets!=NULL) { 1610 *pOffsets=offsets; 1611 } 1612 return FALSE; 1613 } 1614 1615 U_CAPI void U_EXPORT2 1616 ucnv_toUnicode(UConverter *cnv, 1617 UChar **target, const UChar *targetLimit, 1618 const char **source, const char *sourceLimit, 1619 int32_t *offsets, 1620 UBool flush, 1621 UErrorCode *err) { 1622 UConverterToUnicodeArgs args; 1623 const char *s; 1624 UChar *t; 1625 1626 /* check parameters */ 1627 if(err==NULL || U_FAILURE(*err)) { 1628 return; 1629 } 1630 1631 if(cnv==NULL || target==NULL || source==NULL) { 1632 *err=U_ILLEGAL_ARGUMENT_ERROR; 1633 return; 1634 } 1635 1636 s=*source; 1637 t=*target; 1638 1639 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { 1640 /* 1641 Prevent code from going into an infinite loop in case we do hit this 1642 limit. The limit pointer is expected to be on a UChar * boundary. 1643 This also prevents the next argument check from failing. 1644 */ 1645 targetLimit = (const UChar *)(((const char *)targetLimit) - 1); 1646 } 1647 1648 /* 1649 * All these conditions should never happen. 1650 * 1651 * 1) Make sure that the limits are >= to the address source or target 1652 * 1653 * 2) Make sure that the buffer sizes do not exceed the number range for 1654 * int32_t because some functions use the size (in units or bytes) 1655 * rather than comparing pointers, and because offsets are int32_t values. 1656 * 1657 * size_t is guaranteed to be unsigned and large enough for the job. 1658 * 1659 * Return with an error instead of adjusting the limits because we would 1660 * not be able to maintain the semantics that either the source must be 1661 * consumed or the target filled (unless an error occurs). 1662 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1663 * 1664 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1665 * to a char * pointer and provide an incomplete UChar code unit. 1666 */ 1667 if (sourceLimit<s || targetLimit<t || 1668 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || 1669 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || 1670 (((const char *)targetLimit-(const char *)t) & 1) != 0 1671 ) { 1672 *err=U_ILLEGAL_ARGUMENT_ERROR; 1673 return; 1674 } 1675 1676 /* output the target overflow buffer */ 1677 if( cnv->UCharErrorBufferLength>0 && 1678 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) 1679 ) { 1680 /* U_BUFFER_OVERFLOW_ERROR */ 1681 return; 1682 } 1683 /* *target may have moved, therefore stop using t */ 1684 1685 if(!flush && s==sourceLimit && cnv->preToULength>=0) { 1686 /* the overflow buffer is emptied and there is no new input: we are done */ 1687 return; 1688 } 1689 1690 /* 1691 * Do not simply return with a buffer overflow error if 1692 * !flush && t==targetLimit 1693 * because it is possible that the source will not generate any output. 1694 * For example, the skip callback may be called; 1695 * it does not output anything. 1696 */ 1697 1698 /* prepare the converter arguments */ 1699 args.converter=cnv; 1700 args.flush=flush; 1701 args.offsets=offsets; 1702 args.source=s; 1703 args.sourceLimit=sourceLimit; 1704 args.target=*target; 1705 args.targetLimit=targetLimit; 1706 args.size=sizeof(args); 1707 1708 _toUnicodeWithCallback(&args, err); 1709 1710 *source=args.source; 1711 *target=args.target; 1712 } 1713 1714 /* ucnv_to/fromUChars() ----------------------------------------------------- */ 1715 1716 U_CAPI int32_t U_EXPORT2 1717 ucnv_fromUChars(UConverter *cnv, 1718 char *dest, int32_t destCapacity, 1719 const UChar *src, int32_t srcLength, 1720 UErrorCode *pErrorCode) { 1721 const UChar *srcLimit; 1722 char *originalDest, *destLimit; 1723 int32_t destLength; 1724 1725 /* check arguments */ 1726 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1727 return 0; 1728 } 1729 1730 if( cnv==NULL || 1731 destCapacity<0 || (destCapacity>0 && dest==NULL) || 1732 srcLength<-1 || (srcLength!=0 && src==NULL) 1733 ) { 1734 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1735 return 0; 1736 } 1737 1738 /* initialize */ 1739 ucnv_resetFromUnicode(cnv); 1740 originalDest=dest; 1741 if(srcLength==-1) { 1742 srcLength=u_strlen(src); 1743 } 1744 if(srcLength>0) { 1745 srcLimit=src+srcLength; 1746 destLimit=dest+destCapacity; 1747 1748 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ 1749 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { 1750 destLimit=(char *)U_MAX_PTR(dest); 1751 } 1752 1753 /* perform the conversion */ 1754 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1755 destLength=(int32_t)(dest-originalDest); 1756 1757 /* if an overflow occurs, then get the preflighting length */ 1758 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1759 char buffer[1024]; 1760 1761 destLimit=buffer+sizeof(buffer); 1762 do { 1763 dest=buffer; 1764 *pErrorCode=U_ZERO_ERROR; 1765 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1766 destLength+=(int32_t)(dest-buffer); 1767 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1768 } 1769 } else { 1770 destLength=0; 1771 } 1772 1773 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); 1774 } 1775 1776 U_CAPI int32_t U_EXPORT2 1777 ucnv_toUChars(UConverter *cnv, 1778 UChar *dest, int32_t destCapacity, 1779 const char *src, int32_t srcLength, 1780 UErrorCode *pErrorCode) { 1781 const char *srcLimit; 1782 UChar *originalDest, *destLimit; 1783 int32_t destLength; 1784 1785 /* check arguments */ 1786 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1787 return 0; 1788 } 1789 1790 if( cnv==NULL || 1791 destCapacity<0 || (destCapacity>0 && dest==NULL) || 1792 srcLength<-1 || (srcLength!=0 && src==NULL)) 1793 { 1794 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1795 return 0; 1796 } 1797 1798 /* initialize */ 1799 ucnv_resetToUnicode(cnv); 1800 originalDest=dest; 1801 if(srcLength==-1) { 1802 srcLength=(int32_t)uprv_strlen(src); 1803 } 1804 if(srcLength>0) { 1805 srcLimit=src+srcLength; 1806 destLimit=dest+destCapacity; 1807 1808 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ 1809 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) { 1810 destLimit=(UChar *)U_MAX_PTR(dest); 1811 } 1812 1813 /* perform the conversion */ 1814 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1815 destLength=(int32_t)(dest-originalDest); 1816 1817 /* if an overflow occurs, then get the preflighting length */ 1818 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) 1819 { 1820 UChar buffer[1024]; 1821 1822 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR; 1823 do { 1824 dest=buffer; 1825 *pErrorCode=U_ZERO_ERROR; 1826 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1827 destLength+=(int32_t)(dest-buffer); 1828 } 1829 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1830 } 1831 } else { 1832 destLength=0; 1833 } 1834 1835 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); 1836 } 1837 1838 /* ucnv_getNextUChar() ------------------------------------------------------ */ 1839 1840 U_CAPI UChar32 U_EXPORT2 1841 ucnv_getNextUChar(UConverter *cnv, 1842 const char **source, const char *sourceLimit, 1843 UErrorCode *err) { 1844 UConverterToUnicodeArgs args; 1845 UChar buffer[U16_MAX_LENGTH]; 1846 const char *s; 1847 UChar32 c; 1848 int32_t i, length; 1849 1850 /* check parameters */ 1851 if(err==NULL || U_FAILURE(*err)) { 1852 return 0xffff; 1853 } 1854 1855 if(cnv==NULL || source==NULL) { 1856 *err=U_ILLEGAL_ARGUMENT_ERROR; 1857 return 0xffff; 1858 } 1859 1860 s=*source; 1861 if(sourceLimit<s) { 1862 *err=U_ILLEGAL_ARGUMENT_ERROR; 1863 return 0xffff; 1864 } 1865 1866 /* 1867 * Make sure that the buffer sizes do not exceed the number range for 1868 * int32_t because some functions use the size (in units or bytes) 1869 * rather than comparing pointers, and because offsets are int32_t values. 1870 * 1871 * size_t is guaranteed to be unsigned and large enough for the job. 1872 * 1873 * Return with an error instead of adjusting the limits because we would 1874 * not be able to maintain the semantics that either the source must be 1875 * consumed or the target filled (unless an error occurs). 1876 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1877 */ 1878 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { 1879 *err=U_ILLEGAL_ARGUMENT_ERROR; 1880 return 0xffff; 1881 } 1882 1883 c=U_SENTINEL; 1884 1885 /* flush the target overflow buffer */ 1886 if(cnv->UCharErrorBufferLength>0) { 1887 UChar *overflow; 1888 1889 overflow=cnv->UCharErrorBuffer; 1890 i=0; 1891 length=cnv->UCharErrorBufferLength; 1892 U16_NEXT(overflow, i, length, c); 1893 1894 /* move the remaining overflow contents up to the beginning */ 1895 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { 1896 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, 1897 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1898 } 1899 1900 if(!U16_IS_LEAD(c) || i<length) { 1901 return c; 1902 } 1903 /* 1904 * Continue if the overflow buffer contained only a lead surrogate, 1905 * in case the converter outputs single surrogates from complete 1906 * input sequences. 1907 */ 1908 } 1909 1910 /* 1911 * flush==TRUE is implied for ucnv_getNextUChar() 1912 * 1913 * do not simply return even if s==sourceLimit because the converter may 1914 * not have seen flush==TRUE before 1915 */ 1916 1917 /* prepare the converter arguments */ 1918 args.converter=cnv; 1919 args.flush=TRUE; 1920 args.offsets=NULL; 1921 args.source=s; 1922 args.sourceLimit=sourceLimit; 1923 args.target=buffer; 1924 args.targetLimit=buffer+1; 1925 args.size=sizeof(args); 1926 1927 if(c<0) { 1928 /* 1929 * call the native getNextUChar() implementation if we are 1930 * at a character boundary (toULength==0) 1931 * 1932 * unlike with _toUnicode(), getNextUChar() implementations must set 1933 * U_TRUNCATED_CHAR_FOUND for truncated input, 1934 * in addition to setting toULength/toUBytes[] 1935 */ 1936 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { 1937 c=cnv->sharedData->impl->getNextUChar(&args, err); 1938 *source=s=args.source; 1939 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { 1940 /* reset the converter without calling the callback function */ 1941 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1942 return 0xffff; /* no output */ 1943 } else if(U_SUCCESS(*err) && c>=0) { 1944 return c; 1945 /* 1946 * else fall through to use _toUnicode() because 1947 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all 1948 * U_FAILURE: call _toUnicode() for callback handling (do not output c) 1949 */ 1950 } 1951 } 1952 1953 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ 1954 _toUnicodeWithCallback(&args, err); 1955 1956 if(*err==U_BUFFER_OVERFLOW_ERROR) { 1957 *err=U_ZERO_ERROR; 1958 } 1959 1960 i=0; 1961 length=(int32_t)(args.target-buffer); 1962 } else { 1963 /* write the lead surrogate from the overflow buffer */ 1964 buffer[0]=(UChar)c; 1965 args.target=buffer+1; 1966 i=0; 1967 length=1; 1968 } 1969 1970 /* buffer contents starts at i and ends before length */ 1971 1972 if(U_FAILURE(*err)) { 1973 c=0xffff; /* no output */ 1974 } else if(length==0) { 1975 /* no input or only state changes */ 1976 *err=U_INDEX_OUTOFBOUNDS_ERROR; 1977 /* no need to reset explicitly because _toUnicodeWithCallback() did it */ 1978 c=0xffff; /* no output */ 1979 } else { 1980 c=buffer[0]; 1981 i=1; 1982 if(!U16_IS_LEAD(c)) { 1983 /* consume c=buffer[0], done */ 1984 } else { 1985 /* got a lead surrogate, see if a trail surrogate follows */ 1986 UChar c2; 1987 1988 if(cnv->UCharErrorBufferLength>0) { 1989 /* got overflow output from the conversion */ 1990 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { 1991 /* got a trail surrogate, too */ 1992 c=U16_GET_SUPPLEMENTARY(c, c2); 1993 1994 /* move the remaining overflow contents up to the beginning */ 1995 if((--cnv->UCharErrorBufferLength)>0) { 1996 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, 1997 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1998 } 1999 } else { 2000 /* c is an unpaired lead surrogate, just return it */ 2001 } 2002 } else if(args.source<sourceLimit) { 2003 /* convert once more, to buffer[1] */ 2004 args.targetLimit=buffer+2; 2005 _toUnicodeWithCallback(&args, err); 2006 if(*err==U_BUFFER_OVERFLOW_ERROR) { 2007 *err=U_ZERO_ERROR; 2008 } 2009 2010 length=(int32_t)(args.target-buffer); 2011 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { 2012 /* got a trail surrogate, too */ 2013 c=U16_GET_SUPPLEMENTARY(c, c2); 2014 i=2; 2015 } 2016 } 2017 } 2018 } 2019 2020 /* 2021 * move leftover output from buffer[i..length[ 2022 * into the beginning of the overflow buffer 2023 */ 2024 if(i<length) { 2025 /* move further overflow back */ 2026 int32_t delta=length-i; 2027 if((length=cnv->UCharErrorBufferLength)>0) { 2028 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, 2029 length*U_SIZEOF_UCHAR); 2030 } 2031 cnv->UCharErrorBufferLength=(int8_t)(length+delta); 2032 2033 cnv->UCharErrorBuffer[0]=buffer[i++]; 2034 if(delta>1) { 2035 cnv->UCharErrorBuffer[1]=buffer[i]; 2036 } 2037 } 2038 2039 *source=args.source; 2040 return c; 2041 } 2042 2043 /* ucnv_convert() and siblings ---------------------------------------------- */ 2044 2045 U_CAPI void U_EXPORT2 2046 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, 2047 char **target, const char *targetLimit, 2048 const char **source, const char *sourceLimit, 2049 UChar *pivotStart, UChar **pivotSource, 2050 UChar **pivotTarget, const UChar *pivotLimit, 2051 UBool reset, UBool flush, 2052 UErrorCode *pErrorCode) { 2053 UChar pivotBuffer[CHUNK_SIZE]; 2054 const UChar *myPivotSource; 2055 UChar *myPivotTarget; 2056 const char *s; 2057 char *t; 2058 2059 UConverterToUnicodeArgs toUArgs; 2060 UConverterFromUnicodeArgs fromUArgs; 2061 UConverterConvert convert; 2062 2063 /* error checking */ 2064 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2065 return; 2066 } 2067 2068 if( targetCnv==NULL || sourceCnv==NULL || 2069 source==NULL || *source==NULL || 2070 target==NULL || *target==NULL || targetLimit==NULL 2071 ) { 2072 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2073 return; 2074 } 2075 2076 s=*source; 2077 t=*target; 2078 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { 2079 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2080 return; 2081 } 2082 2083 /* 2084 * Make sure that the buffer sizes do not exceed the number range for 2085 * int32_t. See ucnv_toUnicode() for a more detailed comment. 2086 */ 2087 if( 2088 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || 2089 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) 2090 ) { 2091 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2092 return; 2093 } 2094 2095 if(pivotStart==NULL) { 2096 if(!flush) { 2097 /* streaming conversion requires an explicit pivot buffer */ 2098 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2099 return; 2100 } 2101 2102 /* use the stack pivot buffer */ 2103 myPivotSource=myPivotTarget=pivotStart=pivotBuffer; 2104 pivotSource=(UChar **)&myPivotSource; 2105 pivotTarget=&myPivotTarget; 2106 pivotLimit=pivotBuffer+CHUNK_SIZE; 2107 } else if( pivotStart>=pivotLimit || 2108 pivotSource==NULL || *pivotSource==NULL || 2109 pivotTarget==NULL || *pivotTarget==NULL || 2110 pivotLimit==NULL 2111 ) { 2112 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2113 return; 2114 } 2115 2116 if(sourceLimit==NULL) { 2117 /* get limit of single-byte-NUL-terminated source string */ 2118 sourceLimit=uprv_strchr(*source, 0); 2119 } 2120 2121 if(reset) { 2122 ucnv_resetToUnicode(sourceCnv); 2123 ucnv_resetFromUnicode(targetCnv); 2124 *pivotSource=*pivotTarget=pivotStart; 2125 } else if(targetCnv->charErrorBufferLength>0) { 2126 /* output the targetCnv overflow buffer */ 2127 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { 2128 /* U_BUFFER_OVERFLOW_ERROR */ 2129 return; 2130 } 2131 /* *target has moved, therefore stop using t */ 2132 2133 if( !flush && 2134 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && 2135 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit 2136 ) { 2137 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ 2138 return; 2139 } 2140 } 2141 2142 /* Is direct-UTF-8 conversion available? */ 2143 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2144 targetCnv->sharedData->impl->fromUTF8!=NULL 2145 ) { 2146 convert=targetCnv->sharedData->impl->fromUTF8; 2147 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2148 sourceCnv->sharedData->impl->toUTF8!=NULL 2149 ) { 2150 convert=sourceCnv->sharedData->impl->toUTF8; 2151 } else { 2152 convert=NULL; 2153 } 2154 2155 /* 2156 * If direct-UTF-8 conversion is available, then we use a smaller 2157 * pivot buffer for error handling and partial matches 2158 * so that we quickly return to direct conversion. 2159 * 2160 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. 2161 * 2162 * We could reduce the pivot buffer size further, at the cost of 2163 * buffer overflows from callbacks. 2164 * The pivot buffer should not be smaller than the maximum number of 2165 * fromUnicode extension table input UChars 2166 * (for m:n conversion, see 2167 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) 2168 * or 2 for surrogate pairs. 2169 * 2170 * Too small a buffer can cause thrashing between pivoting and direct 2171 * conversion, with function call overhead outweighing the benefits 2172 * of direct conversion. 2173 */ 2174 if(convert!=NULL && (pivotLimit-pivotStart)>32) { 2175 pivotLimit=pivotStart+32; 2176 } 2177 2178 /* prepare the converter arguments */ 2179 fromUArgs.converter=targetCnv; 2180 fromUArgs.flush=FALSE; 2181 fromUArgs.offsets=NULL; 2182 fromUArgs.target=*target; 2183 fromUArgs.targetLimit=targetLimit; 2184 fromUArgs.size=sizeof(fromUArgs); 2185 2186 toUArgs.converter=sourceCnv; 2187 toUArgs.flush=flush; 2188 toUArgs.offsets=NULL; 2189 toUArgs.source=s; 2190 toUArgs.sourceLimit=sourceLimit; 2191 toUArgs.targetLimit=pivotLimit; 2192 toUArgs.size=sizeof(toUArgs); 2193 2194 /* 2195 * TODO: Consider separating this function into two functions, 2196 * extracting exactly the conversion loop, 2197 * for readability and to reduce the set of visible variables. 2198 * 2199 * Otherwise stop using s and t from here on. 2200 */ 2201 s=t=NULL; 2202 2203 /* 2204 * conversion loop 2205 * 2206 * The sequence of steps in the loop may appear backward, 2207 * but the principle is simple: 2208 * In the chain of 2209 * source - sourceCnv overflow - pivot - targetCnv overflow - target 2210 * empty out later buffers before refilling them from earlier ones. 2211 * 2212 * The targetCnv overflow buffer is flushed out only once before the loop. 2213 */ 2214 for(;;) { 2215 /* 2216 * if(pivot not empty or error or replay or flush fromUnicode) { 2217 * fromUnicode(pivot -> target); 2218 * } 2219 * 2220 * For pivoting conversion; and for direct conversion for 2221 * error callback handling and flushing the replay buffer. 2222 */ 2223 if( *pivotSource<*pivotTarget || 2224 U_FAILURE(*pErrorCode) || 2225 targetCnv->preFromULength<0 || 2226 fromUArgs.flush 2227 ) { 2228 fromUArgs.source=*pivotSource; 2229 fromUArgs.sourceLimit=*pivotTarget; 2230 _fromUnicodeWithCallback(&fromUArgs, pErrorCode); 2231 if(U_FAILURE(*pErrorCode)) { 2232 /* target overflow, or conversion error */ 2233 *pivotSource=(UChar *)fromUArgs.source; 2234 break; 2235 } 2236 2237 /* 2238 * _fromUnicodeWithCallback() must have consumed the pivot contents 2239 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() 2240 */ 2241 } 2242 2243 /* The pivot buffer is empty; reset it so we start at pivotStart. */ 2244 *pivotSource=*pivotTarget=pivotStart; 2245 2246 /* 2247 * if(sourceCnv overflow buffer not empty) { 2248 * move(sourceCnv overflow buffer -> pivot); 2249 * continue; 2250 * } 2251 */ 2252 /* output the sourceCnv overflow buffer */ 2253 if(sourceCnv->UCharErrorBufferLength>0) { 2254 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { 2255 /* U_BUFFER_OVERFLOW_ERROR */ 2256 *pErrorCode=U_ZERO_ERROR; 2257 } 2258 continue; 2259 } 2260 2261 /* 2262 * check for end of input and break if done 2263 * 2264 * Checking both flush and fromUArgs.flush ensures that the converters 2265 * have been called with the flush flag set if the ucnv_convertEx() 2266 * caller set it. 2267 */ 2268 if( toUArgs.source==sourceLimit && 2269 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && 2270 (!flush || fromUArgs.flush) 2271 ) { 2272 /* done successfully */ 2273 break; 2274 } 2275 2276 /* 2277 * use direct conversion if available 2278 * but not if continuing a partial match 2279 * or flushing the toUnicode replay buffer 2280 */ 2281 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { 2282 if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2283 /* remove a warning that may be set by this function */ 2284 *pErrorCode=U_ZERO_ERROR; 2285 } 2286 convert(&fromUArgs, &toUArgs, pErrorCode); 2287 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2288 break; 2289 } else if(U_FAILURE(*pErrorCode)) { 2290 if(sourceCnv->toULength>0) { 2291 /* 2292 * Fall through to calling _toUnicodeWithCallback() 2293 * for callback handling. 2294 * 2295 * The pivot buffer will be reset with 2296 * *pivotSource=*pivotTarget=pivotStart; 2297 * which indicates a toUnicode error to the caller 2298 * (*pivotSource==pivotStart shows no pivot UChars consumed). 2299 */ 2300 } else { 2301 /* 2302 * Indicate a fromUnicode error to the caller 2303 * (*pivotSource>pivotStart shows some pivot UChars consumed). 2304 */ 2305 *pivotSource=*pivotTarget=pivotStart+1; 2306 /* 2307 * Loop around to calling _fromUnicodeWithCallbacks() 2308 * for callback handling. 2309 */ 2310 continue; 2311 } 2312 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2313 /* 2314 * No error, but the implementation requested to temporarily 2315 * fall back to pivoting. 2316 */ 2317 *pErrorCode=U_ZERO_ERROR; 2318 /* 2319 * The following else branches are almost identical to the end-of-input 2320 * handling in _toUnicodeWithCallback(). 2321 * Avoid calling it just for the end of input. 2322 */ 2323 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ 2324 /* 2325 * the entire input stream is consumed 2326 * and there is a partial, truncated input sequence left 2327 */ 2328 2329 /* inject an error and continue with callback handling */ 2330 *pErrorCode=U_TRUNCATED_CHAR_FOUND; 2331 } else { 2332 /* input consumed */ 2333 if(flush) { 2334 /* reset the converters without calling the callback functions */ 2335 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); 2336 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); 2337 } 2338 2339 /* done successfully */ 2340 break; 2341 } 2342 } 2343 2344 /* 2345 * toUnicode(source -> pivot); 2346 * 2347 * For pivoting conversion; and for direct conversion for 2348 * error callback handling, continuing partial matches 2349 * and flushing the replay buffer. 2350 * 2351 * The pivot buffer is empty and reset. 2352 */ 2353 toUArgs.target=pivotStart; /* ==*pivotTarget */ 2354 /* toUArgs.targetLimit=pivotLimit; already set before the loop */ 2355 _toUnicodeWithCallback(&toUArgs, pErrorCode); 2356 *pivotTarget=toUArgs.target; 2357 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2358 /* pivot overflow: continue with the conversion loop */ 2359 *pErrorCode=U_ZERO_ERROR; 2360 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { 2361 /* conversion error, or there was nothing left to convert */ 2362 break; 2363 } 2364 /* 2365 * else: 2366 * _toUnicodeWithCallback() wrote into the pivot buffer, 2367 * continue with fromUnicode conversion. 2368 * 2369 * Set the fromUnicode flush flag if we flush and if toUnicode has 2370 * processed the end of the input. 2371 */ 2372 if( flush && toUArgs.source==sourceLimit && 2373 sourceCnv->preToULength>=0 && 2374 sourceCnv->UCharErrorBufferLength==0 2375 ) { 2376 fromUArgs.flush=TRUE; 2377 } 2378 } 2379 2380 /* 2381 * The conversion loop is exited when one of the following is true: 2382 * - the entire source text has been converted successfully to the target buffer 2383 * - a target buffer overflow occurred 2384 * - a conversion error occurred 2385 */ 2386 2387 *source=toUArgs.source; 2388 *target=fromUArgs.target; 2389 2390 /* terminate the target buffer if possible */ 2391 if(flush && U_SUCCESS(*pErrorCode)) { 2392 if(*target!=targetLimit) { 2393 **target=0; 2394 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { 2395 *pErrorCode=U_ZERO_ERROR; 2396 } 2397 } else { 2398 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; 2399 } 2400 } 2401 } 2402 2403 /* internal implementation of ucnv_convert() etc. with preflighting */ 2404 static int32_t 2405 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, 2406 char *target, int32_t targetCapacity, 2407 const char *source, int32_t sourceLength, 2408 UErrorCode *pErrorCode) { 2409 UChar pivotBuffer[CHUNK_SIZE]; 2410 UChar *pivot, *pivot2; 2411 2412 char *myTarget; 2413 const char *sourceLimit; 2414 const char *targetLimit; 2415 int32_t targetLength=0; 2416 2417 /* set up */ 2418 if(sourceLength<0) { 2419 sourceLimit=uprv_strchr(source, 0); 2420 } else { 2421 sourceLimit=source+sourceLength; 2422 } 2423 2424 /* if there is no input data, we're done */ 2425 if(source==sourceLimit) { 2426 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2427 } 2428 2429 pivot=pivot2=pivotBuffer; 2430 myTarget=target; 2431 targetLength=0; 2432 2433 if(targetCapacity>0) { 2434 /* perform real conversion */ 2435 targetLimit=target+targetCapacity; 2436 ucnv_convertEx(outConverter, inConverter, 2437 &myTarget, targetLimit, 2438 &source, sourceLimit, 2439 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2440 FALSE, 2441 TRUE, 2442 pErrorCode); 2443 targetLength=(int32_t)(myTarget-target); 2444 } 2445 2446 /* 2447 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing 2448 * to it but continue the conversion in order to store in targetCapacity 2449 * the number of bytes that was required. 2450 */ 2451 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) 2452 { 2453 char targetBuffer[CHUNK_SIZE]; 2454 2455 targetLimit=targetBuffer+CHUNK_SIZE; 2456 do { 2457 *pErrorCode=U_ZERO_ERROR; 2458 myTarget=targetBuffer; 2459 ucnv_convertEx(outConverter, inConverter, 2460 &myTarget, targetLimit, 2461 &source, sourceLimit, 2462 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2463 FALSE, 2464 TRUE, 2465 pErrorCode); 2466 targetLength+=(int32_t)(myTarget-targetBuffer); 2467 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 2468 2469 /* done with preflighting, set warnings and errors as appropriate */ 2470 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); 2471 } 2472 2473 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ 2474 return targetLength; 2475 } 2476 2477 U_CAPI int32_t U_EXPORT2 2478 ucnv_convert(const char *toConverterName, const char *fromConverterName, 2479 char *target, int32_t targetCapacity, 2480 const char *source, int32_t sourceLength, 2481 UErrorCode *pErrorCode) { 2482 UConverter in, out; /* stack-allocated */ 2483 UConverter *inConverter, *outConverter; 2484 int32_t targetLength; 2485 2486 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2487 return 0; 2488 } 2489 2490 if( source==NULL || sourceLength<-1 || 2491 targetCapacity<0 || (targetCapacity>0 && target==NULL) 2492 ) { 2493 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2494 return 0; 2495 } 2496 2497 /* if there is no input data, we're done */ 2498 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2499 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2500 } 2501 2502 /* create the converters */ 2503 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); 2504 if(U_FAILURE(*pErrorCode)) { 2505 return 0; 2506 } 2507 2508 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); 2509 if(U_FAILURE(*pErrorCode)) { 2510 ucnv_close(inConverter); 2511 return 0; 2512 } 2513 2514 targetLength=ucnv_internalConvert(outConverter, inConverter, 2515 target, targetCapacity, 2516 source, sourceLength, 2517 pErrorCode); 2518 2519 ucnv_close(inConverter); 2520 ucnv_close(outConverter); 2521 2522 return targetLength; 2523 } 2524 2525 /* @internal */ 2526 static int32_t 2527 ucnv_convertAlgorithmic(UBool convertToAlgorithmic, 2528 UConverterType algorithmicType, 2529 UConverter *cnv, 2530 char *target, int32_t targetCapacity, 2531 const char *source, int32_t sourceLength, 2532 UErrorCode *pErrorCode) { 2533 UConverter algoConverterStatic; /* stack-allocated */ 2534 UConverter *algoConverter, *to, *from; 2535 int32_t targetLength; 2536 2537 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2538 return 0; 2539 } 2540 2541 if( cnv==NULL || source==NULL || sourceLength<-1 || 2542 targetCapacity<0 || (targetCapacity>0 && target==NULL) 2543 ) { 2544 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2545 return 0; 2546 } 2547 2548 /* if there is no input data, we're done */ 2549 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2550 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2551 } 2552 2553 /* create the algorithmic converter */ 2554 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, 2555 "", 0, pErrorCode); 2556 if(U_FAILURE(*pErrorCode)) { 2557 return 0; 2558 } 2559 2560 /* reset the other converter */ 2561 if(convertToAlgorithmic) { 2562 /* cnv->Unicode->algo */ 2563 ucnv_resetToUnicode(cnv); 2564 to=algoConverter; 2565 from=cnv; 2566 } else { 2567 /* algo->Unicode->cnv */ 2568 ucnv_resetFromUnicode(cnv); 2569 from=algoConverter; 2570 to=cnv; 2571 } 2572 2573 targetLength=ucnv_internalConvert(to, from, 2574 target, targetCapacity, 2575 source, sourceLength, 2576 pErrorCode); 2577 2578 ucnv_close(algoConverter); 2579 2580 return targetLength; 2581 } 2582 2583 U_CAPI int32_t U_EXPORT2 2584 ucnv_toAlgorithmic(UConverterType algorithmicType, 2585 UConverter *cnv, 2586 char *target, int32_t targetCapacity, 2587 const char *source, int32_t sourceLength, 2588 UErrorCode *pErrorCode) { 2589 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, 2590 target, targetCapacity, 2591 source, sourceLength, 2592 pErrorCode); 2593 } 2594 2595 U_CAPI int32_t U_EXPORT2 2596 ucnv_fromAlgorithmic(UConverter *cnv, 2597 UConverterType algorithmicType, 2598 char *target, int32_t targetCapacity, 2599 const char *source, int32_t sourceLength, 2600 UErrorCode *pErrorCode) { 2601 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, 2602 target, targetCapacity, 2603 source, sourceLength, 2604 pErrorCode); 2605 } 2606 2607 U_CAPI UConverterType U_EXPORT2 2608 ucnv_getType(const UConverter* converter) 2609 { 2610 int8_t type = converter->sharedData->staticData->conversionType; 2611 #if !UCONFIG_NO_LEGACY_CONVERSION 2612 if(type == UCNV_MBCS) { 2613 return ucnv_MBCSGetType(converter); 2614 } 2615 #endif 2616 return (UConverterType)type; 2617 } 2618 2619 U_CAPI void U_EXPORT2 2620 ucnv_getStarters(const UConverter* converter, 2621 UBool starters[256], 2622 UErrorCode* err) 2623 { 2624 if (err == NULL || U_FAILURE(*err)) { 2625 return; 2626 } 2627 2628 if(converter->sharedData->impl->getStarters != NULL) { 2629 converter->sharedData->impl->getStarters(converter, starters, err); 2630 } else { 2631 *err = U_ILLEGAL_ARGUMENT_ERROR; 2632 } 2633 } 2634 2635 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) 2636 { 2637 UErrorCode errorCode; 2638 const char *name; 2639 int32_t i; 2640 2641 if(cnv==NULL) { 2642 return NULL; 2643 } 2644 2645 errorCode=U_ZERO_ERROR; 2646 name=ucnv_getName(cnv, &errorCode); 2647 if(U_FAILURE(errorCode)) { 2648 return NULL; 2649 } 2650 2651 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i) 2652 { 2653 if(0==uprv_strcmp(name, ambiguousConverters[i].name)) 2654 { 2655 return ambiguousConverters+i; 2656 } 2657 } 2658 2659 return NULL; 2660 } 2661 2662 U_CAPI void U_EXPORT2 2663 ucnv_fixFileSeparator(const UConverter *cnv, 2664 UChar* source, 2665 int32_t sourceLength) { 2666 const UAmbiguousConverter *a; 2667 int32_t i; 2668 UChar variant5c; 2669 2670 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) 2671 { 2672 return; 2673 } 2674 2675 variant5c=a->variant5c; 2676 for(i=0; i<sourceLength; ++i) { 2677 if(source[i]==variant5c) { 2678 source[i]=0x5c; 2679 } 2680 } 2681 } 2682 2683 U_CAPI UBool U_EXPORT2 2684 ucnv_isAmbiguous(const UConverter *cnv) { 2685 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); 2686 } 2687 2688 U_CAPI void U_EXPORT2 2689 ucnv_setFallback(UConverter *cnv, UBool usesFallback) 2690 { 2691 cnv->useFallback = usesFallback; 2692 } 2693 2694 U_CAPI UBool U_EXPORT2 2695 ucnv_usesFallback(const UConverter *cnv) 2696 { 2697 return cnv->useFallback; 2698 } 2699 2700 U_CAPI void U_EXPORT2 2701 ucnv_getInvalidChars (const UConverter * converter, 2702 char *errBytes, 2703 int8_t * len, 2704 UErrorCode * err) 2705 { 2706 if (err == NULL || U_FAILURE(*err)) 2707 { 2708 return; 2709 } 2710 if (len == NULL || errBytes == NULL || converter == NULL) 2711 { 2712 *err = U_ILLEGAL_ARGUMENT_ERROR; 2713 return; 2714 } 2715 if (*len < converter->invalidCharLength) 2716 { 2717 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2718 return; 2719 } 2720 if ((*len = converter->invalidCharLength) > 0) 2721 { 2722 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); 2723 } 2724 } 2725 2726 U_CAPI void U_EXPORT2 2727 ucnv_getInvalidUChars (const UConverter * converter, 2728 UChar *errChars, 2729 int8_t * len, 2730 UErrorCode * err) 2731 { 2732 if (err == NULL || U_FAILURE(*err)) 2733 { 2734 return; 2735 } 2736 if (len == NULL || errChars == NULL || converter == NULL) 2737 { 2738 *err = U_ILLEGAL_ARGUMENT_ERROR; 2739 return; 2740 } 2741 if (*len < converter->invalidUCharLength) 2742 { 2743 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2744 return; 2745 } 2746 if ((*len = converter->invalidUCharLength) > 0) 2747 { 2748 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len)); 2749 } 2750 } 2751 2752 #define SIG_MAX_LEN 5 2753 2754 U_CAPI const char* U_EXPORT2 2755 ucnv_detectUnicodeSignature( const char* source, 2756 int32_t sourceLength, 2757 int32_t* signatureLength, 2758 UErrorCode* pErrorCode) { 2759 int32_t dummy; 2760 2761 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN 2762 * bytes we don't misdetect something 2763 */ 2764 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; 2765 int i = 0; 2766 2767 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ 2768 return NULL; 2769 } 2770 2771 if(source == NULL || sourceLength < -1){ 2772 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 2773 return NULL; 2774 } 2775 2776 if(signatureLength == NULL) { 2777 signatureLength = &dummy; 2778 } 2779 2780 if(sourceLength==-1){ 2781 sourceLength=(int32_t)uprv_strlen(source); 2782 } 2783 2784 2785 while(i<sourceLength&& i<SIG_MAX_LEN){ 2786 start[i]=source[i]; 2787 i++; 2788 } 2789 2790 if(start[0] == '\xFE' && start[1] == '\xFF') { 2791 *signatureLength=2; 2792 return "UTF-16BE"; 2793 } else if(start[0] == '\xFF' && start[1] == '\xFE') { 2794 if(start[2] == '\x00' && start[3] =='\x00') { 2795 *signatureLength=4; 2796 return "UTF-32LE"; 2797 } else { 2798 *signatureLength=2; 2799 return "UTF-16LE"; 2800 } 2801 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { 2802 *signatureLength=3; 2803 return "UTF-8"; 2804 } else if(start[0] == '\x00' && start[1] == '\x00' && 2805 start[2] == '\xFE' && start[3]=='\xFF') { 2806 *signatureLength=4; 2807 return "UTF-32BE"; 2808 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { 2809 *signatureLength=3; 2810 return "SCSU"; 2811 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { 2812 *signatureLength=3; 2813 return "BOCU-1"; 2814 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { 2815 /* 2816 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ 2817 * depending on the second UTF-16 code unit. 2818 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF 2819 * if it occurs. 2820 * 2821 * So far we have +/v 2822 */ 2823 if(start[3] == '\x38' && start[4] == '\x2D') { 2824 /* 5 bytes +/v8- */ 2825 *signatureLength=5; 2826 return "UTF-7"; 2827 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { 2828 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ 2829 *signatureLength=4; 2830 return "UTF-7"; 2831 } 2832 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ 2833 *signatureLength=4; 2834 return "UTF-EBCDIC"; 2835 } 2836 2837 2838 /* no known Unicode signature byte sequence recognized */ 2839 *signatureLength=0; 2840 return NULL; 2841 } 2842 2843 U_CAPI int32_t U_EXPORT2 2844 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) 2845 { 2846 if(status == NULL || U_FAILURE(*status)){ 2847 return -1; 2848 } 2849 if(cnv == NULL){ 2850 *status = U_ILLEGAL_ARGUMENT_ERROR; 2851 return -1; 2852 } 2853 2854 if(cnv->preFromUFirstCP >= 0){ 2855 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; 2856 }else if(cnv->preFromULength < 0){ 2857 return -cnv->preFromULength ; 2858 }else if(cnv->fromUChar32 > 0){ 2859 return 1; 2860 } 2861 return 0; 2862 2863 } 2864 2865 U_CAPI int32_t U_EXPORT2 2866 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ 2867 2868 if(status == NULL || U_FAILURE(*status)){ 2869 return -1; 2870 } 2871 if(cnv == NULL){ 2872 *status = U_ILLEGAL_ARGUMENT_ERROR; 2873 return -1; 2874 } 2875 2876 if(cnv->preToULength > 0){ 2877 return cnv->preToULength ; 2878 }else if(cnv->preToULength < 0){ 2879 return -cnv->preToULength; 2880 }else if(cnv->toULength > 0){ 2881 return cnv->toULength; 2882 } 2883 return 0; 2884 } 2885 2886 U_CAPI UBool U_EXPORT2 2887 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ 2888 if (U_FAILURE(*status)) { 2889 return FALSE; 2890 } 2891 2892 if (cnv == NULL) { 2893 *status = U_ILLEGAL_ARGUMENT_ERROR; 2894 return FALSE; 2895 } 2896 2897 switch (ucnv_getType(cnv)) { 2898 case UCNV_SBCS: 2899 case UCNV_DBCS: 2900 case UCNV_UTF32_BigEndian: 2901 case UCNV_UTF32_LittleEndian: 2902 case UCNV_UTF32: 2903 case UCNV_US_ASCII: 2904 return TRUE; 2905 default: 2906 return FALSE; 2907 } 2908 } 2909 #endif 2910 2911 /* 2912 * Hey, Emacs, please set the following: 2913 * 2914 * Local Variables: 2915 * indent-tabs-mode: nil 2916 * End: 2917 * 2918 */ 2919