1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * ucnv.c: 12 * Implements APIs for the ICU's codeset conversion library; 13 * mostly calls through internal functions; 14 * created by Bertrand A. Damiba 15 * 16 * Modification History: 17 * 18 * Date Name Description 19 * 04/04/99 helena Fixed internal header inclusion. 20 * 05/09/00 helena Added implementation to handle fallback mappings. 21 * 06/20/2000 helena OS/400 port changes; mostly typecast. 22 */ 23 24 #include "unicode/utypes.h" 25 26 #if !UCONFIG_NO_CONVERSION 27 28 #include "unicode/ustring.h" 29 #include "unicode/ucnv.h" 30 #include "unicode/ucnv_err.h" 31 #include "unicode/uset.h" 32 #include "unicode/utf.h" 33 #include "unicode/utf16.h" 34 #include "putilimp.h" 35 #include "cmemory.h" 36 #include "cstring.h" 37 #include "uassert.h" 38 #include "utracimp.h" 39 #include "ustr_imp.h" 40 #include "ucnv_imp.h" 41 #include "ucnv_cnv.h" 42 #include "ucnv_bld.h" 43 44 /* size of intermediate and preflighting buffers in ucnv_convert() */ 45 #define CHUNK_SIZE 1024 46 47 typedef struct UAmbiguousConverter { 48 const char *name; 49 const UChar variant5c; 50 } UAmbiguousConverter; 51 52 static const UAmbiguousConverter ambiguousConverters[]={ 53 { "ibm-897_P100-1995", 0xa5 }, 54 { "ibm-942_P120-1999", 0xa5 }, 55 { "ibm-943_P130-1999", 0xa5 }, 56 { "ibm-946_P100-1995", 0xa5 }, 57 { "ibm-33722_P120-1999", 0xa5 }, 58 { "ibm-1041_P100-1995", 0xa5 }, 59 /*{ "ibm-54191_P100-2006", 0xa5 },*/ 60 /*{ "ibm-62383_P100-2007", 0xa5 },*/ 61 /*{ "ibm-891_P100-1995", 0x20a9 },*/ 62 { "ibm-944_P100-1995", 0x20a9 }, 63 { "ibm-949_P110-1999", 0x20a9 }, 64 { "ibm-1363_P110-1997", 0x20a9 }, 65 { "ISO_2022,locale=ko,version=0", 0x20a9 }, 66 { "ibm-1088_P100-1995", 0x20a9 } 67 }; 68 69 /*Calls through createConverter */ 70 U_CAPI UConverter* U_EXPORT2 71 ucnv_open (const char *name, 72 UErrorCode * err) 73 { 74 UConverter *r; 75 76 if (err == NULL || U_FAILURE (*err)) { 77 return NULL; 78 } 79 80 r = ucnv_createConverter(NULL, name, err); 81 return r; 82 } 83 84 U_CAPI UConverter* U_EXPORT2 85 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) 86 { 87 return ucnv_createConverterFromPackage(packageName, converterName, err); 88 } 89 90 /*Extracts the UChar* to a char* and calls through createConverter */ 91 U_CAPI UConverter* U_EXPORT2 92 ucnv_openU (const UChar * name, 93 UErrorCode * err) 94 { 95 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 96 97 if (err == NULL || U_FAILURE(*err)) 98 return NULL; 99 if (name == NULL) 100 return ucnv_open (NULL, err); 101 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) 102 { 103 *err = U_ILLEGAL_ARGUMENT_ERROR; 104 return NULL; 105 } 106 return ucnv_open(u_austrcpy(asciiName, name), err); 107 } 108 109 /* Copy the string that is represented by the UConverterPlatform enum 110 * @param platformString An output buffer 111 * @param platform An enum representing a platform 112 * @return the length of the copied string. 113 */ 114 static int32_t 115 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) 116 { 117 switch (pltfrm) 118 { 119 case UCNV_IBM: 120 uprv_strcpy(platformString, "ibm-"); 121 return 4; 122 case UCNV_UNKNOWN: 123 break; 124 } 125 126 /* default to empty string */ 127 *platformString = 0; 128 return 0; 129 } 130 131 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls 132 *through createConverter*/ 133 U_CAPI UConverter* U_EXPORT2 134 ucnv_openCCSID (int32_t codepage, 135 UConverterPlatform platform, 136 UErrorCode * err) 137 { 138 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 139 int32_t myNameLen; 140 141 if (err == NULL || U_FAILURE (*err)) 142 return NULL; 143 144 /* ucnv_copyPlatformString could return "ibm-" or "cp" */ 145 myNameLen = ucnv_copyPlatformString(myName, platform); 146 T_CString_integerToString(myName + myNameLen, codepage, 10); 147 148 return ucnv_createConverter(NULL, myName, err); 149 } 150 151 /* Creating a temporary stack-based object that can be used in one thread, 152 and created from a converter that is shared across threads. 153 */ 154 155 U_CAPI UConverter* U_EXPORT2 156 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) 157 { 158 UConverter *localConverter, *allocatedConverter; 159 int32_t stackBufferSize; 160 int32_t bufferSizeNeeded; 161 char *stackBufferChars = (char *)stackBuffer; 162 UErrorCode cbErr; 163 UConverterToUnicodeArgs toUArgs = { 164 sizeof(UConverterToUnicodeArgs), 165 TRUE, 166 NULL, 167 NULL, 168 NULL, 169 NULL, 170 NULL, 171 NULL 172 }; 173 UConverterFromUnicodeArgs fromUArgs = { 174 sizeof(UConverterFromUnicodeArgs), 175 TRUE, 176 NULL, 177 NULL, 178 NULL, 179 NULL, 180 NULL, 181 NULL 182 }; 183 184 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); 185 186 if (status == NULL || U_FAILURE(*status)){ 187 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); 188 return NULL; 189 } 190 191 if (cnv == NULL) { 192 *status = U_ILLEGAL_ARGUMENT_ERROR; 193 UTRACE_EXIT_STATUS(*status); 194 return NULL; 195 } 196 197 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", 198 ucnv_getName(cnv, status), cnv, stackBuffer); 199 200 if (cnv->sharedData->impl->safeClone != NULL) { 201 /* call the custom safeClone function for sizing */ 202 bufferSizeNeeded = 0; 203 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); 204 if (U_FAILURE(*status)) { 205 UTRACE_EXIT_STATUS(*status); 206 return NULL; 207 } 208 } 209 else 210 { 211 /* inherent sizing */ 212 bufferSizeNeeded = sizeof(UConverter); 213 } 214 215 if (pBufferSize == NULL) { 216 stackBufferSize = 1; 217 pBufferSize = &stackBufferSize; 218 } else { 219 stackBufferSize = *pBufferSize; 220 if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ 221 *pBufferSize = bufferSizeNeeded; 222 UTRACE_EXIT_VALUE(bufferSizeNeeded); 223 return NULL; 224 } 225 } 226 227 228 /* Pointers on 64-bit platforms need to be aligned 229 * on a 64-bit boundary in memory. 230 */ 231 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { 232 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars); 233 if(stackBufferSize > offsetUp) { 234 stackBufferSize -= offsetUp; 235 stackBufferChars += offsetUp; 236 } else { 237 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ 238 stackBufferSize = 1; 239 } 240 } 241 242 stackBuffer = (void *)stackBufferChars; 243 244 /* Now, see if we must allocate any memory */ 245 if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) 246 { 247 /* allocate one here...*/ 248 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); 249 250 if(localConverter == NULL) { 251 *status = U_MEMORY_ALLOCATION_ERROR; 252 UTRACE_EXIT_STATUS(*status); 253 return NULL; 254 } 255 *status = U_SAFECLONE_ALLOCATED_WARNING; 256 257 /* record the fact that memory was allocated */ 258 *pBufferSize = bufferSizeNeeded; 259 } else { 260 /* just use the stack buffer */ 261 localConverter = (UConverter*) stackBuffer; 262 allocatedConverter = NULL; 263 } 264 265 uprv_memset(localConverter, 0, bufferSizeNeeded); 266 267 /* Copy initial state */ 268 uprv_memcpy(localConverter, cnv, sizeof(UConverter)); 269 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; 270 271 /* copy the substitution string */ 272 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 273 localConverter->subChars = (uint8_t *)localConverter->subUChars; 274 } else { 275 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 276 if (localConverter->subChars == NULL) { 277 uprv_free(allocatedConverter); 278 UTRACE_EXIT_STATUS(*status); 279 return NULL; 280 } 281 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 282 } 283 284 /* now either call the safeclone fcn or not */ 285 if (cnv->sharedData->impl->safeClone != NULL) { 286 /* call the custom safeClone function */ 287 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); 288 } 289 290 if(localConverter==NULL || U_FAILURE(*status)) { 291 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { 292 uprv_free(allocatedConverter->subChars); 293 } 294 uprv_free(allocatedConverter); 295 UTRACE_EXIT_STATUS(*status); 296 return NULL; 297 } 298 299 /* increment refcount of shared data if needed */ 300 if (cnv->sharedData->isReferenceCounted) { 301 ucnv_incrementRefCount(cnv->sharedData); 302 } 303 304 if(localConverter == (UConverter*)stackBuffer) { 305 /* we're using user provided data - set to not destroy */ 306 localConverter->isCopyLocal = TRUE; 307 } 308 309 /* allow callback functions to handle any memory allocation */ 310 toUArgs.converter = fromUArgs.converter = localConverter; 311 cbErr = U_ZERO_ERROR; 312 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); 313 cbErr = U_ZERO_ERROR; 314 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); 315 316 UTRACE_EXIT_PTR_STATUS(localConverter, *status); 317 return localConverter; 318 } 319 320 321 322 /*Decreases the reference counter in the shared immutable section of the object 323 *and frees the mutable part*/ 324 325 U_CAPI void U_EXPORT2 326 ucnv_close (UConverter * converter) 327 { 328 UErrorCode errorCode = U_ZERO_ERROR; 329 330 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); 331 332 if (converter == NULL) 333 { 334 UTRACE_EXIT(); 335 return; 336 } 337 338 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", 339 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); 340 341 /* In order to speed up the close, only call the callbacks when they have been changed. 342 This performance check will only work when the callbacks are set within a shared library 343 or from user code that statically links this code. */ 344 /* first, notify the callback functions that the converter is closed */ 345 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 346 UConverterToUnicodeArgs toUArgs = { 347 sizeof(UConverterToUnicodeArgs), 348 TRUE, 349 NULL, 350 NULL, 351 NULL, 352 NULL, 353 NULL, 354 NULL 355 }; 356 357 toUArgs.converter = converter; 358 errorCode = U_ZERO_ERROR; 359 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); 360 } 361 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 362 UConverterFromUnicodeArgs fromUArgs = { 363 sizeof(UConverterFromUnicodeArgs), 364 TRUE, 365 NULL, 366 NULL, 367 NULL, 368 NULL, 369 NULL, 370 NULL 371 }; 372 fromUArgs.converter = converter; 373 errorCode = U_ZERO_ERROR; 374 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); 375 } 376 377 if (converter->sharedData->impl->close != NULL) { 378 converter->sharedData->impl->close(converter); 379 } 380 381 if (converter->subChars != (uint8_t *)converter->subUChars) { 382 uprv_free(converter->subChars); 383 } 384 385 if (converter->sharedData->isReferenceCounted) { 386 ucnv_unloadSharedDataIfReady(converter->sharedData); 387 } 388 389 if(!converter->isCopyLocal){ 390 uprv_free(converter); 391 } 392 393 UTRACE_EXIT(); 394 } 395 396 /*returns a single Name from the list, will return NULL if out of bounds 397 */ 398 U_CAPI const char* U_EXPORT2 399 ucnv_getAvailableName (int32_t n) 400 { 401 if (0 <= n && n <= 0xffff) { 402 UErrorCode err = U_ZERO_ERROR; 403 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); 404 if (U_SUCCESS(err)) { 405 return name; 406 } 407 } 408 return NULL; 409 } 410 411 U_CAPI int32_t U_EXPORT2 412 ucnv_countAvailable () 413 { 414 UErrorCode err = U_ZERO_ERROR; 415 return ucnv_bld_countAvailableConverters(&err); 416 } 417 418 U_CAPI void U_EXPORT2 419 ucnv_getSubstChars (const UConverter * converter, 420 char *mySubChar, 421 int8_t * len, 422 UErrorCode * err) 423 { 424 if (U_FAILURE (*err)) 425 return; 426 427 if (converter->subCharLen <= 0) { 428 /* Unicode string or empty string from ucnv_setSubstString(). */ 429 *len = 0; 430 return; 431 } 432 433 if (*len < converter->subCharLen) /*not enough space in subChars */ 434 { 435 *err = U_INDEX_OUTOFBOUNDS_ERROR; 436 return; 437 } 438 439 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ 440 *len = converter->subCharLen; /*store # of bytes copied to buffer */ 441 } 442 443 U_CAPI void U_EXPORT2 444 ucnv_setSubstChars (UConverter * converter, 445 const char *mySubChar, 446 int8_t len, 447 UErrorCode * err) 448 { 449 if (U_FAILURE (*err)) 450 return; 451 452 /*Makes sure that the subChar is within the codepages char length boundaries */ 453 if ((len > converter->sharedData->staticData->maxBytesPerChar) 454 || (len < converter->sharedData->staticData->minBytesPerChar)) 455 { 456 *err = U_ILLEGAL_ARGUMENT_ERROR; 457 return; 458 } 459 460 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ 461 converter->subCharLen = len; /*sets the new len */ 462 463 /* 464 * There is currently (2001Feb) no separate API to set/get subChar1. 465 * In order to always have subChar written after it is explicitly set, 466 * we set subChar1 to 0. 467 */ 468 converter->subChar1 = 0; 469 470 return; 471 } 472 473 U_CAPI void U_EXPORT2 474 ucnv_setSubstString(UConverter *cnv, 475 const UChar *s, 476 int32_t length, 477 UErrorCode *err) { 478 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1]; 479 char chars[UCNV_ERROR_BUFFER_LENGTH]; 480 481 UConverter *clone; 482 uint8_t *subChars; 483 int32_t cloneSize, length8; 484 485 /* Let the following functions check all arguments. */ 486 cloneSize = sizeof(cloneBuffer); 487 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); 488 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); 489 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); 490 ucnv_close(clone); 491 if (U_FAILURE(*err)) { 492 return; 493 } 494 495 if (cnv->sharedData->impl->writeSub == NULL 496 #if !UCONFIG_NO_LEGACY_CONVERSION 497 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && 498 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) 499 #endif 500 ) { 501 /* The converter is not stateful. Store the charset bytes as a fixed string. */ 502 subChars = (uint8_t *)chars; 503 } else { 504 /* 505 * The converter has a non-default writeSub() function, indicating 506 * that it is stateful. 507 * Store the Unicode string for on-the-fly conversion for correct 508 * state handling. 509 */ 510 if (length > UCNV_ERROR_BUFFER_LENGTH) { 511 /* 512 * Should not occur. The converter should output at least one byte 513 * per UChar, which means that ucnv_fromUChars() should catch all 514 * overflows. 515 */ 516 *err = U_BUFFER_OVERFLOW_ERROR; 517 return; 518 } 519 subChars = (uint8_t *)s; 520 if (length < 0) { 521 length = u_strlen(s); 522 } 523 length8 = length * U_SIZEOF_UCHAR; 524 } 525 526 /* 527 * For storing the substitution string, select either the small buffer inside 528 * UConverter or allocate a subChars buffer. 529 */ 530 if (length8 > UCNV_MAX_SUBCHAR_LEN) { 531 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ 532 if (cnv->subChars == (uint8_t *)cnv->subUChars) { 533 /* Allocate a new buffer for the string. */ 534 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 535 if (cnv->subChars == NULL) { 536 cnv->subChars = (uint8_t *)cnv->subUChars; 537 *err = U_MEMORY_ALLOCATION_ERROR; 538 return; 539 } 540 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); 541 } 542 } 543 544 /* Copy the substitution string into the UConverter or its subChars buffer. */ 545 if (length8 == 0) { 546 cnv->subCharLen = 0; 547 } else { 548 uprv_memcpy(cnv->subChars, subChars, length8); 549 if (subChars == (uint8_t *)chars) { 550 cnv->subCharLen = (int8_t)length8; 551 } else /* subChars == s */ { 552 cnv->subCharLen = (int8_t)-length; 553 } 554 } 555 556 /* See comment in ucnv_setSubstChars(). */ 557 cnv->subChar1 = 0; 558 } 559 560 /*resets the internal states of a converter 561 *goal : have the same behaviour than a freshly created converter 562 */ 563 static void _reset(UConverter *converter, UConverterResetChoice choice, 564 UBool callCallback) { 565 if(converter == NULL) { 566 return; 567 } 568 569 if(callCallback) { 570 /* first, notify the callback functions that the converter is reset */ 571 UErrorCode errorCode; 572 573 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { 574 UConverterToUnicodeArgs toUArgs = { 575 sizeof(UConverterToUnicodeArgs), 576 TRUE, 577 NULL, 578 NULL, 579 NULL, 580 NULL, 581 NULL, 582 NULL 583 }; 584 toUArgs.converter = converter; 585 errorCode = U_ZERO_ERROR; 586 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); 587 } 588 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { 589 UConverterFromUnicodeArgs fromUArgs = { 590 sizeof(UConverterFromUnicodeArgs), 591 TRUE, 592 NULL, 593 NULL, 594 NULL, 595 NULL, 596 NULL, 597 NULL 598 }; 599 fromUArgs.converter = converter; 600 errorCode = U_ZERO_ERROR; 601 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); 602 } 603 } 604 605 /* now reset the converter itself */ 606 if(choice<=UCNV_RESET_TO_UNICODE) { 607 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; 608 converter->mode = 0; 609 converter->toULength = 0; 610 converter->invalidCharLength = converter->UCharErrorBufferLength = 0; 611 converter->preToULength = 0; 612 } 613 if(choice!=UCNV_RESET_TO_UNICODE) { 614 converter->fromUnicodeStatus = 0; 615 converter->fromUChar32 = 0; 616 converter->invalidUCharLength = converter->charErrorBufferLength = 0; 617 converter->preFromUFirstCP = U_SENTINEL; 618 converter->preFromULength = 0; 619 } 620 621 if (converter->sharedData->impl->reset != NULL) { 622 /* call the custom reset function */ 623 converter->sharedData->impl->reset(converter, choice); 624 } 625 } 626 627 U_CAPI void U_EXPORT2 628 ucnv_reset(UConverter *converter) 629 { 630 _reset(converter, UCNV_RESET_BOTH, TRUE); 631 } 632 633 U_CAPI void U_EXPORT2 634 ucnv_resetToUnicode(UConverter *converter) 635 { 636 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); 637 } 638 639 U_CAPI void U_EXPORT2 640 ucnv_resetFromUnicode(UConverter *converter) 641 { 642 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); 643 } 644 645 U_CAPI int8_t U_EXPORT2 646 ucnv_getMaxCharSize (const UConverter * converter) 647 { 648 return converter->maxBytesPerUChar; 649 } 650 651 652 U_CAPI int8_t U_EXPORT2 653 ucnv_getMinCharSize (const UConverter * converter) 654 { 655 return converter->sharedData->staticData->minBytesPerChar; 656 } 657 658 U_CAPI const char* U_EXPORT2 659 ucnv_getName (const UConverter * converter, UErrorCode * err) 660 661 { 662 if (U_FAILURE (*err)) 663 return NULL; 664 if(converter->sharedData->impl->getName){ 665 const char* temp= converter->sharedData->impl->getName(converter); 666 if(temp) 667 return temp; 668 } 669 return converter->sharedData->staticData->name; 670 } 671 672 U_CAPI int32_t U_EXPORT2 673 ucnv_getCCSID(const UConverter * converter, 674 UErrorCode * err) 675 { 676 int32_t ccsid; 677 if (U_FAILURE (*err)) 678 return -1; 679 680 ccsid = converter->sharedData->staticData->codepage; 681 if (ccsid == 0) { 682 /* Rare case. This is for cases like gb18030, 683 which doesn't have an IBM canonical name, but does have an IBM alias. */ 684 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); 685 if (U_SUCCESS(*err) && standardName) { 686 const char *ccsidStr = uprv_strchr(standardName, '-'); 687 if (ccsidStr) { 688 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ 689 } 690 } 691 } 692 return ccsid; 693 } 694 695 696 U_CAPI UConverterPlatform U_EXPORT2 697 ucnv_getPlatform (const UConverter * converter, 698 UErrorCode * err) 699 { 700 if (U_FAILURE (*err)) 701 return UCNV_UNKNOWN; 702 703 return (UConverterPlatform)converter->sharedData->staticData->platform; 704 } 705 706 U_CAPI void U_EXPORT2 707 ucnv_getToUCallBack (const UConverter * converter, 708 UConverterToUCallback *action, 709 const void **context) 710 { 711 *action = converter->fromCharErrorBehaviour; 712 *context = converter->toUContext; 713 } 714 715 U_CAPI void U_EXPORT2 716 ucnv_getFromUCallBack (const UConverter * converter, 717 UConverterFromUCallback *action, 718 const void **context) 719 { 720 *action = converter->fromUCharErrorBehaviour; 721 *context = converter->fromUContext; 722 } 723 724 U_CAPI void U_EXPORT2 725 ucnv_setToUCallBack (UConverter * converter, 726 UConverterToUCallback newAction, 727 const void* newContext, 728 UConverterToUCallback *oldAction, 729 const void** oldContext, 730 UErrorCode * err) 731 { 732 if (U_FAILURE (*err)) 733 return; 734 if (oldAction) *oldAction = converter->fromCharErrorBehaviour; 735 converter->fromCharErrorBehaviour = newAction; 736 if (oldContext) *oldContext = converter->toUContext; 737 converter->toUContext = newContext; 738 } 739 740 U_CAPI void U_EXPORT2 741 ucnv_setFromUCallBack (UConverter * converter, 742 UConverterFromUCallback newAction, 743 const void* newContext, 744 UConverterFromUCallback *oldAction, 745 const void** oldContext, 746 UErrorCode * err) 747 { 748 if (U_FAILURE (*err)) 749 return; 750 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; 751 converter->fromUCharErrorBehaviour = newAction; 752 if (oldContext) *oldContext = converter->fromUContext; 753 converter->fromUContext = newContext; 754 } 755 756 static void 757 _updateOffsets(int32_t *offsets, int32_t length, 758 int32_t sourceIndex, int32_t errorInputLength) { 759 int32_t *limit; 760 int32_t delta, offset; 761 762 if(sourceIndex>=0) { 763 /* 764 * adjust each offset by adding the previous sourceIndex 765 * minus the length of the input sequence that caused an 766 * error, if any 767 */ 768 delta=sourceIndex-errorInputLength; 769 } else { 770 /* 771 * set each offset to -1 because this conversion function 772 * does not handle offsets 773 */ 774 delta=-1; 775 } 776 777 limit=offsets+length; 778 if(delta==0) { 779 /* most common case, nothing to do */ 780 } else if(delta>0) { 781 /* add the delta to each offset (but not if the offset is <0) */ 782 while(offsets<limit) { 783 offset=*offsets; 784 if(offset>=0) { 785 *offsets=offset+delta; 786 } 787 ++offsets; 788 } 789 } else /* delta<0 */ { 790 /* 791 * set each offset to -1 because this conversion function 792 * does not handle offsets 793 * or the error input sequence started in a previous buffer 794 */ 795 while(offsets<limit) { 796 *offsets++=-1; 797 } 798 } 799 } 800 801 /* ucnv_fromUnicode --------------------------------------------------------- */ 802 803 /* 804 * Implementation note for m:n conversions 805 * 806 * While collecting source units to find the longest match for m:n conversion, 807 * some source units may need to be stored for a partial match. 808 * When a second buffer does not yield a match on all of the previously stored 809 * source units, then they must be "replayed", i.e., fed back into the converter. 810 * 811 * The code relies on the fact that replaying will not nest - 812 * converting a replay buffer will not result in a replay. 813 * This is because a replay is necessary only after the _continuation_ of a 814 * partial match failed, but a replay buffer is converted as a whole. 815 * It may result in some of its units being stored again for a partial match, 816 * but there will not be a continuation _during_ the replay which could fail. 817 * 818 * It is conceivable that a callback function could call the converter 819 * recursively in a way that causes another replay to be stored, but that 820 * would be an error in the callback function. 821 * Such violations will cause assertion failures in a debug build, 822 * and wrong output, but they will not cause a crash. 823 */ 824 825 static void 826 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { 827 UConverterFromUnicode fromUnicode; 828 UConverter *cnv; 829 const UChar *s; 830 char *t; 831 int32_t *offsets; 832 int32_t sourceIndex; 833 int32_t errorInputLength; 834 UBool converterSawEndOfInput, calledCallback; 835 836 /* variables for m:n conversion */ 837 UChar replay[UCNV_EXT_MAX_UCHARS]; 838 const UChar *realSource, *realSourceLimit; 839 int32_t realSourceIndex; 840 UBool realFlush; 841 842 cnv=pArgs->converter; 843 s=pArgs->source; 844 t=pArgs->target; 845 offsets=pArgs->offsets; 846 847 /* get the converter implementation function */ 848 sourceIndex=0; 849 if(offsets==NULL) { 850 fromUnicode=cnv->sharedData->impl->fromUnicode; 851 } else { 852 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; 853 if(fromUnicode==NULL) { 854 /* there is no WithOffsets implementation */ 855 fromUnicode=cnv->sharedData->impl->fromUnicode; 856 /* we will write -1 for each offset */ 857 sourceIndex=-1; 858 } 859 } 860 861 if(cnv->preFromULength>=0) { 862 /* normal mode */ 863 realSource=NULL; 864 865 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 866 realSourceLimit=NULL; 867 realFlush=FALSE; 868 realSourceIndex=0; 869 } else { 870 /* 871 * Previous m:n conversion stored source units from a partial match 872 * and failed to consume all of them. 873 * We need to "replay" them from a temporary buffer and convert them first. 874 */ 875 realSource=pArgs->source; 876 realSourceLimit=pArgs->sourceLimit; 877 realFlush=pArgs->flush; 878 realSourceIndex=sourceIndex; 879 880 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 881 pArgs->source=replay; 882 pArgs->sourceLimit=replay-cnv->preFromULength; 883 pArgs->flush=FALSE; 884 sourceIndex=-1; 885 886 cnv->preFromULength=0; 887 } 888 889 /* 890 * loop for conversion and error handling 891 * 892 * loop { 893 * convert 894 * loop { 895 * update offsets 896 * handle end of input 897 * handle errors/call callback 898 * } 899 * } 900 */ 901 for(;;) { 902 if(U_SUCCESS(*err)) { 903 /* convert */ 904 fromUnicode(pArgs, err); 905 906 /* 907 * set a flag for whether the converter 908 * successfully processed the end of the input 909 * 910 * need not check cnv->preFromULength==0 because a replay (<0) will cause 911 * s<sourceLimit before converterSawEndOfInput is checked 912 */ 913 converterSawEndOfInput= 914 (UBool)(U_SUCCESS(*err) && 915 pArgs->flush && pArgs->source==pArgs->sourceLimit && 916 cnv->fromUChar32==0); 917 } else { 918 /* handle error from ucnv_convertEx() */ 919 converterSawEndOfInput=FALSE; 920 } 921 922 /* no callback called yet for this iteration */ 923 calledCallback=FALSE; 924 925 /* no sourceIndex adjustment for conversion, only for callback output */ 926 errorInputLength=0; 927 928 /* 929 * loop for offsets and error handling 930 * 931 * iterates at most 3 times: 932 * 1. to clean up after the conversion function 933 * 2. after the callback 934 * 3. after the callback again if there was truncated input 935 */ 936 for(;;) { 937 /* update offsets if we write any */ 938 if(offsets!=NULL) { 939 int32_t length=(int32_t)(pArgs->target-t); 940 if(length>0) { 941 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 942 943 /* 944 * if a converter handles offsets and updates the offsets 945 * pointer at the end, then pArgs->offset should not change 946 * here; 947 * however, some converters do not handle offsets at all 948 * (sourceIndex<0) or may not update the offsets pointer 949 */ 950 pArgs->offsets=offsets+=length; 951 } 952 953 if(sourceIndex>=0) { 954 sourceIndex+=(int32_t)(pArgs->source-s); 955 } 956 } 957 958 if(cnv->preFromULength<0) { 959 /* 960 * switch the source to new replay units (cannot occur while replaying) 961 * after offset handling and before end-of-input and callback handling 962 */ 963 if(realSource==NULL) { 964 realSource=pArgs->source; 965 realSourceLimit=pArgs->sourceLimit; 966 realFlush=pArgs->flush; 967 realSourceIndex=sourceIndex; 968 969 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); 970 pArgs->source=replay; 971 pArgs->sourceLimit=replay-cnv->preFromULength; 972 pArgs->flush=FALSE; 973 if((sourceIndex+=cnv->preFromULength)<0) { 974 sourceIndex=-1; 975 } 976 977 cnv->preFromULength=0; 978 } else { 979 /* see implementation note before _fromUnicodeWithCallback() */ 980 U_ASSERT(realSource==NULL); 981 *err=U_INTERNAL_PROGRAM_ERROR; 982 } 983 } 984 985 /* update pointers */ 986 s=pArgs->source; 987 t=pArgs->target; 988 989 if(U_SUCCESS(*err)) { 990 if(s<pArgs->sourceLimit) { 991 /* 992 * continue with the conversion loop while there is still input left 993 * (continue converting by breaking out of only the inner loop) 994 */ 995 break; 996 } else if(realSource!=NULL) { 997 /* switch back from replaying to the real source and continue */ 998 pArgs->source=realSource; 999 pArgs->sourceLimit=realSourceLimit; 1000 pArgs->flush=realFlush; 1001 sourceIndex=realSourceIndex; 1002 1003 realSource=NULL; 1004 break; 1005 } else if(pArgs->flush && cnv->fromUChar32!=0) { 1006 /* 1007 * the entire input stream is consumed 1008 * and there is a partial, truncated input sequence left 1009 */ 1010 1011 /* inject an error and continue with callback handling */ 1012 *err=U_TRUNCATED_CHAR_FOUND; 1013 calledCallback=FALSE; /* new error condition */ 1014 } else { 1015 /* input consumed */ 1016 if(pArgs->flush) { 1017 /* 1018 * return to the conversion loop once more if the flush 1019 * flag is set and the conversion function has not 1020 * successfully processed the end of the input yet 1021 * 1022 * (continue converting by breaking out of only the inner loop) 1023 */ 1024 if(!converterSawEndOfInput) { 1025 break; 1026 } 1027 1028 /* reset the converter without calling the callback function */ 1029 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); 1030 } 1031 1032 /* done successfully */ 1033 return; 1034 } 1035 } 1036 1037 /* U_FAILURE(*err) */ 1038 { 1039 UErrorCode e; 1040 1041 if( calledCallback || 1042 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1043 (e!=U_INVALID_CHAR_FOUND && 1044 e!=U_ILLEGAL_CHAR_FOUND && 1045 e!=U_TRUNCATED_CHAR_FOUND) 1046 ) { 1047 /* 1048 * the callback did not or cannot resolve the error: 1049 * set output pointers and return 1050 * 1051 * the check for buffer overflow is redundant but it is 1052 * a high-runner case and hopefully documents the intent 1053 * well 1054 * 1055 * if we were replaying, then the replay buffer must be 1056 * copied back into the UConverter 1057 * and the real arguments must be restored 1058 */ 1059 if(realSource!=NULL) { 1060 int32_t length; 1061 1062 U_ASSERT(cnv->preFromULength==0); 1063 1064 length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1065 if(length>0) { 1066 u_memcpy(cnv->preFromU, pArgs->source, length); 1067 cnv->preFromULength=(int8_t)-length; 1068 } 1069 1070 pArgs->source=realSource; 1071 pArgs->sourceLimit=realSourceLimit; 1072 pArgs->flush=realFlush; 1073 } 1074 1075 return; 1076 } 1077 } 1078 1079 /* callback handling */ 1080 { 1081 UChar32 codePoint; 1082 1083 /* get and write the code point */ 1084 codePoint=cnv->fromUChar32; 1085 errorInputLength=0; 1086 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); 1087 cnv->invalidUCharLength=(int8_t)errorInputLength; 1088 1089 /* set the converter state to deal with the next character */ 1090 cnv->fromUChar32=0; 1091 1092 /* call the callback function */ 1093 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, 1094 cnv->invalidUCharBuffer, errorInputLength, codePoint, 1095 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, 1096 err); 1097 } 1098 1099 /* 1100 * loop back to the offset handling 1101 * 1102 * this flag will indicate after offset handling 1103 * that a callback was called; 1104 * if the callback did not resolve the error, then we return 1105 */ 1106 calledCallback=TRUE; 1107 } 1108 } 1109 } 1110 1111 /* 1112 * Output the fromUnicode overflow buffer. 1113 * Call this function if(cnv->charErrorBufferLength>0). 1114 * @return TRUE if overflow 1115 */ 1116 static UBool 1117 ucnv_outputOverflowFromUnicode(UConverter *cnv, 1118 char **target, const char *targetLimit, 1119 int32_t **pOffsets, 1120 UErrorCode *err) { 1121 int32_t *offsets; 1122 char *overflow, *t; 1123 int32_t i, length; 1124 1125 t=*target; 1126 if(pOffsets!=NULL) { 1127 offsets=*pOffsets; 1128 } else { 1129 offsets=NULL; 1130 } 1131 1132 overflow=(char *)cnv->charErrorBuffer; 1133 length=cnv->charErrorBufferLength; 1134 i=0; 1135 while(i<length) { 1136 if(t==targetLimit) { 1137 /* the overflow buffer contains too much, keep the rest */ 1138 int32_t j=0; 1139 1140 do { 1141 overflow[j++]=overflow[i++]; 1142 } while(i<length); 1143 1144 cnv->charErrorBufferLength=(int8_t)j; 1145 *target=t; 1146 if(offsets!=NULL) { 1147 *pOffsets=offsets; 1148 } 1149 *err=U_BUFFER_OVERFLOW_ERROR; 1150 return TRUE; 1151 } 1152 1153 /* copy the overflow contents to the target */ 1154 *t++=overflow[i++]; 1155 if(offsets!=NULL) { 1156 *offsets++=-1; /* no source index available for old output */ 1157 } 1158 } 1159 1160 /* the overflow buffer is completely copied to the target */ 1161 cnv->charErrorBufferLength=0; 1162 *target=t; 1163 if(offsets!=NULL) { 1164 *pOffsets=offsets; 1165 } 1166 return FALSE; 1167 } 1168 1169 U_CAPI void U_EXPORT2 1170 ucnv_fromUnicode(UConverter *cnv, 1171 char **target, const char *targetLimit, 1172 const UChar **source, const UChar *sourceLimit, 1173 int32_t *offsets, 1174 UBool flush, 1175 UErrorCode *err) { 1176 UConverterFromUnicodeArgs args; 1177 const UChar *s; 1178 char *t; 1179 1180 /* check parameters */ 1181 if(err==NULL || U_FAILURE(*err)) { 1182 return; 1183 } 1184 1185 if(cnv==NULL || target==NULL || source==NULL) { 1186 *err=U_ILLEGAL_ARGUMENT_ERROR; 1187 return; 1188 } 1189 1190 s=*source; 1191 t=*target; 1192 1193 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { 1194 /* 1195 Prevent code from going into an infinite loop in case we do hit this 1196 limit. The limit pointer is expected to be on a UChar * boundary. 1197 This also prevents the next argument check from failing. 1198 */ 1199 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); 1200 } 1201 1202 /* 1203 * All these conditions should never happen. 1204 * 1205 * 1) Make sure that the limits are >= to the address source or target 1206 * 1207 * 2) Make sure that the buffer sizes do not exceed the number range for 1208 * int32_t because some functions use the size (in units or bytes) 1209 * rather than comparing pointers, and because offsets are int32_t values. 1210 * 1211 * size_t is guaranteed to be unsigned and large enough for the job. 1212 * 1213 * Return with an error instead of adjusting the limits because we would 1214 * not be able to maintain the semantics that either the source must be 1215 * consumed or the target filled (unless an error occurs). 1216 * An adjustment would be targetLimit=t+0x7fffffff; for example. 1217 * 1218 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1219 * to a char * pointer and provide an incomplete UChar code unit. 1220 */ 1221 if (sourceLimit<s || targetLimit<t || 1222 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || 1223 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || 1224 (((const char *)sourceLimit-(const char *)s) & 1) != 0) 1225 { 1226 *err=U_ILLEGAL_ARGUMENT_ERROR; 1227 return; 1228 } 1229 1230 /* output the target overflow buffer */ 1231 if( cnv->charErrorBufferLength>0 && 1232 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) 1233 ) { 1234 /* U_BUFFER_OVERFLOW_ERROR */ 1235 return; 1236 } 1237 /* *target may have moved, therefore stop using t */ 1238 1239 if(!flush && s==sourceLimit && cnv->preFromULength>=0) { 1240 /* the overflow buffer is emptied and there is no new input: we are done */ 1241 return; 1242 } 1243 1244 /* 1245 * Do not simply return with a buffer overflow error if 1246 * !flush && t==targetLimit 1247 * because it is possible that the source will not generate any output. 1248 * For example, the skip callback may be called; 1249 * it does not output anything. 1250 */ 1251 1252 /* prepare the converter arguments */ 1253 args.converter=cnv; 1254 args.flush=flush; 1255 args.offsets=offsets; 1256 args.source=s; 1257 args.sourceLimit=sourceLimit; 1258 args.target=*target; 1259 args.targetLimit=targetLimit; 1260 args.size=sizeof(args); 1261 1262 _fromUnicodeWithCallback(&args, err); 1263 1264 *source=args.source; 1265 *target=args.target; 1266 } 1267 1268 /* ucnv_toUnicode() --------------------------------------------------------- */ 1269 1270 static void 1271 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 1272 UConverterToUnicode toUnicode; 1273 UConverter *cnv; 1274 const char *s; 1275 UChar *t; 1276 int32_t *offsets; 1277 int32_t sourceIndex; 1278 int32_t errorInputLength; 1279 UBool converterSawEndOfInput, calledCallback; 1280 1281 /* variables for m:n conversion */ 1282 char replay[UCNV_EXT_MAX_BYTES]; 1283 const char *realSource, *realSourceLimit; 1284 int32_t realSourceIndex; 1285 UBool realFlush; 1286 1287 cnv=pArgs->converter; 1288 s=pArgs->source; 1289 t=pArgs->target; 1290 offsets=pArgs->offsets; 1291 1292 /* get the converter implementation function */ 1293 sourceIndex=0; 1294 if(offsets==NULL) { 1295 toUnicode=cnv->sharedData->impl->toUnicode; 1296 } else { 1297 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; 1298 if(toUnicode==NULL) { 1299 /* there is no WithOffsets implementation */ 1300 toUnicode=cnv->sharedData->impl->toUnicode; 1301 /* we will write -1 for each offset */ 1302 sourceIndex=-1; 1303 } 1304 } 1305 1306 if(cnv->preToULength>=0) { 1307 /* normal mode */ 1308 realSource=NULL; 1309 1310 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ 1311 realSourceLimit=NULL; 1312 realFlush=FALSE; 1313 realSourceIndex=0; 1314 } else { 1315 /* 1316 * Previous m:n conversion stored source units from a partial match 1317 * and failed to consume all of them. 1318 * We need to "replay" them from a temporary buffer and convert them first. 1319 */ 1320 realSource=pArgs->source; 1321 realSourceLimit=pArgs->sourceLimit; 1322 realFlush=pArgs->flush; 1323 realSourceIndex=sourceIndex; 1324 1325 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1326 pArgs->source=replay; 1327 pArgs->sourceLimit=replay-cnv->preToULength; 1328 pArgs->flush=FALSE; 1329 sourceIndex=-1; 1330 1331 cnv->preToULength=0; 1332 } 1333 1334 /* 1335 * loop for conversion and error handling 1336 * 1337 * loop { 1338 * convert 1339 * loop { 1340 * update offsets 1341 * handle end of input 1342 * handle errors/call callback 1343 * } 1344 * } 1345 */ 1346 for(;;) { 1347 if(U_SUCCESS(*err)) { 1348 /* convert */ 1349 toUnicode(pArgs, err); 1350 1351 /* 1352 * set a flag for whether the converter 1353 * successfully processed the end of the input 1354 * 1355 * need not check cnv->preToULength==0 because a replay (<0) will cause 1356 * s<sourceLimit before converterSawEndOfInput is checked 1357 */ 1358 converterSawEndOfInput= 1359 (UBool)(U_SUCCESS(*err) && 1360 pArgs->flush && pArgs->source==pArgs->sourceLimit && 1361 cnv->toULength==0); 1362 } else { 1363 /* handle error from getNextUChar() or ucnv_convertEx() */ 1364 converterSawEndOfInput=FALSE; 1365 } 1366 1367 /* no callback called yet for this iteration */ 1368 calledCallback=FALSE; 1369 1370 /* no sourceIndex adjustment for conversion, only for callback output */ 1371 errorInputLength=0; 1372 1373 /* 1374 * loop for offsets and error handling 1375 * 1376 * iterates at most 3 times: 1377 * 1. to clean up after the conversion function 1378 * 2. after the callback 1379 * 3. after the callback again if there was truncated input 1380 */ 1381 for(;;) { 1382 /* update offsets if we write any */ 1383 if(offsets!=NULL) { 1384 int32_t length=(int32_t)(pArgs->target-t); 1385 if(length>0) { 1386 _updateOffsets(offsets, length, sourceIndex, errorInputLength); 1387 1388 /* 1389 * if a converter handles offsets and updates the offsets 1390 * pointer at the end, then pArgs->offset should not change 1391 * here; 1392 * however, some converters do not handle offsets at all 1393 * (sourceIndex<0) or may not update the offsets pointer 1394 */ 1395 pArgs->offsets=offsets+=length; 1396 } 1397 1398 if(sourceIndex>=0) { 1399 sourceIndex+=(int32_t)(pArgs->source-s); 1400 } 1401 } 1402 1403 if(cnv->preToULength<0) { 1404 /* 1405 * switch the source to new replay units (cannot occur while replaying) 1406 * after offset handling and before end-of-input and callback handling 1407 */ 1408 if(realSource==NULL) { 1409 realSource=pArgs->source; 1410 realSourceLimit=pArgs->sourceLimit; 1411 realFlush=pArgs->flush; 1412 realSourceIndex=sourceIndex; 1413 1414 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); 1415 pArgs->source=replay; 1416 pArgs->sourceLimit=replay-cnv->preToULength; 1417 pArgs->flush=FALSE; 1418 if((sourceIndex+=cnv->preToULength)<0) { 1419 sourceIndex=-1; 1420 } 1421 1422 cnv->preToULength=0; 1423 } else { 1424 /* see implementation note before _fromUnicodeWithCallback() */ 1425 U_ASSERT(realSource==NULL); 1426 *err=U_INTERNAL_PROGRAM_ERROR; 1427 } 1428 } 1429 1430 /* update pointers */ 1431 s=pArgs->source; 1432 t=pArgs->target; 1433 1434 if(U_SUCCESS(*err)) { 1435 if(s<pArgs->sourceLimit) { 1436 /* 1437 * continue with the conversion loop while there is still input left 1438 * (continue converting by breaking out of only the inner loop) 1439 */ 1440 break; 1441 } else if(realSource!=NULL) { 1442 /* switch back from replaying to the real source and continue */ 1443 pArgs->source=realSource; 1444 pArgs->sourceLimit=realSourceLimit; 1445 pArgs->flush=realFlush; 1446 sourceIndex=realSourceIndex; 1447 1448 realSource=NULL; 1449 break; 1450 } else if(pArgs->flush && cnv->toULength>0) { 1451 /* 1452 * the entire input stream is consumed 1453 * and there is a partial, truncated input sequence left 1454 */ 1455 1456 /* inject an error and continue with callback handling */ 1457 *err=U_TRUNCATED_CHAR_FOUND; 1458 calledCallback=FALSE; /* new error condition */ 1459 } else { 1460 /* input consumed */ 1461 if(pArgs->flush) { 1462 /* 1463 * return to the conversion loop once more if the flush 1464 * flag is set and the conversion function has not 1465 * successfully processed the end of the input yet 1466 * 1467 * (continue converting by breaking out of only the inner loop) 1468 */ 1469 if(!converterSawEndOfInput) { 1470 break; 1471 } 1472 1473 /* reset the converter without calling the callback function */ 1474 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1475 } 1476 1477 /* done successfully */ 1478 return; 1479 } 1480 } 1481 1482 /* U_FAILURE(*err) */ 1483 { 1484 UErrorCode e; 1485 1486 if( calledCallback || 1487 (e=*err)==U_BUFFER_OVERFLOW_ERROR || 1488 (e!=U_INVALID_CHAR_FOUND && 1489 e!=U_ILLEGAL_CHAR_FOUND && 1490 e!=U_TRUNCATED_CHAR_FOUND && 1491 e!=U_ILLEGAL_ESCAPE_SEQUENCE && 1492 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) 1493 ) { 1494 /* 1495 * the callback did not or cannot resolve the error: 1496 * set output pointers and return 1497 * 1498 * the check for buffer overflow is redundant but it is 1499 * a high-runner case and hopefully documents the intent 1500 * well 1501 * 1502 * if we were replaying, then the replay buffer must be 1503 * copied back into the UConverter 1504 * and the real arguments must be restored 1505 */ 1506 if(realSource!=NULL) { 1507 int32_t length; 1508 1509 U_ASSERT(cnv->preToULength==0); 1510 1511 length=(int32_t)(pArgs->sourceLimit-pArgs->source); 1512 if(length>0) { 1513 uprv_memcpy(cnv->preToU, pArgs->source, length); 1514 cnv->preToULength=(int8_t)-length; 1515 } 1516 1517 pArgs->source=realSource; 1518 pArgs->sourceLimit=realSourceLimit; 1519 pArgs->flush=realFlush; 1520 } 1521 1522 return; 1523 } 1524 } 1525 1526 /* copy toUBytes[] to invalidCharBuffer[] */ 1527 errorInputLength=cnv->invalidCharLength=cnv->toULength; 1528 if(errorInputLength>0) { 1529 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); 1530 } 1531 1532 /* set the converter state to deal with the next character */ 1533 cnv->toULength=0; 1534 1535 /* call the callback function */ 1536 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { 1537 cnv->toUCallbackReason = UCNV_UNASSIGNED; 1538 } 1539 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, 1540 cnv->invalidCharBuffer, errorInputLength, 1541 cnv->toUCallbackReason, 1542 err); 1543 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ 1544 1545 /* 1546 * loop back to the offset handling 1547 * 1548 * this flag will indicate after offset handling 1549 * that a callback was called; 1550 * if the callback did not resolve the error, then we return 1551 */ 1552 calledCallback=TRUE; 1553 } 1554 } 1555 } 1556 1557 /* 1558 * Output the toUnicode overflow buffer. 1559 * Call this function if(cnv->UCharErrorBufferLength>0). 1560 * @return TRUE if overflow 1561 */ 1562 static UBool 1563 ucnv_outputOverflowToUnicode(UConverter *cnv, 1564 UChar **target, const UChar *targetLimit, 1565 int32_t **pOffsets, 1566 UErrorCode *err) { 1567 int32_t *offsets; 1568 UChar *overflow, *t; 1569 int32_t i, length; 1570 1571 t=*target; 1572 if(pOffsets!=NULL) { 1573 offsets=*pOffsets; 1574 } else { 1575 offsets=NULL; 1576 } 1577 1578 overflow=cnv->UCharErrorBuffer; 1579 length=cnv->UCharErrorBufferLength; 1580 i=0; 1581 while(i<length) { 1582 if(t==targetLimit) { 1583 /* the overflow buffer contains too much, keep the rest */ 1584 int32_t j=0; 1585 1586 do { 1587 overflow[j++]=overflow[i++]; 1588 } while(i<length); 1589 1590 cnv->UCharErrorBufferLength=(int8_t)j; 1591 *target=t; 1592 if(offsets!=NULL) { 1593 *pOffsets=offsets; 1594 } 1595 *err=U_BUFFER_OVERFLOW_ERROR; 1596 return TRUE; 1597 } 1598 1599 /* copy the overflow contents to the target */ 1600 *t++=overflow[i++]; 1601 if(offsets!=NULL) { 1602 *offsets++=-1; /* no source index available for old output */ 1603 } 1604 } 1605 1606 /* the overflow buffer is completely copied to the target */ 1607 cnv->UCharErrorBufferLength=0; 1608 *target=t; 1609 if(offsets!=NULL) { 1610 *pOffsets=offsets; 1611 } 1612 return FALSE; 1613 } 1614 1615 U_CAPI void U_EXPORT2 1616 ucnv_toUnicode(UConverter *cnv, 1617 UChar **target, const UChar *targetLimit, 1618 const char **source, const char *sourceLimit, 1619 int32_t *offsets, 1620 UBool flush, 1621 UErrorCode *err) { 1622 UConverterToUnicodeArgs args; 1623 const char *s; 1624 UChar *t; 1625 1626 /* check parameters */ 1627 if(err==NULL || U_FAILURE(*err)) { 1628 return; 1629 } 1630 1631 if(cnv==NULL || target==NULL || source==NULL) { 1632 *err=U_ILLEGAL_ARGUMENT_ERROR; 1633 return; 1634 } 1635 1636 s=*source; 1637 t=*target; 1638 1639 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { 1640 /* 1641 Prevent code from going into an infinite loop in case we do hit this 1642 limit. The limit pointer is expected to be on a UChar * boundary. 1643 This also prevents the next argument check from failing. 1644 */ 1645 targetLimit = (const UChar *)(((const char *)targetLimit) - 1); 1646 } 1647 1648 /* 1649 * All these conditions should never happen. 1650 * 1651 * 1) Make sure that the limits are >= to the address source or target 1652 * 1653 * 2) Make sure that the buffer sizes do not exceed the number range for 1654 * int32_t because some functions use the size (in units or bytes) 1655 * rather than comparing pointers, and because offsets are int32_t values. 1656 * 1657 * size_t is guaranteed to be unsigned and large enough for the job. 1658 * 1659 * Return with an error instead of adjusting the limits because we would 1660 * not be able to maintain the semantics that either the source must be 1661 * consumed or the target filled (unless an error occurs). 1662 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1663 * 1664 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer 1665 * to a char * pointer and provide an incomplete UChar code unit. 1666 */ 1667 if (sourceLimit<s || targetLimit<t || 1668 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || 1669 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || 1670 (((const char *)targetLimit-(const char *)t) & 1) != 0 1671 ) { 1672 *err=U_ILLEGAL_ARGUMENT_ERROR; 1673 return; 1674 } 1675 1676 /* output the target overflow buffer */ 1677 if( cnv->UCharErrorBufferLength>0 && 1678 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) 1679 ) { 1680 /* U_BUFFER_OVERFLOW_ERROR */ 1681 return; 1682 } 1683 /* *target may have moved, therefore stop using t */ 1684 1685 if(!flush && s==sourceLimit && cnv->preToULength>=0) { 1686 /* the overflow buffer is emptied and there is no new input: we are done */ 1687 return; 1688 } 1689 1690 /* 1691 * Do not simply return with a buffer overflow error if 1692 * !flush && t==targetLimit 1693 * because it is possible that the source will not generate any output. 1694 * For example, the skip callback may be called; 1695 * it does not output anything. 1696 */ 1697 1698 /* prepare the converter arguments */ 1699 args.converter=cnv; 1700 args.flush=flush; 1701 args.offsets=offsets; 1702 args.source=s; 1703 args.sourceLimit=sourceLimit; 1704 args.target=*target; 1705 args.targetLimit=targetLimit; 1706 args.size=sizeof(args); 1707 1708 _toUnicodeWithCallback(&args, err); 1709 1710 *source=args.source; 1711 *target=args.target; 1712 } 1713 1714 /* ucnv_to/fromUChars() ----------------------------------------------------- */ 1715 1716 U_CAPI int32_t U_EXPORT2 1717 ucnv_fromUChars(UConverter *cnv, 1718 char *dest, int32_t destCapacity, 1719 const UChar *src, int32_t srcLength, 1720 UErrorCode *pErrorCode) { 1721 const UChar *srcLimit; 1722 char *originalDest, *destLimit; 1723 int32_t destLength; 1724 1725 /* check arguments */ 1726 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1727 return 0; 1728 } 1729 1730 if( cnv==NULL || 1731 destCapacity<0 || (destCapacity>0 && dest==NULL) || 1732 srcLength<-1 || (srcLength!=0 && src==NULL) 1733 ) { 1734 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1735 return 0; 1736 } 1737 1738 /* initialize */ 1739 ucnv_resetFromUnicode(cnv); 1740 originalDest=dest; 1741 if(srcLength==-1) { 1742 srcLength=u_strlen(src); 1743 } 1744 if(srcLength>0) { 1745 srcLimit=src+srcLength; 1746 destCapacity=pinCapacity(dest, destCapacity); 1747 destLimit=dest+destCapacity; 1748 1749 /* perform the conversion */ 1750 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1751 destLength=(int32_t)(dest-originalDest); 1752 1753 /* if an overflow occurs, then get the preflighting length */ 1754 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 1755 char buffer[1024]; 1756 1757 destLimit=buffer+sizeof(buffer); 1758 do { 1759 dest=buffer; 1760 *pErrorCode=U_ZERO_ERROR; 1761 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1762 destLength+=(int32_t)(dest-buffer); 1763 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1764 } 1765 } else { 1766 destLength=0; 1767 } 1768 1769 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); 1770 } 1771 1772 U_CAPI int32_t U_EXPORT2 1773 ucnv_toUChars(UConverter *cnv, 1774 UChar *dest, int32_t destCapacity, 1775 const char *src, int32_t srcLength, 1776 UErrorCode *pErrorCode) { 1777 const char *srcLimit; 1778 UChar *originalDest, *destLimit; 1779 int32_t destLength; 1780 1781 /* check arguments */ 1782 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1783 return 0; 1784 } 1785 1786 if( cnv==NULL || 1787 destCapacity<0 || (destCapacity>0 && dest==NULL) || 1788 srcLength<-1 || (srcLength!=0 && src==NULL)) 1789 { 1790 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1791 return 0; 1792 } 1793 1794 /* initialize */ 1795 ucnv_resetToUnicode(cnv); 1796 originalDest=dest; 1797 if(srcLength==-1) { 1798 srcLength=(int32_t)uprv_strlen(src); 1799 } 1800 if(srcLength>0) { 1801 srcLimit=src+srcLength; 1802 destCapacity=pinCapacity(dest, destCapacity); 1803 destLimit=dest+destCapacity; 1804 1805 /* perform the conversion */ 1806 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1807 destLength=(int32_t)(dest-originalDest); 1808 1809 /* if an overflow occurs, then get the preflighting length */ 1810 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) 1811 { 1812 UChar buffer[1024]; 1813 1814 destLimit=buffer+UPRV_LENGTHOF(buffer); 1815 do { 1816 dest=buffer; 1817 *pErrorCode=U_ZERO_ERROR; 1818 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); 1819 destLength+=(int32_t)(dest-buffer); 1820 } 1821 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 1822 } 1823 } else { 1824 destLength=0; 1825 } 1826 1827 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); 1828 } 1829 1830 /* ucnv_getNextUChar() ------------------------------------------------------ */ 1831 1832 U_CAPI UChar32 U_EXPORT2 1833 ucnv_getNextUChar(UConverter *cnv, 1834 const char **source, const char *sourceLimit, 1835 UErrorCode *err) { 1836 UConverterToUnicodeArgs args; 1837 UChar buffer[U16_MAX_LENGTH]; 1838 const char *s; 1839 UChar32 c; 1840 int32_t i, length; 1841 1842 /* check parameters */ 1843 if(err==NULL || U_FAILURE(*err)) { 1844 return 0xffff; 1845 } 1846 1847 if(cnv==NULL || source==NULL) { 1848 *err=U_ILLEGAL_ARGUMENT_ERROR; 1849 return 0xffff; 1850 } 1851 1852 s=*source; 1853 if(sourceLimit<s) { 1854 *err=U_ILLEGAL_ARGUMENT_ERROR; 1855 return 0xffff; 1856 } 1857 1858 /* 1859 * Make sure that the buffer sizes do not exceed the number range for 1860 * int32_t because some functions use the size (in units or bytes) 1861 * rather than comparing pointers, and because offsets are int32_t values. 1862 * 1863 * size_t is guaranteed to be unsigned and large enough for the job. 1864 * 1865 * Return with an error instead of adjusting the limits because we would 1866 * not be able to maintain the semantics that either the source must be 1867 * consumed or the target filled (unless an error occurs). 1868 * An adjustment would be sourceLimit=t+0x7fffffff; for example. 1869 */ 1870 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { 1871 *err=U_ILLEGAL_ARGUMENT_ERROR; 1872 return 0xffff; 1873 } 1874 1875 c=U_SENTINEL; 1876 1877 /* flush the target overflow buffer */ 1878 if(cnv->UCharErrorBufferLength>0) { 1879 UChar *overflow; 1880 1881 overflow=cnv->UCharErrorBuffer; 1882 i=0; 1883 length=cnv->UCharErrorBufferLength; 1884 U16_NEXT(overflow, i, length, c); 1885 1886 /* move the remaining overflow contents up to the beginning */ 1887 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { 1888 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, 1889 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1890 } 1891 1892 if(!U16_IS_LEAD(c) || i<length) { 1893 return c; 1894 } 1895 /* 1896 * Continue if the overflow buffer contained only a lead surrogate, 1897 * in case the converter outputs single surrogates from complete 1898 * input sequences. 1899 */ 1900 } 1901 1902 /* 1903 * flush==TRUE is implied for ucnv_getNextUChar() 1904 * 1905 * do not simply return even if s==sourceLimit because the converter may 1906 * not have seen flush==TRUE before 1907 */ 1908 1909 /* prepare the converter arguments */ 1910 args.converter=cnv; 1911 args.flush=TRUE; 1912 args.offsets=NULL; 1913 args.source=s; 1914 args.sourceLimit=sourceLimit; 1915 args.target=buffer; 1916 args.targetLimit=buffer+1; 1917 args.size=sizeof(args); 1918 1919 if(c<0) { 1920 /* 1921 * call the native getNextUChar() implementation if we are 1922 * at a character boundary (toULength==0) 1923 * 1924 * unlike with _toUnicode(), getNextUChar() implementations must set 1925 * U_TRUNCATED_CHAR_FOUND for truncated input, 1926 * in addition to setting toULength/toUBytes[] 1927 */ 1928 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { 1929 c=cnv->sharedData->impl->getNextUChar(&args, err); 1930 *source=s=args.source; 1931 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { 1932 /* reset the converter without calling the callback function */ 1933 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); 1934 return 0xffff; /* no output */ 1935 } else if(U_SUCCESS(*err) && c>=0) { 1936 return c; 1937 /* 1938 * else fall through to use _toUnicode() because 1939 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all 1940 * U_FAILURE: call _toUnicode() for callback handling (do not output c) 1941 */ 1942 } 1943 } 1944 1945 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ 1946 _toUnicodeWithCallback(&args, err); 1947 1948 if(*err==U_BUFFER_OVERFLOW_ERROR) { 1949 *err=U_ZERO_ERROR; 1950 } 1951 1952 i=0; 1953 length=(int32_t)(args.target-buffer); 1954 } else { 1955 /* write the lead surrogate from the overflow buffer */ 1956 buffer[0]=(UChar)c; 1957 args.target=buffer+1; 1958 i=0; 1959 length=1; 1960 } 1961 1962 /* buffer contents starts at i and ends before length */ 1963 1964 if(U_FAILURE(*err)) { 1965 c=0xffff; /* no output */ 1966 } else if(length==0) { 1967 /* no input or only state changes */ 1968 *err=U_INDEX_OUTOFBOUNDS_ERROR; 1969 /* no need to reset explicitly because _toUnicodeWithCallback() did it */ 1970 c=0xffff; /* no output */ 1971 } else { 1972 c=buffer[0]; 1973 i=1; 1974 if(!U16_IS_LEAD(c)) { 1975 /* consume c=buffer[0], done */ 1976 } else { 1977 /* got a lead surrogate, see if a trail surrogate follows */ 1978 UChar c2; 1979 1980 if(cnv->UCharErrorBufferLength>0) { 1981 /* got overflow output from the conversion */ 1982 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { 1983 /* got a trail surrogate, too */ 1984 c=U16_GET_SUPPLEMENTARY(c, c2); 1985 1986 /* move the remaining overflow contents up to the beginning */ 1987 if((--cnv->UCharErrorBufferLength)>0) { 1988 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, 1989 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); 1990 } 1991 } else { 1992 /* c is an unpaired lead surrogate, just return it */ 1993 } 1994 } else if(args.source<sourceLimit) { 1995 /* convert once more, to buffer[1] */ 1996 args.targetLimit=buffer+2; 1997 _toUnicodeWithCallback(&args, err); 1998 if(*err==U_BUFFER_OVERFLOW_ERROR) { 1999 *err=U_ZERO_ERROR; 2000 } 2001 2002 length=(int32_t)(args.target-buffer); 2003 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { 2004 /* got a trail surrogate, too */ 2005 c=U16_GET_SUPPLEMENTARY(c, c2); 2006 i=2; 2007 } 2008 } 2009 } 2010 } 2011 2012 /* 2013 * move leftover output from buffer[i..length[ 2014 * into the beginning of the overflow buffer 2015 */ 2016 if(i<length) { 2017 /* move further overflow back */ 2018 int32_t delta=length-i; 2019 if((length=cnv->UCharErrorBufferLength)>0) { 2020 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, 2021 length*U_SIZEOF_UCHAR); 2022 } 2023 cnv->UCharErrorBufferLength=(int8_t)(length+delta); 2024 2025 cnv->UCharErrorBuffer[0]=buffer[i++]; 2026 if(delta>1) { 2027 cnv->UCharErrorBuffer[1]=buffer[i]; 2028 } 2029 } 2030 2031 *source=args.source; 2032 return c; 2033 } 2034 2035 /* ucnv_convert() and siblings ---------------------------------------------- */ 2036 2037 U_CAPI void U_EXPORT2 2038 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, 2039 char **target, const char *targetLimit, 2040 const char **source, const char *sourceLimit, 2041 UChar *pivotStart, UChar **pivotSource, 2042 UChar **pivotTarget, const UChar *pivotLimit, 2043 UBool reset, UBool flush, 2044 UErrorCode *pErrorCode) { 2045 UChar pivotBuffer[CHUNK_SIZE]; 2046 const UChar *myPivotSource; 2047 UChar *myPivotTarget; 2048 const char *s; 2049 char *t; 2050 2051 UConverterToUnicodeArgs toUArgs; 2052 UConverterFromUnicodeArgs fromUArgs; 2053 UConverterConvert convert; 2054 2055 /* error checking */ 2056 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2057 return; 2058 } 2059 2060 if( targetCnv==NULL || sourceCnv==NULL || 2061 source==NULL || *source==NULL || 2062 target==NULL || *target==NULL || targetLimit==NULL 2063 ) { 2064 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2065 return; 2066 } 2067 2068 s=*source; 2069 t=*target; 2070 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { 2071 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2072 return; 2073 } 2074 2075 /* 2076 * Make sure that the buffer sizes do not exceed the number range for 2077 * int32_t. See ucnv_toUnicode() for a more detailed comment. 2078 */ 2079 if( 2080 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || 2081 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) 2082 ) { 2083 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2084 return; 2085 } 2086 2087 if(pivotStart==NULL) { 2088 if(!flush) { 2089 /* streaming conversion requires an explicit pivot buffer */ 2090 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2091 return; 2092 } 2093 2094 /* use the stack pivot buffer */ 2095 myPivotSource=myPivotTarget=pivotStart=pivotBuffer; 2096 pivotSource=(UChar **)&myPivotSource; 2097 pivotTarget=&myPivotTarget; 2098 pivotLimit=pivotBuffer+CHUNK_SIZE; 2099 } else if( pivotStart>=pivotLimit || 2100 pivotSource==NULL || *pivotSource==NULL || 2101 pivotTarget==NULL || *pivotTarget==NULL || 2102 pivotLimit==NULL 2103 ) { 2104 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2105 return; 2106 } 2107 2108 if(sourceLimit==NULL) { 2109 /* get limit of single-byte-NUL-terminated source string */ 2110 sourceLimit=uprv_strchr(*source, 0); 2111 } 2112 2113 if(reset) { 2114 ucnv_resetToUnicode(sourceCnv); 2115 ucnv_resetFromUnicode(targetCnv); 2116 *pivotSource=*pivotTarget=pivotStart; 2117 } else if(targetCnv->charErrorBufferLength>0) { 2118 /* output the targetCnv overflow buffer */ 2119 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { 2120 /* U_BUFFER_OVERFLOW_ERROR */ 2121 return; 2122 } 2123 /* *target has moved, therefore stop using t */ 2124 2125 if( !flush && 2126 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && 2127 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit 2128 ) { 2129 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ 2130 return; 2131 } 2132 } 2133 2134 /* Is direct-UTF-8 conversion available? */ 2135 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2136 targetCnv->sharedData->impl->fromUTF8!=NULL 2137 ) { 2138 convert=targetCnv->sharedData->impl->fromUTF8; 2139 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && 2140 sourceCnv->sharedData->impl->toUTF8!=NULL 2141 ) { 2142 convert=sourceCnv->sharedData->impl->toUTF8; 2143 } else { 2144 convert=NULL; 2145 } 2146 2147 /* 2148 * If direct-UTF-8 conversion is available, then we use a smaller 2149 * pivot buffer for error handling and partial matches 2150 * so that we quickly return to direct conversion. 2151 * 2152 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. 2153 * 2154 * We could reduce the pivot buffer size further, at the cost of 2155 * buffer overflows from callbacks. 2156 * The pivot buffer should not be smaller than the maximum number of 2157 * fromUnicode extension table input UChars 2158 * (for m:n conversion, see 2159 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) 2160 * or 2 for surrogate pairs. 2161 * 2162 * Too small a buffer can cause thrashing between pivoting and direct 2163 * conversion, with function call overhead outweighing the benefits 2164 * of direct conversion. 2165 */ 2166 if(convert!=NULL && (pivotLimit-pivotStart)>32) { 2167 pivotLimit=pivotStart+32; 2168 } 2169 2170 /* prepare the converter arguments */ 2171 fromUArgs.converter=targetCnv; 2172 fromUArgs.flush=FALSE; 2173 fromUArgs.offsets=NULL; 2174 fromUArgs.target=*target; 2175 fromUArgs.targetLimit=targetLimit; 2176 fromUArgs.size=sizeof(fromUArgs); 2177 2178 toUArgs.converter=sourceCnv; 2179 toUArgs.flush=flush; 2180 toUArgs.offsets=NULL; 2181 toUArgs.source=s; 2182 toUArgs.sourceLimit=sourceLimit; 2183 toUArgs.targetLimit=pivotLimit; 2184 toUArgs.size=sizeof(toUArgs); 2185 2186 /* 2187 * TODO: Consider separating this function into two functions, 2188 * extracting exactly the conversion loop, 2189 * for readability and to reduce the set of visible variables. 2190 * 2191 * Otherwise stop using s and t from here on. 2192 */ 2193 s=t=NULL; 2194 2195 /* 2196 * conversion loop 2197 * 2198 * The sequence of steps in the loop may appear backward, 2199 * but the principle is simple: 2200 * In the chain of 2201 * source - sourceCnv overflow - pivot - targetCnv overflow - target 2202 * empty out later buffers before refilling them from earlier ones. 2203 * 2204 * The targetCnv overflow buffer is flushed out only once before the loop. 2205 */ 2206 for(;;) { 2207 /* 2208 * if(pivot not empty or error or replay or flush fromUnicode) { 2209 * fromUnicode(pivot -> target); 2210 * } 2211 * 2212 * For pivoting conversion; and for direct conversion for 2213 * error callback handling and flushing the replay buffer. 2214 */ 2215 if( *pivotSource<*pivotTarget || 2216 U_FAILURE(*pErrorCode) || 2217 targetCnv->preFromULength<0 || 2218 fromUArgs.flush 2219 ) { 2220 fromUArgs.source=*pivotSource; 2221 fromUArgs.sourceLimit=*pivotTarget; 2222 _fromUnicodeWithCallback(&fromUArgs, pErrorCode); 2223 if(U_FAILURE(*pErrorCode)) { 2224 /* target overflow, or conversion error */ 2225 *pivotSource=(UChar *)fromUArgs.source; 2226 break; 2227 } 2228 2229 /* 2230 * _fromUnicodeWithCallback() must have consumed the pivot contents 2231 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() 2232 */ 2233 } 2234 2235 /* The pivot buffer is empty; reset it so we start at pivotStart. */ 2236 *pivotSource=*pivotTarget=pivotStart; 2237 2238 /* 2239 * if(sourceCnv overflow buffer not empty) { 2240 * move(sourceCnv overflow buffer -> pivot); 2241 * continue; 2242 * } 2243 */ 2244 /* output the sourceCnv overflow buffer */ 2245 if(sourceCnv->UCharErrorBufferLength>0) { 2246 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { 2247 /* U_BUFFER_OVERFLOW_ERROR */ 2248 *pErrorCode=U_ZERO_ERROR; 2249 } 2250 continue; 2251 } 2252 2253 /* 2254 * check for end of input and break if done 2255 * 2256 * Checking both flush and fromUArgs.flush ensures that the converters 2257 * have been called with the flush flag set if the ucnv_convertEx() 2258 * caller set it. 2259 */ 2260 if( toUArgs.source==sourceLimit && 2261 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && 2262 (!flush || fromUArgs.flush) 2263 ) { 2264 /* done successfully */ 2265 break; 2266 } 2267 2268 /* 2269 * use direct conversion if available 2270 * but not if continuing a partial match 2271 * or flushing the toUnicode replay buffer 2272 */ 2273 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { 2274 if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2275 /* remove a warning that may be set by this function */ 2276 *pErrorCode=U_ZERO_ERROR; 2277 } 2278 convert(&fromUArgs, &toUArgs, pErrorCode); 2279 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2280 break; 2281 } else if(U_FAILURE(*pErrorCode)) { 2282 if(sourceCnv->toULength>0) { 2283 /* 2284 * Fall through to calling _toUnicodeWithCallback() 2285 * for callback handling. 2286 * 2287 * The pivot buffer will be reset with 2288 * *pivotSource=*pivotTarget=pivotStart; 2289 * which indicates a toUnicode error to the caller 2290 * (*pivotSource==pivotStart shows no pivot UChars consumed). 2291 */ 2292 } else { 2293 /* 2294 * Indicate a fromUnicode error to the caller 2295 * (*pivotSource>pivotStart shows some pivot UChars consumed). 2296 */ 2297 *pivotSource=*pivotTarget=pivotStart+1; 2298 /* 2299 * Loop around to calling _fromUnicodeWithCallbacks() 2300 * for callback handling. 2301 */ 2302 continue; 2303 } 2304 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { 2305 /* 2306 * No error, but the implementation requested to temporarily 2307 * fall back to pivoting. 2308 */ 2309 *pErrorCode=U_ZERO_ERROR; 2310 /* 2311 * The following else branches are almost identical to the end-of-input 2312 * handling in _toUnicodeWithCallback(). 2313 * Avoid calling it just for the end of input. 2314 */ 2315 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ 2316 /* 2317 * the entire input stream is consumed 2318 * and there is a partial, truncated input sequence left 2319 */ 2320 2321 /* inject an error and continue with callback handling */ 2322 *pErrorCode=U_TRUNCATED_CHAR_FOUND; 2323 } else { 2324 /* input consumed */ 2325 if(flush) { 2326 /* reset the converters without calling the callback functions */ 2327 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); 2328 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); 2329 } 2330 2331 /* done successfully */ 2332 break; 2333 } 2334 } 2335 2336 /* 2337 * toUnicode(source -> pivot); 2338 * 2339 * For pivoting conversion; and for direct conversion for 2340 * error callback handling, continuing partial matches 2341 * and flushing the replay buffer. 2342 * 2343 * The pivot buffer is empty and reset. 2344 */ 2345 toUArgs.target=pivotStart; /* ==*pivotTarget */ 2346 /* toUArgs.targetLimit=pivotLimit; already set before the loop */ 2347 _toUnicodeWithCallback(&toUArgs, pErrorCode); 2348 *pivotTarget=toUArgs.target; 2349 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2350 /* pivot overflow: continue with the conversion loop */ 2351 *pErrorCode=U_ZERO_ERROR; 2352 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { 2353 /* conversion error, or there was nothing left to convert */ 2354 break; 2355 } 2356 /* 2357 * else: 2358 * _toUnicodeWithCallback() wrote into the pivot buffer, 2359 * continue with fromUnicode conversion. 2360 * 2361 * Set the fromUnicode flush flag if we flush and if toUnicode has 2362 * processed the end of the input. 2363 */ 2364 if( flush && toUArgs.source==sourceLimit && 2365 sourceCnv->preToULength>=0 && 2366 sourceCnv->UCharErrorBufferLength==0 2367 ) { 2368 fromUArgs.flush=TRUE; 2369 } 2370 } 2371 2372 /* 2373 * The conversion loop is exited when one of the following is true: 2374 * - the entire source text has been converted successfully to the target buffer 2375 * - a target buffer overflow occurred 2376 * - a conversion error occurred 2377 */ 2378 2379 *source=toUArgs.source; 2380 *target=fromUArgs.target; 2381 2382 /* terminate the target buffer if possible */ 2383 if(flush && U_SUCCESS(*pErrorCode)) { 2384 if(*target!=targetLimit) { 2385 **target=0; 2386 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { 2387 *pErrorCode=U_ZERO_ERROR; 2388 } 2389 } else { 2390 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; 2391 } 2392 } 2393 } 2394 2395 /* internal implementation of ucnv_convert() etc. with preflighting */ 2396 static int32_t 2397 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, 2398 char *target, int32_t targetCapacity, 2399 const char *source, int32_t sourceLength, 2400 UErrorCode *pErrorCode) { 2401 UChar pivotBuffer[CHUNK_SIZE]; 2402 UChar *pivot, *pivot2; 2403 2404 char *myTarget; 2405 const char *sourceLimit; 2406 const char *targetLimit; 2407 int32_t targetLength=0; 2408 2409 /* set up */ 2410 if(sourceLength<0) { 2411 sourceLimit=uprv_strchr(source, 0); 2412 } else { 2413 sourceLimit=source+sourceLength; 2414 } 2415 2416 /* if there is no input data, we're done */ 2417 if(source==sourceLimit) { 2418 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2419 } 2420 2421 pivot=pivot2=pivotBuffer; 2422 myTarget=target; 2423 targetLength=0; 2424 2425 if(targetCapacity>0) { 2426 /* perform real conversion */ 2427 targetLimit=target+targetCapacity; 2428 ucnv_convertEx(outConverter, inConverter, 2429 &myTarget, targetLimit, 2430 &source, sourceLimit, 2431 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2432 FALSE, 2433 TRUE, 2434 pErrorCode); 2435 targetLength=(int32_t)(myTarget-target); 2436 } 2437 2438 /* 2439 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing 2440 * to it but continue the conversion in order to store in targetCapacity 2441 * the number of bytes that was required. 2442 */ 2443 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) 2444 { 2445 char targetBuffer[CHUNK_SIZE]; 2446 2447 targetLimit=targetBuffer+CHUNK_SIZE; 2448 do { 2449 *pErrorCode=U_ZERO_ERROR; 2450 myTarget=targetBuffer; 2451 ucnv_convertEx(outConverter, inConverter, 2452 &myTarget, targetLimit, 2453 &source, sourceLimit, 2454 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, 2455 FALSE, 2456 TRUE, 2457 pErrorCode); 2458 targetLength+=(int32_t)(myTarget-targetBuffer); 2459 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); 2460 2461 /* done with preflighting, set warnings and errors as appropriate */ 2462 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); 2463 } 2464 2465 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ 2466 return targetLength; 2467 } 2468 2469 U_CAPI int32_t U_EXPORT2 2470 ucnv_convert(const char *toConverterName, const char *fromConverterName, 2471 char *target, int32_t targetCapacity, 2472 const char *source, int32_t sourceLength, 2473 UErrorCode *pErrorCode) { 2474 UConverter in, out; /* stack-allocated */ 2475 UConverter *inConverter, *outConverter; 2476 int32_t targetLength; 2477 2478 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2479 return 0; 2480 } 2481 2482 if( source==NULL || sourceLength<-1 || 2483 targetCapacity<0 || (targetCapacity>0 && target==NULL) 2484 ) { 2485 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2486 return 0; 2487 } 2488 2489 /* if there is no input data, we're done */ 2490 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2491 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2492 } 2493 2494 /* create the converters */ 2495 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); 2496 if(U_FAILURE(*pErrorCode)) { 2497 return 0; 2498 } 2499 2500 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); 2501 if(U_FAILURE(*pErrorCode)) { 2502 ucnv_close(inConverter); 2503 return 0; 2504 } 2505 2506 targetLength=ucnv_internalConvert(outConverter, inConverter, 2507 target, targetCapacity, 2508 source, sourceLength, 2509 pErrorCode); 2510 2511 ucnv_close(inConverter); 2512 ucnv_close(outConverter); 2513 2514 return targetLength; 2515 } 2516 2517 /* @internal */ 2518 static int32_t 2519 ucnv_convertAlgorithmic(UBool convertToAlgorithmic, 2520 UConverterType algorithmicType, 2521 UConverter *cnv, 2522 char *target, int32_t targetCapacity, 2523 const char *source, int32_t sourceLength, 2524 UErrorCode *pErrorCode) { 2525 UConverter algoConverterStatic; /* stack-allocated */ 2526 UConverter *algoConverter, *to, *from; 2527 int32_t targetLength; 2528 2529 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2530 return 0; 2531 } 2532 2533 if( cnv==NULL || source==NULL || sourceLength<-1 || 2534 targetCapacity<0 || (targetCapacity>0 && target==NULL) 2535 ) { 2536 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2537 return 0; 2538 } 2539 2540 /* if there is no input data, we're done */ 2541 if(sourceLength==0 || (sourceLength<0 && *source==0)) { 2542 return u_terminateChars(target, targetCapacity, 0, pErrorCode); 2543 } 2544 2545 /* create the algorithmic converter */ 2546 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, 2547 "", 0, pErrorCode); 2548 if(U_FAILURE(*pErrorCode)) { 2549 return 0; 2550 } 2551 2552 /* reset the other converter */ 2553 if(convertToAlgorithmic) { 2554 /* cnv->Unicode->algo */ 2555 ucnv_resetToUnicode(cnv); 2556 to=algoConverter; 2557 from=cnv; 2558 } else { 2559 /* algo->Unicode->cnv */ 2560 ucnv_resetFromUnicode(cnv); 2561 from=algoConverter; 2562 to=cnv; 2563 } 2564 2565 targetLength=ucnv_internalConvert(to, from, 2566 target, targetCapacity, 2567 source, sourceLength, 2568 pErrorCode); 2569 2570 ucnv_close(algoConverter); 2571 2572 return targetLength; 2573 } 2574 2575 U_CAPI int32_t U_EXPORT2 2576 ucnv_toAlgorithmic(UConverterType algorithmicType, 2577 UConverter *cnv, 2578 char *target, int32_t targetCapacity, 2579 const char *source, int32_t sourceLength, 2580 UErrorCode *pErrorCode) { 2581 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, 2582 target, targetCapacity, 2583 source, sourceLength, 2584 pErrorCode); 2585 } 2586 2587 U_CAPI int32_t U_EXPORT2 2588 ucnv_fromAlgorithmic(UConverter *cnv, 2589 UConverterType algorithmicType, 2590 char *target, int32_t targetCapacity, 2591 const char *source, int32_t sourceLength, 2592 UErrorCode *pErrorCode) { 2593 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, 2594 target, targetCapacity, 2595 source, sourceLength, 2596 pErrorCode); 2597 } 2598 2599 U_CAPI UConverterType U_EXPORT2 2600 ucnv_getType(const UConverter* converter) 2601 { 2602 int8_t type = converter->sharedData->staticData->conversionType; 2603 #if !UCONFIG_NO_LEGACY_CONVERSION 2604 if(type == UCNV_MBCS) { 2605 return ucnv_MBCSGetType(converter); 2606 } 2607 #endif 2608 return (UConverterType)type; 2609 } 2610 2611 U_CAPI void U_EXPORT2 2612 ucnv_getStarters(const UConverter* converter, 2613 UBool starters[256], 2614 UErrorCode* err) 2615 { 2616 if (err == NULL || U_FAILURE(*err)) { 2617 return; 2618 } 2619 2620 if(converter->sharedData->impl->getStarters != NULL) { 2621 converter->sharedData->impl->getStarters(converter, starters, err); 2622 } else { 2623 *err = U_ILLEGAL_ARGUMENT_ERROR; 2624 } 2625 } 2626 2627 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) 2628 { 2629 UErrorCode errorCode; 2630 const char *name; 2631 int32_t i; 2632 2633 if(cnv==NULL) { 2634 return NULL; 2635 } 2636 2637 errorCode=U_ZERO_ERROR; 2638 name=ucnv_getName(cnv, &errorCode); 2639 if(U_FAILURE(errorCode)) { 2640 return NULL; 2641 } 2642 2643 for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i) 2644 { 2645 if(0==uprv_strcmp(name, ambiguousConverters[i].name)) 2646 { 2647 return ambiguousConverters+i; 2648 } 2649 } 2650 2651 return NULL; 2652 } 2653 2654 U_CAPI void U_EXPORT2 2655 ucnv_fixFileSeparator(const UConverter *cnv, 2656 UChar* source, 2657 int32_t sourceLength) { 2658 const UAmbiguousConverter *a; 2659 int32_t i; 2660 UChar variant5c; 2661 2662 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) 2663 { 2664 return; 2665 } 2666 2667 variant5c=a->variant5c; 2668 for(i=0; i<sourceLength; ++i) { 2669 if(source[i]==variant5c) { 2670 source[i]=0x5c; 2671 } 2672 } 2673 } 2674 2675 U_CAPI UBool U_EXPORT2 2676 ucnv_isAmbiguous(const UConverter *cnv) { 2677 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); 2678 } 2679 2680 U_CAPI void U_EXPORT2 2681 ucnv_setFallback(UConverter *cnv, UBool usesFallback) 2682 { 2683 cnv->useFallback = usesFallback; 2684 } 2685 2686 U_CAPI UBool U_EXPORT2 2687 ucnv_usesFallback(const UConverter *cnv) 2688 { 2689 return cnv->useFallback; 2690 } 2691 2692 U_CAPI void U_EXPORT2 2693 ucnv_getInvalidChars (const UConverter * converter, 2694 char *errBytes, 2695 int8_t * len, 2696 UErrorCode * err) 2697 { 2698 if (err == NULL || U_FAILURE(*err)) 2699 { 2700 return; 2701 } 2702 if (len == NULL || errBytes == NULL || converter == NULL) 2703 { 2704 *err = U_ILLEGAL_ARGUMENT_ERROR; 2705 return; 2706 } 2707 if (*len < converter->invalidCharLength) 2708 { 2709 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2710 return; 2711 } 2712 if ((*len = converter->invalidCharLength) > 0) 2713 { 2714 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); 2715 } 2716 } 2717 2718 U_CAPI void U_EXPORT2 2719 ucnv_getInvalidUChars (const UConverter * converter, 2720 UChar *errChars, 2721 int8_t * len, 2722 UErrorCode * err) 2723 { 2724 if (err == NULL || U_FAILURE(*err)) 2725 { 2726 return; 2727 } 2728 if (len == NULL || errChars == NULL || converter == NULL) 2729 { 2730 *err = U_ILLEGAL_ARGUMENT_ERROR; 2731 return; 2732 } 2733 if (*len < converter->invalidUCharLength) 2734 { 2735 *err = U_INDEX_OUTOFBOUNDS_ERROR; 2736 return; 2737 } 2738 if ((*len = converter->invalidUCharLength) > 0) 2739 { 2740 u_memcpy (errChars, converter->invalidUCharBuffer, *len); 2741 } 2742 } 2743 2744 #define SIG_MAX_LEN 5 2745 2746 U_CAPI const char* U_EXPORT2 2747 ucnv_detectUnicodeSignature( const char* source, 2748 int32_t sourceLength, 2749 int32_t* signatureLength, 2750 UErrorCode* pErrorCode) { 2751 int32_t dummy; 2752 2753 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN 2754 * bytes we don't misdetect something 2755 */ 2756 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; 2757 int i = 0; 2758 2759 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ 2760 return NULL; 2761 } 2762 2763 if(source == NULL || sourceLength < -1){ 2764 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 2765 return NULL; 2766 } 2767 2768 if(signatureLength == NULL) { 2769 signatureLength = &dummy; 2770 } 2771 2772 if(sourceLength==-1){ 2773 sourceLength=(int32_t)uprv_strlen(source); 2774 } 2775 2776 2777 while(i<sourceLength&& i<SIG_MAX_LEN){ 2778 start[i]=source[i]; 2779 i++; 2780 } 2781 2782 if(start[0] == '\xFE' && start[1] == '\xFF') { 2783 *signatureLength=2; 2784 return "UTF-16BE"; 2785 } else if(start[0] == '\xFF' && start[1] == '\xFE') { 2786 if(start[2] == '\x00' && start[3] =='\x00') { 2787 *signatureLength=4; 2788 return "UTF-32LE"; 2789 } else { 2790 *signatureLength=2; 2791 return "UTF-16LE"; 2792 } 2793 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { 2794 *signatureLength=3; 2795 return "UTF-8"; 2796 } else if(start[0] == '\x00' && start[1] == '\x00' && 2797 start[2] == '\xFE' && start[3]=='\xFF') { 2798 *signatureLength=4; 2799 return "UTF-32BE"; 2800 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { 2801 *signatureLength=3; 2802 return "SCSU"; 2803 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { 2804 *signatureLength=3; 2805 return "BOCU-1"; 2806 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { 2807 /* 2808 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ 2809 * depending on the second UTF-16 code unit. 2810 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF 2811 * if it occurs. 2812 * 2813 * So far we have +/v 2814 */ 2815 if(start[3] == '\x38' && start[4] == '\x2D') { 2816 /* 5 bytes +/v8- */ 2817 *signatureLength=5; 2818 return "UTF-7"; 2819 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { 2820 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ 2821 *signatureLength=4; 2822 return "UTF-7"; 2823 } 2824 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ 2825 *signatureLength=4; 2826 return "UTF-EBCDIC"; 2827 } 2828 2829 2830 /* no known Unicode signature byte sequence recognized */ 2831 *signatureLength=0; 2832 return NULL; 2833 } 2834 2835 U_CAPI int32_t U_EXPORT2 2836 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) 2837 { 2838 if(status == NULL || U_FAILURE(*status)){ 2839 return -1; 2840 } 2841 if(cnv == NULL){ 2842 *status = U_ILLEGAL_ARGUMENT_ERROR; 2843 return -1; 2844 } 2845 2846 if(cnv->preFromUFirstCP >= 0){ 2847 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; 2848 }else if(cnv->preFromULength < 0){ 2849 return -cnv->preFromULength ; 2850 }else if(cnv->fromUChar32 > 0){ 2851 return 1; 2852 } 2853 return 0; 2854 2855 } 2856 2857 U_CAPI int32_t U_EXPORT2 2858 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ 2859 2860 if(status == NULL || U_FAILURE(*status)){ 2861 return -1; 2862 } 2863 if(cnv == NULL){ 2864 *status = U_ILLEGAL_ARGUMENT_ERROR; 2865 return -1; 2866 } 2867 2868 if(cnv->preToULength > 0){ 2869 return cnv->preToULength ; 2870 }else if(cnv->preToULength < 0){ 2871 return -cnv->preToULength; 2872 }else if(cnv->toULength > 0){ 2873 return cnv->toULength; 2874 } 2875 return 0; 2876 } 2877 2878 U_CAPI UBool U_EXPORT2 2879 ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ 2880 if (U_FAILURE(*status)) { 2881 return FALSE; 2882 } 2883 2884 if (cnv == NULL) { 2885 *status = U_ILLEGAL_ARGUMENT_ERROR; 2886 return FALSE; 2887 } 2888 2889 switch (ucnv_getType(cnv)) { 2890 case UCNV_SBCS: 2891 case UCNV_DBCS: 2892 case UCNV_UTF32_BigEndian: 2893 case UCNV_UTF32_LittleEndian: 2894 case UCNV_UTF32: 2895 case UCNV_US_ASCII: 2896 return TRUE; 2897 default: 2898 return FALSE; 2899 } 2900 } 2901 #endif 2902 2903 /* 2904 * Hey, Emacs, please set the following: 2905 * 2906 * Local Variables: 2907 * indent-tabs-mode: nil 2908 * End: 2909 * 2910 */ 2911