1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************** 5 * COPYRIGHT: 6 * Copyright (c) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************** 9 * 10 * ucnv_bld.cpp: 11 * 12 * Defines functions that are used in the creation/initialization/deletion 13 * of converters and related structures. 14 * uses uconv_io.h routines to access disk information 15 * is used by ucnv.h to implement public API create/delete/flushCache routines 16 * Modification History: 17 * 18 * Date Name Description 19 * 20 * 06/20/2000 helena OS/400 port changes; mostly typecast. 21 * 06/29/2000 helena Major rewrite of the callback interface. 22 */ 23 24 #include "unicode/utypes.h" 25 26 #if !UCONFIG_NO_CONVERSION 27 28 #include "unicode/putil.h" 29 #include "unicode/udata.h" 30 #include "unicode/ucnv.h" 31 #include "unicode/uloc.h" 32 #include "mutex.h" 33 #include "putilimp.h" 34 #include "uassert.h" 35 #include "utracimp.h" 36 #include "ucnv_io.h" 37 #include "ucnv_bld.h" 38 #include "ucnvmbcs.h" 39 #include "ucnv_ext.h" 40 #include "ucnv_cnv.h" 41 #include "ucnv_imp.h" 42 #include "uhash.h" 43 #include "umutex.h" 44 #include "cstring.h" 45 #include "cmemory.h" 46 #include "ucln_cmn.h" 47 #include "ustr_cnv.h" 48 49 50 #if 0 51 #include <stdio.h> 52 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); 53 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) 54 #else 55 # define UCNV_DEBUG_LOG(x,y,z) 56 #endif 57 58 static const UConverterSharedData * const 59 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 60 NULL, NULL, 61 62 #if UCONFIG_NO_LEGACY_CONVERSION 63 NULL, 64 #else 65 &_MBCSData, 66 #endif 67 68 &_Latin1Data, 69 &_UTF8Data, &_UTF16BEData, &_UTF16LEData, 70 #if UCONFIG_ONLY_HTML_CONVERSION 71 NULL, NULL, 72 #else 73 &_UTF32BEData, &_UTF32LEData, 74 #endif 75 NULL, 76 77 #if UCONFIG_NO_LEGACY_CONVERSION 78 NULL, 79 #else 80 &_ISO2022Data, 81 #endif 82 83 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 84 NULL, NULL, NULL, NULL, NULL, NULL, 85 NULL, NULL, NULL, NULL, NULL, NULL, 86 NULL, 87 #else 88 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, 89 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, 90 &_HZData, 91 #endif 92 93 #if UCONFIG_ONLY_HTML_CONVERSION 94 NULL, 95 #else 96 &_SCSUData, 97 #endif 98 99 100 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 101 NULL, 102 #else 103 &_ISCIIData, 104 #endif 105 106 &_ASCIIData, 107 #if UCONFIG_ONLY_HTML_CONVERSION 108 NULL, NULL, &_UTF16Data, NULL, NULL, NULL, 109 #else 110 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 111 #endif 112 113 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 114 NULL, 115 #else 116 &_CompoundTextData 117 #endif 118 }; 119 120 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. 121 Also the name should be in lower case and all spaces, dashes and underscores 122 removed 123 */ 124 static struct { 125 const char *name; 126 const UConverterType type; 127 } const cnvNameType[] = { 128 #if !UCONFIG_ONLY_HTML_CONVERSION 129 { "bocu1", UCNV_BOCU1 }, 130 { "cesu8", UCNV_CESU8 }, 131 #endif 132 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 133 { "hz",UCNV_HZ }, 134 #endif 135 #if !UCONFIG_ONLY_HTML_CONVERSION 136 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 137 #endif 138 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 139 { "iscii", UCNV_ISCII }, 140 #endif 141 #if !UCONFIG_NO_LEGACY_CONVERSION 142 { "iso2022", UCNV_ISO_2022 }, 143 #endif 144 { "iso88591", UCNV_LATIN_1 }, 145 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 146 { "lmbcs1", UCNV_LMBCS_1 }, 147 { "lmbcs11",UCNV_LMBCS_11 }, 148 { "lmbcs16",UCNV_LMBCS_16 }, 149 { "lmbcs17",UCNV_LMBCS_17 }, 150 { "lmbcs18",UCNV_LMBCS_18 }, 151 { "lmbcs19",UCNV_LMBCS_19 }, 152 { "lmbcs2", UCNV_LMBCS_2 }, 153 { "lmbcs3", UCNV_LMBCS_3 }, 154 { "lmbcs4", UCNV_LMBCS_4 }, 155 { "lmbcs5", UCNV_LMBCS_5 }, 156 { "lmbcs6", UCNV_LMBCS_6 }, 157 { "lmbcs8", UCNV_LMBCS_8 }, 158 #endif 159 #if !UCONFIG_ONLY_HTML_CONVERSION 160 { "scsu", UCNV_SCSU }, 161 #endif 162 { "usascii", UCNV_US_ASCII }, 163 { "utf16", UCNV_UTF16 }, 164 { "utf16be", UCNV_UTF16_BigEndian }, 165 { "utf16le", UCNV_UTF16_LittleEndian }, 166 #if U_IS_BIG_ENDIAN 167 { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, 168 { "utf16platformendian", UCNV_UTF16_BigEndian }, 169 #else 170 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, 171 { "utf16platformendian", UCNV_UTF16_LittleEndian }, 172 #endif 173 #if !UCONFIG_ONLY_HTML_CONVERSION 174 { "utf32", UCNV_UTF32 }, 175 { "utf32be", UCNV_UTF32_BigEndian }, 176 { "utf32le", UCNV_UTF32_LittleEndian }, 177 #if U_IS_BIG_ENDIAN 178 { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, 179 { "utf32platformendian", UCNV_UTF32_BigEndian }, 180 #else 181 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 182 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 183 #endif 184 #endif 185 #if !UCONFIG_ONLY_HTML_CONVERSION 186 { "utf7", UCNV_UTF7 }, 187 #endif 188 { "utf8", UCNV_UTF8 }, 189 #if !UCONFIG_ONLY_HTML_CONVERSION 190 { "x11compoundtext", UCNV_COMPOUND_TEXT} 191 #endif 192 }; 193 194 195 /*initializes some global variables */ 196 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 197 static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ 198 /* Note: the global mutex is used for */ 199 /* reference count updates. */ 200 201 static const char **gAvailableConverters = NULL; 202 static uint16_t gAvailableConverterCount = 0; 203 static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; 204 205 #if !U_CHARSET_IS_UTF8 206 207 /* This contains the resolved converter name. So no further alias lookup is needed again. */ 208 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ 209 static const char *gDefaultConverterName = NULL; 210 211 /* 212 If the default converter is an algorithmic converter, this is the cached value. 213 We don't cache a full UConverter and clone it because ucnv_clone doesn't have 214 less overhead than an algorithmic open. We don't cache non-algorithmic converters 215 because ucnv_flushCache must be able to unload the default converter and its table. 216 */ 217 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; 218 219 /* Does gDefaultConverterName have a converter option and require extra parsing? */ 220 static UBool gDefaultConverterContainsOption; 221 222 #endif /* !U_CHARSET_IS_UTF8 */ 223 224 static const char DATA_TYPE[] = "cnv"; 225 226 /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). 227 * If it is ever to be called from elsewhere, synchronization 228 * will need to be considered. 229 */ 230 static void 231 ucnv_flushAvailableConverterCache() { 232 gAvailableConverterCount = 0; 233 if (gAvailableConverters) { 234 uprv_free((char **)gAvailableConverters); 235 gAvailableConverters = NULL; 236 } 237 gAvailableConvertersInitOnce.reset(); 238 } 239 240 /* ucnv_cleanup - delete all storage held by the converter cache, except any */ 241 /* in use by open converters. */ 242 /* Not thread safe. */ 243 /* Not supported API. */ 244 static UBool U_CALLCONV ucnv_cleanup(void) { 245 ucnv_flushCache(); 246 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 247 uhash_close(SHARED_DATA_HASHTABLE); 248 SHARED_DATA_HASHTABLE = NULL; 249 } 250 251 /* Isn't called from flushCache because other threads may have preexisting references to the table. */ 252 ucnv_flushAvailableConverterCache(); 253 254 #if !U_CHARSET_IS_UTF8 255 gDefaultConverterName = NULL; 256 gDefaultConverterNameBuffer[0] = 0; 257 gDefaultConverterContainsOption = FALSE; 258 gDefaultAlgorithmicSharedData = NULL; 259 #endif 260 261 return (SHARED_DATA_HASHTABLE == NULL); 262 } 263 264 U_CAPI void U_EXPORT2 265 ucnv_enableCleanup() { 266 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 267 } 268 269 static UBool U_CALLCONV 270 isCnvAcceptable(void * /*context*/, 271 const char * /*type*/, const char * /*name*/, 272 const UDataInfo *pInfo) { 273 return (UBool)( 274 pInfo->size>=20 && 275 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 276 pInfo->charsetFamily==U_CHARSET_FAMILY && 277 pInfo->sizeofUChar==U_SIZEOF_UCHAR && 278 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 279 pInfo->dataFormat[1]==0x6e && 280 pInfo->dataFormat[2]==0x76 && 281 pInfo->dataFormat[3]==0x74 && 282 pInfo->formatVersion[0]==6); /* Everything will be version 6 */ 283 } 284 285 /** 286 * Un flatten shared data from a UDATA.. 287 */ 288 static UConverterSharedData* 289 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) 290 { 291 /* UDataInfo info; -- necessary only if some converters have different formatVersion */ 292 const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); 293 const UConverterStaticData *source = (const UConverterStaticData *) raw; 294 UConverterSharedData *data; 295 UConverterType type = (UConverterType)source->conversionType; 296 297 if(U_FAILURE(*status)) 298 return NULL; 299 300 if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || 301 converterData[type] == NULL || 302 !converterData[type]->isReferenceCounted || 303 converterData[type]->referenceCounter != 1 || 304 source->structSize != sizeof(UConverterStaticData)) 305 { 306 *status = U_INVALID_TABLE_FORMAT; 307 return NULL; 308 } 309 310 data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); 311 if(data == NULL) { 312 *status = U_MEMORY_ALLOCATION_ERROR; 313 return NULL; 314 } 315 316 /* copy initial values from the static structure for this type */ 317 uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); 318 319 data->staticData = source; 320 321 data->sharedDataCached = FALSE; 322 323 /* fill in fields from the loaded data */ 324 data->dataMemory = (void*)pData; /* for future use */ 325 326 if(data->impl->load != NULL) { 327 data->impl->load(data, pArgs, raw + source->structSize, status); 328 if(U_FAILURE(*status)) { 329 uprv_free(data); 330 return NULL; 331 } 332 } 333 return data; 334 } 335 336 /*Takes an alias name gets an actual converter file name 337 *goes to disk and opens it. 338 *allocates the memory and returns a new UConverter object 339 */ 340 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) 341 { 342 UDataMemory *data; 343 UConverterSharedData *sharedData; 344 345 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); 346 347 if (U_FAILURE (*err)) { 348 UTRACE_EXIT_STATUS(*err); 349 return NULL; 350 } 351 352 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); 353 354 data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); 355 if(U_FAILURE(*err)) 356 { 357 UTRACE_EXIT_STATUS(*err); 358 return NULL; 359 } 360 361 sharedData = ucnv_data_unFlattenClone(pArgs, data, err); 362 if(U_FAILURE(*err)) 363 { 364 udata_close(data); 365 UTRACE_EXIT_STATUS(*err); 366 return NULL; 367 } 368 369 /* 370 * TODO Store pkg in a field in the shared data so that delta-only converters 371 * can load base converters from the same package. 372 * If the pkg name is longer than the field, then either do not load the converter 373 * in the first place, or just set the pkg field to "". 374 */ 375 376 UTRACE_EXIT_PTR_STATUS(sharedData, *err); 377 return sharedData; 378 } 379 380 /*returns a converter type from a string 381 */ 382 static const UConverterSharedData * 383 getAlgorithmicTypeFromName(const char *realName) 384 { 385 uint32_t mid, start, limit; 386 uint32_t lastMid; 387 int result; 388 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 389 390 /* Lower case and remove ignoreable characters. */ 391 ucnv_io_stripForCompare(strippedName, realName); 392 393 /* do a binary search for the alias */ 394 start = 0; 395 limit = UPRV_LENGTHOF(cnvNameType); 396 mid = limit; 397 lastMid = UINT32_MAX; 398 399 for (;;) { 400 mid = (uint32_t)((start + limit) / 2); 401 if (lastMid == mid) { /* Have we moved? */ 402 break; /* We haven't moved, and it wasn't found. */ 403 } 404 lastMid = mid; 405 result = uprv_strcmp(strippedName, cnvNameType[mid].name); 406 407 if (result < 0) { 408 limit = mid; 409 } else if (result > 0) { 410 start = mid; 411 } else { 412 return converterData[cnvNameType[mid].type]; 413 } 414 } 415 416 return NULL; 417 } 418 419 /* 420 * Based on the number of known converters, this determines how many times larger 421 * the shared data hash table should be. When on small platforms, or just a couple 422 * of converters are used, this number should be 2. When memory is plentiful, or 423 * when ucnv_countAvailable is ever used with a lot of available converters, 424 * this should be 4. 425 * Larger numbers reduce the number of hash collisions, but use more memory. 426 */ 427 #define UCNV_CACHE_LOAD_FACTOR 2 428 429 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ 430 /* Will always be called with the cnvCacheMutex alrady being held */ 431 /* by the calling function. */ 432 /* Stores the shared data in the SHARED_DATA_HASHTABLE 433 * @param data The shared data 434 */ 435 static void 436 ucnv_shareConverterData(UConverterSharedData * data) 437 { 438 UErrorCode err = U_ZERO_ERROR; 439 /*Lazy evaluates the Hashtable itself */ 440 /*void *sanity = NULL;*/ 441 442 if (SHARED_DATA_HASHTABLE == NULL) 443 { 444 SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, 445 ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, 446 &err); 447 ucnv_enableCleanup(); 448 449 if (U_FAILURE(err)) 450 return; 451 } 452 453 /* ### check to see if the element is not already there! */ 454 455 /* 456 sanity = ucnv_getSharedConverterData (data->staticData->name); 457 if(sanity != NULL) 458 { 459 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); 460 } 461 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); 462 */ 463 464 /* Mark it shared */ 465 data->sharedDataCached = TRUE; 466 467 uhash_put(SHARED_DATA_HASHTABLE, 468 (void*) data->staticData->name, /* Okay to cast away const as long as 469 keyDeleter == NULL */ 470 data, 471 &err); 472 UCNV_DEBUG_LOG("put", data->staticData->name,data); 473 474 } 475 476 /* Look up a converter name in the shared data cache. */ 477 /* cnvCacheMutex must be held by the caller to protect the hash table. */ 478 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) 479 * @param name The name of the shared data 480 * @return the shared data from the SHARED_DATA_HASHTABLE 481 */ 482 static UConverterSharedData * 483 ucnv_getSharedConverterData(const char *name) 484 { 485 /*special case when no Table has yet been created we return NULL */ 486 if (SHARED_DATA_HASHTABLE == NULL) 487 { 488 return NULL; 489 } 490 else 491 { 492 UConverterSharedData *rc; 493 494 rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); 495 UCNV_DEBUG_LOG("get",name,rc); 496 return rc; 497 } 498 } 499 500 /*frees the string of memory blocks associates with a sharedConverter 501 *if and only if the referenceCounter == 0 502 */ 503 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to 504 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and 505 * returns TRUE, 506 * otherwise returns FALSE 507 * @param sharedConverterData The shared data 508 * @return if not it frees all the memory stemming from sharedConverterData and 509 * returns TRUE, otherwise returns FALSE 510 */ 511 static UBool 512 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) 513 { 514 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); 515 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); 516 517 if (deadSharedData->referenceCounter > 0) { 518 UTRACE_EXIT_VALUE((int32_t)FALSE); 519 return FALSE; 520 } 521 522 if (deadSharedData->impl->unload != NULL) { 523 deadSharedData->impl->unload(deadSharedData); 524 } 525 526 if(deadSharedData->dataMemory != NULL) 527 { 528 UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; 529 udata_close(data); 530 } 531 532 uprv_free(deadSharedData); 533 534 UTRACE_EXIT_VALUE((int32_t)TRUE); 535 return TRUE; 536 } 537 538 /** 539 * Load a non-algorithmic converter. 540 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 541 */ 542 UConverterSharedData * 543 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { 544 UConverterSharedData *mySharedConverterData; 545 546 if(err == NULL || U_FAILURE(*err)) { 547 return NULL; 548 } 549 550 if(pArgs->pkg != NULL && *pArgs->pkg != 0) { 551 /* application-provided converters are not currently cached */ 552 return createConverterFromFile(pArgs, err); 553 } 554 555 mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); 556 if (mySharedConverterData == NULL) 557 { 558 /*Not cached, we need to stream it in from file */ 559 mySharedConverterData = createConverterFromFile(pArgs, err); 560 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 561 { 562 return NULL; 563 } 564 else if (!pArgs->onlyTestIsLoadable) 565 { 566 /* share it with other library clients */ 567 ucnv_shareConverterData(mySharedConverterData); 568 } 569 } 570 else 571 { 572 /* The data for this converter was already in the cache. */ 573 /* Update the reference counter on the shared data: one more client */ 574 mySharedConverterData->referenceCounter++; 575 } 576 577 return mySharedConverterData; 578 } 579 580 /** 581 * Unload a non-algorithmic converter. 582 * It must be sharedData->isReferenceCounted 583 * and this function must be called inside umtx_lock(&cnvCacheMutex). 584 */ 585 U_CAPI void 586 ucnv_unload(UConverterSharedData *sharedData) { 587 if(sharedData != NULL) { 588 if (sharedData->referenceCounter > 0) { 589 sharedData->referenceCounter--; 590 } 591 592 if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { 593 ucnv_deleteSharedConverterData(sharedData); 594 } 595 } 596 } 597 598 U_CFUNC void 599 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) 600 { 601 if(sharedData != NULL && sharedData->isReferenceCounted) { 602 umtx_lock(&cnvCacheMutex); 603 ucnv_unload(sharedData); 604 umtx_unlock(&cnvCacheMutex); 605 } 606 } 607 608 U_CFUNC void 609 ucnv_incrementRefCount(UConverterSharedData *sharedData) 610 { 611 if(sharedData != NULL && sharedData->isReferenceCounted) { 612 umtx_lock(&cnvCacheMutex); 613 sharedData->referenceCounter++; 614 umtx_unlock(&cnvCacheMutex); 615 } 616 } 617 618 /* 619 * *pPieces must be initialized. 620 * The name without options will be copied to pPieces->cnvName. 621 * The locale and options will be copied to pPieces only if present in inName, 622 * otherwise the existing values in pPieces remain. 623 * *pArgs will be set to the pPieces values. 624 */ 625 static void 626 parseConverterOptions(const char *inName, 627 UConverterNamePieces *pPieces, 628 UConverterLoadArgs *pArgs, 629 UErrorCode *err) 630 { 631 char *cnvName = pPieces->cnvName; 632 char c; 633 int32_t len = 0; 634 635 pArgs->name=inName; 636 pArgs->locale=pPieces->locale; 637 pArgs->options=pPieces->options; 638 639 /* copy the converter name itself to cnvName */ 640 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 641 if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { 642 *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 643 pPieces->cnvName[0]=0; 644 return; 645 } 646 *cnvName++=c; 647 inName++; 648 } 649 *cnvName=0; 650 pArgs->name=pPieces->cnvName; 651 652 /* parse options. No more name copying should occur. */ 653 while((c=*inName)!=0) { 654 if(c==UCNV_OPTION_SEP_CHAR) { 655 ++inName; 656 } 657 658 /* inName is behind an option separator */ 659 if(uprv_strncmp(inName, "locale=", 7)==0) { 660 /* do not modify locale itself in case we have multiple locale options */ 661 char *dest=pPieces->locale; 662 663 /* copy the locale option value */ 664 inName+=7; 665 len=0; 666 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 667 ++inName; 668 669 if(++len>=ULOC_FULLNAME_CAPACITY) { 670 *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 671 pPieces->locale[0]=0; 672 return; 673 } 674 675 *dest++=c; 676 } 677 *dest=0; 678 } else if(uprv_strncmp(inName, "version=", 8)==0) { 679 /* copy the version option value into bits 3..0 of pPieces->options */ 680 inName+=8; 681 c=*inName; 682 if(c==0) { 683 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); 684 return; 685 } else if((uint8_t)(c-'0')<10) { 686 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); 687 ++inName; 688 } 689 } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { 690 inName+=8; 691 pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); 692 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ 693 } else { 694 /* ignore any other options until we define some */ 695 while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { 696 } 697 if(c==0) { 698 return; 699 } 700 } 701 } 702 } 703 704 /*Logic determines if the converter is Algorithmic AND/OR cached 705 *depending on that: 706 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) 707 * -Get it from a Hashtable (Data=X, Cached=TRUE) 708 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) 709 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) 710 */ 711 U_CFUNC UConverterSharedData * 712 ucnv_loadSharedData(const char *converterName, 713 UConverterNamePieces *pPieces, 714 UConverterLoadArgs *pArgs, 715 UErrorCode * err) { 716 UConverterNamePieces stackPieces; 717 UConverterLoadArgs stackArgs; 718 UConverterSharedData *mySharedConverterData = NULL; 719 UErrorCode internalErrorCode = U_ZERO_ERROR; 720 UBool mayContainOption = TRUE; 721 UBool checkForAlgorithmic = TRUE; 722 723 if (U_FAILURE (*err)) { 724 return NULL; 725 } 726 727 if(pPieces == NULL) { 728 if(pArgs != NULL) { 729 /* 730 * Bad: We may set pArgs pointers to stackPieces fields 731 * which will be invalid after this function returns. 732 */ 733 *err = U_INTERNAL_PROGRAM_ERROR; 734 return NULL; 735 } 736 pPieces = &stackPieces; 737 } 738 if(pArgs == NULL) { 739 uprv_memset(&stackArgs, 0, sizeof(stackArgs)); 740 stackArgs.size = (int32_t)sizeof(stackArgs); 741 pArgs = &stackArgs; 742 } 743 744 pPieces->cnvName[0] = 0; 745 pPieces->locale[0] = 0; 746 pPieces->options = 0; 747 748 pArgs->name = converterName; 749 pArgs->locale = pPieces->locale; 750 pArgs->options = pPieces->options; 751 752 /* In case "name" is NULL we want to open the default converter. */ 753 if (converterName == NULL) { 754 #if U_CHARSET_IS_UTF8 755 pArgs->name = "UTF-8"; 756 return (UConverterSharedData *)converterData[UCNV_UTF8]; 757 #else 758 /* Call ucnv_getDefaultName first to query the name from the OS. */ 759 pArgs->name = ucnv_getDefaultName(); 760 if (pArgs->name == NULL) { 761 *err = U_MISSING_RESOURCE_ERROR; 762 return NULL; 763 } 764 mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; 765 checkForAlgorithmic = FALSE; 766 mayContainOption = gDefaultConverterContainsOption; 767 /* the default converter name is already canonical */ 768 #endif 769 } 770 else if(UCNV_FAST_IS_UTF8(converterName)) { 771 /* fastpath for UTF-8 */ 772 pArgs->name = "UTF-8"; 773 return (UConverterSharedData *)converterData[UCNV_UTF8]; 774 } 775 else { 776 /* separate the converter name from the options */ 777 parseConverterOptions(converterName, pPieces, pArgs, err); 778 if (U_FAILURE(*err)) { 779 /* Very bad name used. */ 780 return NULL; 781 } 782 783 /* get the canonical converter name */ 784 pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); 785 if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { 786 /* 787 * set the input name in case the converter was added 788 * without updating the alias table, or when there is no alias table 789 */ 790 pArgs->name = pPieces->cnvName; 791 } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { 792 *err = U_AMBIGUOUS_ALIAS_WARNING; 793 } 794 } 795 796 /* separate the converter name from the options */ 797 if(mayContainOption && pArgs->name != pPieces->cnvName) { 798 parseConverterOptions(pArgs->name, pPieces, pArgs, err); 799 } 800 801 /* get the shared data for an algorithmic converter, if it is one */ 802 if (checkForAlgorithmic) { 803 mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); 804 } 805 if (mySharedConverterData == NULL) 806 { 807 /* it is a data-based converter, get its shared data. */ 808 /* Hold the cnvCacheMutex through the whole process of checking the */ 809 /* converter data cache, and adding new entries to the cache */ 810 /* to prevent other threads from modifying the cache during the */ 811 /* process. */ 812 pArgs->nestedLoads=1; 813 pArgs->pkg=NULL; 814 815 umtx_lock(&cnvCacheMutex); 816 mySharedConverterData = ucnv_load(pArgs, err); 817 umtx_unlock(&cnvCacheMutex); 818 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 819 { 820 return NULL; 821 } 822 } 823 824 return mySharedConverterData; 825 } 826 827 U_CAPI UConverter * 828 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) 829 { 830 UConverterNamePieces stackPieces; 831 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 832 UConverterSharedData *mySharedConverterData; 833 834 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 835 836 if(U_SUCCESS(*err)) { 837 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); 838 839 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 840 841 myUConverter = ucnv_createConverterFromSharedData( 842 myUConverter, mySharedConverterData, 843 &stackArgs, 844 err); 845 846 if(U_SUCCESS(*err)) { 847 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 848 return myUConverter; 849 } 850 } 851 852 /* exit with error */ 853 UTRACE_EXIT_STATUS(*err); 854 return NULL; 855 } 856 857 U_CFUNC UBool 858 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { 859 UConverter myUConverter; 860 UConverterNamePieces stackPieces; 861 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 862 UConverterSharedData *mySharedConverterData; 863 864 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 865 866 if(U_SUCCESS(*err)) { 867 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); 868 869 stackArgs.onlyTestIsLoadable=TRUE; 870 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 871 ucnv_createConverterFromSharedData( 872 &myUConverter, mySharedConverterData, 873 &stackArgs, 874 err); 875 ucnv_unloadSharedDataIfReady(mySharedConverterData); 876 } 877 878 UTRACE_EXIT_STATUS(*err); 879 return U_SUCCESS(*err); 880 } 881 882 UConverter * 883 ucnv_createAlgorithmicConverter(UConverter *myUConverter, 884 UConverterType type, 885 const char *locale, uint32_t options, 886 UErrorCode *err) { 887 UConverter *cnv; 888 const UConverterSharedData *sharedData; 889 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 890 891 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); 892 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); 893 894 if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { 895 *err = U_ILLEGAL_ARGUMENT_ERROR; 896 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 897 return NULL; 898 } 899 900 sharedData = converterData[type]; 901 if(sharedData == NULL || sharedData->isReferenceCounted) { 902 /* not a valid type, or not an algorithmic converter */ 903 *err = U_ILLEGAL_ARGUMENT_ERROR; 904 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 905 return NULL; 906 } 907 908 stackArgs.name = ""; 909 stackArgs.options = options; 910 stackArgs.locale=locale; 911 cnv = ucnv_createConverterFromSharedData( 912 myUConverter, (UConverterSharedData *)sharedData, 913 &stackArgs, err); 914 915 UTRACE_EXIT_PTR_STATUS(cnv, *err); 916 return cnv; 917 } 918 919 U_CFUNC UConverter* 920 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) 921 { 922 UConverter *myUConverter; 923 UConverterSharedData *mySharedConverterData; 924 UConverterNamePieces stackPieces; 925 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 926 927 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); 928 929 if(U_FAILURE(*err)) { 930 UTRACE_EXIT_STATUS(*err); 931 return NULL; 932 } 933 934 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); 935 936 /* first, get the options out of the converterName string */ 937 stackPieces.cnvName[0] = 0; 938 stackPieces.locale[0] = 0; 939 stackPieces.options = 0; 940 parseConverterOptions(converterName, &stackPieces, &stackArgs, err); 941 if (U_FAILURE(*err)) { 942 /* Very bad name used. */ 943 UTRACE_EXIT_STATUS(*err); 944 return NULL; 945 } 946 stackArgs.nestedLoads=1; 947 stackArgs.pkg=packageName; 948 949 /* open the data, unflatten the shared structure */ 950 mySharedConverterData = createConverterFromFile(&stackArgs, err); 951 952 if (U_FAILURE(*err)) { 953 UTRACE_EXIT_STATUS(*err); 954 return NULL; 955 } 956 957 /* create the actual converter */ 958 myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); 959 960 if (U_FAILURE(*err)) { 961 ucnv_close(myUConverter); 962 UTRACE_EXIT_STATUS(*err); 963 return NULL; 964 } 965 966 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 967 return myUConverter; 968 } 969 970 971 U_CFUNC UConverter* 972 ucnv_createConverterFromSharedData(UConverter *myUConverter, 973 UConverterSharedData *mySharedConverterData, 974 UConverterLoadArgs *pArgs, 975 UErrorCode *err) 976 { 977 UBool isCopyLocal; 978 979 if(U_FAILURE(*err)) { 980 ucnv_unloadSharedDataIfReady(mySharedConverterData); 981 return myUConverter; 982 } 983 if(myUConverter == NULL) 984 { 985 myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); 986 if(myUConverter == NULL) 987 { 988 *err = U_MEMORY_ALLOCATION_ERROR; 989 ucnv_unloadSharedDataIfReady(mySharedConverterData); 990 return NULL; 991 } 992 isCopyLocal = FALSE; 993 } else { 994 isCopyLocal = TRUE; 995 } 996 997 /* initialize the converter */ 998 uprv_memset(myUConverter, 0, sizeof(UConverter)); 999 myUConverter->isCopyLocal = isCopyLocal; 1000 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ 1001 myUConverter->sharedData = mySharedConverterData; 1002 myUConverter->options = pArgs->options; 1003 if(!pArgs->onlyTestIsLoadable) { 1004 myUConverter->preFromUFirstCP = U_SENTINEL; 1005 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; 1006 myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; 1007 myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; 1008 myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; 1009 myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; 1010 myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; 1011 myUConverter->subChars = (uint8_t *)myUConverter->subUChars; 1012 uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); 1013 myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ 1014 } 1015 1016 if(mySharedConverterData->impl->open != NULL) { 1017 mySharedConverterData->impl->open(myUConverter, pArgs, err); 1018 if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { 1019 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ 1020 ucnv_close(myUConverter); 1021 return NULL; 1022 } 1023 } 1024 1025 return myUConverter; 1026 } 1027 1028 /*Frees all shared immutable objects that aren't referred to (reference count = 0) 1029 */ 1030 U_CAPI int32_t U_EXPORT2 1031 ucnv_flushCache () 1032 { 1033 UConverterSharedData *mySharedData = NULL; 1034 int32_t pos; 1035 int32_t tableDeletedNum = 0; 1036 const UHashElement *e; 1037 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ 1038 int32_t i, remaining; 1039 1040 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); 1041 1042 /* Close the default converter without creating a new one so that everything will be flushed. */ 1043 u_flushDefaultConverter(); 1044 1045 /*if shared data hasn't even been lazy evaluated yet 1046 * return 0 1047 */ 1048 if (SHARED_DATA_HASHTABLE == NULL) { 1049 UTRACE_EXIT_VALUE((int32_t)0); 1050 return 0; 1051 } 1052 1053 /*creates an enumeration to iterate through every element in the 1054 * table 1055 * 1056 * Synchronization: holding cnvCacheMutex will prevent any other thread from 1057 * accessing or modifying the hash table during the iteration. 1058 * The reference count of an entry may be decremented by 1059 * ucnv_close while the iteration is in process, but this is 1060 * benign. It can't be incremented (in ucnv_createConverter()) 1061 * because the sequence of looking up in the cache + incrementing 1062 * is protected by cnvCacheMutex. 1063 */ 1064 umtx_lock(&cnvCacheMutex); 1065 /* 1066 * double loop: A delta/extension-only converter has a pointer to its base table's 1067 * shared data; the first iteration of the outer loop may see the delta converter 1068 * before the base converter, and unloading the delta converter may get the base 1069 * converter's reference counter down to 0. 1070 */ 1071 i = 0; 1072 do { 1073 remaining = 0; 1074 pos = UHASH_FIRST; 1075 while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) 1076 { 1077 mySharedData = (UConverterSharedData *) e->value.pointer; 1078 /*deletes only if reference counter == 0 */ 1079 if (mySharedData->referenceCounter == 0) 1080 { 1081 tableDeletedNum++; 1082 1083 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); 1084 1085 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1086 mySharedData->sharedDataCached = FALSE; 1087 ucnv_deleteSharedConverterData (mySharedData); 1088 } else { 1089 ++remaining; 1090 } 1091 } 1092 } while(++i == 1 && remaining > 0); 1093 umtx_unlock(&cnvCacheMutex); 1094 1095 UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); 1096 1097 UTRACE_EXIT_VALUE(tableDeletedNum); 1098 return tableDeletedNum; 1099 } 1100 1101 /* available converters list --------------------------------------------------- */ 1102 1103 static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { 1104 U_ASSERT(gAvailableConverterCount == 0); 1105 U_ASSERT(gAvailableConverters == NULL); 1106 1107 ucnv_enableCleanup(); 1108 UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); 1109 int32_t allConverterCount = uenum_count(allConvEnum, &errCode); 1110 if (U_FAILURE(errCode)) { 1111 return; 1112 } 1113 1114 /* We can't have more than "*converterTable" converters to open */ 1115 gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); 1116 if (!gAvailableConverters) { 1117 errCode = U_MEMORY_ALLOCATION_ERROR; 1118 return; 1119 } 1120 1121 /* Open the default converter to make sure that it has first dibs in the hash table. */ 1122 UErrorCode localStatus = U_ZERO_ERROR; 1123 UConverter tempConverter; 1124 ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); 1125 1126 gAvailableConverterCount = 0; 1127 1128 for (int32_t idx = 0; idx < allConverterCount; idx++) { 1129 localStatus = U_ZERO_ERROR; 1130 const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); 1131 if (ucnv_canCreateConverter(converterName, &localStatus)) { 1132 gAvailableConverters[gAvailableConverterCount++] = converterName; 1133 } 1134 } 1135 1136 uenum_close(allConvEnum); 1137 } 1138 1139 1140 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { 1141 umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); 1142 return U_SUCCESS(*pErrorCode); 1143 } 1144 1145 U_CFUNC uint16_t 1146 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { 1147 if (haveAvailableConverterList(pErrorCode)) { 1148 return gAvailableConverterCount; 1149 } 1150 return 0; 1151 } 1152 1153 U_CFUNC const char * 1154 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { 1155 if (haveAvailableConverterList(pErrorCode)) { 1156 if (n < gAvailableConverterCount) { 1157 return gAvailableConverters[n]; 1158 } 1159 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1160 } 1161 return NULL; 1162 } 1163 1164 /* default converter name --------------------------------------------------- */ 1165 1166 #if !U_CHARSET_IS_UTF8 1167 /* 1168 Copy the canonical converter name. 1169 ucnv_getDefaultName must be thread safe, which can call this function. 1170 1171 ucnv_setDefaultName calls this function and it doesn't have to be 1172 thread safe because there is no reliable/safe way to reset the 1173 converter in use in all threads. If you did reset the converter, you 1174 would not be sure that retrieving a default converter for one string 1175 would be the same type of default converter for a successive string. 1176 Since the name is a returned via ucnv_getDefaultName without copying, 1177 you shouldn't be modifying or deleting the string from a separate thread. 1178 */ 1179 static inline void 1180 internalSetName(const char *name, UErrorCode *status) { 1181 UConverterNamePieces stackPieces; 1182 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 1183 int32_t length=(int32_t)(uprv_strlen(name)); 1184 UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); 1185 const UConverterSharedData *algorithmicSharedData; 1186 1187 stackArgs.name = name; 1188 if(containsOption) { 1189 stackPieces.cnvName[0] = 0; 1190 stackPieces.locale[0] = 0; 1191 stackPieces.options = 0; 1192 parseConverterOptions(name, &stackPieces, &stackArgs, status); 1193 if(U_FAILURE(*status)) { 1194 return; 1195 } 1196 } 1197 algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); 1198 1199 umtx_lock(&cnvCacheMutex); 1200 1201 gDefaultAlgorithmicSharedData = algorithmicSharedData; 1202 gDefaultConverterContainsOption = containsOption; 1203 uprv_memcpy(gDefaultConverterNameBuffer, name, length); 1204 gDefaultConverterNameBuffer[length]=0; 1205 1206 /* gDefaultConverterName MUST be the last global var set by this function. */ 1207 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ 1208 // But there is nothing here preventing that from being reordered, either by the compiler 1209 // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. 1210 // -- Andy 1211 gDefaultConverterName = gDefaultConverterNameBuffer; 1212 1213 ucnv_enableCleanup(); 1214 1215 umtx_unlock(&cnvCacheMutex); 1216 } 1217 #endif 1218 1219 /* 1220 * In order to be really thread-safe, the get function would have to take 1221 * a buffer parameter and copy the current string inside a mutex block. 1222 * This implementation only tries to be really thread-safe while 1223 * setting the name. 1224 * It assumes that setting a pointer is atomic. 1225 */ 1226 1227 U_CAPI const char* U_EXPORT2 1228 ucnv_getDefaultName() { 1229 #if U_CHARSET_IS_UTF8 1230 return "UTF-8"; 1231 #else 1232 /* local variable to be thread-safe */ 1233 const char *name; 1234 1235 /* 1236 Concurrent calls to ucnv_getDefaultName must be thread safe, 1237 but ucnv_setDefaultName is not thread safe. 1238 */ 1239 { 1240 icu::Mutex lock(&cnvCacheMutex); 1241 name = gDefaultConverterName; 1242 } 1243 if(name==NULL) { 1244 UErrorCode errorCode = U_ZERO_ERROR; 1245 UConverter *cnv = NULL; 1246 1247 name = uprv_getDefaultCodepage(); 1248 1249 /* if the name is there, test it out and get the canonical name with options */ 1250 if(name != NULL) { 1251 cnv = ucnv_open(name, &errorCode); 1252 if(U_SUCCESS(errorCode) && cnv != NULL) { 1253 name = ucnv_getName(cnv, &errorCode); 1254 } 1255 } 1256 1257 if(name == NULL || name[0] == 0 1258 || U_FAILURE(errorCode) || cnv == NULL 1259 || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) 1260 { 1261 /* Panic time, let's use a fallback. */ 1262 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) 1263 name = "US-ASCII"; 1264 /* there is no 'algorithmic' converter for EBCDIC */ 1265 #elif U_PLATFORM == U_PF_OS390 1266 name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; 1267 #else 1268 name = "ibm-37_P100-1995"; 1269 #endif 1270 } 1271 1272 internalSetName(name, &errorCode); 1273 1274 /* The close may make the current name go away. */ 1275 ucnv_close(cnv); 1276 } 1277 1278 return name; 1279 #endif 1280 } 1281 1282 #if U_CHARSET_IS_UTF8 1283 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} 1284 #else 1285 /* 1286 This function is not thread safe, and it can't be thread safe. 1287 See internalSetName or the API reference for details. 1288 */ 1289 U_CAPI void U_EXPORT2 1290 ucnv_setDefaultName(const char *converterName) { 1291 if(converterName==NULL) { 1292 /* reset to the default codepage */ 1293 gDefaultConverterName=NULL; 1294 } else { 1295 UErrorCode errorCode = U_ZERO_ERROR; 1296 UConverter *cnv = NULL; 1297 const char *name = NULL; 1298 1299 /* if the name is there, test it out and get the canonical name with options */ 1300 cnv = ucnv_open(converterName, &errorCode); 1301 if(U_SUCCESS(errorCode) && cnv != NULL) { 1302 name = ucnv_getName(cnv, &errorCode); 1303 } 1304 1305 if(U_SUCCESS(errorCode) && name!=NULL) { 1306 internalSetName(name, &errorCode); 1307 } 1308 /* else this converter is bad to use. Don't change it to a bad value. */ 1309 1310 /* The close may make the current name go away. */ 1311 ucnv_close(cnv); 1312 1313 /* reset the converter cache */ 1314 u_flushDefaultConverter(); 1315 } 1316 } 1317 #endif 1318 1319 /* data swapping ------------------------------------------------------------ */ 1320 1321 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ 1322 1323 #if !UCONFIG_NO_LEGACY_CONVERSION 1324 1325 U_CAPI int32_t U_EXPORT2 1326 ucnv_swap(const UDataSwapper *ds, 1327 const void *inData, int32_t length, void *outData, 1328 UErrorCode *pErrorCode) { 1329 const UDataInfo *pInfo; 1330 int32_t headerSize; 1331 1332 const uint8_t *inBytes; 1333 uint8_t *outBytes; 1334 1335 uint32_t offset, count, staticDataSize; 1336 int32_t size; 1337 1338 const UConverterStaticData *inStaticData; 1339 UConverterStaticData *outStaticData; 1340 1341 const _MBCSHeader *inMBCSHeader; 1342 _MBCSHeader *outMBCSHeader; 1343 _MBCSHeader mbcsHeader; 1344 uint32_t mbcsHeaderLength; 1345 UBool noFromU=FALSE; 1346 1347 uint8_t outputType; 1348 1349 int32_t maxFastUChar, mbcsIndexLength; 1350 1351 const int32_t *inExtIndexes; 1352 int32_t extOffset; 1353 1354 /* udata_swapDataHeader checks the arguments */ 1355 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1356 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1357 return 0; 1358 } 1359 1360 /* check data format and format version */ 1361 pInfo=(const UDataInfo *)((const char *)inData+4); 1362 if(!( 1363 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 1364 pInfo->dataFormat[1]==0x6e && 1365 pInfo->dataFormat[2]==0x76 && 1366 pInfo->dataFormat[3]==0x74 && 1367 pInfo->formatVersion[0]==6 && 1368 pInfo->formatVersion[1]>=2 1369 )) { 1370 udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", 1371 pInfo->dataFormat[0], pInfo->dataFormat[1], 1372 pInfo->dataFormat[2], pInfo->dataFormat[3], 1373 pInfo->formatVersion[0], pInfo->formatVersion[1]); 1374 *pErrorCode=U_UNSUPPORTED_ERROR; 1375 return 0; 1376 } 1377 1378 inBytes=(const uint8_t *)inData+headerSize; 1379 outBytes=(uint8_t *)outData+headerSize; 1380 1381 /* read the initial UConverterStaticData structure after the UDataInfo header */ 1382 inStaticData=(const UConverterStaticData *)inBytes; 1383 outStaticData=(UConverterStaticData *)outBytes; 1384 1385 if(length<0) { 1386 staticDataSize=ds->readUInt32(inStaticData->structSize); 1387 } else { 1388 length-=headerSize; 1389 if( length<(int32_t)sizeof(UConverterStaticData) || 1390 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 1391 ) { 1392 udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 1393 length); 1394 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1395 return 0; 1396 } 1397 } 1398 1399 if(length>=0) { 1400 /* swap the static data */ 1401 if(inStaticData!=outStaticData) { 1402 uprv_memcpy(outStaticData, inStaticData, staticDataSize); 1403 } 1404 1405 ds->swapArray32(ds, &inStaticData->structSize, 4, 1406 &outStaticData->structSize, pErrorCode); 1407 ds->swapArray32(ds, &inStaticData->codepage, 4, 1408 &outStaticData->codepage, pErrorCode); 1409 1410 ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), 1411 outStaticData->name, pErrorCode); 1412 if(U_FAILURE(*pErrorCode)) { 1413 udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); 1414 return 0; 1415 } 1416 } 1417 1418 inBytes+=staticDataSize; 1419 outBytes+=staticDataSize; 1420 if(length>=0) { 1421 length-=(int32_t)staticDataSize; 1422 } 1423 1424 /* check for supported conversionType values */ 1425 if(inStaticData->conversionType==UCNV_MBCS) { 1426 /* swap MBCS data */ 1427 inMBCSHeader=(const _MBCSHeader *)inBytes; 1428 outMBCSHeader=(_MBCSHeader *)outBytes; 1429 1430 if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { 1431 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1432 length); 1433 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1434 return 0; 1435 } 1436 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 1437 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 1438 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 1439 ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& 1440 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 1441 ) { 1442 mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; 1443 noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); 1444 } else { 1445 udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", 1446 inMBCSHeader->version[0], inMBCSHeader->version[1]); 1447 *pErrorCode=U_UNSUPPORTED_ERROR; 1448 return 0; 1449 } 1450 1451 uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); 1452 mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); 1453 mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); 1454 mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); 1455 mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); 1456 mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); 1457 mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); 1458 mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); 1459 /* mbcsHeader.options have been read above */ 1460 1461 extOffset=(int32_t)(mbcsHeader.flags>>8); 1462 outputType=(uint8_t)mbcsHeader.flags; 1463 if(noFromU && outputType==MBCS_OUTPUT_1) { 1464 udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); 1465 *pErrorCode=U_UNSUPPORTED_ERROR; 1466 return 0; 1467 } 1468 1469 /* make sure that the output type is known */ 1470 switch(outputType) { 1471 case MBCS_OUTPUT_1: 1472 case MBCS_OUTPUT_2: 1473 case MBCS_OUTPUT_3: 1474 case MBCS_OUTPUT_4: 1475 case MBCS_OUTPUT_3_EUC: 1476 case MBCS_OUTPUT_4_EUC: 1477 case MBCS_OUTPUT_2_SISO: 1478 case MBCS_OUTPUT_EXT_ONLY: 1479 /* OK */ 1480 break; 1481 default: 1482 udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", 1483 outputType); 1484 *pErrorCode=U_UNSUPPORTED_ERROR; 1485 return 0; 1486 } 1487 1488 /* calculate the length of the MBCS data */ 1489 1490 /* 1491 * utf8Friendly MBCS files (mbcsHeader.version 4.3) 1492 * contain an additional mbcsIndex table: 1493 * uint16_t[(maxFastUChar+1)>>6]; 1494 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). 1495 */ 1496 maxFastUChar=0; 1497 mbcsIndexLength=0; 1498 if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && 1499 mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 1500 ) { 1501 maxFastUChar=(maxFastUChar<<8)|0xff; 1502 mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ 1503 } 1504 1505 if(extOffset==0) { 1506 size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); 1507 if(!noFromU) { 1508 size+=(int32_t)mbcsHeader.fromUBytesLength; 1509 } 1510 1511 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ 1512 inExtIndexes=NULL; 1513 } else { 1514 /* there is extension data after the base data, see ucnv_ext.h */ 1515 if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 1516 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 1517 length); 1518 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1519 return 0; 1520 } 1521 1522 inExtIndexes=(const int32_t *)(inBytes+extOffset); 1523 size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); 1524 } 1525 1526 if(length>=0) { 1527 if(length<size) { 1528 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1529 length); 1530 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1531 return 0; 1532 } 1533 1534 /* copy the data for inaccessible bytes */ 1535 if(inBytes!=outBytes) { 1536 uprv_memcpy(outBytes, inBytes, size); 1537 } 1538 1539 /* swap the MBCSHeader, except for the version field */ 1540 count=mbcsHeaderLength*4; 1541 ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, 1542 &outMBCSHeader->countStates, pErrorCode); 1543 1544 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 1545 /* 1546 * extension-only file, 1547 * contains a base name instead of normal base table data 1548 */ 1549 1550 /* swap the base name, between the header and the extension data */ 1551 const char *inBaseName=(const char *)inBytes+count; 1552 char *outBaseName=(char *)outBytes+count; 1553 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), 1554 outBaseName, pErrorCode); 1555 } else { 1556 /* normal file with base table data */ 1557 1558 /* swap the state table, 1kB per state */ 1559 offset=count; 1560 count=mbcsHeader.countStates*1024; 1561 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1562 outBytes+offset, pErrorCode); 1563 1564 /* swap the toUFallbacks[] */ 1565 offset+=count; 1566 count=mbcsHeader.countToUFallbacks*8; 1567 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1568 outBytes+offset, pErrorCode); 1569 1570 /* swap the unicodeCodeUnits[] */ 1571 offset=mbcsHeader.offsetToUCodeUnits; 1572 count=mbcsHeader.offsetFromUTable-offset; 1573 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1574 outBytes+offset, pErrorCode); 1575 1576 /* offset to the stage 1 table, independent of the outputType */ 1577 offset=mbcsHeader.offsetFromUTable; 1578 1579 if(outputType==MBCS_OUTPUT_1) { 1580 /* SBCS: swap the fromU tables, all 16 bits wide */ 1581 count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; 1582 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1583 outBytes+offset, pErrorCode); 1584 } else { 1585 /* otherwise: swap the stage tables separately */ 1586 1587 /* stage 1 table: uint16_t[0x440 or 0x40] */ 1588 if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1589 count=0x440*2; /* for all of Unicode */ 1590 } else { 1591 count=0x40*2; /* only BMP */ 1592 } 1593 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1594 outBytes+offset, pErrorCode); 1595 1596 /* stage 2 table: uint32_t[] */ 1597 offset+=count; 1598 count=mbcsHeader.offsetFromUBytes-offset; 1599 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1600 outBytes+offset, pErrorCode); 1601 1602 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ 1603 offset=mbcsHeader.offsetFromUBytes; 1604 count= noFromU ? 0 : mbcsHeader.fromUBytesLength; 1605 switch(outputType) { 1606 case MBCS_OUTPUT_2: 1607 case MBCS_OUTPUT_3_EUC: 1608 case MBCS_OUTPUT_2_SISO: 1609 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1610 outBytes+offset, pErrorCode); 1611 break; 1612 case MBCS_OUTPUT_4: 1613 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1614 outBytes+offset, pErrorCode); 1615 break; 1616 default: 1617 /* just uint8_t[], nothing to swap */ 1618 break; 1619 } 1620 1621 if(mbcsIndexLength!=0) { 1622 offset+=count; 1623 count=mbcsIndexLength; 1624 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1625 outBytes+offset, pErrorCode); 1626 } 1627 } 1628 } 1629 1630 if(extOffset!=0) { 1631 /* swap the extension data */ 1632 inBytes+=extOffset; 1633 outBytes+=extOffset; 1634 1635 /* swap toUTable[] */ 1636 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); 1637 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); 1638 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1639 1640 /* swap toUUChars[] */ 1641 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); 1642 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); 1643 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1644 1645 /* swap fromUTableUChars[] */ 1646 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); 1647 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); 1648 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1649 1650 /* swap fromUTableValues[] */ 1651 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); 1652 /* same length as for fromUTableUChars[] */ 1653 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1654 1655 /* no need to swap fromUBytes[] */ 1656 1657 /* swap fromUStage12[] */ 1658 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); 1659 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); 1660 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1661 1662 /* swap fromUStage3[] */ 1663 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); 1664 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); 1665 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1666 1667 /* swap fromUStage3b[] */ 1668 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); 1669 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); 1670 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1671 1672 /* swap indexes[] */ 1673 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); 1674 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); 1675 } 1676 } 1677 } else { 1678 udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", 1679 inStaticData->conversionType); 1680 *pErrorCode=U_UNSUPPORTED_ERROR; 1681 return 0; 1682 } 1683 1684 return headerSize+(int32_t)staticDataSize+size; 1685 } 1686 1687 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1688 1689 #endif 1690