1 /* 2 ******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 1996-2012, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************** 7 * 8 * uconv_bld.cpp: 9 * 10 * Defines functions that are used in the creation/initialization/deletion 11 * of converters and related structures. 12 * uses uconv_io.h routines to access disk information 13 * is used by ucnv.h to implement public API create/delete/flushCache routines 14 * Modification History: 15 * 16 * Date Name Description 17 * 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 * 06/29/2000 helena Major rewrite of the callback interface. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/putil.h" 27 #include "unicode/udata.h" 28 #include "unicode/ucnv.h" 29 #include "unicode/uloc.h" 30 #include "putilimp.h" 31 #include "utracimp.h" 32 #include "ucnv_io.h" 33 #include "ucnv_bld.h" 34 #include "ucnvmbcs.h" 35 #include "ucnv_ext.h" 36 #include "ucnv_cnv.h" 37 #include "ucnv_imp.h" 38 #include "uhash.h" 39 #include "umutex.h" 40 #include "cstring.h" 41 #include "cmemory.h" 42 #include "ucln_cmn.h" 43 #include "ustr_cnv.h" 44 45 46 47 #if 0 48 #include <stdio.h> 49 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); 50 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) 51 #else 52 # define UCNV_DEBUG_LOG(x,y,z) 53 #endif 54 55 static const UConverterSharedData * const 56 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 57 NULL, NULL, 58 59 #if UCONFIG_NO_LEGACY_CONVERSION 60 NULL, 61 #else 62 &_MBCSData, 63 #endif 64 65 &_Latin1Data, 66 &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData, 67 NULL, 68 69 #if UCONFIG_NO_LEGACY_CONVERSION 70 NULL, 71 NULL, NULL, NULL, NULL, NULL, NULL, 72 NULL, NULL, NULL, NULL, NULL, NULL, 73 NULL, 74 #else 75 &_ISO2022Data, 76 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, 77 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, 78 &_HZData, 79 #endif 80 81 &_SCSUData, 82 83 #if UCONFIG_NO_LEGACY_CONVERSION 84 NULL, 85 #else 86 &_ISCIIData, 87 #endif 88 89 &_ASCIIData, 90 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 91 92 #if UCONFIG_NO_LEGACY_CONVERSION 93 NULL, 94 #else 95 &_CompoundTextData 96 #endif 97 }; 98 99 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. 100 Also the name should be in lower case and all spaces, dashes and underscores 101 removed 102 */ 103 static struct { 104 const char *name; 105 const UConverterType type; 106 } const cnvNameType[] = { 107 { "bocu1", UCNV_BOCU1 }, 108 { "cesu8", UCNV_CESU8 }, 109 #if !UCONFIG_NO_LEGACY_CONVERSION 110 { "hz",UCNV_HZ }, 111 #endif 112 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 113 #if !UCONFIG_NO_LEGACY_CONVERSION 114 { "iscii", UCNV_ISCII }, 115 { "iso2022", UCNV_ISO_2022 }, 116 #endif 117 { "iso88591", UCNV_LATIN_1 }, 118 #if !UCONFIG_NO_LEGACY_CONVERSION 119 { "lmbcs1", UCNV_LMBCS_1 }, 120 { "lmbcs11",UCNV_LMBCS_11 }, 121 { "lmbcs16",UCNV_LMBCS_16 }, 122 { "lmbcs17",UCNV_LMBCS_17 }, 123 { "lmbcs18",UCNV_LMBCS_18 }, 124 { "lmbcs19",UCNV_LMBCS_19 }, 125 { "lmbcs2", UCNV_LMBCS_2 }, 126 { "lmbcs3", UCNV_LMBCS_3 }, 127 { "lmbcs4", UCNV_LMBCS_4 }, 128 { "lmbcs5", UCNV_LMBCS_5 }, 129 { "lmbcs6", UCNV_LMBCS_6 }, 130 { "lmbcs8", UCNV_LMBCS_8 }, 131 #endif 132 { "scsu", UCNV_SCSU }, 133 { "usascii", UCNV_US_ASCII }, 134 { "utf16", UCNV_UTF16 }, 135 { "utf16be", UCNV_UTF16_BigEndian }, 136 { "utf16le", UCNV_UTF16_LittleEndian }, 137 #if U_IS_BIG_ENDIAN 138 { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, 139 { "utf16platformendian", UCNV_UTF16_BigEndian }, 140 #else 141 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, 142 { "utf16platformendian", UCNV_UTF16_LittleEndian }, 143 #endif 144 { "utf32", UCNV_UTF32 }, 145 { "utf32be", UCNV_UTF32_BigEndian }, 146 { "utf32le", UCNV_UTF32_LittleEndian }, 147 #if U_IS_BIG_ENDIAN 148 { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, 149 { "utf32platformendian", UCNV_UTF32_BigEndian }, 150 #else 151 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 152 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 153 #endif 154 { "utf7", UCNV_UTF7 }, 155 { "utf8", UCNV_UTF8 }, 156 { "x11compoundtext", UCNV_COMPOUND_TEXT} 157 }; 158 159 160 /*initializes some global variables */ 161 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 162 static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ 163 /* Note: the global mutex is used for */ 164 /* reference count updates. */ 165 166 static const char **gAvailableConverters = NULL; 167 static uint16_t gAvailableConverterCount = 0; 168 169 #if !U_CHARSET_IS_UTF8 170 171 /* This contains the resolved converter name. So no further alias lookup is needed again. */ 172 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ 173 static const char *gDefaultConverterName = NULL; 174 175 /* 176 If the default converter is an algorithmic converter, this is the cached value. 177 We don't cache a full UConverter and clone it because ucnv_clone doesn't have 178 less overhead than an algorithmic open. We don't cache non-algorithmic converters 179 because ucnv_flushCache must be able to unload the default converter and its table. 180 */ 181 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; 182 183 /* Does gDefaultConverterName have a converter option and require extra parsing? */ 184 static UBool gDefaultConverterContainsOption; 185 186 #endif /* !U_CHARSET_IS_UTF8 */ 187 188 static const char DATA_TYPE[] = "cnv"; 189 190 static void 191 ucnv_flushAvailableConverterCache() { 192 if (gAvailableConverters) { 193 umtx_lock(&cnvCacheMutex); 194 gAvailableConverterCount = 0; 195 uprv_free((char **)gAvailableConverters); 196 gAvailableConverters = NULL; 197 umtx_unlock(&cnvCacheMutex); 198 } 199 } 200 201 /* ucnv_cleanup - delete all storage held by the converter cache, except any */ 202 /* in use by open converters. */ 203 /* Not thread safe. */ 204 /* Not supported API. */ 205 static UBool U_CALLCONV ucnv_cleanup(void) { 206 ucnv_flushCache(); 207 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 208 uhash_close(SHARED_DATA_HASHTABLE); 209 SHARED_DATA_HASHTABLE = NULL; 210 } 211 212 /* Isn't called from flushCache because other threads may have preexisting references to the table. */ 213 ucnv_flushAvailableConverterCache(); 214 215 #if !U_CHARSET_IS_UTF8 216 gDefaultConverterName = NULL; 217 gDefaultConverterNameBuffer[0] = 0; 218 gDefaultConverterContainsOption = FALSE; 219 gDefaultAlgorithmicSharedData = NULL; 220 #endif 221 222 return (SHARED_DATA_HASHTABLE == NULL); 223 } 224 225 static UBool U_CALLCONV 226 isCnvAcceptable(void * /*context*/, 227 const char * /*type*/, const char * /*name*/, 228 const UDataInfo *pInfo) { 229 return (UBool)( 230 pInfo->size>=20 && 231 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 232 pInfo->charsetFamily==U_CHARSET_FAMILY && 233 pInfo->sizeofUChar==U_SIZEOF_UCHAR && 234 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 235 pInfo->dataFormat[1]==0x6e && 236 pInfo->dataFormat[2]==0x76 && 237 pInfo->dataFormat[3]==0x74 && 238 pInfo->formatVersion[0]==6); /* Everything will be version 6 */ 239 } 240 241 /** 242 * Un flatten shared data from a UDATA.. 243 */ 244 static UConverterSharedData* 245 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) 246 { 247 /* UDataInfo info; -- necessary only if some converters have different formatVersion */ 248 const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); 249 const UConverterStaticData *source = (const UConverterStaticData *) raw; 250 UConverterSharedData *data; 251 UConverterType type = (UConverterType)source->conversionType; 252 253 if(U_FAILURE(*status)) 254 return NULL; 255 256 if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || 257 converterData[type] == NULL || 258 converterData[type]->referenceCounter != 1 || 259 source->structSize != sizeof(UConverterStaticData)) 260 { 261 *status = U_INVALID_TABLE_FORMAT; 262 return NULL; 263 } 264 265 data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); 266 if(data == NULL) { 267 *status = U_MEMORY_ALLOCATION_ERROR; 268 return NULL; 269 } 270 271 /* copy initial values from the static structure for this type */ 272 uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); 273 274 #if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */ 275 /* 276 * It would be much more efficient if the table were a direct member, not a pointer. 277 * However, that would add to the size of all UConverterSharedData objects 278 * even if they do not use this table (especially algorithmic ones). 279 * If this changes, then the static templates from converterData[type] 280 * need more entries. 281 * 282 * In principle, it would be cleaner if the load() function below 283 * allocated the table. 284 */ 285 data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable)); 286 if(data->table == NULL) { 287 uprv_free(data); 288 *status = U_MEMORY_ALLOCATION_ERROR; 289 return NULL; 290 } 291 uprv_memset(data->table, 0, sizeof(UConverterTable)); 292 #endif 293 294 data->staticData = source; 295 296 data->sharedDataCached = FALSE; 297 298 /* fill in fields from the loaded data */ 299 data->dataMemory = (void*)pData; /* for future use */ 300 301 if(data->impl->load != NULL) { 302 data->impl->load(data, pArgs, raw + source->structSize, status); 303 if(U_FAILURE(*status)) { 304 uprv_free(data->table); 305 uprv_free(data); 306 return NULL; 307 } 308 } 309 return data; 310 } 311 312 /*Takes an alias name gets an actual converter file name 313 *goes to disk and opens it. 314 *allocates the memory and returns a new UConverter object 315 */ 316 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) 317 { 318 UDataMemory *data; 319 UConverterSharedData *sharedData; 320 321 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); 322 323 if (U_FAILURE (*err)) { 324 UTRACE_EXIT_STATUS(*err); 325 return NULL; 326 } 327 328 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); 329 330 data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); 331 if(U_FAILURE(*err)) 332 { 333 UTRACE_EXIT_STATUS(*err); 334 return NULL; 335 } 336 337 sharedData = ucnv_data_unFlattenClone(pArgs, data, err); 338 if(U_FAILURE(*err)) 339 { 340 udata_close(data); 341 UTRACE_EXIT_STATUS(*err); 342 return NULL; 343 } 344 345 /* 346 * TODO Store pkg in a field in the shared data so that delta-only converters 347 * can load base converters from the same package. 348 * If the pkg name is longer than the field, then either do not load the converter 349 * in the first place, or just set the pkg field to "". 350 */ 351 352 UTRACE_EXIT_PTR_STATUS(sharedData, *err); 353 return sharedData; 354 } 355 356 /*returns a converter type from a string 357 */ 358 static const UConverterSharedData * 359 getAlgorithmicTypeFromName(const char *realName) 360 { 361 uint32_t mid, start, limit; 362 uint32_t lastMid; 363 int result; 364 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 365 366 /* Lower case and remove ignoreable characters. */ 367 ucnv_io_stripForCompare(strippedName, realName); 368 369 /* do a binary search for the alias */ 370 start = 0; 371 limit = sizeof(cnvNameType)/sizeof(cnvNameType[0]); 372 mid = limit; 373 lastMid = UINT32_MAX; 374 375 for (;;) { 376 mid = (uint32_t)((start + limit) / 2); 377 if (lastMid == mid) { /* Have we moved? */ 378 break; /* We haven't moved, and it wasn't found. */ 379 } 380 lastMid = mid; 381 result = uprv_strcmp(strippedName, cnvNameType[mid].name); 382 383 if (result < 0) { 384 limit = mid; 385 } else if (result > 0) { 386 start = mid; 387 } else { 388 return converterData[cnvNameType[mid].type]; 389 } 390 } 391 392 return NULL; 393 } 394 395 /* 396 * Based on the number of known converters, this determines how many times larger 397 * the shared data hash table should be. When on small platforms, or just a couple 398 * of converters are used, this number should be 2. When memory is plentiful, or 399 * when ucnv_countAvailable is ever used with a lot of available converters, 400 * this should be 4. 401 * Larger numbers reduce the number of hash collisions, but use more memory. 402 */ 403 #define UCNV_CACHE_LOAD_FACTOR 2 404 405 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ 406 /* Will always be called with the cnvCacheMutex alrady being held */ 407 /* by the calling function. */ 408 /* Stores the shared data in the SHARED_DATA_HASHTABLE 409 * @param data The shared data 410 */ 411 static void 412 ucnv_shareConverterData(UConverterSharedData * data) 413 { 414 UErrorCode err = U_ZERO_ERROR; 415 /*Lazy evaluates the Hashtable itself */ 416 /*void *sanity = NULL;*/ 417 418 if (SHARED_DATA_HASHTABLE == NULL) 419 { 420 SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, 421 ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, 422 &err); 423 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 424 425 if (U_FAILURE(err)) 426 return; 427 } 428 429 /* ### check to see if the element is not already there! */ 430 431 /* 432 sanity = ucnv_getSharedConverterData (data->staticData->name); 433 if(sanity != NULL) 434 { 435 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); 436 } 437 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); 438 */ 439 440 /* Mark it shared */ 441 data->sharedDataCached = TRUE; 442 443 uhash_put(SHARED_DATA_HASHTABLE, 444 (void*) data->staticData->name, /* Okay to cast away const as long as 445 keyDeleter == NULL */ 446 data, 447 &err); 448 UCNV_DEBUG_LOG("put", data->staticData->name,data); 449 450 } 451 452 /* Look up a converter name in the shared data cache. */ 453 /* cnvCacheMutex must be held by the caller to protect the hash table. */ 454 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) 455 * @param name The name of the shared data 456 * @return the shared data from the SHARED_DATA_HASHTABLE 457 */ 458 static UConverterSharedData * 459 ucnv_getSharedConverterData(const char *name) 460 { 461 /*special case when no Table has yet been created we return NULL */ 462 if (SHARED_DATA_HASHTABLE == NULL) 463 { 464 return NULL; 465 } 466 else 467 { 468 UConverterSharedData *rc; 469 470 rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); 471 UCNV_DEBUG_LOG("get",name,rc); 472 return rc; 473 } 474 } 475 476 /*frees the string of memory blocks associates with a sharedConverter 477 *if and only if the referenceCounter == 0 478 */ 479 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to 480 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and 481 * returns TRUE, 482 * otherwise returns FALSE 483 * @param sharedConverterData The shared data 484 * @return if not it frees all the memory stemming from sharedConverterData and 485 * returns TRUE, otherwise returns FALSE 486 */ 487 static UBool 488 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) 489 { 490 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); 491 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); 492 493 if (deadSharedData->referenceCounter > 0) { 494 UTRACE_EXIT_VALUE((int32_t)FALSE); 495 return FALSE; 496 } 497 498 if (deadSharedData->impl->unload != NULL) { 499 deadSharedData->impl->unload(deadSharedData); 500 } 501 502 if(deadSharedData->dataMemory != NULL) 503 { 504 UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; 505 udata_close(data); 506 } 507 508 if(deadSharedData->table != NULL) 509 { 510 uprv_free(deadSharedData->table); 511 } 512 513 #if 0 514 /* if the static data is actually owned by the shared data */ 515 /* enable if we ever have this situation. */ 516 if(deadSharedData->staticDataOwned == TRUE) /* see ucnv_bld.h */ 517 { 518 uprv_free((void*)deadSharedData->staticData); 519 } 520 #endif 521 522 #if 0 523 /* Zap it ! */ 524 uprv_memset(deadSharedData->0, sizeof(*deadSharedData)); 525 #endif 526 527 uprv_free(deadSharedData); 528 529 UTRACE_EXIT_VALUE((int32_t)TRUE); 530 return TRUE; 531 } 532 533 /** 534 * Load a non-algorithmic converter. 535 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 536 */ 537 UConverterSharedData * 538 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { 539 UConverterSharedData *mySharedConverterData; 540 541 if(err == NULL || U_FAILURE(*err)) { 542 return NULL; 543 } 544 545 if(pArgs->pkg != NULL && *pArgs->pkg != 0) { 546 /* application-provided converters are not currently cached */ 547 return createConverterFromFile(pArgs, err); 548 } 549 550 mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); 551 if (mySharedConverterData == NULL) 552 { 553 /*Not cached, we need to stream it in from file */ 554 mySharedConverterData = createConverterFromFile(pArgs, err); 555 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 556 { 557 return NULL; 558 } 559 else if (!pArgs->onlyTestIsLoadable) 560 { 561 /* share it with other library clients */ 562 ucnv_shareConverterData(mySharedConverterData); 563 } 564 } 565 else 566 { 567 /* The data for this converter was already in the cache. */ 568 /* Update the reference counter on the shared data: one more client */ 569 mySharedConverterData->referenceCounter++; 570 } 571 572 return mySharedConverterData; 573 } 574 575 /** 576 * Unload a non-algorithmic converter. 577 * It must be sharedData->referenceCounter != ~0 578 * and this function must be called inside umtx_lock(&cnvCacheMutex). 579 */ 580 U_CAPI void 581 ucnv_unload(UConverterSharedData *sharedData) { 582 if(sharedData != NULL) { 583 if (sharedData->referenceCounter > 0) { 584 sharedData->referenceCounter--; 585 } 586 587 if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { 588 ucnv_deleteSharedConverterData(sharedData); 589 } 590 } 591 } 592 593 U_CFUNC void 594 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) 595 { 596 /* 597 Checking whether it's an algorithic converter is okay 598 in multithreaded applications because the value never changes. 599 Don't check referenceCounter for any other value. 600 */ 601 if(sharedData != NULL && sharedData->referenceCounter != (uint32_t)~0) { 602 umtx_lock(&cnvCacheMutex); 603 ucnv_unload(sharedData); 604 umtx_unlock(&cnvCacheMutex); 605 } 606 } 607 608 U_CFUNC void 609 ucnv_incrementRefCount(UConverterSharedData *sharedData) 610 { 611 /* 612 Checking whether it's an algorithic converter is okay 613 in multithreaded applications because the value never changes. 614 Don't check referenceCounter for any other value. 615 */ 616 if(sharedData != NULL && sharedData->referenceCounter != (uint32_t)~0) { 617 umtx_lock(&cnvCacheMutex); 618 sharedData->referenceCounter++; 619 umtx_unlock(&cnvCacheMutex); 620 } 621 } 622 623 /* 624 * *pPieces must be initialized. 625 * The name without options will be copied to pPieces->cnvName. 626 * The locale and options will be copied to pPieces only if present in inName, 627 * otherwise the existing values in pPieces remain. 628 * *pArgs will be set to the pPieces values. 629 */ 630 static void 631 parseConverterOptions(const char *inName, 632 UConverterNamePieces *pPieces, 633 UConverterLoadArgs *pArgs, 634 UErrorCode *err) 635 { 636 char *cnvName = pPieces->cnvName; 637 char c; 638 int32_t len = 0; 639 640 pArgs->name=inName; 641 pArgs->locale=pPieces->locale; 642 pArgs->options=pPieces->options; 643 644 /* copy the converter name itself to cnvName */ 645 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 646 if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { 647 *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 648 pPieces->cnvName[0]=0; 649 return; 650 } 651 *cnvName++=c; 652 inName++; 653 } 654 *cnvName=0; 655 pArgs->name=pPieces->cnvName; 656 657 /* parse options. No more name copying should occur. */ 658 while((c=*inName)!=0) { 659 if(c==UCNV_OPTION_SEP_CHAR) { 660 ++inName; 661 } 662 663 /* inName is behind an option separator */ 664 if(uprv_strncmp(inName, "locale=", 7)==0) { 665 /* do not modify locale itself in case we have multiple locale options */ 666 char *dest=pPieces->locale; 667 668 /* copy the locale option value */ 669 inName+=7; 670 len=0; 671 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 672 ++inName; 673 674 if(++len>=ULOC_FULLNAME_CAPACITY) { 675 *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 676 pPieces->locale[0]=0; 677 return; 678 } 679 680 *dest++=c; 681 } 682 *dest=0; 683 } else if(uprv_strncmp(inName, "version=", 8)==0) { 684 /* copy the version option value into bits 3..0 of pPieces->options */ 685 inName+=8; 686 c=*inName; 687 if(c==0) { 688 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); 689 return; 690 } else if((uint8_t)(c-'0')<10) { 691 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); 692 ++inName; 693 } 694 } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { 695 inName+=8; 696 pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); 697 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ 698 } else { 699 /* ignore any other options until we define some */ 700 while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { 701 } 702 if(c==0) { 703 return; 704 } 705 } 706 } 707 } 708 709 /*Logic determines if the converter is Algorithmic AND/OR cached 710 *depending on that: 711 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) 712 * -Get it from a Hashtable (Data=X, Cached=TRUE) 713 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) 714 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) 715 */ 716 U_CFUNC UConverterSharedData * 717 ucnv_loadSharedData(const char *converterName, 718 UConverterNamePieces *pPieces, 719 UConverterLoadArgs *pArgs, 720 UErrorCode * err) { 721 UConverterNamePieces stackPieces; 722 UConverterLoadArgs stackArgs; 723 UConverterSharedData *mySharedConverterData = NULL; 724 UErrorCode internalErrorCode = U_ZERO_ERROR; 725 UBool mayContainOption = TRUE; 726 UBool checkForAlgorithmic = TRUE; 727 728 if (U_FAILURE (*err)) { 729 return NULL; 730 } 731 732 if(pPieces == NULL) { 733 if(pArgs != NULL) { 734 /* 735 * Bad: We may set pArgs pointers to stackPieces fields 736 * which will be invalid after this function returns. 737 */ 738 *err = U_INTERNAL_PROGRAM_ERROR; 739 return NULL; 740 } 741 pPieces = &stackPieces; 742 } 743 if(pArgs == NULL) { 744 uprv_memset(&stackArgs, 0, sizeof(stackArgs)); 745 stackArgs.size = (int32_t)sizeof(stackArgs); 746 pArgs = &stackArgs; 747 } 748 749 pPieces->cnvName[0] = 0; 750 pPieces->locale[0] = 0; 751 pPieces->options = 0; 752 753 pArgs->name = converterName; 754 pArgs->locale = pPieces->locale; 755 pArgs->options = pPieces->options; 756 757 /* In case "name" is NULL we want to open the default converter. */ 758 if (converterName == NULL) { 759 #if U_CHARSET_IS_UTF8 760 pArgs->name = "UTF-8"; 761 return (UConverterSharedData *)converterData[UCNV_UTF8]; 762 #else 763 /* Call ucnv_getDefaultName first to query the name from the OS. */ 764 pArgs->name = ucnv_getDefaultName(); 765 if (pArgs->name == NULL) { 766 *err = U_MISSING_RESOURCE_ERROR; 767 return NULL; 768 } 769 mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; 770 checkForAlgorithmic = FALSE; 771 mayContainOption = gDefaultConverterContainsOption; 772 /* the default converter name is already canonical */ 773 #endif 774 } 775 else if(UCNV_FAST_IS_UTF8(converterName)) { 776 /* fastpath for UTF-8 */ 777 pArgs->name = "UTF-8"; 778 return (UConverterSharedData *)converterData[UCNV_UTF8]; 779 } 780 else { 781 /* separate the converter name from the options */ 782 parseConverterOptions(converterName, pPieces, pArgs, err); 783 if (U_FAILURE(*err)) { 784 /* Very bad name used. */ 785 return NULL; 786 } 787 788 /* get the canonical converter name */ 789 pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); 790 if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { 791 /* 792 * set the input name in case the converter was added 793 * without updating the alias table, or when there is no alias table 794 */ 795 pArgs->name = pPieces->cnvName; 796 } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { 797 *err = U_AMBIGUOUS_ALIAS_WARNING; 798 } 799 } 800 801 /* separate the converter name from the options */ 802 if(mayContainOption && pArgs->name != pPieces->cnvName) { 803 parseConverterOptions(pArgs->name, pPieces, pArgs, err); 804 } 805 806 /* get the shared data for an algorithmic converter, if it is one */ 807 if (checkForAlgorithmic) { 808 mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); 809 } 810 if (mySharedConverterData == NULL) 811 { 812 /* it is a data-based converter, get its shared data. */ 813 /* Hold the cnvCacheMutex through the whole process of checking the */ 814 /* converter data cache, and adding new entries to the cache */ 815 /* to prevent other threads from modifying the cache during the */ 816 /* process. */ 817 pArgs->nestedLoads=1; 818 pArgs->pkg=NULL; 819 820 umtx_lock(&cnvCacheMutex); 821 mySharedConverterData = ucnv_load(pArgs, err); 822 umtx_unlock(&cnvCacheMutex); 823 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 824 { 825 return NULL; 826 } 827 } 828 829 return mySharedConverterData; 830 } 831 832 U_CAPI UConverter * 833 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) 834 { 835 UConverterNamePieces stackPieces; 836 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 837 UConverterSharedData *mySharedConverterData; 838 839 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 840 841 if(U_SUCCESS(*err)) { 842 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); 843 844 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 845 846 myUConverter = ucnv_createConverterFromSharedData( 847 myUConverter, mySharedConverterData, 848 &stackArgs, 849 err); 850 851 if(U_SUCCESS(*err)) { 852 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 853 return myUConverter; 854 } 855 } 856 857 /* exit with error */ 858 UTRACE_EXIT_STATUS(*err); 859 return NULL; 860 } 861 862 U_CFUNC UBool 863 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { 864 UConverter myUConverter; 865 UConverterNamePieces stackPieces; 866 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 867 UConverterSharedData *mySharedConverterData; 868 869 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 870 871 if(U_SUCCESS(*err)) { 872 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); 873 874 stackArgs.onlyTestIsLoadable=TRUE; 875 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 876 ucnv_createConverterFromSharedData( 877 &myUConverter, mySharedConverterData, 878 &stackArgs, 879 err); 880 ucnv_unloadSharedDataIfReady(mySharedConverterData); 881 } 882 883 UTRACE_EXIT_STATUS(*err); 884 return U_SUCCESS(*err); 885 } 886 887 UConverter * 888 ucnv_createAlgorithmicConverter(UConverter *myUConverter, 889 UConverterType type, 890 const char *locale, uint32_t options, 891 UErrorCode *err) { 892 UConverter *cnv; 893 const UConverterSharedData *sharedData; 894 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 895 896 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); 897 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); 898 899 if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { 900 *err = U_ILLEGAL_ARGUMENT_ERROR; 901 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 902 return NULL; 903 } 904 905 sharedData = converterData[type]; 906 /* 907 Checking whether it's an algorithic converter is okay 908 in multithreaded applications because the value never changes. 909 Don't check referenceCounter for any other value. 910 */ 911 if(sharedData == NULL || sharedData->referenceCounter != (uint32_t)~0) { 912 /* not a valid type, or not an algorithmic converter */ 913 *err = U_ILLEGAL_ARGUMENT_ERROR; 914 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 915 return NULL; 916 } 917 918 stackArgs.name = ""; 919 stackArgs.options = options; 920 stackArgs.locale=locale; 921 cnv = ucnv_createConverterFromSharedData( 922 myUConverter, (UConverterSharedData *)sharedData, 923 &stackArgs, err); 924 925 UTRACE_EXIT_PTR_STATUS(cnv, *err); 926 return cnv; 927 } 928 929 U_CFUNC UConverter* 930 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) 931 { 932 UConverter *myUConverter; 933 UConverterSharedData *mySharedConverterData; 934 UConverterNamePieces stackPieces; 935 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 936 937 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); 938 939 if(U_FAILURE(*err)) { 940 UTRACE_EXIT_STATUS(*err); 941 return NULL; 942 } 943 944 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); 945 946 /* first, get the options out of the converterName string */ 947 stackPieces.cnvName[0] = 0; 948 stackPieces.locale[0] = 0; 949 stackPieces.options = 0; 950 parseConverterOptions(converterName, &stackPieces, &stackArgs, err); 951 if (U_FAILURE(*err)) { 952 /* Very bad name used. */ 953 UTRACE_EXIT_STATUS(*err); 954 return NULL; 955 } 956 stackArgs.nestedLoads=1; 957 stackArgs.pkg=packageName; 958 959 /* open the data, unflatten the shared structure */ 960 mySharedConverterData = createConverterFromFile(&stackArgs, err); 961 962 if (U_FAILURE(*err)) { 963 UTRACE_EXIT_STATUS(*err); 964 return NULL; 965 } 966 967 /* create the actual converter */ 968 myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); 969 970 if (U_FAILURE(*err)) { 971 ucnv_close(myUConverter); 972 UTRACE_EXIT_STATUS(*err); 973 return NULL; 974 } 975 976 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 977 return myUConverter; 978 } 979 980 981 U_CFUNC UConverter* 982 ucnv_createConverterFromSharedData(UConverter *myUConverter, 983 UConverterSharedData *mySharedConverterData, 984 UConverterLoadArgs *pArgs, 985 UErrorCode *err) 986 { 987 UBool isCopyLocal; 988 989 if(U_FAILURE(*err)) { 990 ucnv_unloadSharedDataIfReady(mySharedConverterData); 991 return myUConverter; 992 } 993 if(myUConverter == NULL) 994 { 995 myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); 996 if(myUConverter == NULL) 997 { 998 *err = U_MEMORY_ALLOCATION_ERROR; 999 ucnv_unloadSharedDataIfReady(mySharedConverterData); 1000 return NULL; 1001 } 1002 isCopyLocal = FALSE; 1003 } else { 1004 isCopyLocal = TRUE; 1005 } 1006 1007 /* initialize the converter */ 1008 uprv_memset(myUConverter, 0, sizeof(UConverter)); 1009 myUConverter->isCopyLocal = isCopyLocal; 1010 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ 1011 myUConverter->sharedData = mySharedConverterData; 1012 myUConverter->options = pArgs->options; 1013 if(!pArgs->onlyTestIsLoadable) { 1014 myUConverter->preFromUFirstCP = U_SENTINEL; 1015 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; 1016 myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; 1017 myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; 1018 myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; 1019 myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; 1020 myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; 1021 myUConverter->subChars = (uint8_t *)myUConverter->subUChars; 1022 uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); 1023 myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ 1024 } 1025 1026 if(mySharedConverterData->impl->open != NULL) { 1027 mySharedConverterData->impl->open(myUConverter, pArgs, err); 1028 if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { 1029 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ 1030 ucnv_close(myUConverter); 1031 return NULL; 1032 } 1033 } 1034 1035 return myUConverter; 1036 } 1037 1038 /*Frees all shared immutable objects that aren't referred to (reference count = 0) 1039 */ 1040 U_CAPI int32_t U_EXPORT2 1041 ucnv_flushCache () 1042 { 1043 UConverterSharedData *mySharedData = NULL; 1044 int32_t pos; 1045 int32_t tableDeletedNum = 0; 1046 const UHashElement *e; 1047 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ 1048 int32_t i, remaining; 1049 1050 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); 1051 1052 /* Close the default converter without creating a new one so that everything will be flushed. */ 1053 u_flushDefaultConverter(); 1054 1055 /*if shared data hasn't even been lazy evaluated yet 1056 * return 0 1057 */ 1058 if (SHARED_DATA_HASHTABLE == NULL) { 1059 UTRACE_EXIT_VALUE((int32_t)0); 1060 return 0; 1061 } 1062 1063 /*creates an enumeration to iterate through every element in the 1064 * table 1065 * 1066 * Synchronization: holding cnvCacheMutex will prevent any other thread from 1067 * accessing or modifying the hash table during the iteration. 1068 * The reference count of an entry may be decremented by 1069 * ucnv_close while the iteration is in process, but this is 1070 * benign. It can't be incremented (in ucnv_createConverter()) 1071 * because the sequence of looking up in the cache + incrementing 1072 * is protected by cnvCacheMutex. 1073 */ 1074 umtx_lock(&cnvCacheMutex); 1075 /* 1076 * double loop: A delta/extension-only converter has a pointer to its base table's 1077 * shared data; the first iteration of the outer loop may see the delta converter 1078 * before the base converter, and unloading the delta converter may get the base 1079 * converter's reference counter down to 0. 1080 */ 1081 i = 0; 1082 do { 1083 remaining = 0; 1084 pos = -1; 1085 while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) 1086 { 1087 mySharedData = (UConverterSharedData *) e->value.pointer; 1088 /*deletes only if reference counter == 0 */ 1089 if (mySharedData->referenceCounter == 0) 1090 { 1091 tableDeletedNum++; 1092 1093 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); 1094 1095 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1096 mySharedData->sharedDataCached = FALSE; 1097 ucnv_deleteSharedConverterData (mySharedData); 1098 } else { 1099 ++remaining; 1100 } 1101 } 1102 } while(++i == 1 && remaining > 0); 1103 umtx_unlock(&cnvCacheMutex); 1104 1105 UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); 1106 1107 UTRACE_EXIT_VALUE(tableDeletedNum); 1108 return tableDeletedNum; 1109 } 1110 1111 /* available converters list --------------------------------------------------- */ 1112 1113 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { 1114 int needInit; 1115 UMTX_CHECK(&cnvCacheMutex, (gAvailableConverters == NULL), needInit); 1116 if (needInit) { 1117 UConverter tempConverter; 1118 UEnumeration *allConvEnum = NULL; 1119 uint16_t idx; 1120 uint16_t localConverterCount; 1121 uint16_t allConverterCount; 1122 UErrorCode localStatus; 1123 const char *converterName; 1124 const char **localConverterList; 1125 1126 allConvEnum = ucnv_openAllNames(pErrorCode); 1127 allConverterCount = uenum_count(allConvEnum, pErrorCode); 1128 if (U_FAILURE(*pErrorCode)) { 1129 return FALSE; 1130 } 1131 1132 /* We can't have more than "*converterTable" converters to open */ 1133 localConverterList = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); 1134 if (!localConverterList) { 1135 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1136 return FALSE; 1137 } 1138 1139 /* Open the default converter to make sure that it has first dibs in the hash table. */ 1140 localStatus = U_ZERO_ERROR; 1141 ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); 1142 1143 localConverterCount = 0; 1144 1145 for (idx = 0; idx < allConverterCount; idx++) { 1146 localStatus = U_ZERO_ERROR; 1147 converterName = uenum_next(allConvEnum, NULL, &localStatus); 1148 if (ucnv_canCreateConverter(converterName, &localStatus)) { 1149 localConverterList[localConverterCount++] = converterName; 1150 } 1151 } 1152 uenum_close(allConvEnum); 1153 1154 umtx_lock(&cnvCacheMutex); 1155 if (gAvailableConverters == NULL) { 1156 gAvailableConverterCount = localConverterCount; 1157 gAvailableConverters = localConverterList; 1158 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1159 } 1160 else { 1161 uprv_free((char **)localConverterList); 1162 } 1163 umtx_unlock(&cnvCacheMutex); 1164 } 1165 return TRUE; 1166 } 1167 1168 U_CFUNC uint16_t 1169 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { 1170 if (haveAvailableConverterList(pErrorCode)) { 1171 return gAvailableConverterCount; 1172 } 1173 return 0; 1174 } 1175 1176 U_CFUNC const char * 1177 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { 1178 if (haveAvailableConverterList(pErrorCode)) { 1179 if (n < gAvailableConverterCount) { 1180 return gAvailableConverters[n]; 1181 } 1182 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1183 } 1184 return NULL; 1185 } 1186 1187 /* default converter name --------------------------------------------------- */ 1188 1189 #if !U_CHARSET_IS_UTF8 1190 /* 1191 Copy the canonical converter name. 1192 ucnv_getDefaultName must be thread safe, which can call this function. 1193 1194 ucnv_setDefaultName calls this function and it doesn't have to be 1195 thread safe because there is no reliable/safe way to reset the 1196 converter in use in all threads. If you did reset the converter, you 1197 would not be sure that retrieving a default converter for one string 1198 would be the same type of default converter for a successive string. 1199 Since the name is a returned via ucnv_getDefaultName without copying, 1200 you shouldn't be modifying or deleting the string from a separate thread. 1201 */ 1202 static inline void 1203 internalSetName(const char *name, UErrorCode *status) { 1204 UConverterNamePieces stackPieces; 1205 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 1206 int32_t length=(int32_t)(uprv_strlen(name)); 1207 UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); 1208 const UConverterSharedData *algorithmicSharedData; 1209 1210 stackArgs.name = name; 1211 if(containsOption) { 1212 stackPieces.cnvName[0] = 0; 1213 stackPieces.locale[0] = 0; 1214 stackPieces.options = 0; 1215 parseConverterOptions(name, &stackPieces, &stackArgs, status); 1216 if(U_FAILURE(*status)) { 1217 return; 1218 } 1219 } 1220 algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); 1221 1222 umtx_lock(&cnvCacheMutex); 1223 1224 gDefaultAlgorithmicSharedData = algorithmicSharedData; 1225 gDefaultConverterContainsOption = containsOption; 1226 uprv_memcpy(gDefaultConverterNameBuffer, name, length); 1227 gDefaultConverterNameBuffer[length]=0; 1228 1229 /* gDefaultConverterName MUST be the last global var set by this function. */ 1230 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ 1231 gDefaultConverterName = gDefaultConverterNameBuffer; 1232 1233 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1234 1235 umtx_unlock(&cnvCacheMutex); 1236 } 1237 #endif 1238 1239 /* 1240 * In order to be really thread-safe, the get function would have to take 1241 * a buffer parameter and copy the current string inside a mutex block. 1242 * This implementation only tries to be really thread-safe while 1243 * setting the name. 1244 * It assumes that setting a pointer is atomic. 1245 */ 1246 1247 U_CAPI const char* U_EXPORT2 1248 ucnv_getDefaultName() { 1249 #if U_CHARSET_IS_UTF8 1250 return "UTF-8"; 1251 #else 1252 /* local variable to be thread-safe */ 1253 const char *name; 1254 1255 /* 1256 Multiple calls to ucnv_getDefaultName must be thread safe, 1257 but ucnv_setDefaultName is not thread safe. 1258 */ 1259 UMTX_CHECK(&cnvCacheMutex, gDefaultConverterName, name); 1260 if(name==NULL) { 1261 UErrorCode errorCode = U_ZERO_ERROR; 1262 UConverter *cnv = NULL; 1263 1264 name = uprv_getDefaultCodepage(); 1265 1266 /* if the name is there, test it out and get the canonical name with options */ 1267 if(name != NULL) { 1268 cnv = ucnv_open(name, &errorCode); 1269 if(U_SUCCESS(errorCode) && cnv != NULL) { 1270 name = ucnv_getName(cnv, &errorCode); 1271 } 1272 } 1273 1274 if(name == NULL || name[0] == 0 1275 || U_FAILURE(errorCode) || cnv == NULL 1276 || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) 1277 { 1278 /* Panic time, let's use a fallback. */ 1279 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) 1280 name = "US-ASCII"; 1281 /* there is no 'algorithmic' converter for EBCDIC */ 1282 #elif U_PLATFORM == U_PF_OS390 1283 name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; 1284 #else 1285 name = "ibm-37_P100-1995"; 1286 #endif 1287 } 1288 1289 internalSetName(name, &errorCode); 1290 1291 /* The close may make the current name go away. */ 1292 ucnv_close(cnv); 1293 } 1294 1295 return name; 1296 #endif 1297 } 1298 1299 #if U_CHARSET_IS_UTF8 1300 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} 1301 #else 1302 /* 1303 This function is not thread safe, and it can't be thread safe. 1304 See internalSetName or the API reference for details. 1305 */ 1306 U_CAPI void U_EXPORT2 1307 ucnv_setDefaultName(const char *converterName) { 1308 if(converterName==NULL) { 1309 /* reset to the default codepage */ 1310 gDefaultConverterName=NULL; 1311 } else { 1312 UErrorCode errorCode = U_ZERO_ERROR; 1313 UConverter *cnv = NULL; 1314 const char *name = NULL; 1315 1316 /* if the name is there, test it out and get the canonical name with options */ 1317 cnv = ucnv_open(converterName, &errorCode); 1318 if(U_SUCCESS(errorCode) && cnv != NULL) { 1319 name = ucnv_getName(cnv, &errorCode); 1320 } 1321 1322 if(U_SUCCESS(errorCode) && name!=NULL) { 1323 internalSetName(name, &errorCode); 1324 } 1325 /* else this converter is bad to use. Don't change it to a bad value. */ 1326 1327 /* The close may make the current name go away. */ 1328 ucnv_close(cnv); 1329 1330 /* reset the converter cache */ 1331 u_flushDefaultConverter(); 1332 } 1333 } 1334 #endif 1335 1336 /* data swapping ------------------------------------------------------------ */ 1337 1338 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ 1339 1340 #if !UCONFIG_NO_LEGACY_CONVERSION 1341 1342 U_CAPI int32_t U_EXPORT2 1343 ucnv_swap(const UDataSwapper *ds, 1344 const void *inData, int32_t length, void *outData, 1345 UErrorCode *pErrorCode) { 1346 const UDataInfo *pInfo; 1347 int32_t headerSize; 1348 1349 const uint8_t *inBytes; 1350 uint8_t *outBytes; 1351 1352 uint32_t offset, count, staticDataSize; 1353 int32_t size; 1354 1355 const UConverterStaticData *inStaticData; 1356 UConverterStaticData *outStaticData; 1357 1358 const _MBCSHeader *inMBCSHeader; 1359 _MBCSHeader *outMBCSHeader; 1360 _MBCSHeader mbcsHeader; 1361 uint32_t mbcsHeaderLength; 1362 UBool noFromU=FALSE; 1363 1364 uint8_t outputType; 1365 1366 int32_t maxFastUChar, mbcsIndexLength; 1367 1368 const int32_t *inExtIndexes; 1369 int32_t extOffset; 1370 1371 /* udata_swapDataHeader checks the arguments */ 1372 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1373 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1374 return 0; 1375 } 1376 1377 /* check data format and format version */ 1378 pInfo=(const UDataInfo *)((const char *)inData+4); 1379 if(!( 1380 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 1381 pInfo->dataFormat[1]==0x6e && 1382 pInfo->dataFormat[2]==0x76 && 1383 pInfo->dataFormat[3]==0x74 && 1384 pInfo->formatVersion[0]==6 && 1385 pInfo->formatVersion[1]>=2 1386 )) { 1387 udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", 1388 pInfo->dataFormat[0], pInfo->dataFormat[1], 1389 pInfo->dataFormat[2], pInfo->dataFormat[3], 1390 pInfo->formatVersion[0], pInfo->formatVersion[1]); 1391 *pErrorCode=U_UNSUPPORTED_ERROR; 1392 return 0; 1393 } 1394 1395 inBytes=(const uint8_t *)inData+headerSize; 1396 outBytes=(uint8_t *)outData+headerSize; 1397 1398 /* read the initial UConverterStaticData structure after the UDataInfo header */ 1399 inStaticData=(const UConverterStaticData *)inBytes; 1400 outStaticData=(UConverterStaticData *)outBytes; 1401 1402 if(length<0) { 1403 staticDataSize=ds->readUInt32(inStaticData->structSize); 1404 } else { 1405 length-=headerSize; 1406 if( length<(int32_t)sizeof(UConverterStaticData) || 1407 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 1408 ) { 1409 udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 1410 length); 1411 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1412 return 0; 1413 } 1414 } 1415 1416 if(length>=0) { 1417 /* swap the static data */ 1418 if(inStaticData!=outStaticData) { 1419 uprv_memcpy(outStaticData, inStaticData, staticDataSize); 1420 } 1421 1422 ds->swapArray32(ds, &inStaticData->structSize, 4, 1423 &outStaticData->structSize, pErrorCode); 1424 ds->swapArray32(ds, &inStaticData->codepage, 4, 1425 &outStaticData->codepage, pErrorCode); 1426 1427 ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), 1428 outStaticData->name, pErrorCode); 1429 if(U_FAILURE(*pErrorCode)) { 1430 udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); 1431 return 0; 1432 } 1433 } 1434 1435 inBytes+=staticDataSize; 1436 outBytes+=staticDataSize; 1437 if(length>=0) { 1438 length-=(int32_t)staticDataSize; 1439 } 1440 1441 /* check for supported conversionType values */ 1442 if(inStaticData->conversionType==UCNV_MBCS) { 1443 /* swap MBCS data */ 1444 inMBCSHeader=(const _MBCSHeader *)inBytes; 1445 outMBCSHeader=(_MBCSHeader *)outBytes; 1446 1447 if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { 1448 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1449 length); 1450 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1451 return 0; 1452 } 1453 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 1454 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 1455 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 1456 ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& 1457 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 1458 ) { 1459 mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; 1460 noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); 1461 } else { 1462 udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", 1463 inMBCSHeader->version[0], inMBCSHeader->version[1]); 1464 *pErrorCode=U_UNSUPPORTED_ERROR; 1465 return 0; 1466 } 1467 1468 uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); 1469 mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); 1470 mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); 1471 mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); 1472 mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); 1473 mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); 1474 mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); 1475 mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); 1476 /* mbcsHeader.options have been read above */ 1477 1478 extOffset=(int32_t)(mbcsHeader.flags>>8); 1479 outputType=(uint8_t)mbcsHeader.flags; 1480 if(noFromU && outputType==MBCS_OUTPUT_1) { 1481 udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); 1482 *pErrorCode=U_UNSUPPORTED_ERROR; 1483 return 0; 1484 } 1485 1486 /* make sure that the output type is known */ 1487 switch(outputType) { 1488 case MBCS_OUTPUT_1: 1489 case MBCS_OUTPUT_2: 1490 case MBCS_OUTPUT_3: 1491 case MBCS_OUTPUT_4: 1492 case MBCS_OUTPUT_3_EUC: 1493 case MBCS_OUTPUT_4_EUC: 1494 case MBCS_OUTPUT_2_SISO: 1495 case MBCS_OUTPUT_EXT_ONLY: 1496 /* OK */ 1497 break; 1498 default: 1499 udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", 1500 outputType); 1501 *pErrorCode=U_UNSUPPORTED_ERROR; 1502 return 0; 1503 } 1504 1505 /* calculate the length of the MBCS data */ 1506 1507 /* 1508 * utf8Friendly MBCS files (mbcsHeader.version 4.3) 1509 * contain an additional mbcsIndex table: 1510 * uint16_t[(maxFastUChar+1)>>6]; 1511 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). 1512 */ 1513 maxFastUChar=0; 1514 mbcsIndexLength=0; 1515 if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && 1516 mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 1517 ) { 1518 maxFastUChar=(maxFastUChar<<8)|0xff; 1519 mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ 1520 } 1521 1522 if(extOffset==0) { 1523 size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); 1524 if(!noFromU) { 1525 size+=(int32_t)mbcsHeader.fromUBytesLength; 1526 } 1527 1528 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ 1529 inExtIndexes=NULL; 1530 } else { 1531 /* there is extension data after the base data, see ucnv_ext.h */ 1532 if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 1533 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 1534 length); 1535 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1536 return 0; 1537 } 1538 1539 inExtIndexes=(const int32_t *)(inBytes+extOffset); 1540 size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); 1541 } 1542 1543 if(length>=0) { 1544 if(length<size) { 1545 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1546 length); 1547 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1548 return 0; 1549 } 1550 1551 /* copy the data for inaccessible bytes */ 1552 if(inBytes!=outBytes) { 1553 uprv_memcpy(outBytes, inBytes, size); 1554 } 1555 1556 /* swap the MBCSHeader, except for the version field */ 1557 count=mbcsHeaderLength*4; 1558 ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, 1559 &outMBCSHeader->countStates, pErrorCode); 1560 1561 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 1562 /* 1563 * extension-only file, 1564 * contains a base name instead of normal base table data 1565 */ 1566 1567 /* swap the base name, between the header and the extension data */ 1568 const char *inBaseName=(const char *)inBytes+count; 1569 char *outBaseName=(char *)outBytes+count; 1570 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), 1571 outBaseName, pErrorCode); 1572 } else { 1573 /* normal file with base table data */ 1574 1575 /* swap the state table, 1kB per state */ 1576 offset=count; 1577 count=mbcsHeader.countStates*1024; 1578 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1579 outBytes+offset, pErrorCode); 1580 1581 /* swap the toUFallbacks[] */ 1582 offset+=count; 1583 count=mbcsHeader.countToUFallbacks*8; 1584 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1585 outBytes+offset, pErrorCode); 1586 1587 /* swap the unicodeCodeUnits[] */ 1588 offset=mbcsHeader.offsetToUCodeUnits; 1589 count=mbcsHeader.offsetFromUTable-offset; 1590 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1591 outBytes+offset, pErrorCode); 1592 1593 /* offset to the stage 1 table, independent of the outputType */ 1594 offset=mbcsHeader.offsetFromUTable; 1595 1596 if(outputType==MBCS_OUTPUT_1) { 1597 /* SBCS: swap the fromU tables, all 16 bits wide */ 1598 count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; 1599 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1600 outBytes+offset, pErrorCode); 1601 } else { 1602 /* otherwise: swap the stage tables separately */ 1603 1604 /* stage 1 table: uint16_t[0x440 or 0x40] */ 1605 if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1606 count=0x440*2; /* for all of Unicode */ 1607 } else { 1608 count=0x40*2; /* only BMP */ 1609 } 1610 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1611 outBytes+offset, pErrorCode); 1612 1613 /* stage 2 table: uint32_t[] */ 1614 offset+=count; 1615 count=mbcsHeader.offsetFromUBytes-offset; 1616 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1617 outBytes+offset, pErrorCode); 1618 1619 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ 1620 offset=mbcsHeader.offsetFromUBytes; 1621 count= noFromU ? 0 : mbcsHeader.fromUBytesLength; 1622 switch(outputType) { 1623 case MBCS_OUTPUT_2: 1624 case MBCS_OUTPUT_3_EUC: 1625 case MBCS_OUTPUT_2_SISO: 1626 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1627 outBytes+offset, pErrorCode); 1628 break; 1629 case MBCS_OUTPUT_4: 1630 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1631 outBytes+offset, pErrorCode); 1632 break; 1633 default: 1634 /* just uint8_t[], nothing to swap */ 1635 break; 1636 } 1637 1638 if(mbcsIndexLength!=0) { 1639 offset+=count; 1640 count=mbcsIndexLength; 1641 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1642 outBytes+offset, pErrorCode); 1643 } 1644 } 1645 } 1646 1647 if(extOffset!=0) { 1648 /* swap the extension data */ 1649 inBytes+=extOffset; 1650 outBytes+=extOffset; 1651 1652 /* swap toUTable[] */ 1653 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); 1654 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); 1655 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1656 1657 /* swap toUUChars[] */ 1658 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); 1659 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); 1660 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1661 1662 /* swap fromUTableUChars[] */ 1663 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); 1664 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); 1665 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1666 1667 /* swap fromUTableValues[] */ 1668 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); 1669 /* same length as for fromUTableUChars[] */ 1670 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1671 1672 /* no need to swap fromUBytes[] */ 1673 1674 /* swap fromUStage12[] */ 1675 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); 1676 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); 1677 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1678 1679 /* swap fromUStage3[] */ 1680 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); 1681 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); 1682 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1683 1684 /* swap fromUStage3b[] */ 1685 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); 1686 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); 1687 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1688 1689 /* swap indexes[] */ 1690 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); 1691 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); 1692 } 1693 } 1694 } else { 1695 udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", 1696 inStaticData->conversionType); 1697 *pErrorCode=U_UNSUPPORTED_ERROR; 1698 return 0; 1699 } 1700 1701 return headerSize+(int32_t)staticDataSize+size; 1702 } 1703 1704 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1705 1706 #endif 1707