1 /* 2 ******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 1996-2010, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************** 7 * 8 * uconv_bld.c: 9 * 10 * Defines functions that are used in the creation/initialization/deletion 11 * of converters and related structures. 12 * uses uconv_io.h routines to access disk information 13 * is used by ucnv.h to implement public API create/delete/flushCache routines 14 * Modification History: 15 * 16 * Date Name Description 17 * 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 * 06/29/2000 helena Major rewrite of the callback interface. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/putil.h" 27 #include "unicode/udata.h" 28 #include "unicode/ucnv.h" 29 #include "unicode/uloc.h" 30 #include "utracimp.h" 31 #include "ucnv_io.h" 32 #include "ucnv_bld.h" 33 #include "ucnvmbcs.h" 34 #include "ucnv_ext.h" 35 #include "ucnv_cnv.h" 36 #include "ucnv_imp.h" 37 #include "uhash.h" 38 #include "umutex.h" 39 #include "cstring.h" 40 #include "cmemory.h" 41 #include "ucln_cmn.h" 42 #include "ustr_cnv.h" 43 44 45 46 #if 0 47 #include <stdio.h> 48 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); 49 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) 50 #else 51 # define UCNV_DEBUG_LOG(x,y,z) 52 #endif 53 54 static const UConverterSharedData * const 55 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 56 NULL, NULL, 57 58 #if UCONFIG_NO_LEGACY_CONVERSION 59 NULL, 60 #else 61 &_MBCSData, 62 #endif 63 64 &_Latin1Data, 65 &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData, 66 NULL, 67 68 #if UCONFIG_NO_LEGACY_CONVERSION 69 NULL, 70 NULL, NULL, NULL, NULL, NULL, NULL, 71 NULL, NULL, NULL, NULL, NULL, NULL, 72 NULL, 73 #else 74 &_ISO2022Data, 75 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, 76 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, 77 &_HZData, 78 #endif 79 80 &_SCSUData, 81 82 #if UCONFIG_NO_LEGACY_CONVERSION 83 NULL, 84 #else 85 &_ISCIIData, 86 #endif 87 88 &_ASCIIData, 89 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData 90 }; 91 92 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. 93 Also the name should be in lower case and all spaces, dashes and underscores 94 removed 95 */ 96 static struct { 97 const char *name; 98 const UConverterType type; 99 } const cnvNameType[] = { 100 { "bocu1", UCNV_BOCU1 }, 101 { "cesu8", UCNV_CESU8 }, 102 #if !UCONFIG_NO_LEGACY_CONVERSION 103 { "hz",UCNV_HZ }, 104 #endif 105 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 106 #if !UCONFIG_NO_LEGACY_CONVERSION 107 { "iscii", UCNV_ISCII }, 108 { "iso2022", UCNV_ISO_2022 }, 109 #endif 110 { "iso88591", UCNV_LATIN_1 }, 111 #if !UCONFIG_NO_LEGACY_CONVERSION 112 { "lmbcs1", UCNV_LMBCS_1 }, 113 { "lmbcs11",UCNV_LMBCS_11 }, 114 { "lmbcs16",UCNV_LMBCS_16 }, 115 { "lmbcs17",UCNV_LMBCS_17 }, 116 { "lmbcs18",UCNV_LMBCS_18 }, 117 { "lmbcs19",UCNV_LMBCS_19 }, 118 { "lmbcs2", UCNV_LMBCS_2 }, 119 { "lmbcs3", UCNV_LMBCS_3 }, 120 { "lmbcs4", UCNV_LMBCS_4 }, 121 { "lmbcs5", UCNV_LMBCS_5 }, 122 { "lmbcs6", UCNV_LMBCS_6 }, 123 { "lmbcs8", UCNV_LMBCS_8 }, 124 #endif 125 { "scsu", UCNV_SCSU }, 126 { "usascii", UCNV_US_ASCII }, 127 { "utf16", UCNV_UTF16 }, 128 { "utf16be", UCNV_UTF16_BigEndian }, 129 { "utf16le", UCNV_UTF16_LittleEndian }, 130 #if U_IS_BIG_ENDIAN 131 { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, 132 { "utf16platformendian", UCNV_UTF16_BigEndian }, 133 #else 134 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, 135 { "utf16platformendian", UCNV_UTF16_LittleEndian }, 136 #endif 137 { "utf32", UCNV_UTF32 }, 138 { "utf32be", UCNV_UTF32_BigEndian }, 139 { "utf32le", UCNV_UTF32_LittleEndian }, 140 #if U_IS_BIG_ENDIAN 141 { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, 142 { "utf32platformendian", UCNV_UTF32_BigEndian }, 143 #else 144 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 145 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 146 #endif 147 { "utf7", UCNV_UTF7 }, 148 { "utf8", UCNV_UTF8 } 149 }; 150 151 152 /*initializes some global variables */ 153 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 154 static UMTX cnvCacheMutex = NULL; /* Mutex for synchronizing cnv cache access. */ 155 /* Note: the global mutex is used for */ 156 /* reference count updates. */ 157 158 static const char **gAvailableConverters = NULL; 159 static uint16_t gAvailableConverterCount = 0; 160 161 #if !U_CHARSET_IS_UTF8 162 163 /* This contains the resolved converter name. So no further alias lookup is needed again. */ 164 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ 165 static const char *gDefaultConverterName = NULL; 166 167 /* 168 If the default converter is an algorithmic converter, this is the cached value. 169 We don't cache a full UConverter and clone it because ucnv_clone doesn't have 170 less overhead than an algorithmic open. We don't cache non-algorithmic converters 171 because ucnv_flushCache must be able to unload the default converter and its table. 172 */ 173 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; 174 175 /* Does gDefaultConverterName have a converter option and require extra parsing? */ 176 static UBool gDefaultConverterContainsOption; 177 178 #endif /* !U_CHARSET_IS_UTF8 */ 179 180 static const char DATA_TYPE[] = "cnv"; 181 182 static void 183 ucnv_flushAvailableConverterCache() { 184 if (gAvailableConverters) { 185 umtx_lock(&cnvCacheMutex); 186 gAvailableConverterCount = 0; 187 uprv_free((char **)gAvailableConverters); 188 gAvailableConverters = NULL; 189 umtx_unlock(&cnvCacheMutex); 190 } 191 } 192 193 /* ucnv_cleanup - delete all storage held by the converter cache, except any */ 194 /* in use by open converters. */ 195 /* Not thread safe. */ 196 /* Not supported API. */ 197 static UBool U_CALLCONV ucnv_cleanup(void) { 198 ucnv_flushCache(); 199 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 200 uhash_close(SHARED_DATA_HASHTABLE); 201 SHARED_DATA_HASHTABLE = NULL; 202 } 203 204 /* Isn't called from flushCache because other threads may have preexisting references to the table. */ 205 ucnv_flushAvailableConverterCache(); 206 207 #if !U_CHARSET_IS_UTF8 208 gDefaultConverterName = NULL; 209 gDefaultConverterNameBuffer[0] = 0; 210 gDefaultConverterContainsOption = FALSE; 211 gDefaultAlgorithmicSharedData = NULL; 212 #endif 213 214 umtx_destroy(&cnvCacheMutex); /* Don't worry about destroying the mutex even */ 215 /* if the hash table still exists. The mutex */ 216 /* will lazily re-init itself if needed. */ 217 return (SHARED_DATA_HASHTABLE == NULL); 218 } 219 220 static UBool U_CALLCONV 221 isCnvAcceptable(void *context, 222 const char *type, const char *name, 223 const UDataInfo *pInfo) { 224 return (UBool)( 225 pInfo->size>=20 && 226 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 227 pInfo->charsetFamily==U_CHARSET_FAMILY && 228 pInfo->sizeofUChar==U_SIZEOF_UCHAR && 229 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 230 pInfo->dataFormat[1]==0x6e && 231 pInfo->dataFormat[2]==0x76 && 232 pInfo->dataFormat[3]==0x74 && 233 pInfo->formatVersion[0]==6); /* Everything will be version 6 */ 234 } 235 236 /** 237 * Un flatten shared data from a UDATA.. 238 */ 239 static UConverterSharedData* 240 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) 241 { 242 /* UDataInfo info; -- necessary only if some converters have different formatVersion */ 243 const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); 244 const UConverterStaticData *source = (const UConverterStaticData *) raw; 245 UConverterSharedData *data; 246 UConverterType type = (UConverterType)source->conversionType; 247 248 if(U_FAILURE(*status)) 249 return NULL; 250 251 if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || 252 converterData[type] == NULL || 253 converterData[type]->referenceCounter != 1 || 254 source->structSize != sizeof(UConverterStaticData)) 255 { 256 *status = U_INVALID_TABLE_FORMAT; 257 return NULL; 258 } 259 260 data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); 261 if(data == NULL) { 262 *status = U_MEMORY_ALLOCATION_ERROR; 263 return NULL; 264 } 265 266 /* copy initial values from the static structure for this type */ 267 uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); 268 269 #if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */ 270 /* 271 * It would be much more efficient if the table were a direct member, not a pointer. 272 * However, that would add to the size of all UConverterSharedData objects 273 * even if they do not use this table (especially algorithmic ones). 274 * If this changes, then the static templates from converterData[type] 275 * need more entries. 276 * 277 * In principle, it would be cleaner if the load() function below 278 * allocated the table. 279 */ 280 data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable)); 281 if(data->table == NULL) { 282 uprv_free(data); 283 *status = U_MEMORY_ALLOCATION_ERROR; 284 return NULL; 285 } 286 uprv_memset(data->table, 0, sizeof(UConverterTable)); 287 #endif 288 289 data->staticData = source; 290 291 data->sharedDataCached = FALSE; 292 293 /* fill in fields from the loaded data */ 294 data->dataMemory = (void*)pData; /* for future use */ 295 296 if(data->impl->load != NULL) { 297 data->impl->load(data, pArgs, raw + source->structSize, status); 298 if(U_FAILURE(*status)) { 299 uprv_free(data->table); 300 uprv_free(data); 301 return NULL; 302 } 303 } 304 return data; 305 } 306 307 /*Takes an alias name gets an actual converter file name 308 *goes to disk and opens it. 309 *allocates the memory and returns a new UConverter object 310 */ 311 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) 312 { 313 UDataMemory *data; 314 UConverterSharedData *sharedData; 315 316 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); 317 318 if (U_FAILURE (*err)) { 319 UTRACE_EXIT_STATUS(*err); 320 return NULL; 321 } 322 323 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); 324 325 data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); 326 if(U_FAILURE(*err)) 327 { 328 UTRACE_EXIT_STATUS(*err); 329 return NULL; 330 } 331 332 sharedData = ucnv_data_unFlattenClone(pArgs, data, err); 333 if(U_FAILURE(*err)) 334 { 335 udata_close(data); 336 UTRACE_EXIT_STATUS(*err); 337 return NULL; 338 } 339 340 /* 341 * TODO Store pkg in a field in the shared data so that delta-only converters 342 * can load base converters from the same package. 343 * If the pkg name is longer than the field, then either do not load the converter 344 * in the first place, or just set the pkg field to "". 345 */ 346 347 UTRACE_EXIT_PTR_STATUS(sharedData, *err); 348 return sharedData; 349 } 350 351 /*returns a converter type from a string 352 */ 353 static const UConverterSharedData * 354 getAlgorithmicTypeFromName(const char *realName) 355 { 356 uint32_t mid, start, limit; 357 uint32_t lastMid; 358 int result; 359 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 360 361 /* Lower case and remove ignoreable characters. */ 362 ucnv_io_stripForCompare(strippedName, realName); 363 364 /* do a binary search for the alias */ 365 start = 0; 366 limit = sizeof(cnvNameType)/sizeof(cnvNameType[0]); 367 mid = limit; 368 lastMid = UINT32_MAX; 369 370 for (;;) { 371 mid = (uint32_t)((start + limit) / 2); 372 if (lastMid == mid) { /* Have we moved? */ 373 break; /* We haven't moved, and it wasn't found. */ 374 } 375 lastMid = mid; 376 result = uprv_strcmp(strippedName, cnvNameType[mid].name); 377 378 if (result < 0) { 379 limit = mid; 380 } else if (result > 0) { 381 start = mid; 382 } else { 383 return converterData[cnvNameType[mid].type]; 384 } 385 } 386 387 return NULL; 388 } 389 390 /* 391 * Based on the number of known converters, this determines how many times larger 392 * the shared data hash table should be. When on small platforms, or just a couple 393 * of converters are used, this number should be 2. When memory is plentiful, or 394 * when ucnv_countAvailable is ever used with a lot of available converters, 395 * this should be 4. 396 * Larger numbers reduce the number of hash collisions, but use more memory. 397 */ 398 #define UCNV_CACHE_LOAD_FACTOR 2 399 400 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ 401 /* Will always be called with the cnvCacheMutex alrady being held */ 402 /* by the calling function. */ 403 /* Stores the shared data in the SHARED_DATA_HASHTABLE 404 * @param data The shared data 405 */ 406 static void 407 ucnv_shareConverterData(UConverterSharedData * data) 408 { 409 UErrorCode err = U_ZERO_ERROR; 410 /*Lazy evaluates the Hashtable itself */ 411 /*void *sanity = NULL;*/ 412 413 if (SHARED_DATA_HASHTABLE == NULL) 414 { 415 SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, 416 ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, 417 &err); 418 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 419 420 if (U_FAILURE(err)) 421 return; 422 } 423 424 /* ### check to see if the element is not already there! */ 425 426 /* 427 sanity = ucnv_getSharedConverterData (data->staticData->name); 428 if(sanity != NULL) 429 { 430 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); 431 } 432 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); 433 */ 434 435 /* Mark it shared */ 436 data->sharedDataCached = TRUE; 437 438 uhash_put(SHARED_DATA_HASHTABLE, 439 (void*) data->staticData->name, /* Okay to cast away const as long as 440 keyDeleter == NULL */ 441 data, 442 &err); 443 UCNV_DEBUG_LOG("put", data->staticData->name,data); 444 445 } 446 447 /* Look up a converter name in the shared data cache. */ 448 /* cnvCacheMutex must be held by the caller to protect the hash table. */ 449 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) 450 * @param name The name of the shared data 451 * @return the shared data from the SHARED_DATA_HASHTABLE 452 */ 453 static UConverterSharedData * 454 ucnv_getSharedConverterData(const char *name) 455 { 456 /*special case when no Table has yet been created we return NULL */ 457 if (SHARED_DATA_HASHTABLE == NULL) 458 { 459 return NULL; 460 } 461 else 462 { 463 UConverterSharedData *rc; 464 465 rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); 466 UCNV_DEBUG_LOG("get",name,rc); 467 return rc; 468 } 469 } 470 471 /*frees the string of memory blocks associates with a sharedConverter 472 *if and only if the referenceCounter == 0 473 */ 474 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to 475 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and 476 * returns TRUE, 477 * otherwise returns FALSE 478 * @param sharedConverterData The shared data 479 * @return if not it frees all the memory stemming from sharedConverterData and 480 * returns TRUE, otherwise returns FALSE 481 */ 482 static UBool 483 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) 484 { 485 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); 486 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); 487 488 if (deadSharedData->referenceCounter > 0) { 489 UTRACE_EXIT_VALUE((int32_t)FALSE); 490 return FALSE; 491 } 492 493 if (deadSharedData->impl->unload != NULL) { 494 deadSharedData->impl->unload(deadSharedData); 495 } 496 497 if(deadSharedData->dataMemory != NULL) 498 { 499 UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; 500 udata_close(data); 501 } 502 503 if(deadSharedData->table != NULL) 504 { 505 uprv_free(deadSharedData->table); 506 } 507 508 #if 0 509 /* if the static data is actually owned by the shared data */ 510 /* enable if we ever have this situation. */ 511 if(deadSharedData->staticDataOwned == TRUE) /* see ucnv_bld.h */ 512 { 513 uprv_free((void*)deadSharedData->staticData); 514 } 515 #endif 516 517 #if 0 518 /* Zap it ! */ 519 uprv_memset(deadSharedData->0, sizeof(*deadSharedData)); 520 #endif 521 522 uprv_free(deadSharedData); 523 524 UTRACE_EXIT_VALUE((int32_t)TRUE); 525 return TRUE; 526 } 527 528 /** 529 * Load a non-algorithmic converter. 530 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 531 */ 532 UConverterSharedData * 533 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { 534 UConverterSharedData *mySharedConverterData; 535 536 if(err == NULL || U_FAILURE(*err)) { 537 return NULL; 538 } 539 540 if(pArgs->pkg != NULL && *pArgs->pkg != 0) { 541 /* application-provided converters are not currently cached */ 542 return createConverterFromFile(pArgs, err); 543 } 544 545 mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); 546 if (mySharedConverterData == NULL) 547 { 548 /*Not cached, we need to stream it in from file */ 549 mySharedConverterData = createConverterFromFile(pArgs, err); 550 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 551 { 552 return NULL; 553 } 554 else if (!pArgs->onlyTestIsLoadable) 555 { 556 /* share it with other library clients */ 557 ucnv_shareConverterData(mySharedConverterData); 558 } 559 } 560 else 561 { 562 /* The data for this converter was already in the cache. */ 563 /* Update the reference counter on the shared data: one more client */ 564 mySharedConverterData->referenceCounter++; 565 } 566 567 return mySharedConverterData; 568 } 569 570 /** 571 * Unload a non-algorithmic converter. 572 * It must be sharedData->referenceCounter != ~0 573 * and this function must be called inside umtx_lock(&cnvCacheMutex). 574 */ 575 void 576 ucnv_unload(UConverterSharedData *sharedData) { 577 if(sharedData != NULL) { 578 if (sharedData->referenceCounter > 0) { 579 sharedData->referenceCounter--; 580 } 581 582 if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { 583 ucnv_deleteSharedConverterData(sharedData); 584 } 585 } 586 } 587 588 void 589 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) 590 { 591 /* 592 Checking whether it's an algorithic converter is okay 593 in multithreaded applications because the value never changes. 594 Don't check referenceCounter for any other value. 595 */ 596 if(sharedData != NULL && sharedData->referenceCounter != ~0) { 597 umtx_lock(&cnvCacheMutex); 598 ucnv_unload(sharedData); 599 umtx_unlock(&cnvCacheMutex); 600 } 601 } 602 603 void 604 ucnv_incrementRefCount(UConverterSharedData *sharedData) 605 { 606 /* 607 Checking whether it's an algorithic converter is okay 608 in multithreaded applications because the value never changes. 609 Don't check referenceCounter for any other value. 610 */ 611 if(sharedData != NULL && sharedData->referenceCounter != ~0) { 612 umtx_lock(&cnvCacheMutex); 613 sharedData->referenceCounter++; 614 umtx_unlock(&cnvCacheMutex); 615 } 616 } 617 618 /* 619 * *pPieces must be initialized. 620 * The name without options will be copied to pPieces->cnvName. 621 * The locale and options will be copied to pPieces only if present in inName, 622 * otherwise the existing values in pPieces remain. 623 * *pArgs will be set to the pPieces values. 624 */ 625 static void 626 parseConverterOptions(const char *inName, 627 UConverterNamePieces *pPieces, 628 UConverterLoadArgs *pArgs, 629 UErrorCode *err) 630 { 631 char *cnvName = pPieces->cnvName; 632 char c; 633 int32_t len = 0; 634 635 pArgs->name=inName; 636 pArgs->locale=pPieces->locale; 637 pArgs->options=pPieces->options; 638 639 /* copy the converter name itself to cnvName */ 640 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 641 if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { 642 *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 643 pPieces->cnvName[0]=0; 644 return; 645 } 646 *cnvName++=c; 647 inName++; 648 } 649 *cnvName=0; 650 pArgs->name=pPieces->cnvName; 651 652 /* parse options. No more name copying should occur. */ 653 while((c=*inName)!=0) { 654 if(c==UCNV_OPTION_SEP_CHAR) { 655 ++inName; 656 } 657 658 /* inName is behind an option separator */ 659 if(uprv_strncmp(inName, "locale=", 7)==0) { 660 /* do not modify locale itself in case we have multiple locale options */ 661 char *dest=pPieces->locale; 662 663 /* copy the locale option value */ 664 inName+=7; 665 len=0; 666 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 667 ++inName; 668 669 if(++len>=ULOC_FULLNAME_CAPACITY) { 670 *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 671 pPieces->locale[0]=0; 672 return; 673 } 674 675 *dest++=c; 676 } 677 *dest=0; 678 } else if(uprv_strncmp(inName, "version=", 8)==0) { 679 /* copy the version option value into bits 3..0 of pPieces->options */ 680 inName+=8; 681 c=*inName; 682 if(c==0) { 683 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); 684 return; 685 } else if((uint8_t)(c-'0')<10) { 686 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); 687 ++inName; 688 } 689 } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { 690 inName+=8; 691 pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); 692 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ 693 } else { 694 /* ignore any other options until we define some */ 695 while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { 696 } 697 if(c==0) { 698 return; 699 } 700 } 701 } 702 } 703 704 /*Logic determines if the converter is Algorithmic AND/OR cached 705 *depending on that: 706 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) 707 * -Get it from a Hashtable (Data=X, Cached=TRUE) 708 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) 709 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) 710 */ 711 UConverterSharedData * 712 ucnv_loadSharedData(const char *converterName, 713 UConverterNamePieces *pPieces, 714 UConverterLoadArgs *pArgs, 715 UErrorCode * err) { 716 UConverterNamePieces stackPieces; 717 UConverterLoadArgs stackArgs; 718 UConverterSharedData *mySharedConverterData = NULL; 719 UErrorCode internalErrorCode = U_ZERO_ERROR; 720 UBool mayContainOption = TRUE; 721 UBool checkForAlgorithmic = TRUE; 722 723 if (U_FAILURE (*err)) { 724 return NULL; 725 } 726 727 if(pPieces == NULL) { 728 if(pArgs != NULL) { 729 /* 730 * Bad: We may set pArgs pointers to stackPieces fields 731 * which will be invalid after this function returns. 732 */ 733 *err = U_INTERNAL_PROGRAM_ERROR; 734 return NULL; 735 } 736 pPieces = &stackPieces; 737 } 738 if(pArgs == NULL) { 739 uprv_memset(&stackArgs, 0, sizeof(stackArgs)); 740 stackArgs.size = (int32_t)sizeof(stackArgs); 741 pArgs = &stackArgs; 742 } 743 744 pPieces->cnvName[0] = 0; 745 pPieces->locale[0] = 0; 746 pPieces->options = 0; 747 748 pArgs->name = converterName; 749 pArgs->locale = pPieces->locale; 750 pArgs->options = pPieces->options; 751 752 /* In case "name" is NULL we want to open the default converter. */ 753 if (converterName == NULL) { 754 #if U_CHARSET_IS_UTF8 755 pArgs->name = "UTF-8"; 756 return (UConverterSharedData *)converterData[UCNV_UTF8]; 757 #else 758 /* Call ucnv_getDefaultName first to query the name from the OS. */ 759 pArgs->name = ucnv_getDefaultName(); 760 if (pArgs->name == NULL) { 761 *err = U_MISSING_RESOURCE_ERROR; 762 return NULL; 763 } 764 mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; 765 checkForAlgorithmic = FALSE; 766 mayContainOption = gDefaultConverterContainsOption; 767 /* the default converter name is already canonical */ 768 #endif 769 } 770 else if(UCNV_FAST_IS_UTF8(converterName)) { 771 /* fastpath for UTF-8 */ 772 pArgs->name = "UTF-8"; 773 return (UConverterSharedData *)converterData[UCNV_UTF8]; 774 } 775 else { 776 /* separate the converter name from the options */ 777 parseConverterOptions(converterName, pPieces, pArgs, err); 778 if (U_FAILURE(*err)) { 779 /* Very bad name used. */ 780 return NULL; 781 } 782 783 /* get the canonical converter name */ 784 pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); 785 if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { 786 /* 787 * set the input name in case the converter was added 788 * without updating the alias table, or when there is no alias table 789 */ 790 pArgs->name = pPieces->cnvName; 791 } 792 } 793 794 /* separate the converter name from the options */ 795 if(mayContainOption && pArgs->name != pPieces->cnvName) { 796 parseConverterOptions(pArgs->name, pPieces, pArgs, err); 797 } 798 799 /* get the shared data for an algorithmic converter, if it is one */ 800 if (checkForAlgorithmic) { 801 mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); 802 } 803 if (mySharedConverterData == NULL) 804 { 805 /* it is a data-based converter, get its shared data. */ 806 /* Hold the cnvCacheMutex through the whole process of checking the */ 807 /* converter data cache, and adding new entries to the cache */ 808 /* to prevent other threads from modifying the cache during the */ 809 /* process. */ 810 pArgs->nestedLoads=1; 811 pArgs->pkg=NULL; 812 813 umtx_lock(&cnvCacheMutex); 814 mySharedConverterData = ucnv_load(pArgs, err); 815 umtx_unlock(&cnvCacheMutex); 816 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 817 { 818 return NULL; 819 } 820 } 821 822 return mySharedConverterData; 823 } 824 825 UConverter * 826 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) 827 { 828 UConverterNamePieces stackPieces; 829 UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 830 UConverterSharedData *mySharedConverterData; 831 832 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 833 834 if(U_SUCCESS(*err)) { 835 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); 836 837 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 838 839 myUConverter = ucnv_createConverterFromSharedData( 840 myUConverter, mySharedConverterData, 841 &stackArgs, 842 err); 843 844 if(U_SUCCESS(*err)) { 845 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 846 return myUConverter; 847 } 848 } 849 850 /* exit with error */ 851 UTRACE_EXIT_STATUS(*err); 852 return NULL; 853 } 854 855 U_CFUNC UBool 856 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { 857 UConverter myUConverter; 858 UConverterNamePieces stackPieces; 859 UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 860 UConverterSharedData *mySharedConverterData; 861 862 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 863 864 if(U_SUCCESS(*err)) { 865 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); 866 867 stackArgs.onlyTestIsLoadable=TRUE; 868 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 869 ucnv_createConverterFromSharedData( 870 &myUConverter, mySharedConverterData, 871 &stackArgs, 872 err); 873 ucnv_unloadSharedDataIfReady(mySharedConverterData); 874 } 875 876 UTRACE_EXIT_STATUS(*err); 877 return U_SUCCESS(*err); 878 } 879 880 UConverter * 881 ucnv_createAlgorithmicConverter(UConverter *myUConverter, 882 UConverterType type, 883 const char *locale, uint32_t options, 884 UErrorCode *err) { 885 UConverter *cnv; 886 const UConverterSharedData *sharedData; 887 UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 888 889 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); 890 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); 891 892 if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { 893 *err = U_ILLEGAL_ARGUMENT_ERROR; 894 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 895 return NULL; 896 } 897 898 sharedData = converterData[type]; 899 /* 900 Checking whether it's an algorithic converter is okay 901 in multithreaded applications because the value never changes. 902 Don't check referenceCounter for any other value. 903 */ 904 if(sharedData == NULL || sharedData->referenceCounter != ~0) { 905 /* not a valid type, or not an algorithmic converter */ 906 *err = U_ILLEGAL_ARGUMENT_ERROR; 907 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 908 return NULL; 909 } 910 911 stackArgs.name = ""; 912 stackArgs.options = options; 913 stackArgs.locale=locale; 914 cnv = ucnv_createConverterFromSharedData( 915 myUConverter, (UConverterSharedData *)sharedData, 916 &stackArgs, err); 917 918 UTRACE_EXIT_PTR_STATUS(cnv, *err); 919 return cnv; 920 } 921 922 UConverter* 923 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) 924 { 925 UConverter *myUConverter; 926 UConverterSharedData *mySharedConverterData; 927 UConverterNamePieces stackPieces; 928 UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 929 930 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); 931 932 if(U_FAILURE(*err)) { 933 UTRACE_EXIT_STATUS(*err); 934 return NULL; 935 } 936 937 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); 938 939 /* first, get the options out of the converterName string */ 940 stackPieces.cnvName[0] = 0; 941 stackPieces.locale[0] = 0; 942 stackPieces.options = 0; 943 parseConverterOptions(converterName, &stackPieces, &stackArgs, err); 944 if (U_FAILURE(*err)) { 945 /* Very bad name used. */ 946 UTRACE_EXIT_STATUS(*err); 947 return NULL; 948 } 949 stackArgs.nestedLoads=1; 950 stackArgs.pkg=packageName; 951 952 /* open the data, unflatten the shared structure */ 953 mySharedConverterData = createConverterFromFile(&stackArgs, err); 954 955 if (U_FAILURE(*err)) { 956 UTRACE_EXIT_STATUS(*err); 957 return NULL; 958 } 959 960 /* create the actual converter */ 961 myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); 962 963 if (U_FAILURE(*err)) { 964 ucnv_close(myUConverter); 965 UTRACE_EXIT_STATUS(*err); 966 return NULL; 967 } 968 969 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 970 return myUConverter; 971 } 972 973 974 UConverter* 975 ucnv_createConverterFromSharedData(UConverter *myUConverter, 976 UConverterSharedData *mySharedConverterData, 977 UConverterLoadArgs *pArgs, 978 UErrorCode *err) 979 { 980 UBool isCopyLocal; 981 982 if(U_FAILURE(*err)) { 983 ucnv_unloadSharedDataIfReady(mySharedConverterData); 984 return myUConverter; 985 } 986 if(myUConverter == NULL) 987 { 988 myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); 989 if(myUConverter == NULL) 990 { 991 *err = U_MEMORY_ALLOCATION_ERROR; 992 ucnv_unloadSharedDataIfReady(mySharedConverterData); 993 return NULL; 994 } 995 isCopyLocal = FALSE; 996 } else { 997 isCopyLocal = TRUE; 998 } 999 1000 /* initialize the converter */ 1001 uprv_memset(myUConverter, 0, sizeof(UConverter)); 1002 myUConverter->isCopyLocal = isCopyLocal; 1003 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ 1004 myUConverter->sharedData = mySharedConverterData; 1005 myUConverter->options = pArgs->options; 1006 if(!pArgs->onlyTestIsLoadable) { 1007 myUConverter->preFromUFirstCP = U_SENTINEL; 1008 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; 1009 myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; 1010 myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; 1011 myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; 1012 myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; 1013 myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; 1014 myUConverter->subChars = (uint8_t *)myUConverter->subUChars; 1015 uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); 1016 myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ 1017 } 1018 1019 if(mySharedConverterData->impl->open != NULL) { 1020 mySharedConverterData->impl->open(myUConverter, pArgs, err); 1021 if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { 1022 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ 1023 ucnv_close(myUConverter); 1024 return NULL; 1025 } 1026 } 1027 1028 return myUConverter; 1029 } 1030 1031 /*Frees all shared immutable objects that aren't referred to (reference count = 0) 1032 */ 1033 U_CAPI int32_t U_EXPORT2 1034 ucnv_flushCache () 1035 { 1036 UConverterSharedData *mySharedData = NULL; 1037 int32_t pos; 1038 int32_t tableDeletedNum = 0; 1039 const UHashElement *e; 1040 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ 1041 int32_t i, remaining; 1042 1043 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); 1044 1045 /* Close the default converter without creating a new one so that everything will be flushed. */ 1046 u_flushDefaultConverter(); 1047 1048 /*if shared data hasn't even been lazy evaluated yet 1049 * return 0 1050 */ 1051 if (SHARED_DATA_HASHTABLE == NULL) { 1052 UTRACE_EXIT_VALUE((int32_t)0); 1053 return 0; 1054 } 1055 1056 /*creates an enumeration to iterate through every element in the 1057 * table 1058 * 1059 * Synchronization: holding cnvCacheMutex will prevent any other thread from 1060 * accessing or modifying the hash table during the iteration. 1061 * The reference count of an entry may be decremented by 1062 * ucnv_close while the iteration is in process, but this is 1063 * benign. It can't be incremented (in ucnv_createConverter()) 1064 * because the sequence of looking up in the cache + incrementing 1065 * is protected by cnvCacheMutex. 1066 */ 1067 umtx_lock(&cnvCacheMutex); 1068 /* 1069 * double loop: A delta/extension-only converter has a pointer to its base table's 1070 * shared data; the first iteration of the outer loop may see the delta converter 1071 * before the base converter, and unloading the delta converter may get the base 1072 * converter's reference counter down to 0. 1073 */ 1074 i = 0; 1075 do { 1076 remaining = 0; 1077 pos = -1; 1078 while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) 1079 { 1080 mySharedData = (UConverterSharedData *) e->value.pointer; 1081 /*deletes only if reference counter == 0 */ 1082 if (mySharedData->referenceCounter == 0) 1083 { 1084 tableDeletedNum++; 1085 1086 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); 1087 1088 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1089 mySharedData->sharedDataCached = FALSE; 1090 ucnv_deleteSharedConverterData (mySharedData); 1091 } else { 1092 ++remaining; 1093 } 1094 } 1095 } while(++i == 1 && remaining > 0); 1096 umtx_unlock(&cnvCacheMutex); 1097 1098 UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); 1099 1100 UTRACE_EXIT_VALUE(tableDeletedNum); 1101 return tableDeletedNum; 1102 } 1103 1104 /* available converters list --------------------------------------------------- */ 1105 1106 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { 1107 int needInit; 1108 UMTX_CHECK(&cnvCacheMutex, (gAvailableConverters == NULL), needInit); 1109 if (needInit) { 1110 UConverter tempConverter; 1111 UEnumeration *allConvEnum = NULL; 1112 uint16_t idx; 1113 uint16_t localConverterCount; 1114 uint16_t allConverterCount; 1115 UErrorCode localStatus; 1116 const char *converterName; 1117 const char **localConverterList; 1118 1119 allConvEnum = ucnv_openAllNames(pErrorCode); 1120 allConverterCount = uenum_count(allConvEnum, pErrorCode); 1121 if (U_FAILURE(*pErrorCode)) { 1122 return FALSE; 1123 } 1124 1125 /* We can't have more than "*converterTable" converters to open */ 1126 localConverterList = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); 1127 if (!localConverterList) { 1128 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1129 return FALSE; 1130 } 1131 1132 /* Open the default converter to make sure that it has first dibs in the hash table. */ 1133 localStatus = U_ZERO_ERROR; 1134 ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); 1135 1136 localConverterCount = 0; 1137 1138 for (idx = 0; idx < allConverterCount; idx++) { 1139 localStatus = U_ZERO_ERROR; 1140 converterName = uenum_next(allConvEnum, NULL, &localStatus); 1141 if (ucnv_canCreateConverter(converterName, &localStatus)) { 1142 localConverterList[localConverterCount++] = converterName; 1143 } 1144 } 1145 uenum_close(allConvEnum); 1146 1147 umtx_lock(&cnvCacheMutex); 1148 if (gAvailableConverters == NULL) { 1149 gAvailableConverterCount = localConverterCount; 1150 gAvailableConverters = localConverterList; 1151 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1152 } 1153 else { 1154 uprv_free((char **)localConverterList); 1155 } 1156 umtx_unlock(&cnvCacheMutex); 1157 } 1158 return TRUE; 1159 } 1160 1161 U_CFUNC uint16_t 1162 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { 1163 if (haveAvailableConverterList(pErrorCode)) { 1164 return gAvailableConverterCount; 1165 } 1166 return 0; 1167 } 1168 1169 U_CFUNC const char * 1170 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { 1171 if (haveAvailableConverterList(pErrorCode)) { 1172 if (n < gAvailableConverterCount) { 1173 return gAvailableConverters[n]; 1174 } 1175 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1176 } 1177 return NULL; 1178 } 1179 1180 /* default converter name --------------------------------------------------- */ 1181 1182 #if !U_CHARSET_IS_UTF8 1183 /* 1184 Copy the canonical converter name. 1185 ucnv_getDefaultName must be thread safe, which can call this function. 1186 1187 ucnv_setDefaultName calls this function and it doesn't have to be 1188 thread safe because there is no reliable/safe way to reset the 1189 converter in use in all threads. If you did reset the converter, you 1190 would not be sure that retrieving a default converter for one string 1191 would be the same type of default converter for a successive string. 1192 Since the name is a returned via ucnv_getDefaultName without copying, 1193 you shouldn't be modifying or deleting the string from a separate thread. 1194 */ 1195 static U_INLINE void 1196 internalSetName(const char *name, UErrorCode *status) { 1197 UConverterNamePieces stackPieces; 1198 UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 1199 int32_t length=(int32_t)(uprv_strlen(name)); 1200 UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); 1201 const UConverterSharedData *algorithmicSharedData; 1202 1203 stackArgs.name = name; 1204 if(containsOption) { 1205 stackPieces.cnvName[0] = 0; 1206 stackPieces.locale[0] = 0; 1207 stackPieces.options = 0; 1208 parseConverterOptions(name, &stackPieces, &stackArgs, status); 1209 if(U_FAILURE(*status)) { 1210 return; 1211 } 1212 } 1213 algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); 1214 1215 umtx_lock(&cnvCacheMutex); 1216 1217 gDefaultAlgorithmicSharedData = algorithmicSharedData; 1218 gDefaultConverterContainsOption = containsOption; 1219 uprv_memcpy(gDefaultConverterNameBuffer, name, length); 1220 gDefaultConverterNameBuffer[length]=0; 1221 1222 /* gDefaultConverterName MUST be the last global var set by this function. */ 1223 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ 1224 gDefaultConverterName = gDefaultConverterNameBuffer; 1225 1226 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1227 1228 umtx_unlock(&cnvCacheMutex); 1229 } 1230 #endif 1231 1232 /* 1233 * In order to be really thread-safe, the get function would have to take 1234 * a buffer parameter and copy the current string inside a mutex block. 1235 * This implementation only tries to be really thread-safe while 1236 * setting the name. 1237 * It assumes that setting a pointer is atomic. 1238 */ 1239 1240 U_CAPI const char* U_EXPORT2 1241 ucnv_getDefaultName() { 1242 #if U_CHARSET_IS_UTF8 1243 return "UTF-8"; 1244 #else 1245 /* local variable to be thread-safe */ 1246 const char *name; 1247 1248 /* 1249 Multiple calls to ucnv_getDefaultName must be thread safe, 1250 but ucnv_setDefaultName is not thread safe. 1251 */ 1252 UMTX_CHECK(&cnvCacheMutex, gDefaultConverterName, name); 1253 if(name==NULL) { 1254 UErrorCode errorCode = U_ZERO_ERROR; 1255 UConverter *cnv = NULL; 1256 1257 name = uprv_getDefaultCodepage(); 1258 1259 /* if the name is there, test it out and get the canonical name with options */ 1260 if(name != NULL) { 1261 cnv = ucnv_open(name, &errorCode); 1262 if(U_SUCCESS(errorCode) && cnv != NULL) { 1263 name = ucnv_getName(cnv, &errorCode); 1264 } 1265 } 1266 1267 if(name == NULL || name[0] == 0 1268 || U_FAILURE(errorCode) || cnv == NULL 1269 || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) 1270 { 1271 /* Panic time, let's use a fallback. */ 1272 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) 1273 name = "US-ASCII"; 1274 /* there is no 'algorithmic' converter for EBCDIC */ 1275 #elif defined(OS390) 1276 name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; 1277 #else 1278 name = "ibm-37_P100-1995"; 1279 #endif 1280 } 1281 1282 internalSetName(name, &errorCode); 1283 1284 /* The close may make the current name go away. */ 1285 ucnv_close(cnv); 1286 } 1287 1288 return name; 1289 #endif 1290 } 1291 1292 /* 1293 This function is not thread safe, and it can't be thread safe. 1294 See internalSetName or the API reference for details. 1295 */ 1296 U_CAPI void U_EXPORT2 1297 ucnv_setDefaultName(const char *converterName) { 1298 #if !U_CHARSET_IS_UTF8 1299 if(converterName==NULL) { 1300 /* reset to the default codepage */ 1301 gDefaultConverterName=NULL; 1302 } else { 1303 UErrorCode errorCode = U_ZERO_ERROR; 1304 UConverter *cnv = NULL; 1305 const char *name = NULL; 1306 1307 /* if the name is there, test it out and get the canonical name with options */ 1308 cnv = ucnv_open(converterName, &errorCode); 1309 if(U_SUCCESS(errorCode) && cnv != NULL) { 1310 name = ucnv_getName(cnv, &errorCode); 1311 } 1312 1313 if(U_SUCCESS(errorCode) && name!=NULL) { 1314 internalSetName(name, &errorCode); 1315 } 1316 /* else this converter is bad to use. Don't change it to a bad value. */ 1317 1318 /* The close may make the current name go away. */ 1319 ucnv_close(cnv); 1320 1321 /* reset the converter cache */ 1322 u_flushDefaultConverter(); 1323 } 1324 #endif 1325 } 1326 1327 /* data swapping ------------------------------------------------------------ */ 1328 1329 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ 1330 1331 #if !UCONFIG_NO_LEGACY_CONVERSION 1332 1333 U_CAPI int32_t U_EXPORT2 1334 ucnv_swap(const UDataSwapper *ds, 1335 const void *inData, int32_t length, void *outData, 1336 UErrorCode *pErrorCode) { 1337 const UDataInfo *pInfo; 1338 int32_t headerSize; 1339 1340 const uint8_t *inBytes; 1341 uint8_t *outBytes; 1342 1343 uint32_t offset, count, staticDataSize; 1344 int32_t size; 1345 1346 const UConverterStaticData *inStaticData; 1347 UConverterStaticData *outStaticData; 1348 1349 const _MBCSHeader *inMBCSHeader; 1350 _MBCSHeader *outMBCSHeader; 1351 _MBCSHeader mbcsHeader; 1352 uint32_t mbcsHeaderLength; 1353 UBool noFromU=FALSE; 1354 1355 uint8_t outputType; 1356 1357 int32_t maxFastUChar, mbcsIndexLength; 1358 1359 const int32_t *inExtIndexes; 1360 int32_t extOffset; 1361 1362 /* udata_swapDataHeader checks the arguments */ 1363 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1364 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1365 return 0; 1366 } 1367 1368 /* check data format and format version */ 1369 pInfo=(const UDataInfo *)((const char *)inData+4); 1370 if(!( 1371 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 1372 pInfo->dataFormat[1]==0x6e && 1373 pInfo->dataFormat[2]==0x76 && 1374 pInfo->dataFormat[3]==0x74 && 1375 pInfo->formatVersion[0]==6 && 1376 pInfo->formatVersion[1]>=2 1377 )) { 1378 udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", 1379 pInfo->dataFormat[0], pInfo->dataFormat[1], 1380 pInfo->dataFormat[2], pInfo->dataFormat[3], 1381 pInfo->formatVersion[0], pInfo->formatVersion[1]); 1382 *pErrorCode=U_UNSUPPORTED_ERROR; 1383 return 0; 1384 } 1385 1386 inBytes=(const uint8_t *)inData+headerSize; 1387 outBytes=(uint8_t *)outData+headerSize; 1388 1389 /* read the initial UConverterStaticData structure after the UDataInfo header */ 1390 inStaticData=(const UConverterStaticData *)inBytes; 1391 outStaticData=(UConverterStaticData *)outBytes; 1392 1393 if(length<0) { 1394 staticDataSize=ds->readUInt32(inStaticData->structSize); 1395 } else { 1396 length-=headerSize; 1397 if( length<sizeof(UConverterStaticData) || 1398 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 1399 ) { 1400 udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 1401 length); 1402 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1403 return 0; 1404 } 1405 } 1406 1407 if(length>=0) { 1408 /* swap the static data */ 1409 if(inStaticData!=outStaticData) { 1410 uprv_memcpy(outStaticData, inStaticData, staticDataSize); 1411 } 1412 1413 ds->swapArray32(ds, &inStaticData->structSize, 4, 1414 &outStaticData->structSize, pErrorCode); 1415 ds->swapArray32(ds, &inStaticData->codepage, 4, 1416 &outStaticData->codepage, pErrorCode); 1417 1418 ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), 1419 outStaticData->name, pErrorCode); 1420 if(U_FAILURE(*pErrorCode)) { 1421 udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); 1422 return 0; 1423 } 1424 } 1425 1426 inBytes+=staticDataSize; 1427 outBytes+=staticDataSize; 1428 if(length>=0) { 1429 length-=(int32_t)staticDataSize; 1430 } 1431 1432 /* check for supported conversionType values */ 1433 if(inStaticData->conversionType==UCNV_MBCS) { 1434 /* swap MBCS data */ 1435 inMBCSHeader=(const _MBCSHeader *)inBytes; 1436 outMBCSHeader=(_MBCSHeader *)outBytes; 1437 1438 if(0<=length && length<sizeof(_MBCSHeader)) { 1439 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1440 length); 1441 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1442 return 0; 1443 } 1444 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 1445 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 1446 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 1447 ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& 1448 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 1449 ) { 1450 mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; 1451 noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); 1452 } else { 1453 udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", 1454 inMBCSHeader->version[0], inMBCSHeader->version[1]); 1455 *pErrorCode=U_UNSUPPORTED_ERROR; 1456 return 0; 1457 } 1458 1459 uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); 1460 mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); 1461 mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); 1462 mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); 1463 mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); 1464 mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); 1465 mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); 1466 mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); 1467 /* mbcsHeader.options have been read above */ 1468 1469 extOffset=(int32_t)(mbcsHeader.flags>>8); 1470 outputType=(uint8_t)mbcsHeader.flags; 1471 if(noFromU && outputType==MBCS_OUTPUT_1) { 1472 udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); 1473 *pErrorCode=U_UNSUPPORTED_ERROR; 1474 return 0; 1475 } 1476 1477 /* make sure that the output type is known */ 1478 switch(outputType) { 1479 case MBCS_OUTPUT_1: 1480 case MBCS_OUTPUT_2: 1481 case MBCS_OUTPUT_3: 1482 case MBCS_OUTPUT_4: 1483 case MBCS_OUTPUT_3_EUC: 1484 case MBCS_OUTPUT_4_EUC: 1485 case MBCS_OUTPUT_2_SISO: 1486 case MBCS_OUTPUT_EXT_ONLY: 1487 /* OK */ 1488 break; 1489 default: 1490 udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", 1491 outputType); 1492 *pErrorCode=U_UNSUPPORTED_ERROR; 1493 return 0; 1494 } 1495 1496 /* calculate the length of the MBCS data */ 1497 1498 /* 1499 * utf8Friendly MBCS files (mbcsHeader.version 4.3) 1500 * contain an additional mbcsIndex table: 1501 * uint16_t[(maxFastUChar+1)>>6]; 1502 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). 1503 */ 1504 maxFastUChar=0; 1505 mbcsIndexLength=0; 1506 if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && 1507 mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 1508 ) { 1509 maxFastUChar=(maxFastUChar<<8)|0xff; 1510 mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ 1511 } 1512 1513 if(extOffset==0) { 1514 size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); 1515 if(!noFromU) { 1516 size+=(int32_t)mbcsHeader.fromUBytesLength; 1517 } 1518 1519 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ 1520 inExtIndexes=NULL; 1521 } else { 1522 /* there is extension data after the base data, see ucnv_ext.h */ 1523 if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 1524 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 1525 length); 1526 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1527 return 0; 1528 } 1529 1530 inExtIndexes=(const int32_t *)(inBytes+extOffset); 1531 size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); 1532 } 1533 1534 if(length>=0) { 1535 if(length<size) { 1536 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1537 length); 1538 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1539 return 0; 1540 } 1541 1542 /* copy the data for inaccessible bytes */ 1543 if(inBytes!=outBytes) { 1544 uprv_memcpy(outBytes, inBytes, size); 1545 } 1546 1547 /* swap the MBCSHeader, except for the version field */ 1548 count=mbcsHeaderLength*4; 1549 ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, 1550 &outMBCSHeader->countStates, pErrorCode); 1551 1552 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 1553 /* 1554 * extension-only file, 1555 * contains a base name instead of normal base table data 1556 */ 1557 1558 /* swap the base name, between the header and the extension data */ 1559 const char *inBaseName=(const char *)inBytes+count; 1560 char *outBaseName=(char *)outBytes+count; 1561 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), 1562 outBaseName, pErrorCode); 1563 } else { 1564 /* normal file with base table data */ 1565 1566 /* swap the state table, 1kB per state */ 1567 offset=count; 1568 count=mbcsHeader.countStates*1024; 1569 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1570 outBytes+offset, pErrorCode); 1571 1572 /* swap the toUFallbacks[] */ 1573 offset+=count; 1574 count=mbcsHeader.countToUFallbacks*8; 1575 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1576 outBytes+offset, pErrorCode); 1577 1578 /* swap the unicodeCodeUnits[] */ 1579 offset=mbcsHeader.offsetToUCodeUnits; 1580 count=mbcsHeader.offsetFromUTable-offset; 1581 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1582 outBytes+offset, pErrorCode); 1583 1584 /* offset to the stage 1 table, independent of the outputType */ 1585 offset=mbcsHeader.offsetFromUTable; 1586 1587 if(outputType==MBCS_OUTPUT_1) { 1588 /* SBCS: swap the fromU tables, all 16 bits wide */ 1589 count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; 1590 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1591 outBytes+offset, pErrorCode); 1592 } else { 1593 /* otherwise: swap the stage tables separately */ 1594 1595 /* stage 1 table: uint16_t[0x440 or 0x40] */ 1596 if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1597 count=0x440*2; /* for all of Unicode */ 1598 } else { 1599 count=0x40*2; /* only BMP */ 1600 } 1601 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1602 outBytes+offset, pErrorCode); 1603 1604 /* stage 2 table: uint32_t[] */ 1605 offset+=count; 1606 count=mbcsHeader.offsetFromUBytes-offset; 1607 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1608 outBytes+offset, pErrorCode); 1609 1610 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ 1611 offset=mbcsHeader.offsetFromUBytes; 1612 count= noFromU ? 0 : mbcsHeader.fromUBytesLength; 1613 switch(outputType) { 1614 case MBCS_OUTPUT_2: 1615 case MBCS_OUTPUT_3_EUC: 1616 case MBCS_OUTPUT_2_SISO: 1617 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1618 outBytes+offset, pErrorCode); 1619 break; 1620 case MBCS_OUTPUT_4: 1621 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1622 outBytes+offset, pErrorCode); 1623 break; 1624 default: 1625 /* just uint8_t[], nothing to swap */ 1626 break; 1627 } 1628 1629 if(mbcsIndexLength!=0) { 1630 offset+=count; 1631 count=mbcsIndexLength; 1632 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1633 outBytes+offset, pErrorCode); 1634 } 1635 } 1636 } 1637 1638 if(extOffset!=0) { 1639 /* swap the extension data */ 1640 inBytes+=extOffset; 1641 outBytes+=extOffset; 1642 1643 /* swap toUTable[] */ 1644 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); 1645 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); 1646 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1647 1648 /* swap toUUChars[] */ 1649 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); 1650 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); 1651 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1652 1653 /* swap fromUTableUChars[] */ 1654 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); 1655 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); 1656 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1657 1658 /* swap fromUTableValues[] */ 1659 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); 1660 /* same length as for fromUTableUChars[] */ 1661 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1662 1663 /* no need to swap fromUBytes[] */ 1664 1665 /* swap fromUStage12[] */ 1666 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); 1667 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); 1668 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1669 1670 /* swap fromUStage3[] */ 1671 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); 1672 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); 1673 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1674 1675 /* swap fromUStage3b[] */ 1676 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); 1677 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); 1678 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1679 1680 /* swap indexes[] */ 1681 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); 1682 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); 1683 } 1684 } 1685 } else { 1686 udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", 1687 inStaticData->conversionType); 1688 *pErrorCode=U_UNSUPPORTED_ERROR; 1689 return 0; 1690 } 1691 1692 return headerSize+(int32_t)staticDataSize+size; 1693 } 1694 1695 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1696 1697 #endif 1698