1 /* 2 ******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 1996-2012, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************** 7 * 8 * uconv_bld.cpp: 9 * 10 * Defines functions that are used in the creation/initialization/deletion 11 * of converters and related structures. 12 * uses uconv_io.h routines to access disk information 13 * is used by ucnv.h to implement public API create/delete/flushCache routines 14 * Modification History: 15 * 16 * Date Name Description 17 * 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 * 06/29/2000 helena Major rewrite of the callback interface. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/putil.h" 27 #include "unicode/udata.h" 28 #include "unicode/ucnv.h" 29 #include "unicode/uloc.h" 30 #include "putilimp.h" 31 #include "utracimp.h" 32 #include "ucnv_io.h" 33 #include "ucnv_bld.h" 34 #include "ucnvmbcs.h" 35 #include "ucnv_ext.h" 36 #include "ucnv_cnv.h" 37 #include "ucnv_imp.h" 38 #include "uhash.h" 39 #include "umutex.h" 40 #include "cstring.h" 41 #include "cmemory.h" 42 #include "ucln_cmn.h" 43 #include "ustr_cnv.h" 44 45 46 47 #if 0 48 #include <stdio.h> 49 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); 50 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) 51 #else 52 # define UCNV_DEBUG_LOG(x,y,z) 53 #endif 54 55 static const UConverterSharedData * const 56 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 57 NULL, NULL, 58 59 #if UCONFIG_NO_LEGACY_CONVERSION 60 NULL, 61 #else 62 &_MBCSData, 63 #endif 64 65 &_Latin1Data, 66 &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData, 67 NULL, 68 69 #if UCONFIG_NO_LEGACY_CONVERSION 70 NULL, 71 NULL, NULL, NULL, NULL, NULL, NULL, 72 NULL, NULL, NULL, NULL, NULL, NULL, 73 NULL, 74 #else 75 &_ISO2022Data, 76 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, 77 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, 78 &_HZData, 79 #endif 80 81 &_SCSUData, 82 83 #if UCONFIG_NO_LEGACY_CONVERSION 84 NULL, 85 #else 86 &_ISCIIData, 87 #endif 88 89 &_ASCIIData, 90 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 91 92 #if UCONFIG_NO_LEGACY_CONVERSION 93 NULL, 94 #else 95 &_CompoundTextData 96 #endif 97 }; 98 99 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. 100 Also the name should be in lower case and all spaces, dashes and underscores 101 removed 102 */ 103 static struct { 104 const char *name; 105 const UConverterType type; 106 } const cnvNameType[] = { 107 { "bocu1", UCNV_BOCU1 }, 108 { "cesu8", UCNV_CESU8 }, 109 #if !UCONFIG_NO_LEGACY_CONVERSION 110 { "hz",UCNV_HZ }, 111 #endif 112 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 113 #if !UCONFIG_NO_LEGACY_CONVERSION 114 { "iscii", UCNV_ISCII }, 115 { "iso2022", UCNV_ISO_2022 }, 116 #endif 117 { "iso88591", UCNV_LATIN_1 }, 118 #if !UCONFIG_NO_LEGACY_CONVERSION 119 { "lmbcs1", UCNV_LMBCS_1 }, 120 { "lmbcs11",UCNV_LMBCS_11 }, 121 { "lmbcs16",UCNV_LMBCS_16 }, 122 { "lmbcs17",UCNV_LMBCS_17 }, 123 { "lmbcs18",UCNV_LMBCS_18 }, 124 { "lmbcs19",UCNV_LMBCS_19 }, 125 { "lmbcs2", UCNV_LMBCS_2 }, 126 { "lmbcs3", UCNV_LMBCS_3 }, 127 { "lmbcs4", UCNV_LMBCS_4 }, 128 { "lmbcs5", UCNV_LMBCS_5 }, 129 { "lmbcs6", UCNV_LMBCS_6 }, 130 { "lmbcs8", UCNV_LMBCS_8 }, 131 #endif 132 { "scsu", UCNV_SCSU }, 133 { "usascii", UCNV_US_ASCII }, 134 { "utf16", UCNV_UTF16 }, 135 { "utf16be", UCNV_UTF16_BigEndian }, 136 { "utf16le", UCNV_UTF16_LittleEndian }, 137 #if U_IS_BIG_ENDIAN 138 { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, 139 { "utf16platformendian", UCNV_UTF16_BigEndian }, 140 #else 141 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, 142 { "utf16platformendian", UCNV_UTF16_LittleEndian }, 143 #endif 144 { "utf32", UCNV_UTF32 }, 145 { "utf32be", UCNV_UTF32_BigEndian }, 146 { "utf32le", UCNV_UTF32_LittleEndian }, 147 #if U_IS_BIG_ENDIAN 148 { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, 149 { "utf32platformendian", UCNV_UTF32_BigEndian }, 150 #else 151 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 152 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 153 #endif 154 { "utf7", UCNV_UTF7 }, 155 { "utf8", UCNV_UTF8 }, 156 { "x11compoundtext", UCNV_COMPOUND_TEXT} 157 }; 158 159 160 /*initializes some global variables */ 161 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 162 static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ 163 /* Note: the global mutex is used for */ 164 /* reference count updates. */ 165 166 static const char **gAvailableConverters = NULL; 167 static uint16_t gAvailableConverterCount = 0; 168 169 #if !U_CHARSET_IS_UTF8 170 171 /* This contains the resolved converter name. So no further alias lookup is needed again. */ 172 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ 173 static const char *gDefaultConverterName = NULL; 174 175 /* 176 If the default converter is an algorithmic converter, this is the cached value. 177 We don't cache a full UConverter and clone it because ucnv_clone doesn't have 178 less overhead than an algorithmic open. We don't cache non-algorithmic converters 179 because ucnv_flushCache must be able to unload the default converter and its table. 180 */ 181 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; 182 183 /* Does gDefaultConverterName have a converter option and require extra parsing? */ 184 static UBool gDefaultConverterContainsOption; 185 186 #endif /* !U_CHARSET_IS_UTF8 */ 187 188 static const char DATA_TYPE[] = "cnv"; 189 190 static void 191 ucnv_flushAvailableConverterCache() { 192 if (gAvailableConverters) { 193 umtx_lock(&cnvCacheMutex); 194 gAvailableConverterCount = 0; 195 uprv_free((char **)gAvailableConverters); 196 gAvailableConverters = NULL; 197 umtx_unlock(&cnvCacheMutex); 198 } 199 } 200 201 /* ucnv_cleanup - delete all storage held by the converter cache, except any */ 202 /* in use by open converters. */ 203 /* Not thread safe. */ 204 /* Not supported API. */ 205 static UBool U_CALLCONV ucnv_cleanup(void) { 206 ucnv_flushCache(); 207 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 208 uhash_close(SHARED_DATA_HASHTABLE); 209 SHARED_DATA_HASHTABLE = NULL; 210 } 211 212 /* Isn't called from flushCache because other threads may have preexisting references to the table. */ 213 ucnv_flushAvailableConverterCache(); 214 215 #if !U_CHARSET_IS_UTF8 216 gDefaultConverterName = NULL; 217 gDefaultConverterNameBuffer[0] = 0; 218 gDefaultConverterContainsOption = FALSE; 219 gDefaultAlgorithmicSharedData = NULL; 220 #endif 221 222 return (SHARED_DATA_HASHTABLE == NULL); 223 } 224 225 static UBool U_CALLCONV 226 isCnvAcceptable(void * /*context*/, 227 const char * /*type*/, const char * /*name*/, 228 const UDataInfo *pInfo) { 229 return (UBool)( 230 pInfo->size>=20 && 231 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 232 pInfo->charsetFamily==U_CHARSET_FAMILY && 233 pInfo->sizeofUChar==U_SIZEOF_UCHAR && 234 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 235 pInfo->dataFormat[1]==0x6e && 236 pInfo->dataFormat[2]==0x76 && 237 pInfo->dataFormat[3]==0x74 && 238 pInfo->formatVersion[0]==6); /* Everything will be version 6 */ 239 } 240 241 /** 242 * Un flatten shared data from a UDATA.. 243 */ 244 static UConverterSharedData* 245 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) 246 { 247 /* UDataInfo info; -- necessary only if some converters have different formatVersion */ 248 const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); 249 const UConverterStaticData *source = (const UConverterStaticData *) raw; 250 UConverterSharedData *data; 251 UConverterType type = (UConverterType)source->conversionType; 252 253 if(U_FAILURE(*status)) 254 return NULL; 255 256 if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || 257 converterData[type] == NULL || 258 converterData[type]->referenceCounter != 1 || 259 source->structSize != sizeof(UConverterStaticData)) 260 { 261 *status = U_INVALID_TABLE_FORMAT; 262 return NULL; 263 } 264 265 data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); 266 if(data == NULL) { 267 *status = U_MEMORY_ALLOCATION_ERROR; 268 return NULL; 269 } 270 271 /* copy initial values from the static structure for this type */ 272 uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); 273 274 #if 0 /* made UConverterMBCSTable part of UConverterSharedData -- markus 20031107 */ 275 /* 276 * It would be much more efficient if the table were a direct member, not a pointer. 277 * However, that would add to the size of all UConverterSharedData objects 278 * even if they do not use this table (especially algorithmic ones). 279 * If this changes, then the static templates from converterData[type] 280 * need more entries. 281 * 282 * In principle, it would be cleaner if the load() function below 283 * allocated the table. 284 */ 285 data->table = (UConverterTable *)uprv_malloc(sizeof(UConverterTable)); 286 if(data->table == NULL) { 287 uprv_free(data); 288 *status = U_MEMORY_ALLOCATION_ERROR; 289 return NULL; 290 } 291 uprv_memset(data->table, 0, sizeof(UConverterTable)); 292 #endif 293 294 data->staticData = source; 295 296 data->sharedDataCached = FALSE; 297 298 /* fill in fields from the loaded data */ 299 data->dataMemory = (void*)pData; /* for future use */ 300 301 if(data->impl->load != NULL) { 302 data->impl->load(data, pArgs, raw + source->structSize, status); 303 if(U_FAILURE(*status)) { 304 uprv_free(data->table); 305 uprv_free(data); 306 return NULL; 307 } 308 } 309 return data; 310 } 311 312 /*Takes an alias name gets an actual converter file name 313 *goes to disk and opens it. 314 *allocates the memory and returns a new UConverter object 315 */ 316 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) 317 { 318 UDataMemory *data; 319 UConverterSharedData *sharedData; 320 321 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); 322 323 if (U_FAILURE (*err)) { 324 UTRACE_EXIT_STATUS(*err); 325 return NULL; 326 } 327 328 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); 329 330 data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); 331 if(U_FAILURE(*err)) 332 { 333 UTRACE_EXIT_STATUS(*err); 334 return NULL; 335 } 336 337 sharedData = ucnv_data_unFlattenClone(pArgs, data, err); 338 if(U_FAILURE(*err)) 339 { 340 udata_close(data); 341 UTRACE_EXIT_STATUS(*err); 342 return NULL; 343 } 344 345 /* 346 * TODO Store pkg in a field in the shared data so that delta-only converters 347 * can load base converters from the same package. 348 * If the pkg name is longer than the field, then either do not load the converter 349 * in the first place, or just set the pkg field to "". 350 */ 351 352 UTRACE_EXIT_PTR_STATUS(sharedData, *err); 353 return sharedData; 354 } 355 356 /*returns a converter type from a string 357 */ 358 static const UConverterSharedData * 359 getAlgorithmicTypeFromName(const char *realName) 360 { 361 uint32_t mid, start, limit; 362 uint32_t lastMid; 363 int result; 364 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 365 366 /* Lower case and remove ignoreable characters. */ 367 ucnv_io_stripForCompare(strippedName, realName); 368 369 /* do a binary search for the alias */ 370 start = 0; 371 limit = sizeof(cnvNameType)/sizeof(cnvNameType[0]); 372 mid = limit; 373 lastMid = UINT32_MAX; 374 375 for (;;) { 376 mid = (uint32_t)((start + limit) / 2); 377 if (lastMid == mid) { /* Have we moved? */ 378 break; /* We haven't moved, and it wasn't found. */ 379 } 380 lastMid = mid; 381 result = uprv_strcmp(strippedName, cnvNameType[mid].name); 382 383 if (result < 0) { 384 limit = mid; 385 } else if (result > 0) { 386 start = mid; 387 } else { 388 return converterData[cnvNameType[mid].type]; 389 } 390 } 391 392 return NULL; 393 } 394 395 /* 396 * Based on the number of known converters, this determines how many times larger 397 * the shared data hash table should be. When on small platforms, or just a couple 398 * of converters are used, this number should be 2. When memory is plentiful, or 399 * when ucnv_countAvailable is ever used with a lot of available converters, 400 * this should be 4. 401 * Larger numbers reduce the number of hash collisions, but use more memory. 402 */ 403 #define UCNV_CACHE_LOAD_FACTOR 2 404 405 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ 406 /* Will always be called with the cnvCacheMutex alrady being held */ 407 /* by the calling function. */ 408 /* Stores the shared data in the SHARED_DATA_HASHTABLE 409 * @param data The shared data 410 */ 411 static void 412 ucnv_shareConverterData(UConverterSharedData * data) 413 { 414 UErrorCode err = U_ZERO_ERROR; 415 /*Lazy evaluates the Hashtable itself */ 416 /*void *sanity = NULL;*/ 417 418 if (SHARED_DATA_HASHTABLE == NULL) 419 { 420 SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, 421 ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, 422 &err); 423 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 424 425 if (U_FAILURE(err)) 426 return; 427 } 428 429 /* ### check to see if the element is not already there! */ 430 431 /* 432 sanity = ucnv_getSharedConverterData (data->staticData->name); 433 if(sanity != NULL) 434 { 435 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); 436 } 437 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); 438 */ 439 440 /* Mark it shared */ 441 data->sharedDataCached = TRUE; 442 443 uhash_put(SHARED_DATA_HASHTABLE, 444 (void*) data->staticData->name, /* Okay to cast away const as long as 445 keyDeleter == NULL */ 446 data, 447 &err); 448 UCNV_DEBUG_LOG("put", data->staticData->name,data); 449 450 } 451 452 /* Look up a converter name in the shared data cache. */ 453 /* cnvCacheMutex must be held by the caller to protect the hash table. */ 454 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) 455 * @param name The name of the shared data 456 * @return the shared data from the SHARED_DATA_HASHTABLE 457 */ 458 static UConverterSharedData * 459 ucnv_getSharedConverterData(const char *name) 460 { 461 /*special case when no Table has yet been created we return NULL */ 462 if (SHARED_DATA_HASHTABLE == NULL) 463 { 464 return NULL; 465 } 466 else 467 { 468 UConverterSharedData *rc; 469 470 rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); 471 UCNV_DEBUG_LOG("get",name,rc); 472 return rc; 473 } 474 } 475 476 /*frees the string of memory blocks associates with a sharedConverter 477 *if and only if the referenceCounter == 0 478 */ 479 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to 480 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and 481 * returns TRUE, 482 * otherwise returns FALSE 483 * @param sharedConverterData The shared data 484 * @return if not it frees all the memory stemming from sharedConverterData and 485 * returns TRUE, otherwise returns FALSE 486 */ 487 static UBool 488 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) 489 { 490 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); 491 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); 492 493 if (deadSharedData->referenceCounter > 0) { 494 UTRACE_EXIT_VALUE((int32_t)FALSE); 495 return FALSE; 496 } 497 498 if (deadSharedData->impl->unload != NULL) { 499 deadSharedData->impl->unload(deadSharedData); 500 } 501 502 if(deadSharedData->dataMemory != NULL) 503 { 504 UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; 505 udata_close(data); 506 } 507 508 if(deadSharedData->table != NULL) 509 { 510 uprv_free(deadSharedData->table); 511 } 512 513 #if 0 514 /* if the static data is actually owned by the shared data */ 515 /* enable if we ever have this situation. */ 516 if(deadSharedData->staticDataOwned == TRUE) /* see ucnv_bld.h */ 517 { 518 uprv_free((void*)deadSharedData->staticData); 519 } 520 #endif 521 522 #if 0 523 /* Zap it ! */ 524 uprv_memset(deadSharedData->0, sizeof(*deadSharedData)); 525 #endif 526 527 uprv_free(deadSharedData); 528 529 UTRACE_EXIT_VALUE((int32_t)TRUE); 530 return TRUE; 531 } 532 533 /** 534 * Load a non-algorithmic converter. 535 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 536 */ 537 UConverterSharedData * 538 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { 539 UConverterSharedData *mySharedConverterData; 540 541 if(err == NULL || U_FAILURE(*err)) { 542 return NULL; 543 } 544 545 if(pArgs->pkg != NULL && *pArgs->pkg != 0) { 546 /* application-provided converters are not currently cached */ 547 return createConverterFromFile(pArgs, err); 548 } 549 550 mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); 551 if (mySharedConverterData == NULL) 552 { 553 /*Not cached, we need to stream it in from file */ 554 mySharedConverterData = createConverterFromFile(pArgs, err); 555 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 556 { 557 return NULL; 558 } 559 else if (!pArgs->onlyTestIsLoadable) 560 { 561 /* share it with other library clients */ 562 ucnv_shareConverterData(mySharedConverterData); 563 } 564 } 565 else 566 { 567 /* The data for this converter was already in the cache. */ 568 /* Update the reference counter on the shared data: one more client */ 569 mySharedConverterData->referenceCounter++; 570 } 571 572 return mySharedConverterData; 573 } 574 575 /** 576 * Unload a non-algorithmic converter. 577 * It must be sharedData->referenceCounter != ~0 578 * and this function must be called inside umtx_lock(&cnvCacheMutex). 579 */ 580 U_CAPI void 581 ucnv_unload(UConverterSharedData *sharedData) { 582 if(sharedData != NULL) { 583 if (sharedData->referenceCounter > 0) { 584 sharedData->referenceCounter--; 585 } 586 587 if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { 588 ucnv_deleteSharedConverterData(sharedData); 589 } 590 } 591 } 592 593 U_CFUNC void 594 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) 595 { 596 /* 597 Checking whether it's an algorithic converter is okay 598 in multithreaded applications because the value never changes. 599 Don't check referenceCounter for any other value. 600 */ 601 if(sharedData != NULL && sharedData->referenceCounter != (uint32_t)~0) { 602 umtx_lock(&cnvCacheMutex); 603 ucnv_unload(sharedData); 604 umtx_unlock(&cnvCacheMutex); 605 } 606 } 607 608 U_CFUNC void 609 ucnv_incrementRefCount(UConverterSharedData *sharedData) 610 { 611 /* 612 Checking whether it's an algorithic converter is okay 613 in multithreaded applications because the value never changes. 614 Don't check referenceCounter for any other value. 615 */ 616 if(sharedData != NULL && sharedData->referenceCounter != (uint32_t)~0) { 617 umtx_lock(&cnvCacheMutex); 618 sharedData->referenceCounter++; 619 umtx_unlock(&cnvCacheMutex); 620 } 621 } 622 623 /* 624 * *pPieces must be initialized. 625 * The name without options will be copied to pPieces->cnvName. 626 * The locale and options will be copied to pPieces only if present in inName, 627 * otherwise the existing values in pPieces remain. 628 * *pArgs will be set to the pPieces values. 629 */ 630 static void 631 parseConverterOptions(const char *inName, 632 UConverterNamePieces *pPieces, 633 UConverterLoadArgs *pArgs, 634 UErrorCode *err) 635 { 636 char *cnvName = pPieces->cnvName; 637 char c; 638 int32_t len = 0; 639 640 pArgs->name=inName; 641 pArgs->locale=pPieces->locale; 642 pArgs->options=pPieces->options; 643 644 /* copy the converter name itself to cnvName */ 645 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 646 if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { 647 *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 648 pPieces->cnvName[0]=0; 649 return; 650 } 651 *cnvName++=c; 652 inName++; 653 } 654 *cnvName=0; 655 pArgs->name=pPieces->cnvName; 656 657 /* parse options. No more name copying should occur. */ 658 while((c=*inName)!=0) { 659 if(c==UCNV_OPTION_SEP_CHAR) { 660 ++inName; 661 } 662 663 /* inName is behind an option separator */ 664 if(uprv_strncmp(inName, "locale=", 7)==0) { 665 /* do not modify locale itself in case we have multiple locale options */ 666 char *dest=pPieces->locale; 667 668 /* copy the locale option value */ 669 inName+=7; 670 len=0; 671 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 672 ++inName; 673 674 if(++len>=ULOC_FULLNAME_CAPACITY) { 675 *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 676 pPieces->locale[0]=0; 677 return; 678 } 679 680 *dest++=c; 681 } 682 *dest=0; 683 } else if(uprv_strncmp(inName, "version=", 8)==0) { 684 /* copy the version option value into bits 3..0 of pPieces->options */ 685 inName+=8; 686 c=*inName; 687 if(c==0) { 688 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); 689 return; 690 } else if((uint8_t)(c-'0')<10) { 691 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); 692 ++inName; 693 } 694 } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { 695 inName+=8; 696 pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); 697 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ 698 } else { 699 /* ignore any other options until we define some */ 700 while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { 701 } 702 if(c==0) { 703 return; 704 } 705 } 706 } 707 } 708 709 /*Logic determines if the converter is Algorithmic AND/OR cached 710 *depending on that: 711 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) 712 * -Get it from a Hashtable (Data=X, Cached=TRUE) 713 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) 714 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) 715 */ 716 U_CFUNC UConverterSharedData * 717 ucnv_loadSharedData(const char *converterName, 718 UConverterNamePieces *pPieces, 719 UConverterLoadArgs *pArgs, 720 UErrorCode * err) { 721 UConverterNamePieces stackPieces; 722 UConverterLoadArgs stackArgs; 723 UConverterSharedData *mySharedConverterData = NULL; 724 UErrorCode internalErrorCode = U_ZERO_ERROR; 725 UBool mayContainOption = TRUE; 726 UBool checkForAlgorithmic = TRUE; 727 728 if (U_FAILURE (*err)) { 729 return NULL; 730 } 731 732 if(pPieces == NULL) { 733 if(pArgs != NULL) { 734 /* 735 * Bad: We may set pArgs pointers to stackPieces fields 736 * which will be invalid after this function returns. 737 */ 738 *err = U_INTERNAL_PROGRAM_ERROR; 739 return NULL; 740 } 741 pPieces = &stackPieces; 742 } 743 if(pArgs == NULL) { 744 uprv_memset(&stackArgs, 0, sizeof(stackArgs)); 745 stackArgs.size = (int32_t)sizeof(stackArgs); 746 pArgs = &stackArgs; 747 } 748 749 pPieces->cnvName[0] = 0; 750 pPieces->locale[0] = 0; 751 pPieces->options = 0; 752 753 pArgs->name = converterName; 754 pArgs->locale = pPieces->locale; 755 pArgs->options = pPieces->options; 756 757 /* In case "name" is NULL we want to open the default converter. */ 758 if (converterName == NULL) { 759 #if U_CHARSET_IS_UTF8 760 pArgs->name = "UTF-8"; 761 return (UConverterSharedData *)converterData[UCNV_UTF8]; 762 #else 763 /* Call ucnv_getDefaultName first to query the name from the OS. */ 764 pArgs->name = ucnv_getDefaultName(); 765 if (pArgs->name == NULL) { 766 *err = U_MISSING_RESOURCE_ERROR; 767 return NULL; 768 } 769 mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; 770 checkForAlgorithmic = FALSE; 771 mayContainOption = gDefaultConverterContainsOption; 772 /* the default converter name is already canonical */ 773 #endif 774 } 775 else if(UCNV_FAST_IS_UTF8(converterName)) { 776 /* fastpath for UTF-8 */ 777 pArgs->name = "UTF-8"; 778 return (UConverterSharedData *)converterData[UCNV_UTF8]; 779 } 780 else { 781 /* separate the converter name from the options */ 782 parseConverterOptions(converterName, pPieces, pArgs, err); 783 if (U_FAILURE(*err)) { 784 /* Very bad name used. */ 785 return NULL; 786 } 787 788 /* get the canonical converter name */ 789 pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); 790 if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { 791 /* 792 * set the input name in case the converter was added 793 * without updating the alias table, or when there is no alias table 794 */ 795 pArgs->name = pPieces->cnvName; 796 } 797 } 798 799 /* separate the converter name from the options */ 800 if(mayContainOption && pArgs->name != pPieces->cnvName) { 801 parseConverterOptions(pArgs->name, pPieces, pArgs, err); 802 } 803 804 /* get the shared data for an algorithmic converter, if it is one */ 805 if (checkForAlgorithmic) { 806 mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); 807 } 808 if (mySharedConverterData == NULL) 809 { 810 /* it is a data-based converter, get its shared data. */ 811 /* Hold the cnvCacheMutex through the whole process of checking the */ 812 /* converter data cache, and adding new entries to the cache */ 813 /* to prevent other threads from modifying the cache during the */ 814 /* process. */ 815 pArgs->nestedLoads=1; 816 pArgs->pkg=NULL; 817 818 umtx_lock(&cnvCacheMutex); 819 mySharedConverterData = ucnv_load(pArgs, err); 820 umtx_unlock(&cnvCacheMutex); 821 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 822 { 823 return NULL; 824 } 825 } 826 827 return mySharedConverterData; 828 } 829 830 U_CAPI UConverter * 831 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) 832 { 833 UConverterNamePieces stackPieces; 834 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 835 UConverterSharedData *mySharedConverterData; 836 837 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 838 839 if(U_SUCCESS(*err)) { 840 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); 841 842 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 843 844 myUConverter = ucnv_createConverterFromSharedData( 845 myUConverter, mySharedConverterData, 846 &stackArgs, 847 err); 848 849 if(U_SUCCESS(*err)) { 850 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 851 return myUConverter; 852 } 853 } 854 855 /* exit with error */ 856 UTRACE_EXIT_STATUS(*err); 857 return NULL; 858 } 859 860 U_CFUNC UBool 861 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { 862 UConverter myUConverter; 863 UConverterNamePieces stackPieces; 864 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 865 UConverterSharedData *mySharedConverterData; 866 867 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 868 869 if(U_SUCCESS(*err)) { 870 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); 871 872 stackArgs.onlyTestIsLoadable=TRUE; 873 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 874 ucnv_createConverterFromSharedData( 875 &myUConverter, mySharedConverterData, 876 &stackArgs, 877 err); 878 ucnv_unloadSharedDataIfReady(mySharedConverterData); 879 } 880 881 UTRACE_EXIT_STATUS(*err); 882 return U_SUCCESS(*err); 883 } 884 885 UConverter * 886 ucnv_createAlgorithmicConverter(UConverter *myUConverter, 887 UConverterType type, 888 const char *locale, uint32_t options, 889 UErrorCode *err) { 890 UConverter *cnv; 891 const UConverterSharedData *sharedData; 892 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 893 894 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); 895 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); 896 897 if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { 898 *err = U_ILLEGAL_ARGUMENT_ERROR; 899 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 900 return NULL; 901 } 902 903 sharedData = converterData[type]; 904 /* 905 Checking whether it's an algorithic converter is okay 906 in multithreaded applications because the value never changes. 907 Don't check referenceCounter for any other value. 908 */ 909 if(sharedData == NULL || sharedData->referenceCounter != (uint32_t)~0) { 910 /* not a valid type, or not an algorithmic converter */ 911 *err = U_ILLEGAL_ARGUMENT_ERROR; 912 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 913 return NULL; 914 } 915 916 stackArgs.name = ""; 917 stackArgs.options = options; 918 stackArgs.locale=locale; 919 cnv = ucnv_createConverterFromSharedData( 920 myUConverter, (UConverterSharedData *)sharedData, 921 &stackArgs, err); 922 923 UTRACE_EXIT_PTR_STATUS(cnv, *err); 924 return cnv; 925 } 926 927 U_CFUNC UConverter* 928 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) 929 { 930 UConverter *myUConverter; 931 UConverterSharedData *mySharedConverterData; 932 UConverterNamePieces stackPieces; 933 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 934 935 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); 936 937 if(U_FAILURE(*err)) { 938 UTRACE_EXIT_STATUS(*err); 939 return NULL; 940 } 941 942 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); 943 944 /* first, get the options out of the converterName string */ 945 stackPieces.cnvName[0] = 0; 946 stackPieces.locale[0] = 0; 947 stackPieces.options = 0; 948 parseConverterOptions(converterName, &stackPieces, &stackArgs, err); 949 if (U_FAILURE(*err)) { 950 /* Very bad name used. */ 951 UTRACE_EXIT_STATUS(*err); 952 return NULL; 953 } 954 stackArgs.nestedLoads=1; 955 stackArgs.pkg=packageName; 956 957 /* open the data, unflatten the shared structure */ 958 mySharedConverterData = createConverterFromFile(&stackArgs, err); 959 960 if (U_FAILURE(*err)) { 961 UTRACE_EXIT_STATUS(*err); 962 return NULL; 963 } 964 965 /* create the actual converter */ 966 myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); 967 968 if (U_FAILURE(*err)) { 969 ucnv_close(myUConverter); 970 UTRACE_EXIT_STATUS(*err); 971 return NULL; 972 } 973 974 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 975 return myUConverter; 976 } 977 978 979 U_CFUNC UConverter* 980 ucnv_createConverterFromSharedData(UConverter *myUConverter, 981 UConverterSharedData *mySharedConverterData, 982 UConverterLoadArgs *pArgs, 983 UErrorCode *err) 984 { 985 UBool isCopyLocal; 986 987 if(U_FAILURE(*err)) { 988 ucnv_unloadSharedDataIfReady(mySharedConverterData); 989 return myUConverter; 990 } 991 if(myUConverter == NULL) 992 { 993 myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); 994 if(myUConverter == NULL) 995 { 996 *err = U_MEMORY_ALLOCATION_ERROR; 997 ucnv_unloadSharedDataIfReady(mySharedConverterData); 998 return NULL; 999 } 1000 isCopyLocal = FALSE; 1001 } else { 1002 isCopyLocal = TRUE; 1003 } 1004 1005 /* initialize the converter */ 1006 uprv_memset(myUConverter, 0, sizeof(UConverter)); 1007 myUConverter->isCopyLocal = isCopyLocal; 1008 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ 1009 myUConverter->sharedData = mySharedConverterData; 1010 myUConverter->options = pArgs->options; 1011 if(!pArgs->onlyTestIsLoadable) { 1012 myUConverter->preFromUFirstCP = U_SENTINEL; 1013 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; 1014 myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; 1015 myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; 1016 myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; 1017 myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; 1018 myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; 1019 myUConverter->subChars = (uint8_t *)myUConverter->subUChars; 1020 uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); 1021 myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ 1022 } 1023 1024 if(mySharedConverterData->impl->open != NULL) { 1025 mySharedConverterData->impl->open(myUConverter, pArgs, err); 1026 if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { 1027 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ 1028 ucnv_close(myUConverter); 1029 return NULL; 1030 } 1031 } 1032 1033 return myUConverter; 1034 } 1035 1036 /*Frees all shared immutable objects that aren't referred to (reference count = 0) 1037 */ 1038 U_CAPI int32_t U_EXPORT2 1039 ucnv_flushCache () 1040 { 1041 UConverterSharedData *mySharedData = NULL; 1042 int32_t pos; 1043 int32_t tableDeletedNum = 0; 1044 const UHashElement *e; 1045 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ 1046 int32_t i, remaining; 1047 1048 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); 1049 1050 /* Close the default converter without creating a new one so that everything will be flushed. */ 1051 u_flushDefaultConverter(); 1052 1053 /*if shared data hasn't even been lazy evaluated yet 1054 * return 0 1055 */ 1056 if (SHARED_DATA_HASHTABLE == NULL) { 1057 UTRACE_EXIT_VALUE((int32_t)0); 1058 return 0; 1059 } 1060 1061 /*creates an enumeration to iterate through every element in the 1062 * table 1063 * 1064 * Synchronization: holding cnvCacheMutex will prevent any other thread from 1065 * accessing or modifying the hash table during the iteration. 1066 * The reference count of an entry may be decremented by 1067 * ucnv_close while the iteration is in process, but this is 1068 * benign. It can't be incremented (in ucnv_createConverter()) 1069 * because the sequence of looking up in the cache + incrementing 1070 * is protected by cnvCacheMutex. 1071 */ 1072 umtx_lock(&cnvCacheMutex); 1073 /* 1074 * double loop: A delta/extension-only converter has a pointer to its base table's 1075 * shared data; the first iteration of the outer loop may see the delta converter 1076 * before the base converter, and unloading the delta converter may get the base 1077 * converter's reference counter down to 0. 1078 */ 1079 i = 0; 1080 do { 1081 remaining = 0; 1082 pos = -1; 1083 while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) 1084 { 1085 mySharedData = (UConverterSharedData *) e->value.pointer; 1086 /*deletes only if reference counter == 0 */ 1087 if (mySharedData->referenceCounter == 0) 1088 { 1089 tableDeletedNum++; 1090 1091 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); 1092 1093 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1094 mySharedData->sharedDataCached = FALSE; 1095 ucnv_deleteSharedConverterData (mySharedData); 1096 } else { 1097 ++remaining; 1098 } 1099 } 1100 } while(++i == 1 && remaining > 0); 1101 umtx_unlock(&cnvCacheMutex); 1102 1103 UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); 1104 1105 UTRACE_EXIT_VALUE(tableDeletedNum); 1106 return tableDeletedNum; 1107 } 1108 1109 /* available converters list --------------------------------------------------- */ 1110 1111 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { 1112 int needInit; 1113 UMTX_CHECK(&cnvCacheMutex, (gAvailableConverters == NULL), needInit); 1114 if (needInit) { 1115 UConverter tempConverter; 1116 UEnumeration *allConvEnum = NULL; 1117 uint16_t idx; 1118 uint16_t localConverterCount; 1119 uint16_t allConverterCount; 1120 UErrorCode localStatus; 1121 const char *converterName; 1122 const char **localConverterList; 1123 1124 allConvEnum = ucnv_openAllNames(pErrorCode); 1125 allConverterCount = uenum_count(allConvEnum, pErrorCode); 1126 if (U_FAILURE(*pErrorCode)) { 1127 return FALSE; 1128 } 1129 1130 /* We can't have more than "*converterTable" converters to open */ 1131 localConverterList = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); 1132 if (!localConverterList) { 1133 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1134 return FALSE; 1135 } 1136 1137 /* Open the default converter to make sure that it has first dibs in the hash table. */ 1138 localStatus = U_ZERO_ERROR; 1139 ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); 1140 1141 localConverterCount = 0; 1142 1143 for (idx = 0; idx < allConverterCount; idx++) { 1144 localStatus = U_ZERO_ERROR; 1145 converterName = uenum_next(allConvEnum, NULL, &localStatus); 1146 if (ucnv_canCreateConverter(converterName, &localStatus)) { 1147 localConverterList[localConverterCount++] = converterName; 1148 } 1149 } 1150 uenum_close(allConvEnum); 1151 1152 umtx_lock(&cnvCacheMutex); 1153 if (gAvailableConverters == NULL) { 1154 gAvailableConverterCount = localConverterCount; 1155 gAvailableConverters = localConverterList; 1156 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1157 } 1158 else { 1159 uprv_free((char **)localConverterList); 1160 } 1161 umtx_unlock(&cnvCacheMutex); 1162 } 1163 return TRUE; 1164 } 1165 1166 U_CFUNC uint16_t 1167 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { 1168 if (haveAvailableConverterList(pErrorCode)) { 1169 return gAvailableConverterCount; 1170 } 1171 return 0; 1172 } 1173 1174 U_CFUNC const char * 1175 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { 1176 if (haveAvailableConverterList(pErrorCode)) { 1177 if (n < gAvailableConverterCount) { 1178 return gAvailableConverters[n]; 1179 } 1180 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1181 } 1182 return NULL; 1183 } 1184 1185 /* default converter name --------------------------------------------------- */ 1186 1187 #if !U_CHARSET_IS_UTF8 1188 /* 1189 Copy the canonical converter name. 1190 ucnv_getDefaultName must be thread safe, which can call this function. 1191 1192 ucnv_setDefaultName calls this function and it doesn't have to be 1193 thread safe because there is no reliable/safe way to reset the 1194 converter in use in all threads. If you did reset the converter, you 1195 would not be sure that retrieving a default converter for one string 1196 would be the same type of default converter for a successive string. 1197 Since the name is a returned via ucnv_getDefaultName without copying, 1198 you shouldn't be modifying or deleting the string from a separate thread. 1199 */ 1200 static inline void 1201 internalSetName(const char *name, UErrorCode *status) { 1202 UConverterNamePieces stackPieces; 1203 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 1204 int32_t length=(int32_t)(uprv_strlen(name)); 1205 UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); 1206 const UConverterSharedData *algorithmicSharedData; 1207 1208 stackArgs.name = name; 1209 if(containsOption) { 1210 stackPieces.cnvName[0] = 0; 1211 stackPieces.locale[0] = 0; 1212 stackPieces.options = 0; 1213 parseConverterOptions(name, &stackPieces, &stackArgs, status); 1214 if(U_FAILURE(*status)) { 1215 return; 1216 } 1217 } 1218 algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); 1219 1220 umtx_lock(&cnvCacheMutex); 1221 1222 gDefaultAlgorithmicSharedData = algorithmicSharedData; 1223 gDefaultConverterContainsOption = containsOption; 1224 uprv_memcpy(gDefaultConverterNameBuffer, name, length); 1225 gDefaultConverterNameBuffer[length]=0; 1226 1227 /* gDefaultConverterName MUST be the last global var set by this function. */ 1228 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ 1229 gDefaultConverterName = gDefaultConverterNameBuffer; 1230 1231 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1232 1233 umtx_unlock(&cnvCacheMutex); 1234 } 1235 #endif 1236 1237 /* 1238 * In order to be really thread-safe, the get function would have to take 1239 * a buffer parameter and copy the current string inside a mutex block. 1240 * This implementation only tries to be really thread-safe while 1241 * setting the name. 1242 * It assumes that setting a pointer is atomic. 1243 */ 1244 1245 U_CAPI const char* U_EXPORT2 1246 ucnv_getDefaultName() { 1247 #if U_CHARSET_IS_UTF8 1248 return "UTF-8"; 1249 #else 1250 /* local variable to be thread-safe */ 1251 const char *name; 1252 1253 /* 1254 Multiple calls to ucnv_getDefaultName must be thread safe, 1255 but ucnv_setDefaultName is not thread safe. 1256 */ 1257 UMTX_CHECK(&cnvCacheMutex, gDefaultConverterName, name); 1258 if(name==NULL) { 1259 UErrorCode errorCode = U_ZERO_ERROR; 1260 UConverter *cnv = NULL; 1261 1262 name = uprv_getDefaultCodepage(); 1263 1264 /* if the name is there, test it out and get the canonical name with options */ 1265 if(name != NULL) { 1266 cnv = ucnv_open(name, &errorCode); 1267 if(U_SUCCESS(errorCode) && cnv != NULL) { 1268 name = ucnv_getName(cnv, &errorCode); 1269 } 1270 } 1271 1272 if(name == NULL || name[0] == 0 1273 || U_FAILURE(errorCode) || cnv == NULL 1274 || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) 1275 { 1276 /* Panic time, let's use a fallback. */ 1277 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) 1278 name = "US-ASCII"; 1279 /* there is no 'algorithmic' converter for EBCDIC */ 1280 #elif U_PLATFORM == U_PF_OS390 1281 name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; 1282 #else 1283 name = "ibm-37_P100-1995"; 1284 #endif 1285 } 1286 1287 internalSetName(name, &errorCode); 1288 1289 /* The close may make the current name go away. */ 1290 ucnv_close(cnv); 1291 } 1292 1293 return name; 1294 #endif 1295 } 1296 1297 #if U_CHARSET_IS_UTF8 1298 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} 1299 #else 1300 /* 1301 This function is not thread safe, and it can't be thread safe. 1302 See internalSetName or the API reference for details. 1303 */ 1304 U_CAPI void U_EXPORT2 1305 ucnv_setDefaultName(const char *converterName) { 1306 if(converterName==NULL) { 1307 /* reset to the default codepage */ 1308 gDefaultConverterName=NULL; 1309 } else { 1310 UErrorCode errorCode = U_ZERO_ERROR; 1311 UConverter *cnv = NULL; 1312 const char *name = NULL; 1313 1314 /* if the name is there, test it out and get the canonical name with options */ 1315 cnv = ucnv_open(converterName, &errorCode); 1316 if(U_SUCCESS(errorCode) && cnv != NULL) { 1317 name = ucnv_getName(cnv, &errorCode); 1318 } 1319 1320 if(U_SUCCESS(errorCode) && name!=NULL) { 1321 internalSetName(name, &errorCode); 1322 } 1323 /* else this converter is bad to use. Don't change it to a bad value. */ 1324 1325 /* The close may make the current name go away. */ 1326 ucnv_close(cnv); 1327 1328 /* reset the converter cache */ 1329 u_flushDefaultConverter(); 1330 } 1331 } 1332 #endif 1333 1334 /* data swapping ------------------------------------------------------------ */ 1335 1336 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ 1337 1338 #if !UCONFIG_NO_LEGACY_CONVERSION 1339 1340 U_CAPI int32_t U_EXPORT2 1341 ucnv_swap(const UDataSwapper *ds, 1342 const void *inData, int32_t length, void *outData, 1343 UErrorCode *pErrorCode) { 1344 const UDataInfo *pInfo; 1345 int32_t headerSize; 1346 1347 const uint8_t *inBytes; 1348 uint8_t *outBytes; 1349 1350 uint32_t offset, count, staticDataSize; 1351 int32_t size; 1352 1353 const UConverterStaticData *inStaticData; 1354 UConverterStaticData *outStaticData; 1355 1356 const _MBCSHeader *inMBCSHeader; 1357 _MBCSHeader *outMBCSHeader; 1358 _MBCSHeader mbcsHeader; 1359 uint32_t mbcsHeaderLength; 1360 UBool noFromU=FALSE; 1361 1362 uint8_t outputType; 1363 1364 int32_t maxFastUChar, mbcsIndexLength; 1365 1366 const int32_t *inExtIndexes; 1367 int32_t extOffset; 1368 1369 /* udata_swapDataHeader checks the arguments */ 1370 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1371 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1372 return 0; 1373 } 1374 1375 /* check data format and format version */ 1376 pInfo=(const UDataInfo *)((const char *)inData+4); 1377 if(!( 1378 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 1379 pInfo->dataFormat[1]==0x6e && 1380 pInfo->dataFormat[2]==0x76 && 1381 pInfo->dataFormat[3]==0x74 && 1382 pInfo->formatVersion[0]==6 && 1383 pInfo->formatVersion[1]>=2 1384 )) { 1385 udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", 1386 pInfo->dataFormat[0], pInfo->dataFormat[1], 1387 pInfo->dataFormat[2], pInfo->dataFormat[3], 1388 pInfo->formatVersion[0], pInfo->formatVersion[1]); 1389 *pErrorCode=U_UNSUPPORTED_ERROR; 1390 return 0; 1391 } 1392 1393 inBytes=(const uint8_t *)inData+headerSize; 1394 outBytes=(uint8_t *)outData+headerSize; 1395 1396 /* read the initial UConverterStaticData structure after the UDataInfo header */ 1397 inStaticData=(const UConverterStaticData *)inBytes; 1398 outStaticData=(UConverterStaticData *)outBytes; 1399 1400 if(length<0) { 1401 staticDataSize=ds->readUInt32(inStaticData->structSize); 1402 } else { 1403 length-=headerSize; 1404 if( length<(int32_t)sizeof(UConverterStaticData) || 1405 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 1406 ) { 1407 udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 1408 length); 1409 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1410 return 0; 1411 } 1412 } 1413 1414 if(length>=0) { 1415 /* swap the static data */ 1416 if(inStaticData!=outStaticData) { 1417 uprv_memcpy(outStaticData, inStaticData, staticDataSize); 1418 } 1419 1420 ds->swapArray32(ds, &inStaticData->structSize, 4, 1421 &outStaticData->structSize, pErrorCode); 1422 ds->swapArray32(ds, &inStaticData->codepage, 4, 1423 &outStaticData->codepage, pErrorCode); 1424 1425 ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), 1426 outStaticData->name, pErrorCode); 1427 if(U_FAILURE(*pErrorCode)) { 1428 udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); 1429 return 0; 1430 } 1431 } 1432 1433 inBytes+=staticDataSize; 1434 outBytes+=staticDataSize; 1435 if(length>=0) { 1436 length-=(int32_t)staticDataSize; 1437 } 1438 1439 /* check for supported conversionType values */ 1440 if(inStaticData->conversionType==UCNV_MBCS) { 1441 /* swap MBCS data */ 1442 inMBCSHeader=(const _MBCSHeader *)inBytes; 1443 outMBCSHeader=(_MBCSHeader *)outBytes; 1444 1445 if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { 1446 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1447 length); 1448 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1449 return 0; 1450 } 1451 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 1452 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 1453 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 1454 ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& 1455 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 1456 ) { 1457 mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; 1458 noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); 1459 } else { 1460 udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", 1461 inMBCSHeader->version[0], inMBCSHeader->version[1]); 1462 *pErrorCode=U_UNSUPPORTED_ERROR; 1463 return 0; 1464 } 1465 1466 uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); 1467 mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); 1468 mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); 1469 mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); 1470 mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); 1471 mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); 1472 mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); 1473 mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); 1474 /* mbcsHeader.options have been read above */ 1475 1476 extOffset=(int32_t)(mbcsHeader.flags>>8); 1477 outputType=(uint8_t)mbcsHeader.flags; 1478 if(noFromU && outputType==MBCS_OUTPUT_1) { 1479 udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); 1480 *pErrorCode=U_UNSUPPORTED_ERROR; 1481 return 0; 1482 } 1483 1484 /* make sure that the output type is known */ 1485 switch(outputType) { 1486 case MBCS_OUTPUT_1: 1487 case MBCS_OUTPUT_2: 1488 case MBCS_OUTPUT_3: 1489 case MBCS_OUTPUT_4: 1490 case MBCS_OUTPUT_3_EUC: 1491 case MBCS_OUTPUT_4_EUC: 1492 case MBCS_OUTPUT_2_SISO: 1493 case MBCS_OUTPUT_EXT_ONLY: 1494 /* OK */ 1495 break; 1496 default: 1497 udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", 1498 outputType); 1499 *pErrorCode=U_UNSUPPORTED_ERROR; 1500 return 0; 1501 } 1502 1503 /* calculate the length of the MBCS data */ 1504 1505 /* 1506 * utf8Friendly MBCS files (mbcsHeader.version 4.3) 1507 * contain an additional mbcsIndex table: 1508 * uint16_t[(maxFastUChar+1)>>6]; 1509 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). 1510 */ 1511 maxFastUChar=0; 1512 mbcsIndexLength=0; 1513 if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && 1514 mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 1515 ) { 1516 maxFastUChar=(maxFastUChar<<8)|0xff; 1517 mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ 1518 } 1519 1520 if(extOffset==0) { 1521 size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); 1522 if(!noFromU) { 1523 size+=(int32_t)mbcsHeader.fromUBytesLength; 1524 } 1525 1526 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ 1527 inExtIndexes=NULL; 1528 } else { 1529 /* there is extension data after the base data, see ucnv_ext.h */ 1530 if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 1531 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 1532 length); 1533 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1534 return 0; 1535 } 1536 1537 inExtIndexes=(const int32_t *)(inBytes+extOffset); 1538 size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); 1539 } 1540 1541 if(length>=0) { 1542 if(length<size) { 1543 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1544 length); 1545 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1546 return 0; 1547 } 1548 1549 /* copy the data for inaccessible bytes */ 1550 if(inBytes!=outBytes) { 1551 uprv_memcpy(outBytes, inBytes, size); 1552 } 1553 1554 /* swap the MBCSHeader, except for the version field */ 1555 count=mbcsHeaderLength*4; 1556 ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, 1557 &outMBCSHeader->countStates, pErrorCode); 1558 1559 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 1560 /* 1561 * extension-only file, 1562 * contains a base name instead of normal base table data 1563 */ 1564 1565 /* swap the base name, between the header and the extension data */ 1566 const char *inBaseName=(const char *)inBytes+count; 1567 char *outBaseName=(char *)outBytes+count; 1568 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), 1569 outBaseName, pErrorCode); 1570 } else { 1571 /* normal file with base table data */ 1572 1573 /* swap the state table, 1kB per state */ 1574 offset=count; 1575 count=mbcsHeader.countStates*1024; 1576 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1577 outBytes+offset, pErrorCode); 1578 1579 /* swap the toUFallbacks[] */ 1580 offset+=count; 1581 count=mbcsHeader.countToUFallbacks*8; 1582 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1583 outBytes+offset, pErrorCode); 1584 1585 /* swap the unicodeCodeUnits[] */ 1586 offset=mbcsHeader.offsetToUCodeUnits; 1587 count=mbcsHeader.offsetFromUTable-offset; 1588 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1589 outBytes+offset, pErrorCode); 1590 1591 /* offset to the stage 1 table, independent of the outputType */ 1592 offset=mbcsHeader.offsetFromUTable; 1593 1594 if(outputType==MBCS_OUTPUT_1) { 1595 /* SBCS: swap the fromU tables, all 16 bits wide */ 1596 count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; 1597 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1598 outBytes+offset, pErrorCode); 1599 } else { 1600 /* otherwise: swap the stage tables separately */ 1601 1602 /* stage 1 table: uint16_t[0x440 or 0x40] */ 1603 if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1604 count=0x440*2; /* for all of Unicode */ 1605 } else { 1606 count=0x40*2; /* only BMP */ 1607 } 1608 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1609 outBytes+offset, pErrorCode); 1610 1611 /* stage 2 table: uint32_t[] */ 1612 offset+=count; 1613 count=mbcsHeader.offsetFromUBytes-offset; 1614 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1615 outBytes+offset, pErrorCode); 1616 1617 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ 1618 offset=mbcsHeader.offsetFromUBytes; 1619 count= noFromU ? 0 : mbcsHeader.fromUBytesLength; 1620 switch(outputType) { 1621 case MBCS_OUTPUT_2: 1622 case MBCS_OUTPUT_3_EUC: 1623 case MBCS_OUTPUT_2_SISO: 1624 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1625 outBytes+offset, pErrorCode); 1626 break; 1627 case MBCS_OUTPUT_4: 1628 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1629 outBytes+offset, pErrorCode); 1630 break; 1631 default: 1632 /* just uint8_t[], nothing to swap */ 1633 break; 1634 } 1635 1636 if(mbcsIndexLength!=0) { 1637 offset+=count; 1638 count=mbcsIndexLength; 1639 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1640 outBytes+offset, pErrorCode); 1641 } 1642 } 1643 } 1644 1645 if(extOffset!=0) { 1646 /* swap the extension data */ 1647 inBytes+=extOffset; 1648 outBytes+=extOffset; 1649 1650 /* swap toUTable[] */ 1651 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); 1652 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); 1653 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1654 1655 /* swap toUUChars[] */ 1656 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); 1657 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); 1658 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1659 1660 /* swap fromUTableUChars[] */ 1661 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); 1662 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); 1663 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1664 1665 /* swap fromUTableValues[] */ 1666 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); 1667 /* same length as for fromUTableUChars[] */ 1668 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1669 1670 /* no need to swap fromUBytes[] */ 1671 1672 /* swap fromUStage12[] */ 1673 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); 1674 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); 1675 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1676 1677 /* swap fromUStage3[] */ 1678 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); 1679 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); 1680 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1681 1682 /* swap fromUStage3b[] */ 1683 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); 1684 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); 1685 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1686 1687 /* swap indexes[] */ 1688 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); 1689 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); 1690 } 1691 } 1692 } else { 1693 udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", 1694 inStaticData->conversionType); 1695 *pErrorCode=U_UNSUPPORTED_ERROR; 1696 return 0; 1697 } 1698 1699 return headerSize+(int32_t)staticDataSize+size; 1700 } 1701 1702 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1703 1704 #endif 1705