1 /* 2 ******************************************************************** 3 * COPYRIGHT: 4 * Copyright (c) 1996-2015, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************** 7 * 8 * ucnv_bld.cpp: 9 * 10 * Defines functions that are used in the creation/initialization/deletion 11 * of converters and related structures. 12 * uses uconv_io.h routines to access disk information 13 * is used by ucnv.h to implement public API create/delete/flushCache routines 14 * Modification History: 15 * 16 * Date Name Description 17 * 18 * 06/20/2000 helena OS/400 port changes; mostly typecast. 19 * 06/29/2000 helena Major rewrite of the callback interface. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_CONVERSION 25 26 #include "unicode/putil.h" 27 #include "unicode/udata.h" 28 #include "unicode/ucnv.h" 29 #include "unicode/uloc.h" 30 #include "mutex.h" 31 #include "putilimp.h" 32 #include "uassert.h" 33 #include "utracimp.h" 34 #include "ucnv_io.h" 35 #include "ucnv_bld.h" 36 #include "ucnvmbcs.h" 37 #include "ucnv_ext.h" 38 #include "ucnv_cnv.h" 39 #include "ucnv_imp.h" 40 #include "uhash.h" 41 #include "umutex.h" 42 #include "cstring.h" 43 #include "cmemory.h" 44 #include "ucln_cmn.h" 45 #include "ustr_cnv.h" 46 47 48 #if 0 49 #include <stdio.h> 50 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); 51 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) 52 #else 53 # define UCNV_DEBUG_LOG(x,y,z) 54 #endif 55 56 static const UConverterSharedData * const 57 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 58 NULL, NULL, 59 60 #if UCONFIG_NO_LEGACY_CONVERSION 61 NULL, 62 #else 63 &_MBCSData, 64 #endif 65 66 &_Latin1Data, 67 &_UTF8Data, &_UTF16BEData, &_UTF16LEData, 68 #if UCONFIG_ONLY_HTML_CONVERSION 69 NULL, NULL, 70 #else 71 &_UTF32BEData, &_UTF32LEData, 72 #endif 73 NULL, 74 75 #if UCONFIG_NO_LEGACY_CONVERSION 76 NULL, 77 #else 78 &_ISO2022Data, 79 #endif 80 81 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 82 NULL, NULL, NULL, NULL, NULL, NULL, 83 NULL, NULL, NULL, NULL, NULL, NULL, 84 NULL, 85 #else 86 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, 87 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, 88 &_HZData, 89 #endif 90 91 #if UCONFIG_ONLY_HTML_CONVERSION 92 NULL, 93 #else 94 &_SCSUData, 95 #endif 96 97 98 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 99 NULL, 100 #else 101 &_ISCIIData, 102 #endif 103 104 &_ASCIIData, 105 #if UCONFIG_ONLY_HTML_CONVERSION 106 NULL, NULL, &_UTF16Data, NULL, NULL, NULL, 107 #else 108 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 109 #endif 110 111 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 112 NULL, 113 #else 114 &_CompoundTextData 115 #endif 116 }; 117 118 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. 119 Also the name should be in lower case and all spaces, dashes and underscores 120 removed 121 */ 122 static struct { 123 const char *name; 124 const UConverterType type; 125 } const cnvNameType[] = { 126 #if !UCONFIG_ONLY_HTML_CONVERSION 127 { "bocu1", UCNV_BOCU1 }, 128 { "cesu8", UCNV_CESU8 }, 129 #endif 130 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 131 { "hz",UCNV_HZ }, 132 #endif 133 #if !UCONFIG_ONLY_HTML_CONVERSION 134 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 135 #endif 136 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 137 { "iscii", UCNV_ISCII }, 138 #endif 139 #if !UCONFIG_NO_LEGACY_CONVERSION 140 { "iso2022", UCNV_ISO_2022 }, 141 #endif 142 { "iso88591", UCNV_LATIN_1 }, 143 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 144 { "lmbcs1", UCNV_LMBCS_1 }, 145 { "lmbcs11",UCNV_LMBCS_11 }, 146 { "lmbcs16",UCNV_LMBCS_16 }, 147 { "lmbcs17",UCNV_LMBCS_17 }, 148 { "lmbcs18",UCNV_LMBCS_18 }, 149 { "lmbcs19",UCNV_LMBCS_19 }, 150 { "lmbcs2", UCNV_LMBCS_2 }, 151 { "lmbcs3", UCNV_LMBCS_3 }, 152 { "lmbcs4", UCNV_LMBCS_4 }, 153 { "lmbcs5", UCNV_LMBCS_5 }, 154 { "lmbcs6", UCNV_LMBCS_6 }, 155 { "lmbcs8", UCNV_LMBCS_8 }, 156 #endif 157 #if !UCONFIG_ONLY_HTML_CONVERSION 158 { "scsu", UCNV_SCSU }, 159 #endif 160 { "usascii", UCNV_US_ASCII }, 161 { "utf16", UCNV_UTF16 }, 162 { "utf16be", UCNV_UTF16_BigEndian }, 163 { "utf16le", UCNV_UTF16_LittleEndian }, 164 #if U_IS_BIG_ENDIAN 165 { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, 166 { "utf16platformendian", UCNV_UTF16_BigEndian }, 167 #else 168 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, 169 { "utf16platformendian", UCNV_UTF16_LittleEndian }, 170 #endif 171 #if !UCONFIG_ONLY_HTML_CONVERSION 172 { "utf32", UCNV_UTF32 }, 173 { "utf32be", UCNV_UTF32_BigEndian }, 174 { "utf32le", UCNV_UTF32_LittleEndian }, 175 #if U_IS_BIG_ENDIAN 176 { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, 177 { "utf32platformendian", UCNV_UTF32_BigEndian }, 178 #else 179 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 180 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 181 #endif 182 #endif 183 #if !UCONFIG_ONLY_HTML_CONVERSION 184 { "utf7", UCNV_UTF7 }, 185 #endif 186 { "utf8", UCNV_UTF8 }, 187 #if !UCONFIG_ONLY_HTML_CONVERSION 188 { "x11compoundtext", UCNV_COMPOUND_TEXT} 189 #endif 190 }; 191 192 193 /*initializes some global variables */ 194 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 195 static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ 196 /* Note: the global mutex is used for */ 197 /* reference count updates. */ 198 199 static const char **gAvailableConverters = NULL; 200 static uint16_t gAvailableConverterCount = 0; 201 static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; 202 203 #if !U_CHARSET_IS_UTF8 204 205 /* This contains the resolved converter name. So no further alias lookup is needed again. */ 206 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ 207 static const char *gDefaultConverterName = NULL; 208 209 /* 210 If the default converter is an algorithmic converter, this is the cached value. 211 We don't cache a full UConverter and clone it because ucnv_clone doesn't have 212 less overhead than an algorithmic open. We don't cache non-algorithmic converters 213 because ucnv_flushCache must be able to unload the default converter and its table. 214 */ 215 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; 216 217 /* Does gDefaultConverterName have a converter option and require extra parsing? */ 218 static UBool gDefaultConverterContainsOption; 219 220 #endif /* !U_CHARSET_IS_UTF8 */ 221 222 static const char DATA_TYPE[] = "cnv"; 223 224 /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). 225 * If it is ever to be called from elsewhere, synchronization 226 * will need to be considered. 227 */ 228 static void 229 ucnv_flushAvailableConverterCache() { 230 gAvailableConverterCount = 0; 231 if (gAvailableConverters) { 232 uprv_free((char **)gAvailableConverters); 233 gAvailableConverters = NULL; 234 } 235 gAvailableConvertersInitOnce.reset(); 236 } 237 238 /* ucnv_cleanup - delete all storage held by the converter cache, except any */ 239 /* in use by open converters. */ 240 /* Not thread safe. */ 241 /* Not supported API. */ 242 static UBool U_CALLCONV ucnv_cleanup(void) { 243 ucnv_flushCache(); 244 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 245 uhash_close(SHARED_DATA_HASHTABLE); 246 SHARED_DATA_HASHTABLE = NULL; 247 } 248 249 /* Isn't called from flushCache because other threads may have preexisting references to the table. */ 250 ucnv_flushAvailableConverterCache(); 251 252 #if !U_CHARSET_IS_UTF8 253 gDefaultConverterName = NULL; 254 gDefaultConverterNameBuffer[0] = 0; 255 gDefaultConverterContainsOption = FALSE; 256 gDefaultAlgorithmicSharedData = NULL; 257 #endif 258 259 return (SHARED_DATA_HASHTABLE == NULL); 260 } 261 262 static UBool U_CALLCONV 263 isCnvAcceptable(void * /*context*/, 264 const char * /*type*/, const char * /*name*/, 265 const UDataInfo *pInfo) { 266 return (UBool)( 267 pInfo->size>=20 && 268 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 269 pInfo->charsetFamily==U_CHARSET_FAMILY && 270 pInfo->sizeofUChar==U_SIZEOF_UCHAR && 271 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 272 pInfo->dataFormat[1]==0x6e && 273 pInfo->dataFormat[2]==0x76 && 274 pInfo->dataFormat[3]==0x74 && 275 pInfo->formatVersion[0]==6); /* Everything will be version 6 */ 276 } 277 278 /** 279 * Un flatten shared data from a UDATA.. 280 */ 281 static UConverterSharedData* 282 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) 283 { 284 /* UDataInfo info; -- necessary only if some converters have different formatVersion */ 285 const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); 286 const UConverterStaticData *source = (const UConverterStaticData *) raw; 287 UConverterSharedData *data; 288 UConverterType type = (UConverterType)source->conversionType; 289 290 if(U_FAILURE(*status)) 291 return NULL; 292 293 if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || 294 converterData[type] == NULL || 295 !converterData[type]->isReferenceCounted || 296 converterData[type]->referenceCounter != 1 || 297 source->structSize != sizeof(UConverterStaticData)) 298 { 299 *status = U_INVALID_TABLE_FORMAT; 300 return NULL; 301 } 302 303 data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); 304 if(data == NULL) { 305 *status = U_MEMORY_ALLOCATION_ERROR; 306 return NULL; 307 } 308 309 /* copy initial values from the static structure for this type */ 310 uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); 311 312 data->staticData = source; 313 314 data->sharedDataCached = FALSE; 315 316 /* fill in fields from the loaded data */ 317 data->dataMemory = (void*)pData; /* for future use */ 318 319 if(data->impl->load != NULL) { 320 data->impl->load(data, pArgs, raw + source->structSize, status); 321 if(U_FAILURE(*status)) { 322 uprv_free(data); 323 return NULL; 324 } 325 } 326 return data; 327 } 328 329 /*Takes an alias name gets an actual converter file name 330 *goes to disk and opens it. 331 *allocates the memory and returns a new UConverter object 332 */ 333 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) 334 { 335 UDataMemory *data; 336 UConverterSharedData *sharedData; 337 338 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); 339 340 if (U_FAILURE (*err)) { 341 UTRACE_EXIT_STATUS(*err); 342 return NULL; 343 } 344 345 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); 346 347 data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); 348 if(U_FAILURE(*err)) 349 { 350 UTRACE_EXIT_STATUS(*err); 351 return NULL; 352 } 353 354 sharedData = ucnv_data_unFlattenClone(pArgs, data, err); 355 if(U_FAILURE(*err)) 356 { 357 udata_close(data); 358 UTRACE_EXIT_STATUS(*err); 359 return NULL; 360 } 361 362 /* 363 * TODO Store pkg in a field in the shared data so that delta-only converters 364 * can load base converters from the same package. 365 * If the pkg name is longer than the field, then either do not load the converter 366 * in the first place, or just set the pkg field to "". 367 */ 368 369 UTRACE_EXIT_PTR_STATUS(sharedData, *err); 370 return sharedData; 371 } 372 373 /*returns a converter type from a string 374 */ 375 static const UConverterSharedData * 376 getAlgorithmicTypeFromName(const char *realName) 377 { 378 uint32_t mid, start, limit; 379 uint32_t lastMid; 380 int result; 381 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 382 383 /* Lower case and remove ignoreable characters. */ 384 ucnv_io_stripForCompare(strippedName, realName); 385 386 /* do a binary search for the alias */ 387 start = 0; 388 limit = sizeof(cnvNameType)/sizeof(cnvNameType[0]); 389 mid = limit; 390 lastMid = UINT32_MAX; 391 392 for (;;) { 393 mid = (uint32_t)((start + limit) / 2); 394 if (lastMid == mid) { /* Have we moved? */ 395 break; /* We haven't moved, and it wasn't found. */ 396 } 397 lastMid = mid; 398 result = uprv_strcmp(strippedName, cnvNameType[mid].name); 399 400 if (result < 0) { 401 limit = mid; 402 } else if (result > 0) { 403 start = mid; 404 } else { 405 return converterData[cnvNameType[mid].type]; 406 } 407 } 408 409 return NULL; 410 } 411 412 /* 413 * Based on the number of known converters, this determines how many times larger 414 * the shared data hash table should be. When on small platforms, or just a couple 415 * of converters are used, this number should be 2. When memory is plentiful, or 416 * when ucnv_countAvailable is ever used with a lot of available converters, 417 * this should be 4. 418 * Larger numbers reduce the number of hash collisions, but use more memory. 419 */ 420 #define UCNV_CACHE_LOAD_FACTOR 2 421 422 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ 423 /* Will always be called with the cnvCacheMutex alrady being held */ 424 /* by the calling function. */ 425 /* Stores the shared data in the SHARED_DATA_HASHTABLE 426 * @param data The shared data 427 */ 428 static void 429 ucnv_shareConverterData(UConverterSharedData * data) 430 { 431 UErrorCode err = U_ZERO_ERROR; 432 /*Lazy evaluates the Hashtable itself */ 433 /*void *sanity = NULL;*/ 434 435 if (SHARED_DATA_HASHTABLE == NULL) 436 { 437 SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, 438 ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, 439 &err); 440 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 441 442 if (U_FAILURE(err)) 443 return; 444 } 445 446 /* ### check to see if the element is not already there! */ 447 448 /* 449 sanity = ucnv_getSharedConverterData (data->staticData->name); 450 if(sanity != NULL) 451 { 452 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); 453 } 454 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); 455 */ 456 457 /* Mark it shared */ 458 data->sharedDataCached = TRUE; 459 460 uhash_put(SHARED_DATA_HASHTABLE, 461 (void*) data->staticData->name, /* Okay to cast away const as long as 462 keyDeleter == NULL */ 463 data, 464 &err); 465 UCNV_DEBUG_LOG("put", data->staticData->name,data); 466 467 } 468 469 /* Look up a converter name in the shared data cache. */ 470 /* cnvCacheMutex must be held by the caller to protect the hash table. */ 471 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) 472 * @param name The name of the shared data 473 * @return the shared data from the SHARED_DATA_HASHTABLE 474 */ 475 static UConverterSharedData * 476 ucnv_getSharedConverterData(const char *name) 477 { 478 /*special case when no Table has yet been created we return NULL */ 479 if (SHARED_DATA_HASHTABLE == NULL) 480 { 481 return NULL; 482 } 483 else 484 { 485 UConverterSharedData *rc; 486 487 rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); 488 UCNV_DEBUG_LOG("get",name,rc); 489 return rc; 490 } 491 } 492 493 /*frees the string of memory blocks associates with a sharedConverter 494 *if and only if the referenceCounter == 0 495 */ 496 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to 497 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and 498 * returns TRUE, 499 * otherwise returns FALSE 500 * @param sharedConverterData The shared data 501 * @return if not it frees all the memory stemming from sharedConverterData and 502 * returns TRUE, otherwise returns FALSE 503 */ 504 static UBool 505 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) 506 { 507 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); 508 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); 509 510 if (deadSharedData->referenceCounter > 0) { 511 UTRACE_EXIT_VALUE((int32_t)FALSE); 512 return FALSE; 513 } 514 515 if (deadSharedData->impl->unload != NULL) { 516 deadSharedData->impl->unload(deadSharedData); 517 } 518 519 if(deadSharedData->dataMemory != NULL) 520 { 521 UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; 522 udata_close(data); 523 } 524 525 uprv_free(deadSharedData); 526 527 UTRACE_EXIT_VALUE((int32_t)TRUE); 528 return TRUE; 529 } 530 531 /** 532 * Load a non-algorithmic converter. 533 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 534 */ 535 UConverterSharedData * 536 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { 537 UConverterSharedData *mySharedConverterData; 538 539 if(err == NULL || U_FAILURE(*err)) { 540 return NULL; 541 } 542 543 if(pArgs->pkg != NULL && *pArgs->pkg != 0) { 544 /* application-provided converters are not currently cached */ 545 return createConverterFromFile(pArgs, err); 546 } 547 548 mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); 549 if (mySharedConverterData == NULL) 550 { 551 /*Not cached, we need to stream it in from file */ 552 mySharedConverterData = createConverterFromFile(pArgs, err); 553 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 554 { 555 return NULL; 556 } 557 else if (!pArgs->onlyTestIsLoadable) 558 { 559 /* share it with other library clients */ 560 ucnv_shareConverterData(mySharedConverterData); 561 } 562 } 563 else 564 { 565 /* The data for this converter was already in the cache. */ 566 /* Update the reference counter on the shared data: one more client */ 567 mySharedConverterData->referenceCounter++; 568 } 569 570 return mySharedConverterData; 571 } 572 573 /** 574 * Unload a non-algorithmic converter. 575 * It must be sharedData->isReferenceCounted 576 * and this function must be called inside umtx_lock(&cnvCacheMutex). 577 */ 578 U_CAPI void 579 ucnv_unload(UConverterSharedData *sharedData) { 580 if(sharedData != NULL) { 581 if (sharedData->referenceCounter > 0) { 582 sharedData->referenceCounter--; 583 } 584 585 if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { 586 ucnv_deleteSharedConverterData(sharedData); 587 } 588 } 589 } 590 591 U_CFUNC void 592 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) 593 { 594 if(sharedData != NULL && sharedData->isReferenceCounted) { 595 umtx_lock(&cnvCacheMutex); 596 ucnv_unload(sharedData); 597 umtx_unlock(&cnvCacheMutex); 598 } 599 } 600 601 U_CFUNC void 602 ucnv_incrementRefCount(UConverterSharedData *sharedData) 603 { 604 if(sharedData != NULL && sharedData->isReferenceCounted) { 605 umtx_lock(&cnvCacheMutex); 606 sharedData->referenceCounter++; 607 umtx_unlock(&cnvCacheMutex); 608 } 609 } 610 611 /* 612 * *pPieces must be initialized. 613 * The name without options will be copied to pPieces->cnvName. 614 * The locale and options will be copied to pPieces only if present in inName, 615 * otherwise the existing values in pPieces remain. 616 * *pArgs will be set to the pPieces values. 617 */ 618 static void 619 parseConverterOptions(const char *inName, 620 UConverterNamePieces *pPieces, 621 UConverterLoadArgs *pArgs, 622 UErrorCode *err) 623 { 624 char *cnvName = pPieces->cnvName; 625 char c; 626 int32_t len = 0; 627 628 pArgs->name=inName; 629 pArgs->locale=pPieces->locale; 630 pArgs->options=pPieces->options; 631 632 /* copy the converter name itself to cnvName */ 633 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 634 if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { 635 *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 636 pPieces->cnvName[0]=0; 637 return; 638 } 639 *cnvName++=c; 640 inName++; 641 } 642 *cnvName=0; 643 pArgs->name=pPieces->cnvName; 644 645 /* parse options. No more name copying should occur. */ 646 while((c=*inName)!=0) { 647 if(c==UCNV_OPTION_SEP_CHAR) { 648 ++inName; 649 } 650 651 /* inName is behind an option separator */ 652 if(uprv_strncmp(inName, "locale=", 7)==0) { 653 /* do not modify locale itself in case we have multiple locale options */ 654 char *dest=pPieces->locale; 655 656 /* copy the locale option value */ 657 inName+=7; 658 len=0; 659 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 660 ++inName; 661 662 if(++len>=ULOC_FULLNAME_CAPACITY) { 663 *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 664 pPieces->locale[0]=0; 665 return; 666 } 667 668 *dest++=c; 669 } 670 *dest=0; 671 } else if(uprv_strncmp(inName, "version=", 8)==0) { 672 /* copy the version option value into bits 3..0 of pPieces->options */ 673 inName+=8; 674 c=*inName; 675 if(c==0) { 676 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); 677 return; 678 } else if((uint8_t)(c-'0')<10) { 679 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); 680 ++inName; 681 } 682 } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { 683 inName+=8; 684 pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); 685 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ 686 } else { 687 /* ignore any other options until we define some */ 688 while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { 689 } 690 if(c==0) { 691 return; 692 } 693 } 694 } 695 } 696 697 /*Logic determines if the converter is Algorithmic AND/OR cached 698 *depending on that: 699 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) 700 * -Get it from a Hashtable (Data=X, Cached=TRUE) 701 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) 702 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) 703 */ 704 U_CFUNC UConverterSharedData * 705 ucnv_loadSharedData(const char *converterName, 706 UConverterNamePieces *pPieces, 707 UConverterLoadArgs *pArgs, 708 UErrorCode * err) { 709 UConverterNamePieces stackPieces; 710 UConverterLoadArgs stackArgs; 711 UConverterSharedData *mySharedConverterData = NULL; 712 UErrorCode internalErrorCode = U_ZERO_ERROR; 713 UBool mayContainOption = TRUE; 714 UBool checkForAlgorithmic = TRUE; 715 716 if (U_FAILURE (*err)) { 717 return NULL; 718 } 719 720 if(pPieces == NULL) { 721 if(pArgs != NULL) { 722 /* 723 * Bad: We may set pArgs pointers to stackPieces fields 724 * which will be invalid after this function returns. 725 */ 726 *err = U_INTERNAL_PROGRAM_ERROR; 727 return NULL; 728 } 729 pPieces = &stackPieces; 730 } 731 if(pArgs == NULL) { 732 uprv_memset(&stackArgs, 0, sizeof(stackArgs)); 733 stackArgs.size = (int32_t)sizeof(stackArgs); 734 pArgs = &stackArgs; 735 } 736 737 pPieces->cnvName[0] = 0; 738 pPieces->locale[0] = 0; 739 pPieces->options = 0; 740 741 pArgs->name = converterName; 742 pArgs->locale = pPieces->locale; 743 pArgs->options = pPieces->options; 744 745 /* In case "name" is NULL we want to open the default converter. */ 746 if (converterName == NULL) { 747 #if U_CHARSET_IS_UTF8 748 pArgs->name = "UTF-8"; 749 return (UConverterSharedData *)converterData[UCNV_UTF8]; 750 #else 751 /* Call ucnv_getDefaultName first to query the name from the OS. */ 752 pArgs->name = ucnv_getDefaultName(); 753 if (pArgs->name == NULL) { 754 *err = U_MISSING_RESOURCE_ERROR; 755 return NULL; 756 } 757 mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; 758 checkForAlgorithmic = FALSE; 759 mayContainOption = gDefaultConverterContainsOption; 760 /* the default converter name is already canonical */ 761 #endif 762 } 763 else if(UCNV_FAST_IS_UTF8(converterName)) { 764 /* fastpath for UTF-8 */ 765 pArgs->name = "UTF-8"; 766 return (UConverterSharedData *)converterData[UCNV_UTF8]; 767 } 768 else { 769 /* separate the converter name from the options */ 770 parseConverterOptions(converterName, pPieces, pArgs, err); 771 if (U_FAILURE(*err)) { 772 /* Very bad name used. */ 773 return NULL; 774 } 775 776 /* get the canonical converter name */ 777 pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); 778 if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { 779 /* 780 * set the input name in case the converter was added 781 * without updating the alias table, or when there is no alias table 782 */ 783 pArgs->name = pPieces->cnvName; 784 } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { 785 *err = U_AMBIGUOUS_ALIAS_WARNING; 786 } 787 } 788 789 /* separate the converter name from the options */ 790 if(mayContainOption && pArgs->name != pPieces->cnvName) { 791 parseConverterOptions(pArgs->name, pPieces, pArgs, err); 792 } 793 794 /* get the shared data for an algorithmic converter, if it is one */ 795 if (checkForAlgorithmic) { 796 mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); 797 } 798 if (mySharedConverterData == NULL) 799 { 800 /* it is a data-based converter, get its shared data. */ 801 /* Hold the cnvCacheMutex through the whole process of checking the */ 802 /* converter data cache, and adding new entries to the cache */ 803 /* to prevent other threads from modifying the cache during the */ 804 /* process. */ 805 pArgs->nestedLoads=1; 806 pArgs->pkg=NULL; 807 808 umtx_lock(&cnvCacheMutex); 809 mySharedConverterData = ucnv_load(pArgs, err); 810 umtx_unlock(&cnvCacheMutex); 811 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 812 { 813 return NULL; 814 } 815 } 816 817 return mySharedConverterData; 818 } 819 820 U_CAPI UConverter * 821 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) 822 { 823 UConverterNamePieces stackPieces; 824 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 825 UConverterSharedData *mySharedConverterData; 826 827 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 828 829 if(U_SUCCESS(*err)) { 830 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); 831 832 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 833 834 myUConverter = ucnv_createConverterFromSharedData( 835 myUConverter, mySharedConverterData, 836 &stackArgs, 837 err); 838 839 if(U_SUCCESS(*err)) { 840 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 841 return myUConverter; 842 } 843 } 844 845 /* exit with error */ 846 UTRACE_EXIT_STATUS(*err); 847 return NULL; 848 } 849 850 U_CFUNC UBool 851 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { 852 UConverter myUConverter; 853 UConverterNamePieces stackPieces; 854 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 855 UConverterSharedData *mySharedConverterData; 856 857 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 858 859 if(U_SUCCESS(*err)) { 860 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); 861 862 stackArgs.onlyTestIsLoadable=TRUE; 863 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 864 ucnv_createConverterFromSharedData( 865 &myUConverter, mySharedConverterData, 866 &stackArgs, 867 err); 868 ucnv_unloadSharedDataIfReady(mySharedConverterData); 869 } 870 871 UTRACE_EXIT_STATUS(*err); 872 return U_SUCCESS(*err); 873 } 874 875 UConverter * 876 ucnv_createAlgorithmicConverter(UConverter *myUConverter, 877 UConverterType type, 878 const char *locale, uint32_t options, 879 UErrorCode *err) { 880 UConverter *cnv; 881 const UConverterSharedData *sharedData; 882 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 883 884 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); 885 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); 886 887 if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { 888 *err = U_ILLEGAL_ARGUMENT_ERROR; 889 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 890 return NULL; 891 } 892 893 sharedData = converterData[type]; 894 if(sharedData == NULL || sharedData->isReferenceCounted) { 895 /* not a valid type, or not an algorithmic converter */ 896 *err = U_ILLEGAL_ARGUMENT_ERROR; 897 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 898 return NULL; 899 } 900 901 stackArgs.name = ""; 902 stackArgs.options = options; 903 stackArgs.locale=locale; 904 cnv = ucnv_createConverterFromSharedData( 905 myUConverter, (UConverterSharedData *)sharedData, 906 &stackArgs, err); 907 908 UTRACE_EXIT_PTR_STATUS(cnv, *err); 909 return cnv; 910 } 911 912 U_CFUNC UConverter* 913 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) 914 { 915 UConverter *myUConverter; 916 UConverterSharedData *mySharedConverterData; 917 UConverterNamePieces stackPieces; 918 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 919 920 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); 921 922 if(U_FAILURE(*err)) { 923 UTRACE_EXIT_STATUS(*err); 924 return NULL; 925 } 926 927 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); 928 929 /* first, get the options out of the converterName string */ 930 stackPieces.cnvName[0] = 0; 931 stackPieces.locale[0] = 0; 932 stackPieces.options = 0; 933 parseConverterOptions(converterName, &stackPieces, &stackArgs, err); 934 if (U_FAILURE(*err)) { 935 /* Very bad name used. */ 936 UTRACE_EXIT_STATUS(*err); 937 return NULL; 938 } 939 stackArgs.nestedLoads=1; 940 stackArgs.pkg=packageName; 941 942 /* open the data, unflatten the shared structure */ 943 mySharedConverterData = createConverterFromFile(&stackArgs, err); 944 945 if (U_FAILURE(*err)) { 946 UTRACE_EXIT_STATUS(*err); 947 return NULL; 948 } 949 950 /* create the actual converter */ 951 myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); 952 953 if (U_FAILURE(*err)) { 954 ucnv_close(myUConverter); 955 UTRACE_EXIT_STATUS(*err); 956 return NULL; 957 } 958 959 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 960 return myUConverter; 961 } 962 963 964 U_CFUNC UConverter* 965 ucnv_createConverterFromSharedData(UConverter *myUConverter, 966 UConverterSharedData *mySharedConverterData, 967 UConverterLoadArgs *pArgs, 968 UErrorCode *err) 969 { 970 UBool isCopyLocal; 971 972 if(U_FAILURE(*err)) { 973 ucnv_unloadSharedDataIfReady(mySharedConverterData); 974 return myUConverter; 975 } 976 if(myUConverter == NULL) 977 { 978 myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); 979 if(myUConverter == NULL) 980 { 981 *err = U_MEMORY_ALLOCATION_ERROR; 982 ucnv_unloadSharedDataIfReady(mySharedConverterData); 983 return NULL; 984 } 985 isCopyLocal = FALSE; 986 } else { 987 isCopyLocal = TRUE; 988 } 989 990 /* initialize the converter */ 991 uprv_memset(myUConverter, 0, sizeof(UConverter)); 992 myUConverter->isCopyLocal = isCopyLocal; 993 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ 994 myUConverter->sharedData = mySharedConverterData; 995 myUConverter->options = pArgs->options; 996 if(!pArgs->onlyTestIsLoadable) { 997 myUConverter->preFromUFirstCP = U_SENTINEL; 998 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; 999 myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; 1000 myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; 1001 myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; 1002 myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; 1003 myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; 1004 myUConverter->subChars = (uint8_t *)myUConverter->subUChars; 1005 uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); 1006 myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ 1007 } 1008 1009 if(mySharedConverterData->impl->open != NULL) { 1010 mySharedConverterData->impl->open(myUConverter, pArgs, err); 1011 if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { 1012 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ 1013 ucnv_close(myUConverter); 1014 return NULL; 1015 } 1016 } 1017 1018 return myUConverter; 1019 } 1020 1021 /*Frees all shared immutable objects that aren't referred to (reference count = 0) 1022 */ 1023 U_CAPI int32_t U_EXPORT2 1024 ucnv_flushCache () 1025 { 1026 UConverterSharedData *mySharedData = NULL; 1027 int32_t pos; 1028 int32_t tableDeletedNum = 0; 1029 const UHashElement *e; 1030 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ 1031 int32_t i, remaining; 1032 1033 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); 1034 1035 /* Close the default converter without creating a new one so that everything will be flushed. */ 1036 u_flushDefaultConverter(); 1037 1038 /*if shared data hasn't even been lazy evaluated yet 1039 * return 0 1040 */ 1041 if (SHARED_DATA_HASHTABLE == NULL) { 1042 UTRACE_EXIT_VALUE((int32_t)0); 1043 return 0; 1044 } 1045 1046 /*creates an enumeration to iterate through every element in the 1047 * table 1048 * 1049 * Synchronization: holding cnvCacheMutex will prevent any other thread from 1050 * accessing or modifying the hash table during the iteration. 1051 * The reference count of an entry may be decremented by 1052 * ucnv_close while the iteration is in process, but this is 1053 * benign. It can't be incremented (in ucnv_createConverter()) 1054 * because the sequence of looking up in the cache + incrementing 1055 * is protected by cnvCacheMutex. 1056 */ 1057 umtx_lock(&cnvCacheMutex); 1058 /* 1059 * double loop: A delta/extension-only converter has a pointer to its base table's 1060 * shared data; the first iteration of the outer loop may see the delta converter 1061 * before the base converter, and unloading the delta converter may get the base 1062 * converter's reference counter down to 0. 1063 */ 1064 i = 0; 1065 do { 1066 remaining = 0; 1067 pos = UHASH_FIRST; 1068 while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) 1069 { 1070 mySharedData = (UConverterSharedData *) e->value.pointer; 1071 /*deletes only if reference counter == 0 */ 1072 if (mySharedData->referenceCounter == 0) 1073 { 1074 tableDeletedNum++; 1075 1076 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); 1077 1078 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1079 mySharedData->sharedDataCached = FALSE; 1080 ucnv_deleteSharedConverterData (mySharedData); 1081 } else { 1082 ++remaining; 1083 } 1084 } 1085 } while(++i == 1 && remaining > 0); 1086 umtx_unlock(&cnvCacheMutex); 1087 1088 UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); 1089 1090 UTRACE_EXIT_VALUE(tableDeletedNum); 1091 return tableDeletedNum; 1092 } 1093 1094 /* available converters list --------------------------------------------------- */ 1095 1096 static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { 1097 U_ASSERT(gAvailableConverterCount == 0); 1098 U_ASSERT(gAvailableConverters == NULL); 1099 1100 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1101 UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); 1102 int32_t allConverterCount = uenum_count(allConvEnum, &errCode); 1103 if (U_FAILURE(errCode)) { 1104 return; 1105 } 1106 1107 /* We can't have more than "*converterTable" converters to open */ 1108 gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); 1109 if (!gAvailableConverters) { 1110 errCode = U_MEMORY_ALLOCATION_ERROR; 1111 return; 1112 } 1113 1114 /* Open the default converter to make sure that it has first dibs in the hash table. */ 1115 UErrorCode localStatus = U_ZERO_ERROR; 1116 UConverter tempConverter; 1117 ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); 1118 1119 gAvailableConverterCount = 0; 1120 1121 for (int32_t idx = 0; idx < allConverterCount; idx++) { 1122 localStatus = U_ZERO_ERROR; 1123 const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); 1124 if (ucnv_canCreateConverter(converterName, &localStatus)) { 1125 gAvailableConverters[gAvailableConverterCount++] = converterName; 1126 } 1127 } 1128 1129 uenum_close(allConvEnum); 1130 } 1131 1132 1133 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { 1134 umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); 1135 return U_SUCCESS(*pErrorCode); 1136 } 1137 1138 U_CFUNC uint16_t 1139 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { 1140 if (haveAvailableConverterList(pErrorCode)) { 1141 return gAvailableConverterCount; 1142 } 1143 return 0; 1144 } 1145 1146 U_CFUNC const char * 1147 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { 1148 if (haveAvailableConverterList(pErrorCode)) { 1149 if (n < gAvailableConverterCount) { 1150 return gAvailableConverters[n]; 1151 } 1152 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1153 } 1154 return NULL; 1155 } 1156 1157 /* default converter name --------------------------------------------------- */ 1158 1159 #if !U_CHARSET_IS_UTF8 1160 /* 1161 Copy the canonical converter name. 1162 ucnv_getDefaultName must be thread safe, which can call this function. 1163 1164 ucnv_setDefaultName calls this function and it doesn't have to be 1165 thread safe because there is no reliable/safe way to reset the 1166 converter in use in all threads. If you did reset the converter, you 1167 would not be sure that retrieving a default converter for one string 1168 would be the same type of default converter for a successive string. 1169 Since the name is a returned via ucnv_getDefaultName without copying, 1170 you shouldn't be modifying or deleting the string from a separate thread. 1171 */ 1172 static inline void 1173 internalSetName(const char *name, UErrorCode *status) { 1174 UConverterNamePieces stackPieces; 1175 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 1176 int32_t length=(int32_t)(uprv_strlen(name)); 1177 UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); 1178 const UConverterSharedData *algorithmicSharedData; 1179 1180 stackArgs.name = name; 1181 if(containsOption) { 1182 stackPieces.cnvName[0] = 0; 1183 stackPieces.locale[0] = 0; 1184 stackPieces.options = 0; 1185 parseConverterOptions(name, &stackPieces, &stackArgs, status); 1186 if(U_FAILURE(*status)) { 1187 return; 1188 } 1189 } 1190 algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); 1191 1192 umtx_lock(&cnvCacheMutex); 1193 1194 gDefaultAlgorithmicSharedData = algorithmicSharedData; 1195 gDefaultConverterContainsOption = containsOption; 1196 uprv_memcpy(gDefaultConverterNameBuffer, name, length); 1197 gDefaultConverterNameBuffer[length]=0; 1198 1199 /* gDefaultConverterName MUST be the last global var set by this function. */ 1200 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ 1201 // But there is nothing here preventing that from being reordered, either by the compiler 1202 // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. 1203 // -- Andy 1204 gDefaultConverterName = gDefaultConverterNameBuffer; 1205 1206 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1207 1208 umtx_unlock(&cnvCacheMutex); 1209 } 1210 #endif 1211 1212 /* 1213 * In order to be really thread-safe, the get function would have to take 1214 * a buffer parameter and copy the current string inside a mutex block. 1215 * This implementation only tries to be really thread-safe while 1216 * setting the name. 1217 * It assumes that setting a pointer is atomic. 1218 */ 1219 1220 U_CAPI const char* U_EXPORT2 1221 ucnv_getDefaultName() { 1222 #if U_CHARSET_IS_UTF8 1223 return "UTF-8"; 1224 #else 1225 /* local variable to be thread-safe */ 1226 const char *name; 1227 1228 /* 1229 Concurrent calls to ucnv_getDefaultName must be thread safe, 1230 but ucnv_setDefaultName is not thread safe. 1231 */ 1232 { 1233 icu::Mutex lock(&cnvCacheMutex); 1234 name = gDefaultConverterName; 1235 } 1236 if(name==NULL) { 1237 UErrorCode errorCode = U_ZERO_ERROR; 1238 UConverter *cnv = NULL; 1239 1240 name = uprv_getDefaultCodepage(); 1241 1242 /* if the name is there, test it out and get the canonical name with options */ 1243 if(name != NULL) { 1244 cnv = ucnv_open(name, &errorCode); 1245 if(U_SUCCESS(errorCode) && cnv != NULL) { 1246 name = ucnv_getName(cnv, &errorCode); 1247 } 1248 } 1249 1250 if(name == NULL || name[0] == 0 1251 || U_FAILURE(errorCode) || cnv == NULL 1252 || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) 1253 { 1254 /* Panic time, let's use a fallback. */ 1255 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) 1256 name = "US-ASCII"; 1257 /* there is no 'algorithmic' converter for EBCDIC */ 1258 #elif U_PLATFORM == U_PF_OS390 1259 name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; 1260 #else 1261 name = "ibm-37_P100-1995"; 1262 #endif 1263 } 1264 1265 internalSetName(name, &errorCode); 1266 1267 /* The close may make the current name go away. */ 1268 ucnv_close(cnv); 1269 } 1270 1271 return name; 1272 #endif 1273 } 1274 1275 #if U_CHARSET_IS_UTF8 1276 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} 1277 #else 1278 /* 1279 This function is not thread safe, and it can't be thread safe. 1280 See internalSetName or the API reference for details. 1281 */ 1282 U_CAPI void U_EXPORT2 1283 ucnv_setDefaultName(const char *converterName) { 1284 if(converterName==NULL) { 1285 /* reset to the default codepage */ 1286 gDefaultConverterName=NULL; 1287 } else { 1288 UErrorCode errorCode = U_ZERO_ERROR; 1289 UConverter *cnv = NULL; 1290 const char *name = NULL; 1291 1292 /* if the name is there, test it out and get the canonical name with options */ 1293 cnv = ucnv_open(converterName, &errorCode); 1294 if(U_SUCCESS(errorCode) && cnv != NULL) { 1295 name = ucnv_getName(cnv, &errorCode); 1296 } 1297 1298 if(U_SUCCESS(errorCode) && name!=NULL) { 1299 internalSetName(name, &errorCode); 1300 } 1301 /* else this converter is bad to use. Don't change it to a bad value. */ 1302 1303 /* The close may make the current name go away. */ 1304 ucnv_close(cnv); 1305 1306 /* reset the converter cache */ 1307 u_flushDefaultConverter(); 1308 } 1309 } 1310 #endif 1311 1312 /* data swapping ------------------------------------------------------------ */ 1313 1314 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ 1315 1316 #if !UCONFIG_NO_LEGACY_CONVERSION 1317 1318 U_CAPI int32_t U_EXPORT2 1319 ucnv_swap(const UDataSwapper *ds, 1320 const void *inData, int32_t length, void *outData, 1321 UErrorCode *pErrorCode) { 1322 const UDataInfo *pInfo; 1323 int32_t headerSize; 1324 1325 const uint8_t *inBytes; 1326 uint8_t *outBytes; 1327 1328 uint32_t offset, count, staticDataSize; 1329 int32_t size; 1330 1331 const UConverterStaticData *inStaticData; 1332 UConverterStaticData *outStaticData; 1333 1334 const _MBCSHeader *inMBCSHeader; 1335 _MBCSHeader *outMBCSHeader; 1336 _MBCSHeader mbcsHeader; 1337 uint32_t mbcsHeaderLength; 1338 UBool noFromU=FALSE; 1339 1340 uint8_t outputType; 1341 1342 int32_t maxFastUChar, mbcsIndexLength; 1343 1344 const int32_t *inExtIndexes; 1345 int32_t extOffset; 1346 1347 /* udata_swapDataHeader checks the arguments */ 1348 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1349 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1350 return 0; 1351 } 1352 1353 /* check data format and format version */ 1354 pInfo=(const UDataInfo *)((const char *)inData+4); 1355 if(!( 1356 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 1357 pInfo->dataFormat[1]==0x6e && 1358 pInfo->dataFormat[2]==0x76 && 1359 pInfo->dataFormat[3]==0x74 && 1360 pInfo->formatVersion[0]==6 && 1361 pInfo->formatVersion[1]>=2 1362 )) { 1363 udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", 1364 pInfo->dataFormat[0], pInfo->dataFormat[1], 1365 pInfo->dataFormat[2], pInfo->dataFormat[3], 1366 pInfo->formatVersion[0], pInfo->formatVersion[1]); 1367 *pErrorCode=U_UNSUPPORTED_ERROR; 1368 return 0; 1369 } 1370 1371 inBytes=(const uint8_t *)inData+headerSize; 1372 outBytes=(uint8_t *)outData+headerSize; 1373 1374 /* read the initial UConverterStaticData structure after the UDataInfo header */ 1375 inStaticData=(const UConverterStaticData *)inBytes; 1376 outStaticData=(UConverterStaticData *)outBytes; 1377 1378 if(length<0) { 1379 staticDataSize=ds->readUInt32(inStaticData->structSize); 1380 } else { 1381 length-=headerSize; 1382 if( length<(int32_t)sizeof(UConverterStaticData) || 1383 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 1384 ) { 1385 udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 1386 length); 1387 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1388 return 0; 1389 } 1390 } 1391 1392 if(length>=0) { 1393 /* swap the static data */ 1394 if(inStaticData!=outStaticData) { 1395 uprv_memcpy(outStaticData, inStaticData, staticDataSize); 1396 } 1397 1398 ds->swapArray32(ds, &inStaticData->structSize, 4, 1399 &outStaticData->structSize, pErrorCode); 1400 ds->swapArray32(ds, &inStaticData->codepage, 4, 1401 &outStaticData->codepage, pErrorCode); 1402 1403 ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), 1404 outStaticData->name, pErrorCode); 1405 if(U_FAILURE(*pErrorCode)) { 1406 udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); 1407 return 0; 1408 } 1409 } 1410 1411 inBytes+=staticDataSize; 1412 outBytes+=staticDataSize; 1413 if(length>=0) { 1414 length-=(int32_t)staticDataSize; 1415 } 1416 1417 /* check for supported conversionType values */ 1418 if(inStaticData->conversionType==UCNV_MBCS) { 1419 /* swap MBCS data */ 1420 inMBCSHeader=(const _MBCSHeader *)inBytes; 1421 outMBCSHeader=(_MBCSHeader *)outBytes; 1422 1423 if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { 1424 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1425 length); 1426 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1427 return 0; 1428 } 1429 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 1430 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 1431 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 1432 ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& 1433 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 1434 ) { 1435 mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; 1436 noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); 1437 } else { 1438 udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", 1439 inMBCSHeader->version[0], inMBCSHeader->version[1]); 1440 *pErrorCode=U_UNSUPPORTED_ERROR; 1441 return 0; 1442 } 1443 1444 uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); 1445 mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); 1446 mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); 1447 mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); 1448 mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); 1449 mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); 1450 mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); 1451 mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); 1452 /* mbcsHeader.options have been read above */ 1453 1454 extOffset=(int32_t)(mbcsHeader.flags>>8); 1455 outputType=(uint8_t)mbcsHeader.flags; 1456 if(noFromU && outputType==MBCS_OUTPUT_1) { 1457 udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); 1458 *pErrorCode=U_UNSUPPORTED_ERROR; 1459 return 0; 1460 } 1461 1462 /* make sure that the output type is known */ 1463 switch(outputType) { 1464 case MBCS_OUTPUT_1: 1465 case MBCS_OUTPUT_2: 1466 case MBCS_OUTPUT_3: 1467 case MBCS_OUTPUT_4: 1468 case MBCS_OUTPUT_3_EUC: 1469 case MBCS_OUTPUT_4_EUC: 1470 case MBCS_OUTPUT_2_SISO: 1471 case MBCS_OUTPUT_EXT_ONLY: 1472 /* OK */ 1473 break; 1474 default: 1475 udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", 1476 outputType); 1477 *pErrorCode=U_UNSUPPORTED_ERROR; 1478 return 0; 1479 } 1480 1481 /* calculate the length of the MBCS data */ 1482 1483 /* 1484 * utf8Friendly MBCS files (mbcsHeader.version 4.3) 1485 * contain an additional mbcsIndex table: 1486 * uint16_t[(maxFastUChar+1)>>6]; 1487 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). 1488 */ 1489 maxFastUChar=0; 1490 mbcsIndexLength=0; 1491 if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && 1492 mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 1493 ) { 1494 maxFastUChar=(maxFastUChar<<8)|0xff; 1495 mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ 1496 } 1497 1498 if(extOffset==0) { 1499 size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); 1500 if(!noFromU) { 1501 size+=(int32_t)mbcsHeader.fromUBytesLength; 1502 } 1503 1504 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ 1505 inExtIndexes=NULL; 1506 } else { 1507 /* there is extension data after the base data, see ucnv_ext.h */ 1508 if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 1509 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 1510 length); 1511 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1512 return 0; 1513 } 1514 1515 inExtIndexes=(const int32_t *)(inBytes+extOffset); 1516 size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); 1517 } 1518 1519 if(length>=0) { 1520 if(length<size) { 1521 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1522 length); 1523 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1524 return 0; 1525 } 1526 1527 /* copy the data for inaccessible bytes */ 1528 if(inBytes!=outBytes) { 1529 uprv_memcpy(outBytes, inBytes, size); 1530 } 1531 1532 /* swap the MBCSHeader, except for the version field */ 1533 count=mbcsHeaderLength*4; 1534 ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, 1535 &outMBCSHeader->countStates, pErrorCode); 1536 1537 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 1538 /* 1539 * extension-only file, 1540 * contains a base name instead of normal base table data 1541 */ 1542 1543 /* swap the base name, between the header and the extension data */ 1544 const char *inBaseName=(const char *)inBytes+count; 1545 char *outBaseName=(char *)outBytes+count; 1546 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), 1547 outBaseName, pErrorCode); 1548 } else { 1549 /* normal file with base table data */ 1550 1551 /* swap the state table, 1kB per state */ 1552 offset=count; 1553 count=mbcsHeader.countStates*1024; 1554 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1555 outBytes+offset, pErrorCode); 1556 1557 /* swap the toUFallbacks[] */ 1558 offset+=count; 1559 count=mbcsHeader.countToUFallbacks*8; 1560 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1561 outBytes+offset, pErrorCode); 1562 1563 /* swap the unicodeCodeUnits[] */ 1564 offset=mbcsHeader.offsetToUCodeUnits; 1565 count=mbcsHeader.offsetFromUTable-offset; 1566 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1567 outBytes+offset, pErrorCode); 1568 1569 /* offset to the stage 1 table, independent of the outputType */ 1570 offset=mbcsHeader.offsetFromUTable; 1571 1572 if(outputType==MBCS_OUTPUT_1) { 1573 /* SBCS: swap the fromU tables, all 16 bits wide */ 1574 count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; 1575 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1576 outBytes+offset, pErrorCode); 1577 } else { 1578 /* otherwise: swap the stage tables separately */ 1579 1580 /* stage 1 table: uint16_t[0x440 or 0x40] */ 1581 if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1582 count=0x440*2; /* for all of Unicode */ 1583 } else { 1584 count=0x40*2; /* only BMP */ 1585 } 1586 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1587 outBytes+offset, pErrorCode); 1588 1589 /* stage 2 table: uint32_t[] */ 1590 offset+=count; 1591 count=mbcsHeader.offsetFromUBytes-offset; 1592 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1593 outBytes+offset, pErrorCode); 1594 1595 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ 1596 offset=mbcsHeader.offsetFromUBytes; 1597 count= noFromU ? 0 : mbcsHeader.fromUBytesLength; 1598 switch(outputType) { 1599 case MBCS_OUTPUT_2: 1600 case MBCS_OUTPUT_3_EUC: 1601 case MBCS_OUTPUT_2_SISO: 1602 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1603 outBytes+offset, pErrorCode); 1604 break; 1605 case MBCS_OUTPUT_4: 1606 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1607 outBytes+offset, pErrorCode); 1608 break; 1609 default: 1610 /* just uint8_t[], nothing to swap */ 1611 break; 1612 } 1613 1614 if(mbcsIndexLength!=0) { 1615 offset+=count; 1616 count=mbcsIndexLength; 1617 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1618 outBytes+offset, pErrorCode); 1619 } 1620 } 1621 } 1622 1623 if(extOffset!=0) { 1624 /* swap the extension data */ 1625 inBytes+=extOffset; 1626 outBytes+=extOffset; 1627 1628 /* swap toUTable[] */ 1629 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); 1630 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); 1631 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1632 1633 /* swap toUUChars[] */ 1634 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); 1635 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); 1636 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1637 1638 /* swap fromUTableUChars[] */ 1639 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); 1640 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); 1641 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1642 1643 /* swap fromUTableValues[] */ 1644 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); 1645 /* same length as for fromUTableUChars[] */ 1646 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1647 1648 /* no need to swap fromUBytes[] */ 1649 1650 /* swap fromUStage12[] */ 1651 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); 1652 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); 1653 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1654 1655 /* swap fromUStage3[] */ 1656 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); 1657 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); 1658 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1659 1660 /* swap fromUStage3b[] */ 1661 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); 1662 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); 1663 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1664 1665 /* swap indexes[] */ 1666 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); 1667 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); 1668 } 1669 } 1670 } else { 1671 udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", 1672 inStaticData->conversionType); 1673 *pErrorCode=U_UNSUPPORTED_ERROR; 1674 return 0; 1675 } 1676 1677 return headerSize+(int32_t)staticDataSize+size; 1678 } 1679 1680 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1681 1682 #endif 1683