1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************** 5 * COPYRIGHT: 6 * Copyright (c) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************** 9 * 10 * ucnv_bld.cpp: 11 * 12 * Defines functions that are used in the creation/initialization/deletion 13 * of converters and related structures. 14 * uses uconv_io.h routines to access disk information 15 * is used by ucnv.h to implement public API create/delete/flushCache routines 16 * Modification History: 17 * 18 * Date Name Description 19 * 20 * 06/20/2000 helena OS/400 port changes; mostly typecast. 21 * 06/29/2000 helena Major rewrite of the callback interface. 22 */ 23 24 #include "unicode/utypes.h" 25 26 #if !UCONFIG_NO_CONVERSION 27 28 #include "unicode/putil.h" 29 #include "unicode/udata.h" 30 #include "unicode/ucnv.h" 31 #include "unicode/uloc.h" 32 #include "mutex.h" 33 #include "putilimp.h" 34 #include "uassert.h" 35 #include "utracimp.h" 36 #include "ucnv_io.h" 37 #include "ucnv_bld.h" 38 #include "ucnvmbcs.h" 39 #include "ucnv_ext.h" 40 #include "ucnv_cnv.h" 41 #include "ucnv_imp.h" 42 #include "uhash.h" 43 #include "umutex.h" 44 #include "cstring.h" 45 #include "cmemory.h" 46 #include "ucln_cmn.h" 47 #include "ustr_cnv.h" 48 49 50 #if 0 51 #include <stdio.h> 52 extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); 53 #define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) 54 #else 55 # define UCNV_DEBUG_LOG(x,y,z) 56 #endif 57 58 static const UConverterSharedData * const 59 converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 60 NULL, NULL, 61 62 #if UCONFIG_NO_LEGACY_CONVERSION 63 NULL, 64 #else 65 &_MBCSData, 66 #endif 67 68 &_Latin1Data, 69 &_UTF8Data, &_UTF16BEData, &_UTF16LEData, 70 #if UCONFIG_ONLY_HTML_CONVERSION 71 NULL, NULL, 72 #else 73 &_UTF32BEData, &_UTF32LEData, 74 #endif 75 NULL, 76 77 #if UCONFIG_NO_LEGACY_CONVERSION 78 NULL, 79 #else 80 &_ISO2022Data, 81 #endif 82 83 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 84 NULL, NULL, NULL, NULL, NULL, NULL, 85 NULL, NULL, NULL, NULL, NULL, NULL, 86 NULL, 87 #else 88 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, 89 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, 90 &_HZData, 91 #endif 92 93 #if UCONFIG_ONLY_HTML_CONVERSION 94 NULL, 95 #else 96 &_SCSUData, 97 #endif 98 99 100 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 101 NULL, 102 #else 103 &_ISCIIData, 104 #endif 105 106 &_ASCIIData, 107 #if UCONFIG_ONLY_HTML_CONVERSION 108 NULL, NULL, &_UTF16Data, NULL, NULL, NULL, 109 #else 110 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 111 #endif 112 113 #if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION 114 NULL, 115 #else 116 &_CompoundTextData 117 #endif 118 }; 119 120 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName. 121 Also the name should be in lower case and all spaces, dashes and underscores 122 removed 123 */ 124 static struct { 125 const char *name; 126 const UConverterType type; 127 } const cnvNameType[] = { 128 #if !UCONFIG_ONLY_HTML_CONVERSION 129 { "bocu1", UCNV_BOCU1 }, 130 { "cesu8", UCNV_CESU8 }, 131 #endif 132 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 133 { "hz",UCNV_HZ }, 134 #endif 135 #if !UCONFIG_ONLY_HTML_CONVERSION 136 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 137 #endif 138 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 139 { "iscii", UCNV_ISCII }, 140 #endif 141 #if !UCONFIG_NO_LEGACY_CONVERSION 142 { "iso2022", UCNV_ISO_2022 }, 143 #endif 144 { "iso88591", UCNV_LATIN_1 }, 145 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 146 { "lmbcs1", UCNV_LMBCS_1 }, 147 { "lmbcs11",UCNV_LMBCS_11 }, 148 { "lmbcs16",UCNV_LMBCS_16 }, 149 { "lmbcs17",UCNV_LMBCS_17 }, 150 { "lmbcs18",UCNV_LMBCS_18 }, 151 { "lmbcs19",UCNV_LMBCS_19 }, 152 { "lmbcs2", UCNV_LMBCS_2 }, 153 { "lmbcs3", UCNV_LMBCS_3 }, 154 { "lmbcs4", UCNV_LMBCS_4 }, 155 { "lmbcs5", UCNV_LMBCS_5 }, 156 { "lmbcs6", UCNV_LMBCS_6 }, 157 { "lmbcs8", UCNV_LMBCS_8 }, 158 #endif 159 #if !UCONFIG_ONLY_HTML_CONVERSION 160 { "scsu", UCNV_SCSU }, 161 #endif 162 { "usascii", UCNV_US_ASCII }, 163 { "utf16", UCNV_UTF16 }, 164 { "utf16be", UCNV_UTF16_BigEndian }, 165 { "utf16le", UCNV_UTF16_LittleEndian }, 166 #if U_IS_BIG_ENDIAN 167 { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, 168 { "utf16platformendian", UCNV_UTF16_BigEndian }, 169 #else 170 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, 171 { "utf16platformendian", UCNV_UTF16_LittleEndian }, 172 #endif 173 #if !UCONFIG_ONLY_HTML_CONVERSION 174 { "utf32", UCNV_UTF32 }, 175 { "utf32be", UCNV_UTF32_BigEndian }, 176 { "utf32le", UCNV_UTF32_LittleEndian }, 177 #if U_IS_BIG_ENDIAN 178 { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, 179 { "utf32platformendian", UCNV_UTF32_BigEndian }, 180 #else 181 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 182 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 183 #endif 184 #endif 185 #if !UCONFIG_ONLY_HTML_CONVERSION 186 { "utf7", UCNV_UTF7 }, 187 #endif 188 { "utf8", UCNV_UTF8 }, 189 #if !UCONFIG_ONLY_HTML_CONVERSION 190 { "x11compoundtext", UCNV_COMPOUND_TEXT} 191 #endif 192 }; 193 194 195 /*initializes some global variables */ 196 static UHashtable *SHARED_DATA_HASHTABLE = NULL; 197 static UMutex cnvCacheMutex = U_MUTEX_INITIALIZER; /* Mutex for synchronizing cnv cache access. */ 198 /* Note: the global mutex is used for */ 199 /* reference count updates. */ 200 201 static const char **gAvailableConverters = NULL; 202 static uint16_t gAvailableConverterCount = 0; 203 static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; 204 205 #if !U_CHARSET_IS_UTF8 206 207 /* This contains the resolved converter name. So no further alias lookup is needed again. */ 208 static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ 209 static const char *gDefaultConverterName = NULL; 210 211 /* 212 If the default converter is an algorithmic converter, this is the cached value. 213 We don't cache a full UConverter and clone it because ucnv_clone doesn't have 214 less overhead than an algorithmic open. We don't cache non-algorithmic converters 215 because ucnv_flushCache must be able to unload the default converter and its table. 216 */ 217 static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; 218 219 /* Does gDefaultConverterName have a converter option and require extra parsing? */ 220 static UBool gDefaultConverterContainsOption; 221 222 #endif /* !U_CHARSET_IS_UTF8 */ 223 224 static const char DATA_TYPE[] = "cnv"; 225 226 /* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). 227 * If it is ever to be called from elsewhere, synchronization 228 * will need to be considered. 229 */ 230 static void 231 ucnv_flushAvailableConverterCache() { 232 gAvailableConverterCount = 0; 233 if (gAvailableConverters) { 234 uprv_free((char **)gAvailableConverters); 235 gAvailableConverters = NULL; 236 } 237 gAvailableConvertersInitOnce.reset(); 238 } 239 240 /* ucnv_cleanup - delete all storage held by the converter cache, except any */ 241 /* in use by open converters. */ 242 /* Not thread safe. */ 243 /* Not supported API. */ 244 static UBool U_CALLCONV ucnv_cleanup(void) { 245 ucnv_flushCache(); 246 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 247 uhash_close(SHARED_DATA_HASHTABLE); 248 SHARED_DATA_HASHTABLE = NULL; 249 } 250 251 /* Isn't called from flushCache because other threads may have preexisting references to the table. */ 252 ucnv_flushAvailableConverterCache(); 253 254 #if !U_CHARSET_IS_UTF8 255 gDefaultConverterName = NULL; 256 gDefaultConverterNameBuffer[0] = 0; 257 gDefaultConverterContainsOption = FALSE; 258 gDefaultAlgorithmicSharedData = NULL; 259 #endif 260 261 return (SHARED_DATA_HASHTABLE == NULL); 262 } 263 264 static UBool U_CALLCONV 265 isCnvAcceptable(void * /*context*/, 266 const char * /*type*/, const char * /*name*/, 267 const UDataInfo *pInfo) { 268 return (UBool)( 269 pInfo->size>=20 && 270 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 271 pInfo->charsetFamily==U_CHARSET_FAMILY && 272 pInfo->sizeofUChar==U_SIZEOF_UCHAR && 273 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 274 pInfo->dataFormat[1]==0x6e && 275 pInfo->dataFormat[2]==0x76 && 276 pInfo->dataFormat[3]==0x74 && 277 pInfo->formatVersion[0]==6); /* Everything will be version 6 */ 278 } 279 280 /** 281 * Un flatten shared data from a UDATA.. 282 */ 283 static UConverterSharedData* 284 ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) 285 { 286 /* UDataInfo info; -- necessary only if some converters have different formatVersion */ 287 const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); 288 const UConverterStaticData *source = (const UConverterStaticData *) raw; 289 UConverterSharedData *data; 290 UConverterType type = (UConverterType)source->conversionType; 291 292 if(U_FAILURE(*status)) 293 return NULL; 294 295 if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || 296 converterData[type] == NULL || 297 !converterData[type]->isReferenceCounted || 298 converterData[type]->referenceCounter != 1 || 299 source->structSize != sizeof(UConverterStaticData)) 300 { 301 *status = U_INVALID_TABLE_FORMAT; 302 return NULL; 303 } 304 305 data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); 306 if(data == NULL) { 307 *status = U_MEMORY_ALLOCATION_ERROR; 308 return NULL; 309 } 310 311 /* copy initial values from the static structure for this type */ 312 uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); 313 314 data->staticData = source; 315 316 data->sharedDataCached = FALSE; 317 318 /* fill in fields from the loaded data */ 319 data->dataMemory = (void*)pData; /* for future use */ 320 321 if(data->impl->load != NULL) { 322 data->impl->load(data, pArgs, raw + source->structSize, status); 323 if(U_FAILURE(*status)) { 324 uprv_free(data); 325 return NULL; 326 } 327 } 328 return data; 329 } 330 331 /*Takes an alias name gets an actual converter file name 332 *goes to disk and opens it. 333 *allocates the memory and returns a new UConverter object 334 */ 335 static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) 336 { 337 UDataMemory *data; 338 UConverterSharedData *sharedData; 339 340 UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); 341 342 if (U_FAILURE (*err)) { 343 UTRACE_EXIT_STATUS(*err); 344 return NULL; 345 } 346 347 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); 348 349 data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); 350 if(U_FAILURE(*err)) 351 { 352 UTRACE_EXIT_STATUS(*err); 353 return NULL; 354 } 355 356 sharedData = ucnv_data_unFlattenClone(pArgs, data, err); 357 if(U_FAILURE(*err)) 358 { 359 udata_close(data); 360 UTRACE_EXIT_STATUS(*err); 361 return NULL; 362 } 363 364 /* 365 * TODO Store pkg in a field in the shared data so that delta-only converters 366 * can load base converters from the same package. 367 * If the pkg name is longer than the field, then either do not load the converter 368 * in the first place, or just set the pkg field to "". 369 */ 370 371 UTRACE_EXIT_PTR_STATUS(sharedData, *err); 372 return sharedData; 373 } 374 375 /*returns a converter type from a string 376 */ 377 static const UConverterSharedData * 378 getAlgorithmicTypeFromName(const char *realName) 379 { 380 uint32_t mid, start, limit; 381 uint32_t lastMid; 382 int result; 383 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 384 385 /* Lower case and remove ignoreable characters. */ 386 ucnv_io_stripForCompare(strippedName, realName); 387 388 /* do a binary search for the alias */ 389 start = 0; 390 limit = UPRV_LENGTHOF(cnvNameType); 391 mid = limit; 392 lastMid = UINT32_MAX; 393 394 for (;;) { 395 mid = (uint32_t)((start + limit) / 2); 396 if (lastMid == mid) { /* Have we moved? */ 397 break; /* We haven't moved, and it wasn't found. */ 398 } 399 lastMid = mid; 400 result = uprv_strcmp(strippedName, cnvNameType[mid].name); 401 402 if (result < 0) { 403 limit = mid; 404 } else if (result > 0) { 405 start = mid; 406 } else { 407 return converterData[cnvNameType[mid].type]; 408 } 409 } 410 411 return NULL; 412 } 413 414 /* 415 * Based on the number of known converters, this determines how many times larger 416 * the shared data hash table should be. When on small platforms, or just a couple 417 * of converters are used, this number should be 2. When memory is plentiful, or 418 * when ucnv_countAvailable is ever used with a lot of available converters, 419 * this should be 4. 420 * Larger numbers reduce the number of hash collisions, but use more memory. 421 */ 422 #define UCNV_CACHE_LOAD_FACTOR 2 423 424 /* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ 425 /* Will always be called with the cnvCacheMutex alrady being held */ 426 /* by the calling function. */ 427 /* Stores the shared data in the SHARED_DATA_HASHTABLE 428 * @param data The shared data 429 */ 430 static void 431 ucnv_shareConverterData(UConverterSharedData * data) 432 { 433 UErrorCode err = U_ZERO_ERROR; 434 /*Lazy evaluates the Hashtable itself */ 435 /*void *sanity = NULL;*/ 436 437 if (SHARED_DATA_HASHTABLE == NULL) 438 { 439 SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, 440 ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, 441 &err); 442 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 443 444 if (U_FAILURE(err)) 445 return; 446 } 447 448 /* ### check to see if the element is not already there! */ 449 450 /* 451 sanity = ucnv_getSharedConverterData (data->staticData->name); 452 if(sanity != NULL) 453 { 454 UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); 455 } 456 UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); 457 */ 458 459 /* Mark it shared */ 460 data->sharedDataCached = TRUE; 461 462 uhash_put(SHARED_DATA_HASHTABLE, 463 (void*) data->staticData->name, /* Okay to cast away const as long as 464 keyDeleter == NULL */ 465 data, 466 &err); 467 UCNV_DEBUG_LOG("put", data->staticData->name,data); 468 469 } 470 471 /* Look up a converter name in the shared data cache. */ 472 /* cnvCacheMutex must be held by the caller to protect the hash table. */ 473 /* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) 474 * @param name The name of the shared data 475 * @return the shared data from the SHARED_DATA_HASHTABLE 476 */ 477 static UConverterSharedData * 478 ucnv_getSharedConverterData(const char *name) 479 { 480 /*special case when no Table has yet been created we return NULL */ 481 if (SHARED_DATA_HASHTABLE == NULL) 482 { 483 return NULL; 484 } 485 else 486 { 487 UConverterSharedData *rc; 488 489 rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); 490 UCNV_DEBUG_LOG("get",name,rc); 491 return rc; 492 } 493 } 494 495 /*frees the string of memory blocks associates with a sharedConverter 496 *if and only if the referenceCounter == 0 497 */ 498 /* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to 499 * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and 500 * returns TRUE, 501 * otherwise returns FALSE 502 * @param sharedConverterData The shared data 503 * @return if not it frees all the memory stemming from sharedConverterData and 504 * returns TRUE, otherwise returns FALSE 505 */ 506 static UBool 507 ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) 508 { 509 UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); 510 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); 511 512 if (deadSharedData->referenceCounter > 0) { 513 UTRACE_EXIT_VALUE((int32_t)FALSE); 514 return FALSE; 515 } 516 517 if (deadSharedData->impl->unload != NULL) { 518 deadSharedData->impl->unload(deadSharedData); 519 } 520 521 if(deadSharedData->dataMemory != NULL) 522 { 523 UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; 524 udata_close(data); 525 } 526 527 uprv_free(deadSharedData); 528 529 UTRACE_EXIT_VALUE((int32_t)TRUE); 530 return TRUE; 531 } 532 533 /** 534 * Load a non-algorithmic converter. 535 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 536 */ 537 UConverterSharedData * 538 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { 539 UConverterSharedData *mySharedConverterData; 540 541 if(err == NULL || U_FAILURE(*err)) { 542 return NULL; 543 } 544 545 if(pArgs->pkg != NULL && *pArgs->pkg != 0) { 546 /* application-provided converters are not currently cached */ 547 return createConverterFromFile(pArgs, err); 548 } 549 550 mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); 551 if (mySharedConverterData == NULL) 552 { 553 /*Not cached, we need to stream it in from file */ 554 mySharedConverterData = createConverterFromFile(pArgs, err); 555 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 556 { 557 return NULL; 558 } 559 else if (!pArgs->onlyTestIsLoadable) 560 { 561 /* share it with other library clients */ 562 ucnv_shareConverterData(mySharedConverterData); 563 } 564 } 565 else 566 { 567 /* The data for this converter was already in the cache. */ 568 /* Update the reference counter on the shared data: one more client */ 569 mySharedConverterData->referenceCounter++; 570 } 571 572 return mySharedConverterData; 573 } 574 575 /** 576 * Unload a non-algorithmic converter. 577 * It must be sharedData->isReferenceCounted 578 * and this function must be called inside umtx_lock(&cnvCacheMutex). 579 */ 580 U_CAPI void 581 ucnv_unload(UConverterSharedData *sharedData) { 582 if(sharedData != NULL) { 583 if (sharedData->referenceCounter > 0) { 584 sharedData->referenceCounter--; 585 } 586 587 if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { 588 ucnv_deleteSharedConverterData(sharedData); 589 } 590 } 591 } 592 593 U_CFUNC void 594 ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) 595 { 596 if(sharedData != NULL && sharedData->isReferenceCounted) { 597 umtx_lock(&cnvCacheMutex); 598 ucnv_unload(sharedData); 599 umtx_unlock(&cnvCacheMutex); 600 } 601 } 602 603 U_CFUNC void 604 ucnv_incrementRefCount(UConverterSharedData *sharedData) 605 { 606 if(sharedData != NULL && sharedData->isReferenceCounted) { 607 umtx_lock(&cnvCacheMutex); 608 sharedData->referenceCounter++; 609 umtx_unlock(&cnvCacheMutex); 610 } 611 } 612 613 /* 614 * *pPieces must be initialized. 615 * The name without options will be copied to pPieces->cnvName. 616 * The locale and options will be copied to pPieces only if present in inName, 617 * otherwise the existing values in pPieces remain. 618 * *pArgs will be set to the pPieces values. 619 */ 620 static void 621 parseConverterOptions(const char *inName, 622 UConverterNamePieces *pPieces, 623 UConverterLoadArgs *pArgs, 624 UErrorCode *err) 625 { 626 char *cnvName = pPieces->cnvName; 627 char c; 628 int32_t len = 0; 629 630 pArgs->name=inName; 631 pArgs->locale=pPieces->locale; 632 pArgs->options=pPieces->options; 633 634 /* copy the converter name itself to cnvName */ 635 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 636 if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { 637 *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 638 pPieces->cnvName[0]=0; 639 return; 640 } 641 *cnvName++=c; 642 inName++; 643 } 644 *cnvName=0; 645 pArgs->name=pPieces->cnvName; 646 647 /* parse options. No more name copying should occur. */ 648 while((c=*inName)!=0) { 649 if(c==UCNV_OPTION_SEP_CHAR) { 650 ++inName; 651 } 652 653 /* inName is behind an option separator */ 654 if(uprv_strncmp(inName, "locale=", 7)==0) { 655 /* do not modify locale itself in case we have multiple locale options */ 656 char *dest=pPieces->locale; 657 658 /* copy the locale option value */ 659 inName+=7; 660 len=0; 661 while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { 662 ++inName; 663 664 if(++len>=ULOC_FULLNAME_CAPACITY) { 665 *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ 666 pPieces->locale[0]=0; 667 return; 668 } 669 670 *dest++=c; 671 } 672 *dest=0; 673 } else if(uprv_strncmp(inName, "version=", 8)==0) { 674 /* copy the version option value into bits 3..0 of pPieces->options */ 675 inName+=8; 676 c=*inName; 677 if(c==0) { 678 pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); 679 return; 680 } else if((uint8_t)(c-'0')<10) { 681 pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); 682 ++inName; 683 } 684 } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { 685 inName+=8; 686 pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); 687 /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ 688 } else { 689 /* ignore any other options until we define some */ 690 while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { 691 } 692 if(c==0) { 693 return; 694 } 695 } 696 } 697 } 698 699 /*Logic determines if the converter is Algorithmic AND/OR cached 700 *depending on that: 701 * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) 702 * -Get it from a Hashtable (Data=X, Cached=TRUE) 703 * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) 704 * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) 705 */ 706 U_CFUNC UConverterSharedData * 707 ucnv_loadSharedData(const char *converterName, 708 UConverterNamePieces *pPieces, 709 UConverterLoadArgs *pArgs, 710 UErrorCode * err) { 711 UConverterNamePieces stackPieces; 712 UConverterLoadArgs stackArgs; 713 UConverterSharedData *mySharedConverterData = NULL; 714 UErrorCode internalErrorCode = U_ZERO_ERROR; 715 UBool mayContainOption = TRUE; 716 UBool checkForAlgorithmic = TRUE; 717 718 if (U_FAILURE (*err)) { 719 return NULL; 720 } 721 722 if(pPieces == NULL) { 723 if(pArgs != NULL) { 724 /* 725 * Bad: We may set pArgs pointers to stackPieces fields 726 * which will be invalid after this function returns. 727 */ 728 *err = U_INTERNAL_PROGRAM_ERROR; 729 return NULL; 730 } 731 pPieces = &stackPieces; 732 } 733 if(pArgs == NULL) { 734 uprv_memset(&stackArgs, 0, sizeof(stackArgs)); 735 stackArgs.size = (int32_t)sizeof(stackArgs); 736 pArgs = &stackArgs; 737 } 738 739 pPieces->cnvName[0] = 0; 740 pPieces->locale[0] = 0; 741 pPieces->options = 0; 742 743 pArgs->name = converterName; 744 pArgs->locale = pPieces->locale; 745 pArgs->options = pPieces->options; 746 747 /* In case "name" is NULL we want to open the default converter. */ 748 if (converterName == NULL) { 749 #if U_CHARSET_IS_UTF8 750 pArgs->name = "UTF-8"; 751 return (UConverterSharedData *)converterData[UCNV_UTF8]; 752 #else 753 /* Call ucnv_getDefaultName first to query the name from the OS. */ 754 pArgs->name = ucnv_getDefaultName(); 755 if (pArgs->name == NULL) { 756 *err = U_MISSING_RESOURCE_ERROR; 757 return NULL; 758 } 759 mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; 760 checkForAlgorithmic = FALSE; 761 mayContainOption = gDefaultConverterContainsOption; 762 /* the default converter name is already canonical */ 763 #endif 764 } 765 else if(UCNV_FAST_IS_UTF8(converterName)) { 766 /* fastpath for UTF-8 */ 767 pArgs->name = "UTF-8"; 768 return (UConverterSharedData *)converterData[UCNV_UTF8]; 769 } 770 else { 771 /* separate the converter name from the options */ 772 parseConverterOptions(converterName, pPieces, pArgs, err); 773 if (U_FAILURE(*err)) { 774 /* Very bad name used. */ 775 return NULL; 776 } 777 778 /* get the canonical converter name */ 779 pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); 780 if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { 781 /* 782 * set the input name in case the converter was added 783 * without updating the alias table, or when there is no alias table 784 */ 785 pArgs->name = pPieces->cnvName; 786 } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { 787 *err = U_AMBIGUOUS_ALIAS_WARNING; 788 } 789 } 790 791 /* separate the converter name from the options */ 792 if(mayContainOption && pArgs->name != pPieces->cnvName) { 793 parseConverterOptions(pArgs->name, pPieces, pArgs, err); 794 } 795 796 /* get the shared data for an algorithmic converter, if it is one */ 797 if (checkForAlgorithmic) { 798 mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); 799 } 800 if (mySharedConverterData == NULL) 801 { 802 /* it is a data-based converter, get its shared data. */ 803 /* Hold the cnvCacheMutex through the whole process of checking the */ 804 /* converter data cache, and adding new entries to the cache */ 805 /* to prevent other threads from modifying the cache during the */ 806 /* process. */ 807 pArgs->nestedLoads=1; 808 pArgs->pkg=NULL; 809 810 umtx_lock(&cnvCacheMutex); 811 mySharedConverterData = ucnv_load(pArgs, err); 812 umtx_unlock(&cnvCacheMutex); 813 if (U_FAILURE (*err) || (mySharedConverterData == NULL)) 814 { 815 return NULL; 816 } 817 } 818 819 return mySharedConverterData; 820 } 821 822 U_CAPI UConverter * 823 ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) 824 { 825 UConverterNamePieces stackPieces; 826 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 827 UConverterSharedData *mySharedConverterData; 828 829 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 830 831 if(U_SUCCESS(*err)) { 832 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); 833 834 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 835 836 myUConverter = ucnv_createConverterFromSharedData( 837 myUConverter, mySharedConverterData, 838 &stackArgs, 839 err); 840 841 if(U_SUCCESS(*err)) { 842 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 843 return myUConverter; 844 } 845 } 846 847 /* exit with error */ 848 UTRACE_EXIT_STATUS(*err); 849 return NULL; 850 } 851 852 U_CFUNC UBool 853 ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { 854 UConverter myUConverter; 855 UConverterNamePieces stackPieces; 856 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 857 UConverterSharedData *mySharedConverterData; 858 859 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); 860 861 if(U_SUCCESS(*err)) { 862 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); 863 864 stackArgs.onlyTestIsLoadable=TRUE; 865 mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); 866 ucnv_createConverterFromSharedData( 867 &myUConverter, mySharedConverterData, 868 &stackArgs, 869 err); 870 ucnv_unloadSharedDataIfReady(mySharedConverterData); 871 } 872 873 UTRACE_EXIT_STATUS(*err); 874 return U_SUCCESS(*err); 875 } 876 877 UConverter * 878 ucnv_createAlgorithmicConverter(UConverter *myUConverter, 879 UConverterType type, 880 const char *locale, uint32_t options, 881 UErrorCode *err) { 882 UConverter *cnv; 883 const UConverterSharedData *sharedData; 884 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 885 886 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); 887 UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); 888 889 if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { 890 *err = U_ILLEGAL_ARGUMENT_ERROR; 891 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 892 return NULL; 893 } 894 895 sharedData = converterData[type]; 896 if(sharedData == NULL || sharedData->isReferenceCounted) { 897 /* not a valid type, or not an algorithmic converter */ 898 *err = U_ILLEGAL_ARGUMENT_ERROR; 899 UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); 900 return NULL; 901 } 902 903 stackArgs.name = ""; 904 stackArgs.options = options; 905 stackArgs.locale=locale; 906 cnv = ucnv_createConverterFromSharedData( 907 myUConverter, (UConverterSharedData *)sharedData, 908 &stackArgs, err); 909 910 UTRACE_EXIT_PTR_STATUS(cnv, *err); 911 return cnv; 912 } 913 914 U_CFUNC UConverter* 915 ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) 916 { 917 UConverter *myUConverter; 918 UConverterSharedData *mySharedConverterData; 919 UConverterNamePieces stackPieces; 920 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 921 922 UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); 923 924 if(U_FAILURE(*err)) { 925 UTRACE_EXIT_STATUS(*err); 926 return NULL; 927 } 928 929 UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); 930 931 /* first, get the options out of the converterName string */ 932 stackPieces.cnvName[0] = 0; 933 stackPieces.locale[0] = 0; 934 stackPieces.options = 0; 935 parseConverterOptions(converterName, &stackPieces, &stackArgs, err); 936 if (U_FAILURE(*err)) { 937 /* Very bad name used. */ 938 UTRACE_EXIT_STATUS(*err); 939 return NULL; 940 } 941 stackArgs.nestedLoads=1; 942 stackArgs.pkg=packageName; 943 944 /* open the data, unflatten the shared structure */ 945 mySharedConverterData = createConverterFromFile(&stackArgs, err); 946 947 if (U_FAILURE(*err)) { 948 UTRACE_EXIT_STATUS(*err); 949 return NULL; 950 } 951 952 /* create the actual converter */ 953 myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); 954 955 if (U_FAILURE(*err)) { 956 ucnv_close(myUConverter); 957 UTRACE_EXIT_STATUS(*err); 958 return NULL; 959 } 960 961 UTRACE_EXIT_PTR_STATUS(myUConverter, *err); 962 return myUConverter; 963 } 964 965 966 U_CFUNC UConverter* 967 ucnv_createConverterFromSharedData(UConverter *myUConverter, 968 UConverterSharedData *mySharedConverterData, 969 UConverterLoadArgs *pArgs, 970 UErrorCode *err) 971 { 972 UBool isCopyLocal; 973 974 if(U_FAILURE(*err)) { 975 ucnv_unloadSharedDataIfReady(mySharedConverterData); 976 return myUConverter; 977 } 978 if(myUConverter == NULL) 979 { 980 myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); 981 if(myUConverter == NULL) 982 { 983 *err = U_MEMORY_ALLOCATION_ERROR; 984 ucnv_unloadSharedDataIfReady(mySharedConverterData); 985 return NULL; 986 } 987 isCopyLocal = FALSE; 988 } else { 989 isCopyLocal = TRUE; 990 } 991 992 /* initialize the converter */ 993 uprv_memset(myUConverter, 0, sizeof(UConverter)); 994 myUConverter->isCopyLocal = isCopyLocal; 995 /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ 996 myUConverter->sharedData = mySharedConverterData; 997 myUConverter->options = pArgs->options; 998 if(!pArgs->onlyTestIsLoadable) { 999 myUConverter->preFromUFirstCP = U_SENTINEL; 1000 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; 1001 myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; 1002 myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; 1003 myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; 1004 myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; 1005 myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; 1006 myUConverter->subChars = (uint8_t *)myUConverter->subUChars; 1007 uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); 1008 myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ 1009 } 1010 1011 if(mySharedConverterData->impl->open != NULL) { 1012 mySharedConverterData->impl->open(myUConverter, pArgs, err); 1013 if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { 1014 /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ 1015 ucnv_close(myUConverter); 1016 return NULL; 1017 } 1018 } 1019 1020 return myUConverter; 1021 } 1022 1023 /*Frees all shared immutable objects that aren't referred to (reference count = 0) 1024 */ 1025 U_CAPI int32_t U_EXPORT2 1026 ucnv_flushCache () 1027 { 1028 UConverterSharedData *mySharedData = NULL; 1029 int32_t pos; 1030 int32_t tableDeletedNum = 0; 1031 const UHashElement *e; 1032 /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ 1033 int32_t i, remaining; 1034 1035 UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); 1036 1037 /* Close the default converter without creating a new one so that everything will be flushed. */ 1038 u_flushDefaultConverter(); 1039 1040 /*if shared data hasn't even been lazy evaluated yet 1041 * return 0 1042 */ 1043 if (SHARED_DATA_HASHTABLE == NULL) { 1044 UTRACE_EXIT_VALUE((int32_t)0); 1045 return 0; 1046 } 1047 1048 /*creates an enumeration to iterate through every element in the 1049 * table 1050 * 1051 * Synchronization: holding cnvCacheMutex will prevent any other thread from 1052 * accessing or modifying the hash table during the iteration. 1053 * The reference count of an entry may be decremented by 1054 * ucnv_close while the iteration is in process, but this is 1055 * benign. It can't be incremented (in ucnv_createConverter()) 1056 * because the sequence of looking up in the cache + incrementing 1057 * is protected by cnvCacheMutex. 1058 */ 1059 umtx_lock(&cnvCacheMutex); 1060 /* 1061 * double loop: A delta/extension-only converter has a pointer to its base table's 1062 * shared data; the first iteration of the outer loop may see the delta converter 1063 * before the base converter, and unloading the delta converter may get the base 1064 * converter's reference counter down to 0. 1065 */ 1066 i = 0; 1067 do { 1068 remaining = 0; 1069 pos = UHASH_FIRST; 1070 while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) 1071 { 1072 mySharedData = (UConverterSharedData *) e->value.pointer; 1073 /*deletes only if reference counter == 0 */ 1074 if (mySharedData->referenceCounter == 0) 1075 { 1076 tableDeletedNum++; 1077 1078 UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); 1079 1080 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 1081 mySharedData->sharedDataCached = FALSE; 1082 ucnv_deleteSharedConverterData (mySharedData); 1083 } else { 1084 ++remaining; 1085 } 1086 } 1087 } while(++i == 1 && remaining > 0); 1088 umtx_unlock(&cnvCacheMutex); 1089 1090 UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); 1091 1092 UTRACE_EXIT_VALUE(tableDeletedNum); 1093 return tableDeletedNum; 1094 } 1095 1096 /* available converters list --------------------------------------------------- */ 1097 1098 static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { 1099 U_ASSERT(gAvailableConverterCount == 0); 1100 U_ASSERT(gAvailableConverters == NULL); 1101 1102 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1103 UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); 1104 int32_t allConverterCount = uenum_count(allConvEnum, &errCode); 1105 if (U_FAILURE(errCode)) { 1106 return; 1107 } 1108 1109 /* We can't have more than "*converterTable" converters to open */ 1110 gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); 1111 if (!gAvailableConverters) { 1112 errCode = U_MEMORY_ALLOCATION_ERROR; 1113 return; 1114 } 1115 1116 /* Open the default converter to make sure that it has first dibs in the hash table. */ 1117 UErrorCode localStatus = U_ZERO_ERROR; 1118 UConverter tempConverter; 1119 ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); 1120 1121 gAvailableConverterCount = 0; 1122 1123 for (int32_t idx = 0; idx < allConverterCount; idx++) { 1124 localStatus = U_ZERO_ERROR; 1125 const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); 1126 if (ucnv_canCreateConverter(converterName, &localStatus)) { 1127 gAvailableConverters[gAvailableConverterCount++] = converterName; 1128 } 1129 } 1130 1131 uenum_close(allConvEnum); 1132 } 1133 1134 1135 static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { 1136 umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); 1137 return U_SUCCESS(*pErrorCode); 1138 } 1139 1140 U_CFUNC uint16_t 1141 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { 1142 if (haveAvailableConverterList(pErrorCode)) { 1143 return gAvailableConverterCount; 1144 } 1145 return 0; 1146 } 1147 1148 U_CFUNC const char * 1149 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { 1150 if (haveAvailableConverterList(pErrorCode)) { 1151 if (n < gAvailableConverterCount) { 1152 return gAvailableConverters[n]; 1153 } 1154 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 1155 } 1156 return NULL; 1157 } 1158 1159 /* default converter name --------------------------------------------------- */ 1160 1161 #if !U_CHARSET_IS_UTF8 1162 /* 1163 Copy the canonical converter name. 1164 ucnv_getDefaultName must be thread safe, which can call this function. 1165 1166 ucnv_setDefaultName calls this function and it doesn't have to be 1167 thread safe because there is no reliable/safe way to reset the 1168 converter in use in all threads. If you did reset the converter, you 1169 would not be sure that retrieving a default converter for one string 1170 would be the same type of default converter for a successive string. 1171 Since the name is a returned via ucnv_getDefaultName without copying, 1172 you shouldn't be modifying or deleting the string from a separate thread. 1173 */ 1174 static inline void 1175 internalSetName(const char *name, UErrorCode *status) { 1176 UConverterNamePieces stackPieces; 1177 UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; 1178 int32_t length=(int32_t)(uprv_strlen(name)); 1179 UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); 1180 const UConverterSharedData *algorithmicSharedData; 1181 1182 stackArgs.name = name; 1183 if(containsOption) { 1184 stackPieces.cnvName[0] = 0; 1185 stackPieces.locale[0] = 0; 1186 stackPieces.options = 0; 1187 parseConverterOptions(name, &stackPieces, &stackArgs, status); 1188 if(U_FAILURE(*status)) { 1189 return; 1190 } 1191 } 1192 algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); 1193 1194 umtx_lock(&cnvCacheMutex); 1195 1196 gDefaultAlgorithmicSharedData = algorithmicSharedData; 1197 gDefaultConverterContainsOption = containsOption; 1198 uprv_memcpy(gDefaultConverterNameBuffer, name, length); 1199 gDefaultConverterNameBuffer[length]=0; 1200 1201 /* gDefaultConverterName MUST be the last global var set by this function. */ 1202 /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ 1203 // But there is nothing here preventing that from being reordered, either by the compiler 1204 // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. 1205 // -- Andy 1206 gDefaultConverterName = gDefaultConverterNameBuffer; 1207 1208 ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); 1209 1210 umtx_unlock(&cnvCacheMutex); 1211 } 1212 #endif 1213 1214 /* 1215 * In order to be really thread-safe, the get function would have to take 1216 * a buffer parameter and copy the current string inside a mutex block. 1217 * This implementation only tries to be really thread-safe while 1218 * setting the name. 1219 * It assumes that setting a pointer is atomic. 1220 */ 1221 1222 U_CAPI const char* U_EXPORT2 1223 ucnv_getDefaultName() { 1224 #if U_CHARSET_IS_UTF8 1225 return "UTF-8"; 1226 #else 1227 /* local variable to be thread-safe */ 1228 const char *name; 1229 1230 /* 1231 Concurrent calls to ucnv_getDefaultName must be thread safe, 1232 but ucnv_setDefaultName is not thread safe. 1233 */ 1234 { 1235 icu::Mutex lock(&cnvCacheMutex); 1236 name = gDefaultConverterName; 1237 } 1238 if(name==NULL) { 1239 UErrorCode errorCode = U_ZERO_ERROR; 1240 UConverter *cnv = NULL; 1241 1242 name = uprv_getDefaultCodepage(); 1243 1244 /* if the name is there, test it out and get the canonical name with options */ 1245 if(name != NULL) { 1246 cnv = ucnv_open(name, &errorCode); 1247 if(U_SUCCESS(errorCode) && cnv != NULL) { 1248 name = ucnv_getName(cnv, &errorCode); 1249 } 1250 } 1251 1252 if(name == NULL || name[0] == 0 1253 || U_FAILURE(errorCode) || cnv == NULL 1254 || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) 1255 { 1256 /* Panic time, let's use a fallback. */ 1257 #if (U_CHARSET_FAMILY == U_ASCII_FAMILY) 1258 name = "US-ASCII"; 1259 /* there is no 'algorithmic' converter for EBCDIC */ 1260 #elif U_PLATFORM == U_PF_OS390 1261 name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; 1262 #else 1263 name = "ibm-37_P100-1995"; 1264 #endif 1265 } 1266 1267 internalSetName(name, &errorCode); 1268 1269 /* The close may make the current name go away. */ 1270 ucnv_close(cnv); 1271 } 1272 1273 return name; 1274 #endif 1275 } 1276 1277 #if U_CHARSET_IS_UTF8 1278 U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} 1279 #else 1280 /* 1281 This function is not thread safe, and it can't be thread safe. 1282 See internalSetName or the API reference for details. 1283 */ 1284 U_CAPI void U_EXPORT2 1285 ucnv_setDefaultName(const char *converterName) { 1286 if(converterName==NULL) { 1287 /* reset to the default codepage */ 1288 gDefaultConverterName=NULL; 1289 } else { 1290 UErrorCode errorCode = U_ZERO_ERROR; 1291 UConverter *cnv = NULL; 1292 const char *name = NULL; 1293 1294 /* if the name is there, test it out and get the canonical name with options */ 1295 cnv = ucnv_open(converterName, &errorCode); 1296 if(U_SUCCESS(errorCode) && cnv != NULL) { 1297 name = ucnv_getName(cnv, &errorCode); 1298 } 1299 1300 if(U_SUCCESS(errorCode) && name!=NULL) { 1301 internalSetName(name, &errorCode); 1302 } 1303 /* else this converter is bad to use. Don't change it to a bad value. */ 1304 1305 /* The close may make the current name go away. */ 1306 ucnv_close(cnv); 1307 1308 /* reset the converter cache */ 1309 u_flushDefaultConverter(); 1310 } 1311 } 1312 #endif 1313 1314 /* data swapping ------------------------------------------------------------ */ 1315 1316 /* most of this might belong more properly into ucnvmbcs.c, but that is so large */ 1317 1318 #if !UCONFIG_NO_LEGACY_CONVERSION 1319 1320 U_CAPI int32_t U_EXPORT2 1321 ucnv_swap(const UDataSwapper *ds, 1322 const void *inData, int32_t length, void *outData, 1323 UErrorCode *pErrorCode) { 1324 const UDataInfo *pInfo; 1325 int32_t headerSize; 1326 1327 const uint8_t *inBytes; 1328 uint8_t *outBytes; 1329 1330 uint32_t offset, count, staticDataSize; 1331 int32_t size; 1332 1333 const UConverterStaticData *inStaticData; 1334 UConverterStaticData *outStaticData; 1335 1336 const _MBCSHeader *inMBCSHeader; 1337 _MBCSHeader *outMBCSHeader; 1338 _MBCSHeader mbcsHeader; 1339 uint32_t mbcsHeaderLength; 1340 UBool noFromU=FALSE; 1341 1342 uint8_t outputType; 1343 1344 int32_t maxFastUChar, mbcsIndexLength; 1345 1346 const int32_t *inExtIndexes; 1347 int32_t extOffset; 1348 1349 /* udata_swapDataHeader checks the arguments */ 1350 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1351 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1352 return 0; 1353 } 1354 1355 /* check data format and format version */ 1356 pInfo=(const UDataInfo *)((const char *)inData+4); 1357 if(!( 1358 pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ 1359 pInfo->dataFormat[1]==0x6e && 1360 pInfo->dataFormat[2]==0x76 && 1361 pInfo->dataFormat[3]==0x74 && 1362 pInfo->formatVersion[0]==6 && 1363 pInfo->formatVersion[1]>=2 1364 )) { 1365 udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", 1366 pInfo->dataFormat[0], pInfo->dataFormat[1], 1367 pInfo->dataFormat[2], pInfo->dataFormat[3], 1368 pInfo->formatVersion[0], pInfo->formatVersion[1]); 1369 *pErrorCode=U_UNSUPPORTED_ERROR; 1370 return 0; 1371 } 1372 1373 inBytes=(const uint8_t *)inData+headerSize; 1374 outBytes=(uint8_t *)outData+headerSize; 1375 1376 /* read the initial UConverterStaticData structure after the UDataInfo header */ 1377 inStaticData=(const UConverterStaticData *)inBytes; 1378 outStaticData=(UConverterStaticData *)outBytes; 1379 1380 if(length<0) { 1381 staticDataSize=ds->readUInt32(inStaticData->structSize); 1382 } else { 1383 length-=headerSize; 1384 if( length<(int32_t)sizeof(UConverterStaticData) || 1385 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 1386 ) { 1387 udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 1388 length); 1389 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1390 return 0; 1391 } 1392 } 1393 1394 if(length>=0) { 1395 /* swap the static data */ 1396 if(inStaticData!=outStaticData) { 1397 uprv_memcpy(outStaticData, inStaticData, staticDataSize); 1398 } 1399 1400 ds->swapArray32(ds, &inStaticData->structSize, 4, 1401 &outStaticData->structSize, pErrorCode); 1402 ds->swapArray32(ds, &inStaticData->codepage, 4, 1403 &outStaticData->codepage, pErrorCode); 1404 1405 ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), 1406 outStaticData->name, pErrorCode); 1407 if(U_FAILURE(*pErrorCode)) { 1408 udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); 1409 return 0; 1410 } 1411 } 1412 1413 inBytes+=staticDataSize; 1414 outBytes+=staticDataSize; 1415 if(length>=0) { 1416 length-=(int32_t)staticDataSize; 1417 } 1418 1419 /* check for supported conversionType values */ 1420 if(inStaticData->conversionType==UCNV_MBCS) { 1421 /* swap MBCS data */ 1422 inMBCSHeader=(const _MBCSHeader *)inBytes; 1423 outMBCSHeader=(_MBCSHeader *)outBytes; 1424 1425 if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { 1426 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1427 length); 1428 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1429 return 0; 1430 } 1431 if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { 1432 mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; 1433 } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && 1434 ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& 1435 MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 1436 ) { 1437 mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; 1438 noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); 1439 } else { 1440 udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", 1441 inMBCSHeader->version[0], inMBCSHeader->version[1]); 1442 *pErrorCode=U_UNSUPPORTED_ERROR; 1443 return 0; 1444 } 1445 1446 uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); 1447 mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); 1448 mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); 1449 mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); 1450 mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); 1451 mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); 1452 mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); 1453 mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); 1454 /* mbcsHeader.options have been read above */ 1455 1456 extOffset=(int32_t)(mbcsHeader.flags>>8); 1457 outputType=(uint8_t)mbcsHeader.flags; 1458 if(noFromU && outputType==MBCS_OUTPUT_1) { 1459 udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); 1460 *pErrorCode=U_UNSUPPORTED_ERROR; 1461 return 0; 1462 } 1463 1464 /* make sure that the output type is known */ 1465 switch(outputType) { 1466 case MBCS_OUTPUT_1: 1467 case MBCS_OUTPUT_2: 1468 case MBCS_OUTPUT_3: 1469 case MBCS_OUTPUT_4: 1470 case MBCS_OUTPUT_3_EUC: 1471 case MBCS_OUTPUT_4_EUC: 1472 case MBCS_OUTPUT_2_SISO: 1473 case MBCS_OUTPUT_EXT_ONLY: 1474 /* OK */ 1475 break; 1476 default: 1477 udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", 1478 outputType); 1479 *pErrorCode=U_UNSUPPORTED_ERROR; 1480 return 0; 1481 } 1482 1483 /* calculate the length of the MBCS data */ 1484 1485 /* 1486 * utf8Friendly MBCS files (mbcsHeader.version 4.3) 1487 * contain an additional mbcsIndex table: 1488 * uint16_t[(maxFastUChar+1)>>6]; 1489 * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). 1490 */ 1491 maxFastUChar=0; 1492 mbcsIndexLength=0; 1493 if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && 1494 mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 1495 ) { 1496 maxFastUChar=(maxFastUChar<<8)|0xff; 1497 mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ 1498 } 1499 1500 if(extOffset==0) { 1501 size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); 1502 if(!noFromU) { 1503 size+=(int32_t)mbcsHeader.fromUBytesLength; 1504 } 1505 1506 /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ 1507 inExtIndexes=NULL; 1508 } else { 1509 /* there is extension data after the base data, see ucnv_ext.h */ 1510 if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 1511 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 1512 length); 1513 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1514 return 0; 1515 } 1516 1517 inExtIndexes=(const int32_t *)(inBytes+extOffset); 1518 size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); 1519 } 1520 1521 if(length>=0) { 1522 if(length<size) { 1523 udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 1524 length); 1525 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1526 return 0; 1527 } 1528 1529 /* copy the data for inaccessible bytes */ 1530 if(inBytes!=outBytes) { 1531 uprv_memcpy(outBytes, inBytes, size); 1532 } 1533 1534 /* swap the MBCSHeader, except for the version field */ 1535 count=mbcsHeaderLength*4; 1536 ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, 1537 &outMBCSHeader->countStates, pErrorCode); 1538 1539 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 1540 /* 1541 * extension-only file, 1542 * contains a base name instead of normal base table data 1543 */ 1544 1545 /* swap the base name, between the header and the extension data */ 1546 const char *inBaseName=(const char *)inBytes+count; 1547 char *outBaseName=(char *)outBytes+count; 1548 ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), 1549 outBaseName, pErrorCode); 1550 } else { 1551 /* normal file with base table data */ 1552 1553 /* swap the state table, 1kB per state */ 1554 offset=count; 1555 count=mbcsHeader.countStates*1024; 1556 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1557 outBytes+offset, pErrorCode); 1558 1559 /* swap the toUFallbacks[] */ 1560 offset+=count; 1561 count=mbcsHeader.countToUFallbacks*8; 1562 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1563 outBytes+offset, pErrorCode); 1564 1565 /* swap the unicodeCodeUnits[] */ 1566 offset=mbcsHeader.offsetToUCodeUnits; 1567 count=mbcsHeader.offsetFromUTable-offset; 1568 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1569 outBytes+offset, pErrorCode); 1570 1571 /* offset to the stage 1 table, independent of the outputType */ 1572 offset=mbcsHeader.offsetFromUTable; 1573 1574 if(outputType==MBCS_OUTPUT_1) { 1575 /* SBCS: swap the fromU tables, all 16 bits wide */ 1576 count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; 1577 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1578 outBytes+offset, pErrorCode); 1579 } else { 1580 /* otherwise: swap the stage tables separately */ 1581 1582 /* stage 1 table: uint16_t[0x440 or 0x40] */ 1583 if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1584 count=0x440*2; /* for all of Unicode */ 1585 } else { 1586 count=0x40*2; /* only BMP */ 1587 } 1588 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1589 outBytes+offset, pErrorCode); 1590 1591 /* stage 2 table: uint32_t[] */ 1592 offset+=count; 1593 count=mbcsHeader.offsetFromUBytes-offset; 1594 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1595 outBytes+offset, pErrorCode); 1596 1597 /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ 1598 offset=mbcsHeader.offsetFromUBytes; 1599 count= noFromU ? 0 : mbcsHeader.fromUBytesLength; 1600 switch(outputType) { 1601 case MBCS_OUTPUT_2: 1602 case MBCS_OUTPUT_3_EUC: 1603 case MBCS_OUTPUT_2_SISO: 1604 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1605 outBytes+offset, pErrorCode); 1606 break; 1607 case MBCS_OUTPUT_4: 1608 ds->swapArray32(ds, inBytes+offset, (int32_t)count, 1609 outBytes+offset, pErrorCode); 1610 break; 1611 default: 1612 /* just uint8_t[], nothing to swap */ 1613 break; 1614 } 1615 1616 if(mbcsIndexLength!=0) { 1617 offset+=count; 1618 count=mbcsIndexLength; 1619 ds->swapArray16(ds, inBytes+offset, (int32_t)count, 1620 outBytes+offset, pErrorCode); 1621 } 1622 } 1623 } 1624 1625 if(extOffset!=0) { 1626 /* swap the extension data */ 1627 inBytes+=extOffset; 1628 outBytes+=extOffset; 1629 1630 /* swap toUTable[] */ 1631 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); 1632 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); 1633 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1634 1635 /* swap toUUChars[] */ 1636 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); 1637 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); 1638 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1639 1640 /* swap fromUTableUChars[] */ 1641 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); 1642 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); 1643 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1644 1645 /* swap fromUTableValues[] */ 1646 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); 1647 /* same length as for fromUTableUChars[] */ 1648 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1649 1650 /* no need to swap fromUBytes[] */ 1651 1652 /* swap fromUStage12[] */ 1653 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); 1654 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); 1655 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1656 1657 /* swap fromUStage3[] */ 1658 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); 1659 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); 1660 ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); 1661 1662 /* swap fromUStage3b[] */ 1663 offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); 1664 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); 1665 ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); 1666 1667 /* swap indexes[] */ 1668 length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); 1669 ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); 1670 } 1671 } 1672 } else { 1673 udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", 1674 inStaticData->conversionType); 1675 *pErrorCode=U_UNSUPPORTED_ERROR; 1676 return 0; 1677 } 1678 1679 return headerSize+(int32_t)staticDataSize+size; 1680 } 1681 1682 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1683 1684 #endif 1685