1 /* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1999-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * 10 * ucnv_io.cpp: 11 * initializes global variables and defines functions pertaining to converter 12 * name resolution aspect of the conversion code. 13 * 14 * new implementation: 15 * 16 * created on: 1999nov22 17 * created by: Markus W. Scherer 18 * 19 * Use the binary cnvalias.icu (created from convrtrs.txt) to work 20 * with aliases for converter names. 21 * 22 * Date Name Description 23 * 11/22/1999 markus Created 24 * 06/28/2002 grhoten Major overhaul of the converter alias design. 25 * Now an alias can map to different converters 26 * depending on the specified standard. 27 ******************************************************************************* 28 */ 29 30 #include "unicode/utypes.h" 31 32 #if !UCONFIG_NO_CONVERSION 33 34 #include "unicode/ucnv.h" 35 #include "unicode/udata.h" 36 37 #include "umutex.h" 38 #include "uarrsort.h" 39 #include "udataswp.h" 40 #include "cstring.h" 41 #include "cmemory.h" 42 #include "ucnv_io.h" 43 #include "uenumimp.h" 44 #include "ucln_cmn.h" 45 46 /* Format of cnvalias.icu ----------------------------------------------------- 47 * 48 * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. 49 * This binary form contains several tables. All indexes are to uint16_t 50 * units, and not to the bytes (uint8_t units). Addressing everything on 51 * 16-bit boundaries allows us to store more information with small index 52 * numbers, which are also 16-bit in size. The majority of the table (except 53 * the string table) are 16-bit numbers. 54 * 55 * First there is the size of the Table of Contents (TOC). The TOC 56 * entries contain the size of each section. In order to find the offset 57 * you just need to sum up the previous offsets. 58 * The TOC length and entries are an array of uint32_t values. 59 * The first section after the TOC starts immediately after the TOC. 60 * 61 * 1) This section contains a list of converters. This list contains indexes 62 * into the string table for the converter name. The index of this list is 63 * also used by other sections, which are mentioned later on. 64 * This list is not sorted. 65 * 66 * 2) This section contains a list of tags. This list contains indexes 67 * into the string table for the tag name. The index of this list is 68 * also used by other sections, which are mentioned later on. 69 * This list is in priority order of standards. 70 * 71 * 3) This section contains a list of sorted unique aliases. This 72 * list contains indexes into the string table for the alias name. The 73 * index of this list is also used by other sections, like the 4th section. 74 * The index for the 3rd and 4th section is used to get the 75 * alias -> converter name mapping. Section 3 and 4 form a two column table. 76 * Some of the most significant bits of each index may contain other 77 * information (see findConverter for details). 78 * 79 * 4) This section contains a list of mapped converter names. Consider this 80 * as a table that maps the 3rd section to the 1st section. This list contains 81 * indexes into the 1st section. The index of this list is the same index in 82 * the 3rd section. There is also some extra information in the high bits of 83 * each converter index in this table. Currently it's only used to say that 84 * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK 85 * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is 86 * the predigested form of the 5th section so that an alias lookup can be fast. 87 * 88 * 5) This section contains a 2D array with indexes to the 6th section. This 89 * section is the full form of all alias mappings. The column index is the 90 * index into the converter list (column header). The row index is the index 91 * to tag list (row header). This 2D array is the top part a 3D array. The 92 * third dimension is in the 6th section. 93 * 94 * 6) This is blob of variable length arrays. Each array starts with a size, 95 * and is followed by indexes to alias names in the string table. This is 96 * the third dimension to the section 5. No other section should be referencing 97 * this section. 98 * 99 * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its 100 * presence indicates that a section 9 exists. UConverterAliasOptions specifies 101 * what type of string normalization is used among other potential things in the 102 * future. 103 * 104 * 8) This is the string table. All strings are indexed on an even address. 105 * There are two reasons for this. First many chip architectures locate strings 106 * faster on even address boundaries. Second, since all indexes are 16-bit 107 * numbers, this string table can be 128KB in size instead of 64KB when we 108 * only have strings starting on an even address. 109 * 110 * 9) When present this is a set of prenormalized strings from section 8. This 111 * table contains normalized strings with the dashes and spaces stripped out, 112 * and all strings lowercased. In the future, the options in section 7 may state 113 * other types of normalization. 114 * 115 * Here is the concept of section 5 and 6. It's a 3D cube. Each tag 116 * has a unique alias among all converters. That same alias can 117 * be mentioned in other standards on different converters, 118 * but only one alias per tag can be unique. 119 * 120 * 121 * Converter Names (Usually in TR22 form) 122 * -------------------------------------------. 123 * T / /| 124 * a / / | 125 * g / / | 126 * s / / | 127 * / / | 128 * ------------------------------------------/ | 129 * A | | | 130 * l | | | 131 * i | | / 132 * a | | / 133 * s | | / 134 * e | | / 135 * s | |/ 136 * ------------------------------------------- 137 * 138 * 139 * 140 * Here is what it really looks like. It's like swiss cheese. 141 * There are holes. Some converters aren't recognized by 142 * a standard, or they are really old converters that the 143 * standard doesn't recognize anymore. 144 * 145 * Converter Names (Usually in TR22 form) 146 * -------------------------------------------. 147 * T /##########################################/| 148 * a / # # /# 149 * g / # ## ## ### # ### ### ### #/ 150 * s / # ##### #### ## ## #/# 151 * / ### # # ## # # # ### # # #/## 152 * ------------------------------------------/# # 153 * A |### # # ## # # # ### # # #|# # 154 * l |# # # # # ## # #|# # 155 * i |# # # # # # #|# 156 * a |# #|# 157 * s | #|# 158 * e 159 * s 160 * 161 */ 162 163 /** 164 * Used by the UEnumeration API 165 */ 166 typedef struct UAliasContext { 167 uint32_t listOffset; 168 uint32_t listIdx; 169 } UAliasContext; 170 171 static const char DATA_NAME[] = "cnvalias"; 172 static const char DATA_TYPE[] = "icu"; 173 174 static UDataMemory *gAliasData=NULL; 175 176 enum { 177 tocLengthIndex=0, 178 converterListIndex=1, 179 tagListIndex=2, 180 aliasListIndex=3, 181 untaggedConvArrayIndex=4, 182 taggedAliasArrayIndex=5, 183 taggedAliasListsIndex=6, 184 tableOptionsIndex=7, 185 stringTableIndex=8, 186 normalizedStringTableIndex=9, 187 offsetsCount, /* length of the swapper's temporary offsets[] */ 188 minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */ 189 }; 190 191 static const UConverterAliasOptions defaultTableOptions = { 192 UCNV_IO_UNNORMALIZED, 193 0 /* containsCnvOptionInfo */ 194 }; 195 static UConverterAlias gMainTable; 196 197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx)) 198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx)) 199 200 static UBool U_CALLCONV 201 isAcceptable(void * /*context*/, 202 const char * /*type*/, const char * /*name*/, 203 const UDataInfo *pInfo) { 204 return (UBool)( 205 pInfo->size>=20 && 206 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 207 pInfo->charsetFamily==U_CHARSET_FAMILY && 208 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ 209 pInfo->dataFormat[1]==0x76 && 210 pInfo->dataFormat[2]==0x41 && 211 pInfo->dataFormat[3]==0x6c && 212 pInfo->formatVersion[0]==3); 213 } 214 215 static UBool U_CALLCONV ucnv_io_cleanup(void) 216 { 217 if (gAliasData) { 218 udata_close(gAliasData); 219 gAliasData = NULL; 220 } 221 222 uprv_memset(&gMainTable, 0, sizeof(gMainTable)); 223 224 return TRUE; /* Everything was cleaned up */ 225 } 226 227 static UBool 228 haveAliasData(UErrorCode *pErrorCode) { 229 int needInit; 230 231 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 232 return FALSE; 233 } 234 235 UMTX_CHECK(NULL, (gAliasData==NULL), needInit); 236 237 /* load converter alias data from file if necessary */ 238 if (needInit) { 239 UDataMemory *data; 240 const uint16_t *table; 241 const uint32_t *sectionSizes; 242 uint32_t tableStart; 243 uint32_t currOffset; 244 245 data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); 246 if(U_FAILURE(*pErrorCode)) { 247 return FALSE; 248 } 249 250 sectionSizes = (const uint32_t *)udata_getMemory(data); 251 table = (const uint16_t *)sectionSizes; 252 253 tableStart = sectionSizes[0]; 254 if (tableStart < minTocLength) { 255 *pErrorCode = U_INVALID_FORMAT_ERROR; 256 udata_close(data); 257 return FALSE; 258 } 259 260 umtx_lock(NULL); 261 if(gAliasData==NULL) { 262 gMainTable.converterListSize = sectionSizes[1]; 263 gMainTable.tagListSize = sectionSizes[2]; 264 gMainTable.aliasListSize = sectionSizes[3]; 265 gMainTable.untaggedConvArraySize = sectionSizes[4]; 266 gMainTable.taggedAliasArraySize = sectionSizes[5]; 267 gMainTable.taggedAliasListsSize = sectionSizes[6]; 268 gMainTable.optionTableSize = sectionSizes[7]; 269 gMainTable.stringTableSize = sectionSizes[8]; 270 271 if (tableStart > 8) { 272 gMainTable.normalizedStringTableSize = sectionSizes[9]; 273 } 274 275 currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t)); 276 gMainTable.converterList = table + currOffset; 277 278 currOffset += gMainTable.converterListSize; 279 gMainTable.tagList = table + currOffset; 280 281 currOffset += gMainTable.tagListSize; 282 gMainTable.aliasList = table + currOffset; 283 284 currOffset += gMainTable.aliasListSize; 285 gMainTable.untaggedConvArray = table + currOffset; 286 287 currOffset += gMainTable.untaggedConvArraySize; 288 gMainTable.taggedAliasArray = table + currOffset; 289 290 /* aliasLists is a 1's based array, but it has a padding character */ 291 currOffset += gMainTable.taggedAliasArraySize; 292 gMainTable.taggedAliasLists = table + currOffset; 293 294 currOffset += gMainTable.taggedAliasListsSize; 295 if (gMainTable.optionTableSize > 0 296 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT) 297 { 298 /* Faster table */ 299 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset); 300 } 301 else { 302 /* Smaller table, or I can't handle this normalization mode! 303 Use the original slower table lookup. */ 304 gMainTable.optionTable = &defaultTableOptions; 305 } 306 307 currOffset += gMainTable.optionTableSize; 308 gMainTable.stringTable = table + currOffset; 309 310 currOffset += gMainTable.stringTableSize; 311 gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) 312 ? gMainTable.stringTable : (table + currOffset)); 313 314 ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); 315 316 gAliasData = data; 317 data=NULL; 318 } 319 umtx_unlock(NULL); 320 321 /* if a different thread set it first, then close the extra data */ 322 if(data!=NULL) { 323 udata_close(data); /* NULL if it was set correctly */ 324 } 325 } 326 327 return TRUE; 328 } 329 330 static inline UBool 331 isAlias(const char *alias, UErrorCode *pErrorCode) { 332 if(alias==NULL) { 333 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 334 return FALSE; 335 } 336 return (UBool)(*alias!=0); 337 } 338 339 static uint32_t getTagNumber(const char *tagname) { 340 if (gMainTable.tagList) { 341 uint32_t tagNum; 342 for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) { 343 if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) { 344 return tagNum; 345 } 346 } 347 } 348 349 return UINT32_MAX; 350 } 351 352 /* character types relevant for ucnv_compareNames() */ 353 enum { 354 IGNORE, 355 ZERO, 356 NONZERO, 357 MINLETTER /* any values from here on are lowercase letter mappings */ 358 }; 359 360 /* character types for ASCII 00..7F */ 361 static const uint8_t asciiTypes[128] = { 362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 365 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, 366 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 367 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, 368 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 369 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 370 }; 371 372 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE) 373 374 /* character types for EBCDIC 80..FF */ 375 static const uint8_t ebcdicTypes[128] = { 376 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, 377 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, 378 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, 379 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 380 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, 381 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, 382 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, 383 ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0 384 }; 385 386 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE) 387 388 #if U_CHARSET_FAMILY==U_ASCII_FAMILY 389 # define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c) 390 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 391 # define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c) 392 #else 393 # error U_CHARSET_FAMILY is not valid 394 #endif 395 396 /* @see ucnv_compareNames */ 397 U_CFUNC char * U_EXPORT2 398 ucnv_io_stripASCIIForCompare(char *dst, const char *name) { 399 char *dstItr = dst; 400 uint8_t type, nextType; 401 char c1; 402 UBool afterDigit = FALSE; 403 404 while ((c1 = *name++) != 0) { 405 type = GET_ASCII_TYPE(c1); 406 switch (type) { 407 case IGNORE: 408 afterDigit = FALSE; 409 continue; /* ignore all but letters and digits */ 410 case ZERO: 411 if (!afterDigit) { 412 nextType = GET_ASCII_TYPE(*name); 413 if (nextType == ZERO || nextType == NONZERO) { 414 continue; /* ignore leading zero before another digit */ 415 } 416 } 417 break; 418 case NONZERO: 419 afterDigit = TRUE; 420 break; 421 default: 422 c1 = (char)type; /* lowercased letter */ 423 afterDigit = FALSE; 424 break; 425 } 426 *dstItr++ = c1; 427 } 428 *dstItr = 0; 429 return dst; 430 } 431 432 U_CFUNC char * U_EXPORT2 433 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { 434 char *dstItr = dst; 435 uint8_t type, nextType; 436 char c1; 437 UBool afterDigit = FALSE; 438 439 while ((c1 = *name++) != 0) { 440 type = GET_EBCDIC_TYPE(c1); 441 switch (type) { 442 case IGNORE: 443 afterDigit = FALSE; 444 continue; /* ignore all but letters and digits */ 445 case ZERO: 446 if (!afterDigit) { 447 nextType = GET_EBCDIC_TYPE(*name); 448 if (nextType == ZERO || nextType == NONZERO) { 449 continue; /* ignore leading zero before another digit */ 450 } 451 } 452 break; 453 case NONZERO: 454 afterDigit = TRUE; 455 break; 456 default: 457 c1 = (char)type; /* lowercased letter */ 458 afterDigit = FALSE; 459 break; 460 } 461 *dstItr++ = c1; 462 } 463 *dstItr = 0; 464 return dst; 465 } 466 467 /** 468 * Do a fuzzy compare of two converter/alias names. 469 * The comparison is case-insensitive, ignores leading zeroes if they are not 470 * followed by further digits, and ignores all but letters and digits. 471 * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. 472 * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 473 * at http://www.unicode.org/reports/tr22/ 474 * 475 * This is a symmetrical (commutative) operation; order of arguments 476 * is insignificant. This is an important property for sorting the 477 * list (when the list is preprocessed into binary form) and for 478 * performing binary searches on it at run time. 479 * 480 * @param name1 a converter name or alias, zero-terminated 481 * @param name2 a converter name or alias, zero-terminated 482 * @return 0 if the names match, or a negative value if the name1 483 * lexically precedes name2, or a positive value if the name1 484 * lexically follows name2. 485 * 486 * @see ucnv_io_stripForCompare 487 */ 488 U_CAPI int U_EXPORT2 489 ucnv_compareNames(const char *name1, const char *name2) { 490 int rc; 491 uint8_t type, nextType; 492 char c1, c2; 493 UBool afterDigit1 = FALSE, afterDigit2 = FALSE; 494 495 for (;;) { 496 while ((c1 = *name1++) != 0) { 497 type = GET_CHAR_TYPE(c1); 498 switch (type) { 499 case IGNORE: 500 afterDigit1 = FALSE; 501 continue; /* ignore all but letters and digits */ 502 case ZERO: 503 if (!afterDigit1) { 504 nextType = GET_CHAR_TYPE(*name1); 505 if (nextType == ZERO || nextType == NONZERO) { 506 continue; /* ignore leading zero before another digit */ 507 } 508 } 509 break; 510 case NONZERO: 511 afterDigit1 = TRUE; 512 break; 513 default: 514 c1 = (char)type; /* lowercased letter */ 515 afterDigit1 = FALSE; 516 break; 517 } 518 break; /* deliver c1 */ 519 } 520 while ((c2 = *name2++) != 0) { 521 type = GET_CHAR_TYPE(c2); 522 switch (type) { 523 case IGNORE: 524 afterDigit2 = FALSE; 525 continue; /* ignore all but letters and digits */ 526 case ZERO: 527 if (!afterDigit2) { 528 nextType = GET_CHAR_TYPE(*name2); 529 if (nextType == ZERO || nextType == NONZERO) { 530 continue; /* ignore leading zero before another digit */ 531 } 532 } 533 break; 534 case NONZERO: 535 afterDigit2 = TRUE; 536 break; 537 default: 538 c2 = (char)type; /* lowercased letter */ 539 afterDigit2 = FALSE; 540 break; 541 } 542 break; /* deliver c2 */ 543 } 544 545 /* If we reach the ends of both strings then they match */ 546 if ((c1|c2)==0) { 547 return 0; 548 } 549 550 /* Case-insensitive comparison */ 551 rc = (int)(unsigned char)c1 - (int)(unsigned char)c2; 552 if (rc != 0) { 553 return rc; 554 } 555 } 556 } 557 558 /* 559 * search for an alias 560 * return the converter number index for gConverterList 561 */ 562 static inline uint32_t 563 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { 564 uint32_t mid, start, limit; 565 uint32_t lastMid; 566 int result; 567 int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED); 568 char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; 569 570 if (!isUnnormalized) { 571 if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { 572 *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 573 return UINT32_MAX; 574 } 575 576 /* Lower case and remove ignoreable characters. */ 577 ucnv_io_stripForCompare(strippedName, alias); 578 alias = strippedName; 579 } 580 581 /* do a binary search for the alias */ 582 start = 0; 583 limit = gMainTable.untaggedConvArraySize; 584 mid = limit; 585 lastMid = UINT32_MAX; 586 587 for (;;) { 588 mid = (uint32_t)((start + limit) / 2); 589 if (lastMid == mid) { /* Have we moved? */ 590 break; /* We haven't moved, and it wasn't found. */ 591 } 592 lastMid = mid; 593 if (isUnnormalized) { 594 result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid])); 595 } 596 else { 597 result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid])); 598 } 599 600 if (result < 0) { 601 limit = mid; 602 } else if (result > 0) { 603 start = mid; 604 } else { 605 /* Since the gencnval tool folds duplicates into one entry, 606 * this alias in gAliasList is unique, but different standards 607 * may map an alias to different converters. 608 */ 609 if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { 610 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; 611 } 612 /* State whether the canonical converter name contains an option. 613 This information is contained in this list in order to maintain backward & forward compatibility. */ 614 if (containsOption) { 615 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo; 616 *containsOption = (UBool)((containsCnvOptionInfo 617 && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) 618 || !containsCnvOptionInfo); 619 } 620 return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; 621 } 622 } 623 624 return UINT32_MAX; 625 } 626 627 /* 628 * Is this alias in this list? 629 * alias and listOffset should be non-NULL. 630 */ 631 static inline UBool 632 isAliasInList(const char *alias, uint32_t listOffset) { 633 if (listOffset) { 634 uint32_t currAlias; 635 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 636 /* +1 to skip listCount */ 637 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 638 for (currAlias = 0; currAlias < listCount; currAlias++) { 639 if (currList[currAlias] 640 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) 641 { 642 return TRUE; 643 } 644 } 645 } 646 return FALSE; 647 } 648 649 /* 650 * Search for an standard name of an alias (what is the default name 651 * that this standard uses?) 652 * return the listOffset for gTaggedAliasLists. If it's 0, 653 * the it couldn't be found, but the parameters are valid. 654 */ 655 static uint32_t 656 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { 657 uint32_t idx; 658 uint32_t listOffset; 659 uint32_t convNum; 660 UErrorCode myErr = U_ZERO_ERROR; 661 uint32_t tagNum = getTagNumber(standard); 662 663 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ 664 convNum = findConverter(alias, NULL, &myErr); 665 if (myErr != U_ZERO_ERROR) { 666 *pErrorCode = myErr; 667 } 668 669 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { 670 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; 671 if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) { 672 return listOffset; 673 } 674 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { 675 /* Uh Oh! They used an ambiguous alias. 676 We have to search the whole swiss cheese starting 677 at the highest standard affinity. 678 This may take a while. 679 */ 680 for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) { 681 listOffset = gMainTable.taggedAliasArray[idx]; 682 if (listOffset && isAliasInList(alias, listOffset)) { 683 uint32_t currTagNum = idx/gMainTable.converterListSize; 684 uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize); 685 uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum]; 686 if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) { 687 return tempListOffset; 688 } 689 /* else keep on looking */ 690 /* We could speed this up by starting on the next row 691 because an alias is unique per row, right now. 692 This would change if alias versioning appears. */ 693 } 694 } 695 /* The standard doesn't know about the alias */ 696 } 697 /* else no default name */ 698 return 0; 699 } 700 /* else converter or tag not found */ 701 702 return UINT32_MAX; 703 } 704 705 /* Return the canonical name */ 706 static uint32_t 707 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { 708 uint32_t idx; 709 uint32_t listOffset; 710 uint32_t convNum; 711 UErrorCode myErr = U_ZERO_ERROR; 712 uint32_t tagNum = getTagNumber(standard); 713 714 /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ 715 convNum = findConverter(alias, NULL, &myErr); 716 if (myErr != U_ZERO_ERROR) { 717 *pErrorCode = myErr; 718 } 719 720 if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { 721 listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; 722 if (listOffset && isAliasInList(alias, listOffset)) { 723 return convNum; 724 } 725 if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { 726 /* Uh Oh! They used an ambiguous alias. 727 We have to search one slice of the swiss cheese. 728 We search only in the requested tag, not the whole thing. 729 This may take a while. 730 */ 731 uint32_t convStart = (tagNum)*gMainTable.converterListSize; 732 uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize; 733 for (idx = convStart; idx < convLimit; idx++) { 734 listOffset = gMainTable.taggedAliasArray[idx]; 735 if (listOffset && isAliasInList(alias, listOffset)) { 736 return idx-convStart; 737 } 738 } 739 /* The standard doesn't know about the alias */ 740 } 741 /* else no canonical name */ 742 } 743 /* else converter or tag not found */ 744 745 return UINT32_MAX; 746 } 747 748 749 750 U_CFUNC const char * 751 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { 752 const char *aliasTmp = alias; 753 int32_t i = 0; 754 for (i = 0; i < 2; i++) { 755 if (i == 1) { 756 /* 757 * After the first unsuccess converter lookup, check to see if 758 * the name begins with 'x-'. If it does, strip it off and try 759 * again. This behaviour is similar to how ICU4J does it. 760 */ 761 if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') { 762 aliasTmp = aliasTmp+2; 763 } else { 764 break; 765 } 766 } 767 if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) { 768 uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode); 769 if (convNum < gMainTable.converterListSize) { 770 return GET_STRING(gMainTable.converterList[convNum]); 771 } 772 /* else converter not found */ 773 } else { 774 break; 775 } 776 } 777 778 return NULL; 779 } 780 781 static int32_t U_CALLCONV 782 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { 783 int32_t value = 0; 784 UAliasContext *myContext = (UAliasContext *)(enumerator->context); 785 uint32_t listOffset = myContext->listOffset; 786 787 if (listOffset) { 788 value = gMainTable.taggedAliasLists[listOffset]; 789 } 790 return value; 791 } 792 793 static const char* U_CALLCONV 794 ucnv_io_nextStandardAliases(UEnumeration *enumerator, 795 int32_t* resultLength, 796 UErrorCode * /*pErrorCode*/) 797 { 798 UAliasContext *myContext = (UAliasContext *)(enumerator->context); 799 uint32_t listOffset = myContext->listOffset; 800 801 if (listOffset) { 802 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 803 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 804 805 if (myContext->listIdx < listCount) { 806 const char *myStr = GET_STRING(currList[myContext->listIdx++]); 807 if (resultLength) { 808 *resultLength = (int32_t)uprv_strlen(myStr); 809 } 810 return myStr; 811 } 812 } 813 /* Either we accessed a zero length list, or we enumerated too far. */ 814 if (resultLength) { 815 *resultLength = 0; 816 } 817 return NULL; 818 } 819 820 static void U_CALLCONV 821 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { 822 ((UAliasContext *)(enumerator->context))->listIdx = 0; 823 } 824 825 static void U_CALLCONV 826 ucnv_io_closeUEnumeration(UEnumeration *enumerator) { 827 uprv_free(enumerator->context); 828 uprv_free(enumerator); 829 } 830 831 /* Enumerate the aliases for the specified converter and standard tag */ 832 static const UEnumeration gEnumAliases = { 833 NULL, 834 NULL, 835 ucnv_io_closeUEnumeration, 836 ucnv_io_countStandardAliases, 837 uenum_unextDefault, 838 ucnv_io_nextStandardAliases, 839 ucnv_io_resetStandardAliases 840 }; 841 842 U_CAPI UEnumeration * U_EXPORT2 843 ucnv_openStandardNames(const char *convName, 844 const char *standard, 845 UErrorCode *pErrorCode) 846 { 847 UEnumeration *myEnum = NULL; 848 if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { 849 uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); 850 851 /* When listOffset == 0, we want to acknowledge that the 852 converter name and standard are okay, but there 853 is nothing to enumerate. */ 854 if (listOffset < gMainTable.taggedAliasListsSize) { 855 UAliasContext *myContext; 856 857 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); 858 if (myEnum == NULL) { 859 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 860 return NULL; 861 } 862 uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); 863 myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext))); 864 if (myContext == NULL) { 865 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 866 uprv_free(myEnum); 867 return NULL; 868 } 869 myContext->listOffset = listOffset; 870 myContext->listIdx = 0; 871 myEnum->context = myContext; 872 } 873 /* else converter or tag not found */ 874 } 875 return myEnum; 876 } 877 878 static uint16_t 879 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { 880 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 881 uint32_t convNum = findConverter(alias, NULL, pErrorCode); 882 if (convNum < gMainTable.converterListSize) { 883 /* tagListNum - 1 is the ALL tag */ 884 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; 885 886 if (listOffset) { 887 return gMainTable.taggedAliasLists[listOffset]; 888 } 889 /* else this shouldn't happen. internal program error */ 890 } 891 /* else converter not found */ 892 } 893 return 0; 894 } 895 896 static uint16_t 897 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { 898 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 899 uint32_t currAlias; 900 uint32_t convNum = findConverter(alias, NULL, pErrorCode); 901 if (convNum < gMainTable.converterListSize) { 902 /* tagListNum - 1 is the ALL tag */ 903 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; 904 905 if (listOffset) { 906 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 907 /* +1 to skip listCount */ 908 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 909 910 for (currAlias = start; currAlias < listCount; currAlias++) { 911 aliases[currAlias] = GET_STRING(currList[currAlias]); 912 } 913 } 914 /* else this shouldn't happen. internal program error */ 915 } 916 /* else converter not found */ 917 } 918 return 0; 919 } 920 921 static const char * 922 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { 923 if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 924 uint32_t convNum = findConverter(alias, NULL, pErrorCode); 925 if (convNum < gMainTable.converterListSize) { 926 /* tagListNum - 1 is the ALL tag */ 927 int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; 928 929 if (listOffset) { 930 uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; 931 /* +1 to skip listCount */ 932 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 933 934 if (n < listCount) { 935 return GET_STRING(currList[n]); 936 } 937 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 938 } 939 /* else this shouldn't happen. internal program error */ 940 } 941 /* else converter not found */ 942 } 943 return NULL; 944 } 945 946 static uint16_t 947 ucnv_io_countStandards(UErrorCode *pErrorCode) { 948 if (haveAliasData(pErrorCode)) { 949 /* Don't include the empty list */ 950 return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS); 951 } 952 953 return 0; 954 } 955 956 U_CAPI const char * U_EXPORT2 957 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { 958 if (haveAliasData(pErrorCode)) { 959 if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) { 960 return GET_STRING(gMainTable.tagList[n]); 961 } 962 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; 963 } 964 965 return NULL; 966 } 967 968 U_CAPI const char * U_EXPORT2 969 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { 970 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 971 uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); 972 973 if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) { 974 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; 975 976 /* Get the preferred name from this list */ 977 if (currList[0]) { 978 return GET_STRING(currList[0]); 979 } 980 /* else someone screwed up the alias table. */ 981 /* *pErrorCode = U_INVALID_FORMAT_ERROR */ 982 } 983 } 984 985 return NULL; 986 } 987 988 U_CAPI uint16_t U_EXPORT2 989 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) 990 { 991 return ucnv_io_countAliases(alias, pErrorCode); 992 } 993 994 995 U_CAPI const char* U_EXPORT2 996 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) 997 { 998 return ucnv_io_getAlias(alias, n, pErrorCode); 999 } 1000 1001 U_CAPI void U_EXPORT2 1002 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) 1003 { 1004 ucnv_io_getAliases(alias, 0, aliases, pErrorCode); 1005 } 1006 1007 U_CAPI uint16_t U_EXPORT2 1008 ucnv_countStandards(void) 1009 { 1010 UErrorCode err = U_ZERO_ERROR; 1011 return ucnv_io_countStandards(&err); 1012 } 1013 1014 U_CAPI const char * U_EXPORT2 1015 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { 1016 if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { 1017 uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); 1018 1019 if (convNum < gMainTable.converterListSize) { 1020 return GET_STRING(gMainTable.converterList[convNum]); 1021 } 1022 } 1023 1024 return NULL; 1025 } 1026 1027 static int32_t U_CALLCONV 1028 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) { 1029 return gMainTable.converterListSize; 1030 } 1031 1032 static const char* U_CALLCONV 1033 ucnv_io_nextAllConverters(UEnumeration *enumerator, 1034 int32_t* resultLength, 1035 UErrorCode * /*pErrorCode*/) 1036 { 1037 uint16_t *myContext = (uint16_t *)(enumerator->context); 1038 1039 if (*myContext < gMainTable.converterListSize) { 1040 const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]); 1041 if (resultLength) { 1042 *resultLength = (int32_t)uprv_strlen(myStr); 1043 } 1044 return myStr; 1045 } 1046 /* Either we accessed a zero length list, or we enumerated too far. */ 1047 if (resultLength) { 1048 *resultLength = 0; 1049 } 1050 return NULL; 1051 } 1052 1053 static void U_CALLCONV 1054 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { 1055 *((uint16_t *)(enumerator->context)) = 0; 1056 } 1057 1058 static const UEnumeration gEnumAllConverters = { 1059 NULL, 1060 NULL, 1061 ucnv_io_closeUEnumeration, 1062 ucnv_io_countAllConverters, 1063 uenum_unextDefault, 1064 ucnv_io_nextAllConverters, 1065 ucnv_io_resetAllConverters 1066 }; 1067 1068 U_CAPI UEnumeration * U_EXPORT2 1069 ucnv_openAllNames(UErrorCode *pErrorCode) { 1070 UEnumeration *myEnum = NULL; 1071 if (haveAliasData(pErrorCode)) { 1072 uint16_t *myContext; 1073 1074 myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); 1075 if (myEnum == NULL) { 1076 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1077 return NULL; 1078 } 1079 uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); 1080 myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t))); 1081 if (myContext == NULL) { 1082 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1083 uprv_free(myEnum); 1084 return NULL; 1085 } 1086 *myContext = 0; 1087 myEnum->context = myContext; 1088 } 1089 return myEnum; 1090 } 1091 1092 U_CFUNC uint16_t 1093 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) { 1094 if (haveAliasData(pErrorCode)) { 1095 return (uint16_t)gMainTable.converterListSize; 1096 } 1097 return 0; 1098 } 1099 1100 /* alias table swapping ----------------------------------------------------- */ 1101 1102 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); 1103 1104 /* 1105 * row of a temporary array 1106 * 1107 * gets platform-endian charset string indexes and sorting indexes; 1108 * after sorting this array by strings, the actual arrays are permutated 1109 * according to the sorting indexes 1110 */ 1111 typedef struct TempRow { 1112 uint16_t strIndex, sortIndex; 1113 } TempRow; 1114 1115 typedef struct TempAliasTable { 1116 const char *chars; 1117 TempRow *rows; 1118 uint16_t *resort; 1119 StripForCompareFn *stripForCompare; 1120 } TempAliasTable; 1121 1122 enum { 1123 STACK_ROW_CAPACITY=500 1124 }; 1125 1126 static int32_t 1127 io_compareRows(const void *context, const void *left, const void *right) { 1128 char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], 1129 strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; 1130 1131 TempAliasTable *tempTable=(TempAliasTable *)context; 1132 const char *chars=tempTable->chars; 1133 1134 return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex), 1135 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex)); 1136 } 1137 1138 U_CAPI int32_t U_EXPORT2 1139 ucnv_swapAliases(const UDataSwapper *ds, 1140 const void *inData, int32_t length, void *outData, 1141 UErrorCode *pErrorCode) { 1142 const UDataInfo *pInfo; 1143 int32_t headerSize; 1144 1145 const uint16_t *inTable; 1146 const uint32_t *inSectionSizes; 1147 uint32_t toc[offsetsCount]; 1148 uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ 1149 uint32_t i, count, tocLength, topOffset; 1150 1151 TempRow rows[STACK_ROW_CAPACITY]; 1152 uint16_t resort[STACK_ROW_CAPACITY]; 1153 TempAliasTable tempTable; 1154 1155 /* udata_swapDataHeader checks the arguments */ 1156 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 1157 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1158 return 0; 1159 } 1160 1161 /* check data format and format version */ 1162 pInfo=(const UDataInfo *)((const char *)inData+4); 1163 if(!( 1164 pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ 1165 pInfo->dataFormat[1]==0x76 && 1166 pInfo->dataFormat[2]==0x41 && 1167 pInfo->dataFormat[3]==0x6c && 1168 pInfo->formatVersion[0]==3 1169 )) { 1170 udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", 1171 pInfo->dataFormat[0], pInfo->dataFormat[1], 1172 pInfo->dataFormat[2], pInfo->dataFormat[3], 1173 pInfo->formatVersion[0]); 1174 *pErrorCode=U_UNSUPPORTED_ERROR; 1175 return 0; 1176 } 1177 1178 /* an alias table must contain at least the table of contents array */ 1179 if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { 1180 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", 1181 length-headerSize); 1182 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1183 return 0; 1184 } 1185 1186 inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); 1187 inTable=(const uint16_t *)inSectionSizes; 1188 uprv_memset(toc, 0, sizeof(toc)); 1189 toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); 1190 if(tocLength<minTocLength || offsetsCount<=tocLength) { 1191 udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); 1192 *pErrorCode=U_INVALID_FORMAT_ERROR; 1193 return 0; 1194 } 1195 1196 /* read the known part of the table of contents */ 1197 for(i=converterListIndex; i<=tocLength; ++i) { 1198 toc[i]=ds->readUInt32(inSectionSizes[i]); 1199 } 1200 1201 /* compute offsets */ 1202 uprv_memset(offsets, 0, sizeof(offsets)); 1203 offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ 1204 for(i=tagListIndex; i<=tocLength; ++i) { 1205 offsets[i]=offsets[i-1]+toc[i-1]; 1206 } 1207 1208 /* compute the overall size of the after-header data, in numbers of 16-bit units */ 1209 topOffset=offsets[i-1]+toc[i-1]; 1210 1211 if(length>=0) { 1212 uint16_t *outTable; 1213 const uint16_t *p, *p2; 1214 uint16_t *q, *q2; 1215 uint16_t oldIndex; 1216 1217 if((length-headerSize)<(2*(int32_t)topOffset)) { 1218 udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", 1219 length-headerSize); 1220 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1221 return 0; 1222 } 1223 1224 outTable=(uint16_t *)((char *)outData+headerSize); 1225 1226 /* swap the entire table of contents */ 1227 ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); 1228 1229 /* swap unormalized strings & normalized strings */ 1230 ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), 1231 outTable+offsets[stringTableIndex], pErrorCode); 1232 if(U_FAILURE(*pErrorCode)) { 1233 udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); 1234 return 0; 1235 } 1236 1237 if(ds->inCharset==ds->outCharset) { 1238 /* no need to sort, just swap all 16-bit values together */ 1239 ds->swapArray16(ds, 1240 inTable+offsets[converterListIndex], 1241 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), 1242 outTable+offsets[converterListIndex], 1243 pErrorCode); 1244 } else { 1245 /* allocate the temporary table for sorting */ 1246 count=toc[aliasListIndex]; 1247 1248 tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ 1249 1250 if(count<=STACK_ROW_CAPACITY) { 1251 tempTable.rows=rows; 1252 tempTable.resort=resort; 1253 } else { 1254 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); 1255 if(tempTable.rows==NULL) { 1256 udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", 1257 count); 1258 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1259 return 0; 1260 } 1261 tempTable.resort=(uint16_t *)(tempTable.rows+count); 1262 } 1263 1264 if(ds->outCharset==U_ASCII_FAMILY) { 1265 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; 1266 } else /* U_EBCDIC_FAMILY */ { 1267 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; 1268 } 1269 1270 /* 1271 * Sort unique aliases+mapped names. 1272 * 1273 * We need to sort the list again by outCharset strings because they 1274 * sort differently for different charset families. 1275 * First we set up a temporary table with the string indexes and 1276 * sorting indexes and sort that. 1277 * Then we permutate and copy/swap the actual values. 1278 */ 1279 p=inTable+offsets[aliasListIndex]; 1280 q=outTable+offsets[aliasListIndex]; 1281 1282 p2=inTable+offsets[untaggedConvArrayIndex]; 1283 q2=outTable+offsets[untaggedConvArrayIndex]; 1284 1285 for(i=0; i<count; ++i) { 1286 tempTable.rows[i].strIndex=ds->readUInt16(p[i]); 1287 tempTable.rows[i].sortIndex=(uint16_t)i; 1288 } 1289 1290 uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), 1291 io_compareRows, &tempTable, 1292 FALSE, pErrorCode); 1293 1294 if(U_SUCCESS(*pErrorCode)) { 1295 /* copy/swap/permutate items */ 1296 if(p!=q) { 1297 for(i=0; i<count; ++i) { 1298 oldIndex=tempTable.rows[i].sortIndex; 1299 ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); 1300 ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); 1301 } 1302 } else { 1303 /* 1304 * If we swap in-place, then the permutation must use another 1305 * temporary array (tempTable.resort) 1306 * before the results are copied to the outBundle. 1307 */ 1308 uint16_t *r=tempTable.resort; 1309 1310 for(i=0; i<count; ++i) { 1311 oldIndex=tempTable.rows[i].sortIndex; 1312 ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); 1313 } 1314 uprv_memcpy(q, r, 2*count); 1315 1316 for(i=0; i<count; ++i) { 1317 oldIndex=tempTable.rows[i].sortIndex; 1318 ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); 1319 } 1320 uprv_memcpy(q2, r, 2*count); 1321 } 1322 } 1323 1324 if(tempTable.rows!=rows) { 1325 uprv_free(tempTable.rows); 1326 } 1327 1328 if(U_FAILURE(*pErrorCode)) { 1329 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", 1330 count); 1331 return 0; 1332 } 1333 1334 /* swap remaining 16-bit values */ 1335 ds->swapArray16(ds, 1336 inTable+offsets[converterListIndex], 1337 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), 1338 outTable+offsets[converterListIndex], 1339 pErrorCode); 1340 ds->swapArray16(ds, 1341 inTable+offsets[taggedAliasArrayIndex], 1342 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), 1343 outTable+offsets[taggedAliasArrayIndex], 1344 pErrorCode); 1345 } 1346 } 1347 1348 return headerSize+2*(int32_t)topOffset; 1349 } 1350 1351 #endif 1352 1353 /* 1354 * Hey, Emacs, please set the following: 1355 * 1356 * Local Variables: 1357 * indent-tabs-mode: nil 1358 * End: 1359 * 1360 */ 1361