Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2013, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 *
     10 *  ucnv_io.cpp:
     11 *  initializes global variables and defines functions pertaining to converter
     12 *  name resolution aspect of the conversion code.
     13 *
     14 *   new implementation:
     15 *
     16 *   created on: 1999nov22
     17 *   created by: Markus W. Scherer
     18 *
     19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
     20 *   with aliases for converter names.
     21 *
     22 *   Date        Name        Description
     23 *   11/22/1999  markus      Created
     24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
     25 *                           Now an alias can map to different converters
     26 *                           depending on the specified standard.
     27 *******************************************************************************
     28 */
     29 
     30 #include "unicode/utypes.h"
     31 
     32 #if !UCONFIG_NO_CONVERSION
     33 
     34 #include "unicode/ucnv.h"
     35 #include "unicode/udata.h"
     36 
     37 #include "umutex.h"
     38 #include "uarrsort.h"
     39 #include "uassert.h"
     40 #include "udataswp.h"
     41 #include "cstring.h"
     42 #include "cmemory.h"
     43 #include "ucnv_io.h"
     44 #include "uenumimp.h"
     45 #include "ucln_cmn.h"
     46 
     47 /* Format of cnvalias.icu -----------------------------------------------------
     48  *
     49  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
     50  * This binary form contains several tables. All indexes are to uint16_t
     51  * units, and not to the bytes (uint8_t units). Addressing everything on
     52  * 16-bit boundaries allows us to store more information with small index
     53  * numbers, which are also 16-bit in size. The majority of the table (except
     54  * the string table) are 16-bit numbers.
     55  *
     56  * First there is the size of the Table of Contents (TOC). The TOC
     57  * entries contain the size of each section. In order to find the offset
     58  * you just need to sum up the previous offsets.
     59  * The TOC length and entries are an array of uint32_t values.
     60  * The first section after the TOC starts immediately after the TOC.
     61  *
     62  * 1) This section contains a list of converters. This list contains indexes
     63  * into the string table for the converter name. The index of this list is
     64  * also used by other sections, which are mentioned later on.
     65  * This list is not sorted.
     66  *
     67  * 2) This section contains a list of tags. This list contains indexes
     68  * into the string table for the tag name. The index of this list is
     69  * also used by other sections, which are mentioned later on.
     70  * This list is in priority order of standards.
     71  *
     72  * 3) This section contains a list of sorted unique aliases. This
     73  * list contains indexes into the string table for the alias name. The
     74  * index of this list is also used by other sections, like the 4th section.
     75  * The index for the 3rd and 4th section is used to get the
     76  * alias -> converter name mapping. Section 3 and 4 form a two column table.
     77  * Some of the most significant bits of each index may contain other
     78  * information (see findConverter for details).
     79  *
     80  * 4) This section contains a list of mapped converter names. Consider this
     81  * as a table that maps the 3rd section to the 1st section. This list contains
     82  * indexes into the 1st section. The index of this list is the same index in
     83  * the 3rd section. There is also some extra information in the high bits of
     84  * each converter index in this table. Currently it's only used to say that
     85  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
     86  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
     87  * the predigested form of the 5th section so that an alias lookup can be fast.
     88  *
     89  * 5) This section contains a 2D array with indexes to the 6th section. This
     90  * section is the full form of all alias mappings. The column index is the
     91  * index into the converter list (column header). The row index is the index
     92  * to tag list (row header). This 2D array is the top part a 3D array. The
     93  * third dimension is in the 6th section.
     94  *
     95  * 6) This is blob of variable length arrays. Each array starts with a size,
     96  * and is followed by indexes to alias names in the string table. This is
     97  * the third dimension to the section 5. No other section should be referencing
     98  * this section.
     99  *
    100  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
    101  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
    102  * what type of string normalization is used among other potential things in the
    103  * future.
    104  *
    105  * 8) This is the string table. All strings are indexed on an even address.
    106  * There are two reasons for this. First many chip architectures locate strings
    107  * faster on even address boundaries. Second, since all indexes are 16-bit
    108  * numbers, this string table can be 128KB in size instead of 64KB when we
    109  * only have strings starting on an even address.
    110  *
    111  * 9) When present this is a set of prenormalized strings from section 8. This
    112  * table contains normalized strings with the dashes and spaces stripped out,
    113  * and all strings lowercased. In the future, the options in section 7 may state
    114  * other types of normalization.
    115  *
    116  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
    117  * has a unique alias among all converters. That same alias can
    118  * be mentioned in other standards on different converters,
    119  * but only one alias per tag can be unique.
    120  *
    121  *
    122  *              Converter Names (Usually in TR22 form)
    123  *           -------------------------------------------.
    124  *     T    /                                          /|
    125  *     a   /                                          / |
    126  *     g  /                                          /  |
    127  *     s /                                          /   |
    128  *      /                                          /    |
    129  *      ------------------------------------------/     |
    130  *    A |                                         |     |
    131  *    l |                                         |     |
    132  *    i |                                         |    /
    133  *    a |                                         |   /
    134  *    s |                                         |  /
    135  *    e |                                         | /
    136  *    s |                                         |/
    137  *      -------------------------------------------
    138  *
    139  *
    140  *
    141  * Here is what it really looks like. It's like swiss cheese.
    142  * There are holes. Some converters aren't recognized by
    143  * a standard, or they are really old converters that the
    144  * standard doesn't recognize anymore.
    145  *
    146  *              Converter Names (Usually in TR22 form)
    147  *           -------------------------------------------.
    148  *     T    /##########################################/|
    149  *     a   /     #            #                       /#
    150  *     g  /  #      ##     ##     ### # ### ### ### #/
    151  *     s / #             #####  ####        ##  ## #/#
    152  *      / ### # # ##  #  #   #          ### # #   #/##
    153  *      ------------------------------------------/# #
    154  *    A |### # # ##  #  #   #          ### # #   #|# #
    155  *    l |# # #    #     #               ## #     #|# #
    156  *    i |# # #    #     #                #       #|#
    157  *    a |#                                       #|#
    158  *    s |                                        #|#
    159  *    e
    160  *    s
    161  *
    162  */
    163 
    164 /**
    165  * Used by the UEnumeration API
    166  */
    167 typedef struct UAliasContext {
    168     uint32_t listOffset;
    169     uint32_t listIdx;
    170 } UAliasContext;
    171 
    172 static const char DATA_NAME[] = "cnvalias";
    173 static const char DATA_TYPE[] = "icu";
    174 
    175 static UDataMemory *gAliasData=NULL;
    176 static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
    177 
    178 enum {
    179     tocLengthIndex=0,
    180     converterListIndex=1,
    181     tagListIndex=2,
    182     aliasListIndex=3,
    183     untaggedConvArrayIndex=4,
    184     taggedAliasArrayIndex=5,
    185     taggedAliasListsIndex=6,
    186     tableOptionsIndex=7,
    187     stringTableIndex=8,
    188     normalizedStringTableIndex=9,
    189     offsetsCount,    /* length of the swapper's temporary offsets[] */
    190     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
    191 };
    192 
    193 static const UConverterAliasOptions defaultTableOptions = {
    194     UCNV_IO_UNNORMALIZED,
    195     0 /* containsCnvOptionInfo */
    196 };
    197 static UConverterAlias gMainTable;
    198 
    199 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
    200 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
    201 
    202 static UBool U_CALLCONV
    203 isAcceptable(void * /*context*/,
    204              const char * /*type*/, const char * /*name*/,
    205              const UDataInfo *pInfo) {
    206     return (UBool)(
    207         pInfo->size>=20 &&
    208         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    209         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    210         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
    211         pInfo->dataFormat[1]==0x76 &&
    212         pInfo->dataFormat[2]==0x41 &&
    213         pInfo->dataFormat[3]==0x6c &&
    214         pInfo->formatVersion[0]==3);
    215 }
    216 
    217 static UBool U_CALLCONV ucnv_io_cleanup(void)
    218 {
    219     if (gAliasData) {
    220         udata_close(gAliasData);
    221         gAliasData = NULL;
    222     }
    223     gAliasDataInitOnce.reset();
    224 
    225     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
    226 
    227     return TRUE;                   /* Everything was cleaned up */
    228 }
    229 
    230 static void U_CALLCONV initAliasData(UErrorCode &errCode) {
    231     UDataMemory *data;
    232     const uint16_t *table;
    233     const uint32_t *sectionSizes;
    234     uint32_t tableStart;
    235     uint32_t currOffset;
    236 
    237     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
    238 
    239     U_ASSERT(gAliasData == NULL);
    240     data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
    241     if(U_FAILURE(errCode)) {
    242         return;
    243     }
    244 
    245     sectionSizes = (const uint32_t *)udata_getMemory(data);
    246     table = (const uint16_t *)sectionSizes;
    247 
    248     tableStart      = sectionSizes[0];
    249     if (tableStart < minTocLength) {
    250         errCode = U_INVALID_FORMAT_ERROR;
    251         udata_close(data);
    252         return;
    253     }
    254     gAliasData = data;
    255 
    256     gMainTable.converterListSize      = sectionSizes[1];
    257     gMainTable.tagListSize            = sectionSizes[2];
    258     gMainTable.aliasListSize          = sectionSizes[3];
    259     gMainTable.untaggedConvArraySize  = sectionSizes[4];
    260     gMainTable.taggedAliasArraySize   = sectionSizes[5];
    261     gMainTable.taggedAliasListsSize   = sectionSizes[6];
    262     gMainTable.optionTableSize        = sectionSizes[7];
    263     gMainTable.stringTableSize        = sectionSizes[8];
    264 
    265     if (tableStart > 8) {
    266         gMainTable.normalizedStringTableSize = sectionSizes[9];
    267     }
    268 
    269     currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
    270     gMainTable.converterList = table + currOffset;
    271 
    272     currOffset += gMainTable.converterListSize;
    273     gMainTable.tagList = table + currOffset;
    274 
    275     currOffset += gMainTable.tagListSize;
    276     gMainTable.aliasList = table + currOffset;
    277 
    278     currOffset += gMainTable.aliasListSize;
    279     gMainTable.untaggedConvArray = table + currOffset;
    280 
    281     currOffset += gMainTable.untaggedConvArraySize;
    282     gMainTable.taggedAliasArray = table + currOffset;
    283 
    284     /* aliasLists is a 1's based array, but it has a padding character */
    285     currOffset += gMainTable.taggedAliasArraySize;
    286     gMainTable.taggedAliasLists = table + currOffset;
    287 
    288     currOffset += gMainTable.taggedAliasListsSize;
    289     if (gMainTable.optionTableSize > 0
    290         && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
    291     {
    292         /* Faster table */
    293         gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
    294     }
    295     else {
    296         /* Smaller table, or I can't handle this normalization mode!
    297         Use the original slower table lookup. */
    298         gMainTable.optionTable = &defaultTableOptions;
    299     }
    300 
    301     currOffset += gMainTable.optionTableSize;
    302     gMainTable.stringTable = table + currOffset;
    303 
    304     currOffset += gMainTable.stringTableSize;
    305     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
    306         ? gMainTable.stringTable : (table + currOffset));
    307 }
    308 
    309 
    310 static UBool
    311 haveAliasData(UErrorCode *pErrorCode) {
    312     umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
    313     return U_SUCCESS(*pErrorCode);
    314 }
    315 
    316 static inline UBool
    317 isAlias(const char *alias, UErrorCode *pErrorCode) {
    318     if(alias==NULL) {
    319         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    320         return FALSE;
    321     }
    322     return (UBool)(*alias!=0);
    323 }
    324 
    325 static uint32_t getTagNumber(const char *tagname) {
    326     if (gMainTable.tagList) {
    327         uint32_t tagNum;
    328         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
    329             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
    330                 return tagNum;
    331             }
    332         }
    333     }
    334 
    335     return UINT32_MAX;
    336 }
    337 
    338 /* character types relevant for ucnv_compareNames() */
    339 enum {
    340     UIGNORE,
    341     ZERO,
    342     NONZERO,
    343     MINLETTER /* any values from here on are lowercase letter mappings */
    344 };
    345 
    346 /* character types for ASCII 00..7F */
    347 static const uint8_t asciiTypes[128] = {
    348     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    349     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    350     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    351     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
    352     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    353     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
    354     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    355     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
    356 };
    357 
    358 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
    359 
    360 /* character types for EBCDIC 80..FF */
    361 static const uint8_t ebcdicTypes[128] = {
    362     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    363     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    364     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    365     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    366     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    367     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    368     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    369     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
    370 };
    371 
    372 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
    373 
    374 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    375 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
    376 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    377 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
    378 #else
    379 #   error U_CHARSET_FAMILY is not valid
    380 #endif
    381 
    382 /* @see ucnv_compareNames */
    383 U_CFUNC char * U_EXPORT2
    384 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
    385     char *dstItr = dst;
    386     uint8_t type, nextType;
    387     char c1;
    388     UBool afterDigit = FALSE;
    389 
    390     while ((c1 = *name++) != 0) {
    391         type = GET_ASCII_TYPE(c1);
    392         switch (type) {
    393         case UIGNORE:
    394             afterDigit = FALSE;
    395             continue; /* ignore all but letters and digits */
    396         case ZERO:
    397             if (!afterDigit) {
    398                 nextType = GET_ASCII_TYPE(*name);
    399                 if (nextType == ZERO || nextType == NONZERO) {
    400                     continue; /* ignore leading zero before another digit */
    401                 }
    402             }
    403             break;
    404         case NONZERO:
    405             afterDigit = TRUE;
    406             break;
    407         default:
    408             c1 = (char)type; /* lowercased letter */
    409             afterDigit = FALSE;
    410             break;
    411         }
    412         *dstItr++ = c1;
    413     }
    414     *dstItr = 0;
    415     return dst;
    416 }
    417 
    418 U_CFUNC char * U_EXPORT2
    419 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
    420     char *dstItr = dst;
    421     uint8_t type, nextType;
    422     char c1;
    423     UBool afterDigit = FALSE;
    424 
    425     while ((c1 = *name++) != 0) {
    426         type = GET_EBCDIC_TYPE(c1);
    427         switch (type) {
    428         case UIGNORE:
    429             afterDigit = FALSE;
    430             continue; /* ignore all but letters and digits */
    431         case ZERO:
    432             if (!afterDigit) {
    433                 nextType = GET_EBCDIC_TYPE(*name);
    434                 if (nextType == ZERO || nextType == NONZERO) {
    435                     continue; /* ignore leading zero before another digit */
    436                 }
    437             }
    438             break;
    439         case NONZERO:
    440             afterDigit = TRUE;
    441             break;
    442         default:
    443             c1 = (char)type; /* lowercased letter */
    444             afterDigit = FALSE;
    445             break;
    446         }
    447         *dstItr++ = c1;
    448     }
    449     *dstItr = 0;
    450     return dst;
    451 }
    452 
    453 /**
    454  * Do a fuzzy compare of two converter/alias names.
    455  * The comparison is case-insensitive, ignores leading zeroes if they are not
    456  * followed by further digits, and ignores all but letters and digits.
    457  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
    458  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
    459  * at http://www.unicode.org/reports/tr22/
    460  *
    461  * This is a symmetrical (commutative) operation; order of arguments
    462  * is insignificant.  This is an important property for sorting the
    463  * list (when the list is preprocessed into binary form) and for
    464  * performing binary searches on it at run time.
    465  *
    466  * @param name1 a converter name or alias, zero-terminated
    467  * @param name2 a converter name or alias, zero-terminated
    468  * @return 0 if the names match, or a negative value if the name1
    469  * lexically precedes name2, or a positive value if the name1
    470  * lexically follows name2.
    471  *
    472  * @see ucnv_io_stripForCompare
    473  */
    474 U_CAPI int U_EXPORT2
    475 ucnv_compareNames(const char *name1, const char *name2) {
    476     int rc;
    477     uint8_t type, nextType;
    478     char c1, c2;
    479     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
    480 
    481     for (;;) {
    482         while ((c1 = *name1++) != 0) {
    483             type = GET_CHAR_TYPE(c1);
    484             switch (type) {
    485             case UIGNORE:
    486                 afterDigit1 = FALSE;
    487                 continue; /* ignore all but letters and digits */
    488             case ZERO:
    489                 if (!afterDigit1) {
    490                     nextType = GET_CHAR_TYPE(*name1);
    491                     if (nextType == ZERO || nextType == NONZERO) {
    492                         continue; /* ignore leading zero before another digit */
    493                     }
    494                 }
    495                 break;
    496             case NONZERO:
    497                 afterDigit1 = TRUE;
    498                 break;
    499             default:
    500                 c1 = (char)type; /* lowercased letter */
    501                 afterDigit1 = FALSE;
    502                 break;
    503             }
    504             break; /* deliver c1 */
    505         }
    506         while ((c2 = *name2++) != 0) {
    507             type = GET_CHAR_TYPE(c2);
    508             switch (type) {
    509             case UIGNORE:
    510                 afterDigit2 = FALSE;
    511                 continue; /* ignore all but letters and digits */
    512             case ZERO:
    513                 if (!afterDigit2) {
    514                     nextType = GET_CHAR_TYPE(*name2);
    515                     if (nextType == ZERO || nextType == NONZERO) {
    516                         continue; /* ignore leading zero before another digit */
    517                     }
    518                 }
    519                 break;
    520             case NONZERO:
    521                 afterDigit2 = TRUE;
    522                 break;
    523             default:
    524                 c2 = (char)type; /* lowercased letter */
    525                 afterDigit2 = FALSE;
    526                 break;
    527             }
    528             break; /* deliver c2 */
    529         }
    530 
    531         /* If we reach the ends of both strings then they match */
    532         if ((c1|c2)==0) {
    533             return 0;
    534         }
    535 
    536         /* Case-insensitive comparison */
    537         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
    538         if (rc != 0) {
    539             return rc;
    540         }
    541     }
    542 }
    543 
    544 /*
    545  * search for an alias
    546  * return the converter number index for gConverterList
    547  */
    548 static inline uint32_t
    549 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    550     uint32_t mid, start, limit;
    551     uint32_t lastMid;
    552     int result;
    553     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
    554     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    555 
    556     if (!isUnnormalized) {
    557         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
    558             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    559             return UINT32_MAX;
    560         }
    561 
    562         /* Lower case and remove ignoreable characters. */
    563         ucnv_io_stripForCompare(strippedName, alias);
    564         alias = strippedName;
    565     }
    566 
    567     /* do a binary search for the alias */
    568     start = 0;
    569     limit = gMainTable.untaggedConvArraySize;
    570     mid = limit;
    571     lastMid = UINT32_MAX;
    572 
    573     for (;;) {
    574         mid = (uint32_t)((start + limit) / 2);
    575         if (lastMid == mid) {   /* Have we moved? */
    576             break;  /* We haven't moved, and it wasn't found. */
    577         }
    578         lastMid = mid;
    579         if (isUnnormalized) {
    580             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
    581         }
    582         else {
    583             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
    584         }
    585 
    586         if (result < 0) {
    587             limit = mid;
    588         } else if (result > 0) {
    589             start = mid;
    590         } else {
    591             /* Since the gencnval tool folds duplicates into one entry,
    592              * this alias in gAliasList is unique, but different standards
    593              * may map an alias to different converters.
    594              */
    595             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
    596                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
    597             }
    598             /* State whether the canonical converter name contains an option.
    599             This information is contained in this list in order to maintain backward & forward compatibility. */
    600             if (containsOption) {
    601                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
    602                 *containsOption = (UBool)((containsCnvOptionInfo
    603                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
    604                     || !containsCnvOptionInfo);
    605             }
    606             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
    607         }
    608     }
    609 
    610     return UINT32_MAX;
    611 }
    612 
    613 /*
    614  * Is this alias in this list?
    615  * alias and listOffset should be non-NULL.
    616  */
    617 static inline UBool
    618 isAliasInList(const char *alias, uint32_t listOffset) {
    619     if (listOffset) {
    620         uint32_t currAlias;
    621         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    622         /* +1 to skip listCount */
    623         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    624         for (currAlias = 0; currAlias < listCount; currAlias++) {
    625             if (currList[currAlias]
    626                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
    627             {
    628                 return TRUE;
    629             }
    630         }
    631     }
    632     return FALSE;
    633 }
    634 
    635 /*
    636  * Search for an standard name of an alias (what is the default name
    637  * that this standard uses?)
    638  * return the listOffset for gTaggedAliasLists. If it's 0,
    639  * the it couldn't be found, but the parameters are valid.
    640  */
    641 static uint32_t
    642 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    643     uint32_t idx;
    644     uint32_t listOffset;
    645     uint32_t convNum;
    646     UErrorCode myErr = U_ZERO_ERROR;
    647     uint32_t tagNum = getTagNumber(standard);
    648 
    649     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    650     convNum = findConverter(alias, NULL, &myErr);
    651     if (myErr != U_ZERO_ERROR) {
    652         *pErrorCode = myErr;
    653     }
    654 
    655     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    656         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    657         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
    658             return listOffset;
    659         }
    660         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    661             /* Uh Oh! They used an ambiguous alias.
    662                We have to search the whole swiss cheese starting
    663                at the highest standard affinity.
    664                This may take a while.
    665             */
    666             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
    667                 listOffset = gMainTable.taggedAliasArray[idx];
    668                 if (listOffset && isAliasInList(alias, listOffset)) {
    669                     uint32_t currTagNum = idx/gMainTable.converterListSize;
    670                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
    671                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
    672                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
    673                         return tempListOffset;
    674                     }
    675                     /* else keep on looking */
    676                     /* We could speed this up by starting on the next row
    677                        because an alias is unique per row, right now.
    678                        This would change if alias versioning appears. */
    679                 }
    680             }
    681             /* The standard doesn't know about the alias */
    682         }
    683         /* else no default name */
    684         return 0;
    685     }
    686     /* else converter or tag not found */
    687 
    688     return UINT32_MAX;
    689 }
    690 
    691 /* Return the canonical name */
    692 static uint32_t
    693 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    694     uint32_t idx;
    695     uint32_t listOffset;
    696     uint32_t convNum;
    697     UErrorCode myErr = U_ZERO_ERROR;
    698     uint32_t tagNum = getTagNumber(standard);
    699 
    700     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    701     convNum = findConverter(alias, NULL, &myErr);
    702     if (myErr != U_ZERO_ERROR) {
    703         *pErrorCode = myErr;
    704     }
    705 
    706     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    707         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    708         if (listOffset && isAliasInList(alias, listOffset)) {
    709             return convNum;
    710         }
    711         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    712             /* Uh Oh! They used an ambiguous alias.
    713                We have to search one slice of the swiss cheese.
    714                We search only in the requested tag, not the whole thing.
    715                This may take a while.
    716             */
    717             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
    718             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
    719             for (idx = convStart; idx < convLimit; idx++) {
    720                 listOffset = gMainTable.taggedAliasArray[idx];
    721                 if (listOffset && isAliasInList(alias, listOffset)) {
    722                     return idx-convStart;
    723                 }
    724             }
    725             /* The standard doesn't know about the alias */
    726         }
    727         /* else no canonical name */
    728     }
    729     /* else converter or tag not found */
    730 
    731     return UINT32_MAX;
    732 }
    733 
    734 
    735 
    736 U_CFUNC const char *
    737 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    738     const char *aliasTmp = alias;
    739     int32_t i = 0;
    740     for (i = 0; i < 2; i++) {
    741         if (i == 1) {
    742             /*
    743              * After the first unsuccess converter lookup, check to see if
    744              * the name begins with 'x-'. If it does, strip it off and try
    745              * again.  This behaviour is similar to how ICU4J does it.
    746              */
    747             if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') {
    748                 aliasTmp = aliasTmp+2;
    749             } else {
    750                 break;
    751             }
    752         }
    753         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
    754             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
    755             if (convNum < gMainTable.converterListSize) {
    756                 return GET_STRING(gMainTable.converterList[convNum]);
    757             }
    758             /* else converter not found */
    759         } else {
    760             break;
    761         }
    762     }
    763 
    764     return NULL;
    765 }
    766 
    767 static int32_t U_CALLCONV
    768 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    769     int32_t value = 0;
    770     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    771     uint32_t listOffset = myContext->listOffset;
    772 
    773     if (listOffset) {
    774         value = gMainTable.taggedAliasLists[listOffset];
    775     }
    776     return value;
    777 }
    778 
    779 static const char* U_CALLCONV
    780 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
    781                             int32_t* resultLength,
    782                             UErrorCode * /*pErrorCode*/)
    783 {
    784     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    785     uint32_t listOffset = myContext->listOffset;
    786 
    787     if (listOffset) {
    788         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    789         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    790 
    791         if (myContext->listIdx < listCount) {
    792             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
    793             if (resultLength) {
    794                 *resultLength = (int32_t)uprv_strlen(myStr);
    795             }
    796             return myStr;
    797         }
    798     }
    799     /* Either we accessed a zero length list, or we enumerated too far. */
    800     if (resultLength) {
    801         *resultLength = 0;
    802     }
    803     return NULL;
    804 }
    805 
    806 static void U_CALLCONV
    807 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    808     ((UAliasContext *)(enumerator->context))->listIdx = 0;
    809 }
    810 
    811 static void U_CALLCONV
    812 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
    813     uprv_free(enumerator->context);
    814     uprv_free(enumerator);
    815 }
    816 
    817 /* Enumerate the aliases for the specified converter and standard tag */
    818 static const UEnumeration gEnumAliases = {
    819     NULL,
    820     NULL,
    821     ucnv_io_closeUEnumeration,
    822     ucnv_io_countStandardAliases,
    823     uenum_unextDefault,
    824     ucnv_io_nextStandardAliases,
    825     ucnv_io_resetStandardAliases
    826 };
    827 
    828 U_CAPI UEnumeration * U_EXPORT2
    829 ucnv_openStandardNames(const char *convName,
    830                        const char *standard,
    831                        UErrorCode *pErrorCode)
    832 {
    833     UEnumeration *myEnum = NULL;
    834     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
    835         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
    836 
    837         /* When listOffset == 0, we want to acknowledge that the
    838            converter name and standard are okay, but there
    839            is nothing to enumerate. */
    840         if (listOffset < gMainTable.taggedAliasListsSize) {
    841             UAliasContext *myContext;
    842 
    843             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
    844             if (myEnum == NULL) {
    845                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    846                 return NULL;
    847             }
    848             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
    849             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
    850             if (myContext == NULL) {
    851                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    852                 uprv_free(myEnum);
    853                 return NULL;
    854             }
    855             myContext->listOffset = listOffset;
    856             myContext->listIdx = 0;
    857             myEnum->context = myContext;
    858         }
    859         /* else converter or tag not found */
    860     }
    861     return myEnum;
    862 }
    863 
    864 static uint16_t
    865 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
    866     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    867         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    868         if (convNum < gMainTable.converterListSize) {
    869             /* tagListNum - 1 is the ALL tag */
    870             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    871 
    872             if (listOffset) {
    873                 return gMainTable.taggedAliasLists[listOffset];
    874             }
    875             /* else this shouldn't happen. internal program error */
    876         }
    877         /* else converter not found */
    878     }
    879     return 0;
    880 }
    881 
    882 static uint16_t
    883 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
    884     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    885         uint32_t currAlias;
    886         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    887         if (convNum < gMainTable.converterListSize) {
    888             /* tagListNum - 1 is the ALL tag */
    889             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    890 
    891             if (listOffset) {
    892                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    893                 /* +1 to skip listCount */
    894                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    895 
    896                 for (currAlias = start; currAlias < listCount; currAlias++) {
    897                     aliases[currAlias] = GET_STRING(currList[currAlias]);
    898                 }
    899             }
    900             /* else this shouldn't happen. internal program error */
    901         }
    902         /* else converter not found */
    903     }
    904     return 0;
    905 }
    906 
    907 static const char *
    908 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
    909     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    910         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    911         if (convNum < gMainTable.converterListSize) {
    912             /* tagListNum - 1 is the ALL tag */
    913             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    914 
    915             if (listOffset) {
    916                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    917                 /* +1 to skip listCount */
    918                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    919 
    920                 if (n < listCount)  {
    921                     return GET_STRING(currList[n]);
    922                 }
    923                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    924             }
    925             /* else this shouldn't happen. internal program error */
    926         }
    927         /* else converter not found */
    928     }
    929     return NULL;
    930 }
    931 
    932 static uint16_t
    933 ucnv_io_countStandards(UErrorCode *pErrorCode) {
    934     if (haveAliasData(pErrorCode)) {
    935         /* Don't include the empty list */
    936         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
    937     }
    938 
    939     return 0;
    940 }
    941 
    942 U_CAPI const char * U_EXPORT2
    943 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
    944     if (haveAliasData(pErrorCode)) {
    945         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
    946             return GET_STRING(gMainTable.tagList[n]);
    947         }
    948         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    949     }
    950 
    951     return NULL;
    952 }
    953 
    954 U_CAPI const char * U_EXPORT2
    955 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    956     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    957         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
    958 
    959         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
    960             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    961 
    962             /* Get the preferred name from this list */
    963             if (currList[0]) {
    964                 return GET_STRING(currList[0]);
    965             }
    966             /* else someone screwed up the alias table. */
    967             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
    968         }
    969     }
    970 
    971     return NULL;
    972 }
    973 
    974 U_CAPI uint16_t U_EXPORT2
    975 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
    976 {
    977     return ucnv_io_countAliases(alias, pErrorCode);
    978 }
    979 
    980 
    981 U_CAPI const char* U_EXPORT2
    982 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
    983 {
    984     return ucnv_io_getAlias(alias, n, pErrorCode);
    985 }
    986 
    987 U_CAPI void U_EXPORT2
    988 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
    989 {
    990     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
    991 }
    992 
    993 U_CAPI uint16_t U_EXPORT2
    994 ucnv_countStandards(void)
    995 {
    996     UErrorCode err = U_ZERO_ERROR;
    997     return ucnv_io_countStandards(&err);
    998 }
    999 
   1000 U_CAPI const char * U_EXPORT2
   1001 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   1002     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   1003         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
   1004 
   1005         if (convNum < gMainTable.converterListSize) {
   1006             return GET_STRING(gMainTable.converterList[convNum]);
   1007         }
   1008     }
   1009 
   1010     return NULL;
   1011 }
   1012 
   1013 static int32_t U_CALLCONV
   1014 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
   1015     return gMainTable.converterListSize;
   1016 }
   1017 
   1018 static const char* U_CALLCONV
   1019 ucnv_io_nextAllConverters(UEnumeration *enumerator,
   1020                             int32_t* resultLength,
   1021                             UErrorCode * /*pErrorCode*/)
   1022 {
   1023     uint16_t *myContext = (uint16_t *)(enumerator->context);
   1024 
   1025     if (*myContext < gMainTable.converterListSize) {
   1026         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
   1027         if (resultLength) {
   1028             *resultLength = (int32_t)uprv_strlen(myStr);
   1029         }
   1030         return myStr;
   1031     }
   1032     /* Either we accessed a zero length list, or we enumerated too far. */
   1033     if (resultLength) {
   1034         *resultLength = 0;
   1035     }
   1036     return NULL;
   1037 }
   1038 
   1039 static void U_CALLCONV
   1040 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
   1041     *((uint16_t *)(enumerator->context)) = 0;
   1042 }
   1043 
   1044 static const UEnumeration gEnumAllConverters = {
   1045     NULL,
   1046     NULL,
   1047     ucnv_io_closeUEnumeration,
   1048     ucnv_io_countAllConverters,
   1049     uenum_unextDefault,
   1050     ucnv_io_nextAllConverters,
   1051     ucnv_io_resetAllConverters
   1052 };
   1053 
   1054 U_CAPI UEnumeration * U_EXPORT2
   1055 ucnv_openAllNames(UErrorCode *pErrorCode) {
   1056     UEnumeration *myEnum = NULL;
   1057     if (haveAliasData(pErrorCode)) {
   1058         uint16_t *myContext;
   1059 
   1060         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
   1061         if (myEnum == NULL) {
   1062             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1063             return NULL;
   1064         }
   1065         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
   1066         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
   1067         if (myContext == NULL) {
   1068             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1069             uprv_free(myEnum);
   1070             return NULL;
   1071         }
   1072         *myContext = 0;
   1073         myEnum->context = myContext;
   1074     }
   1075     return myEnum;
   1076 }
   1077 
   1078 U_CFUNC uint16_t
   1079 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
   1080     if (haveAliasData(pErrorCode)) {
   1081         return (uint16_t)gMainTable.converterListSize;
   1082     }
   1083     return 0;
   1084 }
   1085 
   1086 /* alias table swapping ----------------------------------------------------- */
   1087 
   1088 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
   1089 
   1090 /*
   1091  * row of a temporary array
   1092  *
   1093  * gets platform-endian charset string indexes and sorting indexes;
   1094  * after sorting this array by strings, the actual arrays are permutated
   1095  * according to the sorting indexes
   1096  */
   1097 typedef struct TempRow {
   1098     uint16_t strIndex, sortIndex;
   1099 } TempRow;
   1100 
   1101 typedef struct TempAliasTable {
   1102     const char *chars;
   1103     TempRow *rows;
   1104     uint16_t *resort;
   1105     StripForCompareFn *stripForCompare;
   1106 } TempAliasTable;
   1107 
   1108 enum {
   1109     STACK_ROW_CAPACITY=500
   1110 };
   1111 
   1112 static int32_t
   1113 io_compareRows(const void *context, const void *left, const void *right) {
   1114     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
   1115          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
   1116 
   1117     TempAliasTable *tempTable=(TempAliasTable *)context;
   1118     const char *chars=tempTable->chars;
   1119 
   1120     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
   1121                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
   1122 }
   1123 
   1124 U_CAPI int32_t U_EXPORT2
   1125 ucnv_swapAliases(const UDataSwapper *ds,
   1126                  const void *inData, int32_t length, void *outData,
   1127                  UErrorCode *pErrorCode) {
   1128     const UDataInfo *pInfo;
   1129     int32_t headerSize;
   1130 
   1131     const uint16_t *inTable;
   1132     const uint32_t *inSectionSizes;
   1133     uint32_t toc[offsetsCount];
   1134     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
   1135     uint32_t i, count, tocLength, topOffset;
   1136 
   1137     TempRow rows[STACK_ROW_CAPACITY];
   1138     uint16_t resort[STACK_ROW_CAPACITY];
   1139     TempAliasTable tempTable;
   1140 
   1141     /* udata_swapDataHeader checks the arguments */
   1142     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1143     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1144         return 0;
   1145     }
   1146 
   1147     /* check data format and format version */
   1148     pInfo=(const UDataInfo *)((const char *)inData+4);
   1149     if(!(
   1150         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
   1151         pInfo->dataFormat[1]==0x76 &&
   1152         pInfo->dataFormat[2]==0x41 &&
   1153         pInfo->dataFormat[3]==0x6c &&
   1154         pInfo->formatVersion[0]==3
   1155     )) {
   1156         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
   1157                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1158                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1159                          pInfo->formatVersion[0]);
   1160         *pErrorCode=U_UNSUPPORTED_ERROR;
   1161         return 0;
   1162     }
   1163 
   1164     /* an alias table must contain at least the table of contents array */
   1165     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
   1166         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1167                          length-headerSize);
   1168         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1169         return 0;
   1170     }
   1171 
   1172     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
   1173     inTable=(const uint16_t *)inSectionSizes;
   1174     uprv_memset(toc, 0, sizeof(toc));
   1175     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
   1176     if(tocLength<minTocLength || offsetsCount<=tocLength) {
   1177         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
   1178         *pErrorCode=U_INVALID_FORMAT_ERROR;
   1179         return 0;
   1180     }
   1181 
   1182     /* read the known part of the table of contents */
   1183     for(i=converterListIndex; i<=tocLength; ++i) {
   1184         toc[i]=ds->readUInt32(inSectionSizes[i]);
   1185     }
   1186 
   1187     /* compute offsets */
   1188     uprv_memset(offsets, 0, sizeof(offsets));
   1189     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
   1190     for(i=tagListIndex; i<=tocLength; ++i) {
   1191         offsets[i]=offsets[i-1]+toc[i-1];
   1192     }
   1193 
   1194     /* compute the overall size of the after-header data, in numbers of 16-bit units */
   1195     topOffset=offsets[i-1]+toc[i-1];
   1196 
   1197     if(length>=0) {
   1198         uint16_t *outTable;
   1199         const uint16_t *p, *p2;
   1200         uint16_t *q, *q2;
   1201         uint16_t oldIndex;
   1202 
   1203         if((length-headerSize)<(2*(int32_t)topOffset)) {
   1204             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1205                              length-headerSize);
   1206             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1207             return 0;
   1208         }
   1209 
   1210         outTable=(uint16_t *)((char *)outData+headerSize);
   1211 
   1212         /* swap the entire table of contents */
   1213         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
   1214 
   1215         /* swap unormalized strings & normalized strings */
   1216         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
   1217                              outTable+offsets[stringTableIndex], pErrorCode);
   1218         if(U_FAILURE(*pErrorCode)) {
   1219             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
   1220             return 0;
   1221         }
   1222 
   1223         if(ds->inCharset==ds->outCharset) {
   1224             /* no need to sort, just swap all 16-bit values together */
   1225             ds->swapArray16(ds,
   1226                             inTable+offsets[converterListIndex],
   1227                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
   1228                             outTable+offsets[converterListIndex],
   1229                             pErrorCode);
   1230         } else {
   1231             /* allocate the temporary table for sorting */
   1232             count=toc[aliasListIndex];
   1233 
   1234             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
   1235 
   1236             if(count<=STACK_ROW_CAPACITY) {
   1237                 tempTable.rows=rows;
   1238                 tempTable.resort=resort;
   1239             } else {
   1240                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
   1241                 if(tempTable.rows==NULL) {
   1242                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
   1243                                      count);
   1244                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1245                     return 0;
   1246                 }
   1247                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
   1248             }
   1249 
   1250             if(ds->outCharset==U_ASCII_FAMILY) {
   1251                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
   1252             } else /* U_EBCDIC_FAMILY */ {
   1253                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
   1254             }
   1255 
   1256             /*
   1257              * Sort unique aliases+mapped names.
   1258              *
   1259              * We need to sort the list again by outCharset strings because they
   1260              * sort differently for different charset families.
   1261              * First we set up a temporary table with the string indexes and
   1262              * sorting indexes and sort that.
   1263              * Then we permutate and copy/swap the actual values.
   1264              */
   1265             p=inTable+offsets[aliasListIndex];
   1266             q=outTable+offsets[aliasListIndex];
   1267 
   1268             p2=inTable+offsets[untaggedConvArrayIndex];
   1269             q2=outTable+offsets[untaggedConvArrayIndex];
   1270 
   1271             for(i=0; i<count; ++i) {
   1272                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
   1273                 tempTable.rows[i].sortIndex=(uint16_t)i;
   1274             }
   1275 
   1276             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
   1277                            io_compareRows, &tempTable,
   1278                            FALSE, pErrorCode);
   1279 
   1280             if(U_SUCCESS(*pErrorCode)) {
   1281                 /* copy/swap/permutate items */
   1282                 if(p!=q) {
   1283                     for(i=0; i<count; ++i) {
   1284                         oldIndex=tempTable.rows[i].sortIndex;
   1285                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
   1286                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
   1287                     }
   1288                 } else {
   1289                     /*
   1290                      * If we swap in-place, then the permutation must use another
   1291                      * temporary array (tempTable.resort)
   1292                      * before the results are copied to the outBundle.
   1293                      */
   1294                     uint16_t *r=tempTable.resort;
   1295 
   1296                     for(i=0; i<count; ++i) {
   1297                         oldIndex=tempTable.rows[i].sortIndex;
   1298                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
   1299                     }
   1300                     uprv_memcpy(q, r, 2*count);
   1301 
   1302                     for(i=0; i<count; ++i) {
   1303                         oldIndex=tempTable.rows[i].sortIndex;
   1304                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
   1305                     }
   1306                     uprv_memcpy(q2, r, 2*count);
   1307                 }
   1308             }
   1309 
   1310             if(tempTable.rows!=rows) {
   1311                 uprv_free(tempTable.rows);
   1312             }
   1313 
   1314             if(U_FAILURE(*pErrorCode)) {
   1315                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
   1316                                  count);
   1317                 return 0;
   1318             }
   1319 
   1320             /* swap remaining 16-bit values */
   1321             ds->swapArray16(ds,
   1322                             inTable+offsets[converterListIndex],
   1323                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
   1324                             outTable+offsets[converterListIndex],
   1325                             pErrorCode);
   1326             ds->swapArray16(ds,
   1327                             inTable+offsets[taggedAliasArrayIndex],
   1328                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
   1329                             outTable+offsets[taggedAliasArrayIndex],
   1330                             pErrorCode);
   1331         }
   1332     }
   1333 
   1334     return headerSize+2*(int32_t)topOffset;
   1335 }
   1336 
   1337 #endif
   1338 
   1339 
   1340 /*
   1341  * Hey, Emacs, please set the following:
   1342  *
   1343  * Local Variables:
   1344  * indent-tabs-mode: nil
   1345  * End:
   1346  *
   1347  */
   1348