Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 *
     10 *  ucnv_io.c:
     11 *  initializes global variables and defines functions pertaining to converter
     12 *  name resolution aspect of the conversion code.
     13 *
     14 *   new implementation:
     15 *
     16 *   created on: 1999nov22
     17 *   created by: Markus W. Scherer
     18 *
     19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
     20 *   with aliases for converter names.
     21 *
     22 *   Date        Name        Description
     23 *   11/22/1999  markus      Created
     24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
     25 *                           Now an alias can map to different converters
     26 *                           depending on the specified standard.
     27 *******************************************************************************
     28 */
     29 
     30 #include "unicode/utypes.h"
     31 
     32 #if !UCONFIG_NO_CONVERSION
     33 
     34 #include "unicode/ucnv.h"
     35 #include "unicode/udata.h"
     36 
     37 #include "umutex.h"
     38 #include "uarrsort.h"
     39 #include "udataswp.h"
     40 #include "cstring.h"
     41 #include "cmemory.h"
     42 #include "ucnv_io.h"
     43 #include "uenumimp.h"
     44 #include "ucln_cmn.h"
     45 
     46 /* Format of cnvalias.icu -----------------------------------------------------
     47  *
     48  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
     49  * This binary form contains several tables. All indexes are to uint16_t
     50  * units, and not to the bytes (uint8_t units). Addressing everything on
     51  * 16-bit boundaries allows us to store more information with small index
     52  * numbers, which are also 16-bit in size. The majority of the table (except
     53  * the string table) are 16-bit numbers.
     54  *
     55  * First there is the size of the Table of Contents (TOC). The TOC
     56  * entries contain the size of each section. In order to find the offset
     57  * you just need to sum up the previous offsets.
     58  * The TOC length and entries are an array of uint32_t values.
     59  * The first section after the TOC starts immediately after the TOC.
     60  *
     61  * 1) This section contains a list of converters. This list contains indexes
     62  * into the string table for the converter name. The index of this list is
     63  * also used by other sections, which are mentioned later on.
     64  * This list is not sorted.
     65  *
     66  * 2) This section contains a list of tags. This list contains indexes
     67  * into the string table for the tag name. The index of this list is
     68  * also used by other sections, which are mentioned later on.
     69  * This list is in priority order of standards.
     70  *
     71  * 3) This section contains a list of sorted unique aliases. This
     72  * list contains indexes into the string table for the alias name. The
     73  * index of this list is also used by other sections, like the 4th section.
     74  * The index for the 3rd and 4th section is used to get the
     75  * alias -> converter name mapping. Section 3 and 4 form a two column table.
     76  * Some of the most significant bits of each index may contain other
     77  * information (see findConverter for details).
     78  *
     79  * 4) This section contains a list of mapped converter names. Consider this
     80  * as a table that maps the 3rd section to the 1st section. This list contains
     81  * indexes into the 1st section. The index of this list is the same index in
     82  * the 3rd section. There is also some extra information in the high bits of
     83  * each converter index in this table. Currently it's only used to say that
     84  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
     85  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
     86  * the predigested form of the 5th section so that an alias lookup can be fast.
     87  *
     88  * 5) This section contains a 2D array with indexes to the 6th section. This
     89  * section is the full form of all alias mappings. The column index is the
     90  * index into the converter list (column header). The row index is the index
     91  * to tag list (row header). This 2D array is the top part a 3D array. The
     92  * third dimension is in the 6th section.
     93  *
     94  * 6) This is blob of variable length arrays. Each array starts with a size,
     95  * and is followed by indexes to alias names in the string table. This is
     96  * the third dimension to the section 5. No other section should be referencing
     97  * this section.
     98  *
     99  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
    100  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
    101  * what type of string normalization is used among other potential things in the
    102  * future.
    103  *
    104  * 8) This is the string table. All strings are indexed on an even address.
    105  * There are two reasons for this. First many chip architectures locate strings
    106  * faster on even address boundaries. Second, since all indexes are 16-bit
    107  * numbers, this string table can be 128KB in size instead of 64KB when we
    108  * only have strings starting on an even address.
    109  *
    110  * 9) When present this is a set of prenormalized strings from section 8. This
    111  * table contains normalized strings with the dashes and spaces stripped out,
    112  * and all strings lowercased. In the future, the options in section 7 may state
    113  * other types of normalization.
    114  *
    115  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
    116  * has a unique alias among all converters. That same alias can
    117  * be mentioned in other standards on different converters,
    118  * but only one alias per tag can be unique.
    119  *
    120  *
    121  *              Converter Names (Usually in TR22 form)
    122  *           -------------------------------------------.
    123  *     T    /                                          /|
    124  *     a   /                                          / |
    125  *     g  /                                          /  |
    126  *     s /                                          /   |
    127  *      /                                          /    |
    128  *      ------------------------------------------/     |
    129  *    A |                                         |     |
    130  *    l |                                         |     |
    131  *    i |                                         |    /
    132  *    a |                                         |   /
    133  *    s |                                         |  /
    134  *    e |                                         | /
    135  *    s |                                         |/
    136  *      -------------------------------------------
    137  *
    138  *
    139  *
    140  * Here is what it really looks like. It's like swiss cheese.
    141  * There are holes. Some converters aren't recognized by
    142  * a standard, or they are really old converters that the
    143  * standard doesn't recognize anymore.
    144  *
    145  *              Converter Names (Usually in TR22 form)
    146  *           -------------------------------------------.
    147  *     T    /##########################################/|
    148  *     a   /     #            #                       /#
    149  *     g  /  #      ##     ##     ### # ### ### ### #/
    150  *     s / #             #####  ####        ##  ## #/#
    151  *      / ### # # ##  #  #   #          ### # #   #/##
    152  *      ------------------------------------------/# #
    153  *    A |### # # ##  #  #   #          ### # #   #|# #
    154  *    l |# # #    #     #               ## #     #|# #
    155  *    i |# # #    #     #                #       #|#
    156  *    a |#                                       #|#
    157  *    s |                                        #|#
    158  *    e
    159  *    s
    160  *
    161  */
    162 
    163 /**
    164  * Used by the UEnumeration API
    165  */
    166 typedef struct UAliasContext {
    167     uint32_t listOffset;
    168     uint32_t listIdx;
    169 } UAliasContext;
    170 
    171 static const char DATA_NAME[] = "cnvalias";
    172 static const char DATA_TYPE[] = "icu";
    173 
    174 static UDataMemory *gAliasData=NULL;
    175 
    176 enum {
    177     tocLengthIndex=0,
    178     converterListIndex=1,
    179     tagListIndex=2,
    180     aliasListIndex=3,
    181     untaggedConvArrayIndex=4,
    182     taggedAliasArrayIndex=5,
    183     taggedAliasListsIndex=6,
    184     tableOptionsIndex=7,
    185     stringTableIndex=8,
    186     normalizedStringTableIndex=9,
    187     offsetsCount,    /* length of the swapper's temporary offsets[] */
    188     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
    189 };
    190 
    191 static const UConverterAliasOptions defaultTableOptions = {
    192     UCNV_IO_UNNORMALIZED,
    193     0 /* containsCnvOptionInfo */
    194 };
    195 static UConverterAlias gMainTable;
    196 
    197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
    198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
    199 
    200 static UBool U_CALLCONV
    201 isAcceptable(void *context,
    202              const char *type, const char *name,
    203              const UDataInfo *pInfo) {
    204     return (UBool)(
    205         pInfo->size>=20 &&
    206         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    207         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    208         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
    209         pInfo->dataFormat[1]==0x76 &&
    210         pInfo->dataFormat[2]==0x41 &&
    211         pInfo->dataFormat[3]==0x6c &&
    212         pInfo->formatVersion[0]==3);
    213 }
    214 
    215 static UBool U_CALLCONV ucnv_io_cleanup(void)
    216 {
    217     if (gAliasData) {
    218         udata_close(gAliasData);
    219         gAliasData = NULL;
    220     }
    221 
    222     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
    223 
    224     return TRUE;                   /* Everything was cleaned up */
    225 }
    226 
    227 static UBool
    228 haveAliasData(UErrorCode *pErrorCode) {
    229     int needInit;
    230 
    231     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    232         return FALSE;
    233     }
    234 
    235     UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
    236 
    237     /* load converter alias data from file if necessary */
    238     if (needInit) {
    239         UDataMemory *data;
    240         const uint16_t *table;
    241         const uint32_t *sectionSizes;
    242         uint32_t tableStart;
    243         uint32_t currOffset;
    244 
    245         data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    246         if(U_FAILURE(*pErrorCode)) {
    247             return FALSE;
    248         }
    249 
    250         sectionSizes = (const uint32_t *)udata_getMemory(data);
    251         table = (const uint16_t *)sectionSizes;
    252 
    253         tableStart      = sectionSizes[0];
    254         if (tableStart < minTocLength) {
    255             *pErrorCode = U_INVALID_FORMAT_ERROR;
    256             udata_close(data);
    257             return FALSE;
    258         }
    259 
    260         umtx_lock(NULL);
    261         if(gAliasData==NULL) {
    262             gMainTable.converterListSize      = sectionSizes[1];
    263             gMainTable.tagListSize            = sectionSizes[2];
    264             gMainTable.aliasListSize          = sectionSizes[3];
    265             gMainTable.untaggedConvArraySize  = sectionSizes[4];
    266             gMainTable.taggedAliasArraySize   = sectionSizes[5];
    267             gMainTable.taggedAliasListsSize   = sectionSizes[6];
    268             gMainTable.optionTableSize        = sectionSizes[7];
    269             gMainTable.stringTableSize        = sectionSizes[8];
    270 
    271             if (tableStart > 8) {
    272                 gMainTable.normalizedStringTableSize = sectionSizes[9];
    273             }
    274 
    275             currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
    276             gMainTable.converterList = table + currOffset;
    277 
    278             currOffset += gMainTable.converterListSize;
    279             gMainTable.tagList = table + currOffset;
    280 
    281             currOffset += gMainTable.tagListSize;
    282             gMainTable.aliasList = table + currOffset;
    283 
    284             currOffset += gMainTable.aliasListSize;
    285             gMainTable.untaggedConvArray = table + currOffset;
    286 
    287             currOffset += gMainTable.untaggedConvArraySize;
    288             gMainTable.taggedAliasArray = table + currOffset;
    289 
    290             /* aliasLists is a 1's based array, but it has a padding character */
    291             currOffset += gMainTable.taggedAliasArraySize;
    292             gMainTable.taggedAliasLists = table + currOffset;
    293 
    294             currOffset += gMainTable.taggedAliasListsSize;
    295             if (gMainTable.optionTableSize > 0
    296                 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
    297             {
    298                 /* Faster table */
    299                 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
    300             }
    301             else {
    302                 /* Smaller table, or I can't handle this normalization mode!
    303                 Use the original slower table lookup. */
    304                 gMainTable.optionTable = &defaultTableOptions;
    305             }
    306 
    307             currOffset += gMainTable.optionTableSize;
    308             gMainTable.stringTable = table + currOffset;
    309 
    310             currOffset += gMainTable.stringTableSize;
    311             gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
    312                 ? gMainTable.stringTable : (table + currOffset));
    313 
    314             ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
    315 
    316             gAliasData = data;
    317             data=NULL;
    318         }
    319         umtx_unlock(NULL);
    320 
    321         /* if a different thread set it first, then close the extra data */
    322         if(data!=NULL) {
    323             udata_close(data); /* NULL if it was set correctly */
    324         }
    325     }
    326 
    327     return TRUE;
    328 }
    329 
    330 static U_INLINE UBool
    331 isAlias(const char *alias, UErrorCode *pErrorCode) {
    332     if(alias==NULL) {
    333         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    334         return FALSE;
    335     }
    336     return (UBool)(*alias!=0);
    337 }
    338 
    339 static uint32_t getTagNumber(const char *tagname) {
    340     if (gMainTable.tagList) {
    341         uint32_t tagNum;
    342         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
    343             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
    344                 return tagNum;
    345             }
    346         }
    347     }
    348 
    349     return UINT32_MAX;
    350 }
    351 
    352 /* character types relevant for ucnv_compareNames() */
    353 enum {
    354     IGNORE,
    355     ZERO,
    356     NONZERO,
    357     MINLETTER /* any values from here on are lowercase letter mappings */
    358 };
    359 
    360 /* character types for ASCII 00..7F */
    361 static const uint8_t asciiTypes[128] = {
    362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    363     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    364     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    365     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
    366     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    367     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
    368     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    369     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
    370 };
    371 
    372 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
    373 
    374 /* character types for EBCDIC 80..FF */
    375 static const uint8_t ebcdicTypes[128] = {
    376     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    377     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    378     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    380     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    381     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    382     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    383     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
    384 };
    385 
    386 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
    387 
    388 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    389 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
    390 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    391 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
    392 #else
    393 #   error U_CHARSET_FAMILY is not valid
    394 #endif
    395 
    396 /* @see ucnv_compareNames */
    397 U_CFUNC char * U_EXPORT2
    398 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
    399     char *dstItr = dst;
    400     uint8_t type, nextType;
    401     char c1;
    402     UBool afterDigit = FALSE;
    403 
    404     while ((c1 = *name++) != 0) {
    405         type = GET_ASCII_TYPE(c1);
    406         switch (type) {
    407         case IGNORE:
    408             afterDigit = FALSE;
    409             continue; /* ignore all but letters and digits */
    410         case ZERO:
    411             if (!afterDigit) {
    412                 nextType = GET_ASCII_TYPE(*name);
    413                 if (nextType == ZERO || nextType == NONZERO) {
    414                     continue; /* ignore leading zero before another digit */
    415                 }
    416             }
    417             break;
    418         case NONZERO:
    419             afterDigit = TRUE;
    420             break;
    421         default:
    422             c1 = (char)type; /* lowercased letter */
    423             afterDigit = FALSE;
    424             break;
    425         }
    426         *dstItr++ = c1;
    427     }
    428     *dstItr = 0;
    429     return dst;
    430 }
    431 
    432 U_CFUNC char * U_EXPORT2
    433 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
    434     char *dstItr = dst;
    435     uint8_t type, nextType;
    436     char c1;
    437     UBool afterDigit = FALSE;
    438 
    439     while ((c1 = *name++) != 0) {
    440         type = GET_EBCDIC_TYPE(c1);
    441         switch (type) {
    442         case IGNORE:
    443             afterDigit = FALSE;
    444             continue; /* ignore all but letters and digits */
    445         case ZERO:
    446             if (!afterDigit) {
    447                 nextType = GET_EBCDIC_TYPE(*name);
    448                 if (nextType == ZERO || nextType == NONZERO) {
    449                     continue; /* ignore leading zero before another digit */
    450                 }
    451             }
    452             break;
    453         case NONZERO:
    454             afterDigit = TRUE;
    455             break;
    456         default:
    457             c1 = (char)type; /* lowercased letter */
    458             afterDigit = FALSE;
    459             break;
    460         }
    461         *dstItr++ = c1;
    462     }
    463     *dstItr = 0;
    464     return dst;
    465 }
    466 
    467 /**
    468  * Do a fuzzy compare of two converter/alias names.
    469  * The comparison is case-insensitive, ignores leading zeroes if they are not
    470  * followed by further digits, and ignores all but letters and digits.
    471  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
    472  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
    473  * at http://www.unicode.org/reports/tr22/
    474  *
    475  * This is a symmetrical (commutative) operation; order of arguments
    476  * is insignificant.  This is an important property for sorting the
    477  * list (when the list is preprocessed into binary form) and for
    478  * performing binary searches on it at run time.
    479  *
    480  * @param name1 a converter name or alias, zero-terminated
    481  * @param name2 a converter name or alias, zero-terminated
    482  * @return 0 if the names match, or a negative value if the name1
    483  * lexically precedes name2, or a positive value if the name1
    484  * lexically follows name2.
    485  *
    486  * @see ucnv_io_stripForCompare
    487  */
    488 U_CAPI int U_EXPORT2
    489 ucnv_compareNames(const char *name1, const char *name2) {
    490     int rc;
    491     uint8_t type, nextType;
    492     char c1, c2;
    493     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
    494 
    495     for (;;) {
    496         while ((c1 = *name1++) != 0) {
    497             type = GET_CHAR_TYPE(c1);
    498             switch (type) {
    499             case IGNORE:
    500                 afterDigit1 = FALSE;
    501                 continue; /* ignore all but letters and digits */
    502             case ZERO:
    503                 if (!afterDigit1) {
    504                     nextType = GET_CHAR_TYPE(*name1);
    505                     if (nextType == ZERO || nextType == NONZERO) {
    506                         continue; /* ignore leading zero before another digit */
    507                     }
    508                 }
    509                 break;
    510             case NONZERO:
    511                 afterDigit1 = TRUE;
    512                 break;
    513             default:
    514                 c1 = (char)type; /* lowercased letter */
    515                 afterDigit1 = FALSE;
    516                 break;
    517             }
    518             break; /* deliver c1 */
    519         }
    520         while ((c2 = *name2++) != 0) {
    521             type = GET_CHAR_TYPE(c2);
    522             switch (type) {
    523             case IGNORE:
    524                 afterDigit2 = FALSE;
    525                 continue; /* ignore all but letters and digits */
    526             case ZERO:
    527                 if (!afterDigit2) {
    528                     nextType = GET_CHAR_TYPE(*name2);
    529                     if (nextType == ZERO || nextType == NONZERO) {
    530                         continue; /* ignore leading zero before another digit */
    531                     }
    532                 }
    533                 break;
    534             case NONZERO:
    535                 afterDigit2 = TRUE;
    536                 break;
    537             default:
    538                 c2 = (char)type; /* lowercased letter */
    539                 afterDigit2 = FALSE;
    540                 break;
    541             }
    542             break; /* deliver c2 */
    543         }
    544 
    545         /* If we reach the ends of both strings then they match */
    546         if ((c1|c2)==0) {
    547             return 0;
    548         }
    549 
    550         /* Case-insensitive comparison */
    551         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
    552         if (rc != 0) {
    553             return rc;
    554         }
    555     }
    556 }
    557 
    558 /*
    559  * search for an alias
    560  * return the converter number index for gConverterList
    561  */
    562 static U_INLINE uint32_t
    563 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    564     uint32_t mid, start, limit;
    565     uint32_t lastMid;
    566     int result;
    567     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
    568     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    569 
    570     if (!isUnnormalized) {
    571         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
    572             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    573             return UINT32_MAX;
    574         }
    575 
    576         /* Lower case and remove ignoreable characters. */
    577         ucnv_io_stripForCompare(strippedName, alias);
    578         alias = strippedName;
    579     }
    580 
    581     /* do a binary search for the alias */
    582     start = 0;
    583     limit = gMainTable.untaggedConvArraySize;
    584     mid = limit;
    585     lastMid = UINT32_MAX;
    586 
    587     for (;;) {
    588         mid = (uint32_t)((start + limit) / 2);
    589         if (lastMid == mid) {   /* Have we moved? */
    590             break;  /* We haven't moved, and it wasn't found. */
    591         }
    592         lastMid = mid;
    593         if (isUnnormalized) {
    594             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
    595         }
    596         else {
    597             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
    598         }
    599 
    600         if (result < 0) {
    601             limit = mid;
    602         } else if (result > 0) {
    603             start = mid;
    604         } else {
    605             /* Since the gencnval tool folds duplicates into one entry,
    606              * this alias in gAliasList is unique, but different standards
    607              * may map an alias to different converters.
    608              */
    609             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
    610                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
    611             }
    612             /* State whether the canonical converter name contains an option.
    613             This information is contained in this list in order to maintain backward & forward compatibility. */
    614             if (containsOption) {
    615                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
    616                 *containsOption = (UBool)((containsCnvOptionInfo
    617                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
    618                     || !containsCnvOptionInfo);
    619             }
    620             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
    621         }
    622     }
    623 
    624     return UINT32_MAX;
    625 }
    626 
    627 /*
    628  * Is this alias in this list?
    629  * alias and listOffset should be non-NULL.
    630  */
    631 static U_INLINE UBool
    632 isAliasInList(const char *alias, uint32_t listOffset) {
    633     if (listOffset) {
    634         uint32_t currAlias;
    635         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    636         /* +1 to skip listCount */
    637         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    638         for (currAlias = 0; currAlias < listCount; currAlias++) {
    639             if (currList[currAlias]
    640                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
    641             {
    642                 return TRUE;
    643             }
    644         }
    645     }
    646     return FALSE;
    647 }
    648 
    649 /*
    650  * Search for an standard name of an alias (what is the default name
    651  * that this standard uses?)
    652  * return the listOffset for gTaggedAliasLists. If it's 0,
    653  * the it couldn't be found, but the parameters are valid.
    654  */
    655 static uint32_t
    656 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    657     uint32_t idx;
    658     uint32_t listOffset;
    659     uint32_t convNum;
    660     UErrorCode myErr = U_ZERO_ERROR;
    661     uint32_t tagNum = getTagNumber(standard);
    662 
    663     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    664     convNum = findConverter(alias, NULL, &myErr);
    665     if (myErr != U_ZERO_ERROR) {
    666         *pErrorCode = myErr;
    667     }
    668 
    669     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    670         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    671         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
    672             return listOffset;
    673         }
    674         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    675             /* Uh Oh! They used an ambiguous alias.
    676                We have to search the whole swiss cheese starting
    677                at the highest standard affinity.
    678                This may take a while.
    679             */
    680             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
    681                 listOffset = gMainTable.taggedAliasArray[idx];
    682                 if (listOffset && isAliasInList(alias, listOffset)) {
    683                     uint32_t currTagNum = idx/gMainTable.converterListSize;
    684                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
    685                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
    686                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
    687                         return tempListOffset;
    688                     }
    689                     /* else keep on looking */
    690                     /* We could speed this up by starting on the next row
    691                        because an alias is unique per row, right now.
    692                        This would change if alias versioning appears. */
    693                 }
    694             }
    695             /* The standard doesn't know about the alias */
    696         }
    697         /* else no default name */
    698         return 0;
    699     }
    700     /* else converter or tag not found */
    701 
    702     return UINT32_MAX;
    703 }
    704 
    705 /* Return the canonical name */
    706 static uint32_t
    707 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    708     uint32_t idx;
    709     uint32_t listOffset;
    710     uint32_t convNum;
    711     UErrorCode myErr = U_ZERO_ERROR;
    712     uint32_t tagNum = getTagNumber(standard);
    713 
    714     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    715     convNum = findConverter(alias, NULL, &myErr);
    716     if (myErr != U_ZERO_ERROR) {
    717         *pErrorCode = myErr;
    718     }
    719 
    720     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    721         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    722         if (listOffset && isAliasInList(alias, listOffset)) {
    723             return convNum;
    724         }
    725         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    726             /* Uh Oh! They used an ambiguous alias.
    727                We have to search one slice of the swiss cheese.
    728                We search only in the requested tag, not the whole thing.
    729                This may take a while.
    730             */
    731             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
    732             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
    733             for (idx = convStart; idx < convLimit; idx++) {
    734                 listOffset = gMainTable.taggedAliasArray[idx];
    735                 if (listOffset && isAliasInList(alias, listOffset)) {
    736                     return idx-convStart;
    737                 }
    738             }
    739             /* The standard doesn't know about the alias */
    740         }
    741         /* else no canonical name */
    742     }
    743     /* else converter or tag not found */
    744 
    745     return UINT32_MAX;
    746 }
    747 
    748 
    749 
    750 U_CFUNC const char *
    751 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    752     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    753         uint32_t convNum = findConverter(alias, containsOption, pErrorCode);
    754         if (convNum < gMainTable.converterListSize) {
    755             return GET_STRING(gMainTable.converterList[convNum]);
    756         }
    757         /* else converter not found */
    758     }
    759     return NULL;
    760 }
    761 
    762 static int32_t U_CALLCONV
    763 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
    764     int32_t value = 0;
    765     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    766     uint32_t listOffset = myContext->listOffset;
    767 
    768     if (listOffset) {
    769         value = gMainTable.taggedAliasLists[listOffset];
    770     }
    771     return value;
    772 }
    773 
    774 static const char* U_CALLCONV
    775 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
    776                             int32_t* resultLength,
    777                             UErrorCode *pErrorCode)
    778 {
    779     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    780     uint32_t listOffset = myContext->listOffset;
    781 
    782     if (listOffset) {
    783         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    784         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    785 
    786         if (myContext->listIdx < listCount) {
    787             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
    788             if (resultLength) {
    789                 *resultLength = (int32_t)uprv_strlen(myStr);
    790             }
    791             return myStr;
    792         }
    793     }
    794     /* Either we accessed a zero length list, or we enumerated too far. */
    795     if (resultLength) {
    796         *resultLength = 0;
    797     }
    798     return NULL;
    799 }
    800 
    801 static void U_CALLCONV
    802 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode *pErrorCode) {
    803     ((UAliasContext *)(enumerator->context))->listIdx = 0;
    804 }
    805 
    806 static void U_CALLCONV
    807 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
    808     uprv_free(enumerator->context);
    809     uprv_free(enumerator);
    810 }
    811 
    812 /* Enumerate the aliases for the specified converter and standard tag */
    813 static const UEnumeration gEnumAliases = {
    814     NULL,
    815     NULL,
    816     ucnv_io_closeUEnumeration,
    817     ucnv_io_countStandardAliases,
    818     uenum_unextDefault,
    819     ucnv_io_nextStandardAliases,
    820     ucnv_io_resetStandardAliases
    821 };
    822 
    823 U_CAPI UEnumeration * U_EXPORT2
    824 ucnv_openStandardNames(const char *convName,
    825                        const char *standard,
    826                        UErrorCode *pErrorCode)
    827 {
    828     UEnumeration *myEnum = NULL;
    829     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
    830         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
    831 
    832         /* When listOffset == 0, we want to acknowledge that the
    833            converter name and standard are okay, but there
    834            is nothing to enumerate. */
    835         if (listOffset < gMainTable.taggedAliasListsSize) {
    836             UAliasContext *myContext;
    837 
    838             myEnum = uprv_malloc(sizeof(UEnumeration));
    839             if (myEnum == NULL) {
    840                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    841                 return NULL;
    842             }
    843             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
    844             myContext = uprv_malloc(sizeof(UAliasContext));
    845             if (myContext == NULL) {
    846                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    847                 uprv_free(myEnum);
    848                 return NULL;
    849             }
    850             myContext->listOffset = listOffset;
    851             myContext->listIdx = 0;
    852             myEnum->context = myContext;
    853         }
    854         /* else converter or tag not found */
    855     }
    856     return myEnum;
    857 }
    858 
    859 static uint16_t
    860 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
    861     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    862         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    863         if (convNum < gMainTable.converterListSize) {
    864             /* tagListNum - 1 is the ALL tag */
    865             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    866 
    867             if (listOffset) {
    868                 return gMainTable.taggedAliasLists[listOffset];
    869             }
    870             /* else this shouldn't happen. internal program error */
    871         }
    872         /* else converter not found */
    873     }
    874     return 0;
    875 }
    876 
    877 static uint16_t
    878 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
    879     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    880         uint32_t currAlias;
    881         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    882         if (convNum < gMainTable.converterListSize) {
    883             /* tagListNum - 1 is the ALL tag */
    884             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    885 
    886             if (listOffset) {
    887                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    888                 /* +1 to skip listCount */
    889                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    890 
    891                 for (currAlias = start; currAlias < listCount; currAlias++) {
    892                     aliases[currAlias] = GET_STRING(currList[currAlias]);
    893                 }
    894             }
    895             /* else this shouldn't happen. internal program error */
    896         }
    897         /* else converter not found */
    898     }
    899     return 0;
    900 }
    901 
    902 static const char *
    903 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
    904     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    905         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    906         if (convNum < gMainTable.converterListSize) {
    907             /* tagListNum - 1 is the ALL tag */
    908             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    909 
    910             if (listOffset) {
    911                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    912                 /* +1 to skip listCount */
    913                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    914 
    915                 if (n < listCount)  {
    916                     return GET_STRING(currList[n]);
    917                 }
    918                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    919             }
    920             /* else this shouldn't happen. internal program error */
    921         }
    922         /* else converter not found */
    923     }
    924     return NULL;
    925 }
    926 
    927 static uint16_t
    928 ucnv_io_countStandards(UErrorCode *pErrorCode) {
    929     if (haveAliasData(pErrorCode)) {
    930         /* Don't include the empty list */
    931         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
    932     }
    933 
    934     return 0;
    935 }
    936 
    937 U_CAPI const char * U_EXPORT2
    938 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
    939     if (haveAliasData(pErrorCode)) {
    940         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
    941             return GET_STRING(gMainTable.tagList[n]);
    942         }
    943         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    944     }
    945 
    946     return NULL;
    947 }
    948 
    949 U_CAPI const char * U_EXPORT2
    950 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    951     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    952         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
    953 
    954         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
    955             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    956 
    957             /* Get the preferred name from this list */
    958             if (currList[0]) {
    959                 return GET_STRING(currList[0]);
    960             }
    961             /* else someone screwed up the alias table. */
    962             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
    963         }
    964     }
    965 
    966     return NULL;
    967 }
    968 
    969 U_CAPI uint16_t U_EXPORT2
    970 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
    971 {
    972     return ucnv_io_countAliases(alias, pErrorCode);
    973 }
    974 
    975 
    976 U_CAPI const char* U_EXPORT2
    977 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
    978 {
    979     return ucnv_io_getAlias(alias, n, pErrorCode);
    980 }
    981 
    982 U_CAPI void U_EXPORT2
    983 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
    984 {
    985     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
    986 }
    987 
    988 U_CAPI uint16_t U_EXPORT2
    989 ucnv_countStandards(void)
    990 {
    991     UErrorCode err = U_ZERO_ERROR;
    992     return ucnv_io_countStandards(&err);
    993 }
    994 
    995 U_CAPI const char * U_EXPORT2
    996 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    997     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    998         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
    999 
   1000         if (convNum < gMainTable.converterListSize) {
   1001             return GET_STRING(gMainTable.converterList[convNum]);
   1002         }
   1003     }
   1004 
   1005     return NULL;
   1006 }
   1007 
   1008 static int32_t U_CALLCONV
   1009 ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
   1010     return gMainTable.converterListSize;
   1011 }
   1012 
   1013 static const char* U_CALLCONV
   1014 ucnv_io_nextAllConverters(UEnumeration *enumerator,
   1015                             int32_t* resultLength,
   1016                             UErrorCode *pErrorCode)
   1017 {
   1018     uint16_t *myContext = (uint16_t *)(enumerator->context);
   1019 
   1020     if (*myContext < gMainTable.converterListSize) {
   1021         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
   1022         if (resultLength) {
   1023             *resultLength = (int32_t)uprv_strlen(myStr);
   1024         }
   1025         return myStr;
   1026     }
   1027     /* Either we accessed a zero length list, or we enumerated too far. */
   1028     if (resultLength) {
   1029         *resultLength = 0;
   1030     }
   1031     return NULL;
   1032 }
   1033 
   1034 static void U_CALLCONV
   1035 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) {
   1036     *((uint16_t *)(enumerator->context)) = 0;
   1037 }
   1038 
   1039 static const UEnumeration gEnumAllConverters = {
   1040     NULL,
   1041     NULL,
   1042     ucnv_io_closeUEnumeration,
   1043     ucnv_io_countAllConverters,
   1044     uenum_unextDefault,
   1045     ucnv_io_nextAllConverters,
   1046     ucnv_io_resetAllConverters
   1047 };
   1048 
   1049 U_CAPI UEnumeration * U_EXPORT2
   1050 ucnv_openAllNames(UErrorCode *pErrorCode) {
   1051     UEnumeration *myEnum = NULL;
   1052     if (haveAliasData(pErrorCode)) {
   1053         uint16_t *myContext;
   1054 
   1055         myEnum = uprv_malloc(sizeof(UEnumeration));
   1056         if (myEnum == NULL) {
   1057             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1058             return NULL;
   1059         }
   1060         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
   1061         myContext = uprv_malloc(sizeof(uint16_t));
   1062         if (myContext == NULL) {
   1063             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1064             uprv_free(myEnum);
   1065             return NULL;
   1066         }
   1067         *myContext = 0;
   1068         myEnum->context = myContext;
   1069     }
   1070     return myEnum;
   1071 }
   1072 
   1073 U_CFUNC uint16_t
   1074 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
   1075     if (haveAliasData(pErrorCode)) {
   1076         return (uint16_t)gMainTable.converterListSize;
   1077     }
   1078     return 0;
   1079 }
   1080 
   1081 /* alias table swapping ----------------------------------------------------- */
   1082 
   1083 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
   1084 
   1085 /*
   1086  * row of a temporary array
   1087  *
   1088  * gets platform-endian charset string indexes and sorting indexes;
   1089  * after sorting this array by strings, the actual arrays are permutated
   1090  * according to the sorting indexes
   1091  */
   1092 typedef struct TempRow {
   1093     uint16_t strIndex, sortIndex;
   1094 } TempRow;
   1095 
   1096 typedef struct TempAliasTable {
   1097     const char *chars;
   1098     TempRow *rows;
   1099     uint16_t *resort;
   1100     StripForCompareFn *stripForCompare;
   1101 } TempAliasTable;
   1102 
   1103 enum {
   1104     STACK_ROW_CAPACITY=500
   1105 };
   1106 
   1107 static int32_t
   1108 io_compareRows(const void *context, const void *left, const void *right) {
   1109     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
   1110          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
   1111 
   1112     TempAliasTable *tempTable=(TempAliasTable *)context;
   1113     const char *chars=tempTable->chars;
   1114 
   1115     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
   1116                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
   1117 }
   1118 
   1119 U_CAPI int32_t U_EXPORT2
   1120 ucnv_swapAliases(const UDataSwapper *ds,
   1121                  const void *inData, int32_t length, void *outData,
   1122                  UErrorCode *pErrorCode) {
   1123     const UDataInfo *pInfo;
   1124     int32_t headerSize;
   1125 
   1126     const uint16_t *inTable;
   1127     const uint32_t *inSectionSizes;
   1128     uint32_t toc[offsetsCount];
   1129     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
   1130     uint32_t i, count, tocLength, topOffset;
   1131 
   1132     TempRow rows[STACK_ROW_CAPACITY];
   1133     uint16_t resort[STACK_ROW_CAPACITY];
   1134     TempAliasTable tempTable;
   1135 
   1136     /* udata_swapDataHeader checks the arguments */
   1137     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1138     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1139         return 0;
   1140     }
   1141 
   1142     /* check data format and format version */
   1143     pInfo=(const UDataInfo *)((const char *)inData+4);
   1144     if(!(
   1145         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
   1146         pInfo->dataFormat[1]==0x76 &&
   1147         pInfo->dataFormat[2]==0x41 &&
   1148         pInfo->dataFormat[3]==0x6c &&
   1149         pInfo->formatVersion[0]==3
   1150     )) {
   1151         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
   1152                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1153                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1154                          pInfo->formatVersion[0]);
   1155         *pErrorCode=U_UNSUPPORTED_ERROR;
   1156         return 0;
   1157     }
   1158 
   1159     /* an alias table must contain at least the table of contents array */
   1160     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
   1161         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1162                          length-headerSize);
   1163         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1164         return 0;
   1165     }
   1166 
   1167     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
   1168     inTable=(const uint16_t *)inSectionSizes;
   1169     uprv_memset(toc, 0, sizeof(toc));
   1170     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
   1171     if(tocLength<minTocLength || offsetsCount<=tocLength) {
   1172         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
   1173         *pErrorCode=U_INVALID_FORMAT_ERROR;
   1174         return 0;
   1175     }
   1176 
   1177     /* read the known part of the table of contents */
   1178     for(i=converterListIndex; i<=tocLength; ++i) {
   1179         toc[i]=ds->readUInt32(inSectionSizes[i]);
   1180     }
   1181 
   1182     /* compute offsets */
   1183     uprv_memset(offsets, 0, sizeof(offsets));
   1184     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
   1185     for(i=tagListIndex; i<=tocLength; ++i) {
   1186         offsets[i]=offsets[i-1]+toc[i-1];
   1187     }
   1188 
   1189     /* compute the overall size of the after-header data, in numbers of 16-bit units */
   1190     topOffset=offsets[i-1]+toc[i-1];
   1191 
   1192     if(length>=0) {
   1193         uint16_t *outTable;
   1194         const uint16_t *p, *p2;
   1195         uint16_t *q, *q2;
   1196         uint16_t oldIndex;
   1197 
   1198         if((length-headerSize)<(2*(int32_t)topOffset)) {
   1199             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1200                              length-headerSize);
   1201             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1202             return 0;
   1203         }
   1204 
   1205         outTable=(uint16_t *)((char *)outData+headerSize);
   1206 
   1207         /* swap the entire table of contents */
   1208         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
   1209 
   1210         /* swap unormalized strings & normalized strings */
   1211         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
   1212                              outTable+offsets[stringTableIndex], pErrorCode);
   1213         if(U_FAILURE(*pErrorCode)) {
   1214             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
   1215             return 0;
   1216         }
   1217 
   1218         if(ds->inCharset==ds->outCharset) {
   1219             /* no need to sort, just swap all 16-bit values together */
   1220             ds->swapArray16(ds,
   1221                             inTable+offsets[converterListIndex],
   1222                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
   1223                             outTable+offsets[converterListIndex],
   1224                             pErrorCode);
   1225         } else {
   1226             /* allocate the temporary table for sorting */
   1227             count=toc[aliasListIndex];
   1228 
   1229             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
   1230 
   1231             if(count<=STACK_ROW_CAPACITY) {
   1232                 tempTable.rows=rows;
   1233                 tempTable.resort=resort;
   1234             } else {
   1235                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
   1236                 if(tempTable.rows==NULL) {
   1237                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
   1238                                      count);
   1239                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1240                     return 0;
   1241                 }
   1242                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
   1243             }
   1244 
   1245             if(ds->outCharset==U_ASCII_FAMILY) {
   1246                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
   1247             } else /* U_EBCDIC_FAMILY */ {
   1248                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
   1249             }
   1250 
   1251             /*
   1252              * Sort unique aliases+mapped names.
   1253              *
   1254              * We need to sort the list again by outCharset strings because they
   1255              * sort differently for different charset families.
   1256              * First we set up a temporary table with the string indexes and
   1257              * sorting indexes and sort that.
   1258              * Then we permutate and copy/swap the actual values.
   1259              */
   1260             p=inTable+offsets[aliasListIndex];
   1261             q=outTable+offsets[aliasListIndex];
   1262 
   1263             p2=inTable+offsets[untaggedConvArrayIndex];
   1264             q2=outTable+offsets[untaggedConvArrayIndex];
   1265 
   1266             for(i=0; i<count; ++i) {
   1267                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
   1268                 tempTable.rows[i].sortIndex=(uint16_t)i;
   1269             }
   1270 
   1271             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
   1272                            io_compareRows, &tempTable,
   1273                            FALSE, pErrorCode);
   1274 
   1275             if(U_SUCCESS(*pErrorCode)) {
   1276                 /* copy/swap/permutate items */
   1277                 if(p!=q) {
   1278                     for(i=0; i<count; ++i) {
   1279                         oldIndex=tempTable.rows[i].sortIndex;
   1280                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
   1281                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
   1282                     }
   1283                 } else {
   1284                     /*
   1285                      * If we swap in-place, then the permutation must use another
   1286                      * temporary array (tempTable.resort)
   1287                      * before the results are copied to the outBundle.
   1288                      */
   1289                     uint16_t *r=tempTable.resort;
   1290 
   1291                     for(i=0; i<count; ++i) {
   1292                         oldIndex=tempTable.rows[i].sortIndex;
   1293                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
   1294                     }
   1295                     uprv_memcpy(q, r, 2*count);
   1296 
   1297                     for(i=0; i<count; ++i) {
   1298                         oldIndex=tempTable.rows[i].sortIndex;
   1299                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
   1300                     }
   1301                     uprv_memcpy(q2, r, 2*count);
   1302                 }
   1303             }
   1304 
   1305             if(tempTable.rows!=rows) {
   1306                 uprv_free(tempTable.rows);
   1307             }
   1308 
   1309             if(U_FAILURE(*pErrorCode)) {
   1310                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
   1311                                  count);
   1312                 return 0;
   1313             }
   1314 
   1315             /* swap remaining 16-bit values */
   1316             ds->swapArray16(ds,
   1317                             inTable+offsets[converterListIndex],
   1318                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
   1319                             outTable+offsets[converterListIndex],
   1320                             pErrorCode);
   1321             ds->swapArray16(ds,
   1322                             inTable+offsets[taggedAliasArrayIndex],
   1323                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
   1324                             outTable+offsets[taggedAliasArrayIndex],
   1325                             pErrorCode);
   1326         }
   1327     }
   1328 
   1329     return headerSize+2*(int32_t)topOffset;
   1330 }
   1331 
   1332 #endif
   1333 
   1334 /*
   1335  * Hey, Emacs, please set the following:
   1336  *
   1337  * Local Variables:
   1338  * indent-tabs-mode: nil
   1339  * End:
   1340  *
   1341  */
   1342