Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *
     11 *
     12 *  ucnv_io.cpp:
     13 *  initializes global variables and defines functions pertaining to converter
     14 *  name resolution aspect of the conversion code.
     15 *
     16 *   new implementation:
     17 *
     18 *   created on: 1999nov22
     19 *   created by: Markus W. Scherer
     20 *
     21 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
     22 *   with aliases for converter names.
     23 *
     24 *   Date        Name        Description
     25 *   11/22/1999  markus      Created
     26 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
     27 *                           Now an alias can map to different converters
     28 *                           depending on the specified standard.
     29 *******************************************************************************
     30 */
     31 
     32 #include "unicode/utypes.h"
     33 
     34 #if !UCONFIG_NO_CONVERSION
     35 
     36 #include "unicode/ucnv.h"
     37 #include "unicode/udata.h"
     38 
     39 #include "umutex.h"
     40 #include "uarrsort.h"
     41 #include "uassert.h"
     42 #include "udataswp.h"
     43 #include "cstring.h"
     44 #include "cmemory.h"
     45 #include "ucnv_io.h"
     46 #include "uenumimp.h"
     47 #include "ucln_cmn.h"
     48 
     49 /* Format of cnvalias.icu -----------------------------------------------------
     50  *
     51  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
     52  * This binary form contains several tables. All indexes are to uint16_t
     53  * units, and not to the bytes (uint8_t units). Addressing everything on
     54  * 16-bit boundaries allows us to store more information with small index
     55  * numbers, which are also 16-bit in size. The majority of the table (except
     56  * the string table) are 16-bit numbers.
     57  *
     58  * First there is the size of the Table of Contents (TOC). The TOC
     59  * entries contain the size of each section. In order to find the offset
     60  * you just need to sum up the previous offsets.
     61  * The TOC length and entries are an array of uint32_t values.
     62  * The first section after the TOC starts immediately after the TOC.
     63  *
     64  * 1) This section contains a list of converters. This list contains indexes
     65  * into the string table for the converter name. The index of this list is
     66  * also used by other sections, which are mentioned later on.
     67  * This list is not sorted.
     68  *
     69  * 2) This section contains a list of tags. This list contains indexes
     70  * into the string table for the tag name. The index of this list is
     71  * also used by other sections, which are mentioned later on.
     72  * This list is in priority order of standards.
     73  *
     74  * 3) This section contains a list of sorted unique aliases. This
     75  * list contains indexes into the string table for the alias name. The
     76  * index of this list is also used by other sections, like the 4th section.
     77  * The index for the 3rd and 4th section is used to get the
     78  * alias -> converter name mapping. Section 3 and 4 form a two column table.
     79  * Some of the most significant bits of each index may contain other
     80  * information (see findConverter for details).
     81  *
     82  * 4) This section contains a list of mapped converter names. Consider this
     83  * as a table that maps the 3rd section to the 1st section. This list contains
     84  * indexes into the 1st section. The index of this list is the same index in
     85  * the 3rd section. There is also some extra information in the high bits of
     86  * each converter index in this table. Currently it's only used to say that
     87  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
     88  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
     89  * the predigested form of the 5th section so that an alias lookup can be fast.
     90  *
     91  * 5) This section contains a 2D array with indexes to the 6th section. This
     92  * section is the full form of all alias mappings. The column index is the
     93  * index into the converter list (column header). The row index is the index
     94  * to tag list (row header). This 2D array is the top part a 3D array. The
     95  * third dimension is in the 6th section.
     96  *
     97  * 6) This is blob of variable length arrays. Each array starts with a size,
     98  * and is followed by indexes to alias names in the string table. This is
     99  * the third dimension to the section 5. No other section should be referencing
    100  * this section.
    101  *
    102  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
    103  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
    104  * what type of string normalization is used among other potential things in the
    105  * future.
    106  *
    107  * 8) This is the string table. All strings are indexed on an even address.
    108  * There are two reasons for this. First many chip architectures locate strings
    109  * faster on even address boundaries. Second, since all indexes are 16-bit
    110  * numbers, this string table can be 128KB in size instead of 64KB when we
    111  * only have strings starting on an even address.
    112  *
    113  * 9) When present this is a set of prenormalized strings from section 8. This
    114  * table contains normalized strings with the dashes and spaces stripped out,
    115  * and all strings lowercased. In the future, the options in section 7 may state
    116  * other types of normalization.
    117  *
    118  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
    119  * has a unique alias among all converters. That same alias can
    120  * be mentioned in other standards on different converters,
    121  * but only one alias per tag can be unique.
    122  *
    123  *
    124  *              Converter Names (Usually in TR22 form)
    125  *           -------------------------------------------.
    126  *     T    /                                          /|
    127  *     a   /                                          / |
    128  *     g  /                                          /  |
    129  *     s /                                          /   |
    130  *      /                                          /    |
    131  *      ------------------------------------------/     |
    132  *    A |                                         |     |
    133  *    l |                                         |     |
    134  *    i |                                         |    /
    135  *    a |                                         |   /
    136  *    s |                                         |  /
    137  *    e |                                         | /
    138  *    s |                                         |/
    139  *      -------------------------------------------
    140  *
    141  *
    142  *
    143  * Here is what it really looks like. It's like swiss cheese.
    144  * There are holes. Some converters aren't recognized by
    145  * a standard, or they are really old converters that the
    146  * standard doesn't recognize anymore.
    147  *
    148  *              Converter Names (Usually in TR22 form)
    149  *           -------------------------------------------.
    150  *     T    /##########################################/|
    151  *     a   /     #            #                       /#
    152  *     g  /  #      ##     ##     ### # ### ### ### #/
    153  *     s / #             #####  ####        ##  ## #/#
    154  *      / ### # # ##  #  #   #          ### # #   #/##
    155  *      ------------------------------------------/# #
    156  *    A |### # # ##  #  #   #          ### # #   #|# #
    157  *    l |# # #    #     #               ## #     #|# #
    158  *    i |# # #    #     #                #       #|#
    159  *    a |#                                       #|#
    160  *    s |                                        #|#
    161  *    e
    162  *    s
    163  *
    164  */
    165 
    166 /**
    167  * Used by the UEnumeration API
    168  */
    169 typedef struct UAliasContext {
    170     uint32_t listOffset;
    171     uint32_t listIdx;
    172 } UAliasContext;
    173 
    174 static const char DATA_NAME[] = "cnvalias";
    175 static const char DATA_TYPE[] = "icu";
    176 
    177 static UDataMemory *gAliasData=NULL;
    178 static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER;
    179 
    180 enum {
    181     tocLengthIndex=0,
    182     converterListIndex=1,
    183     tagListIndex=2,
    184     aliasListIndex=3,
    185     untaggedConvArrayIndex=4,
    186     taggedAliasArrayIndex=5,
    187     taggedAliasListsIndex=6,
    188     tableOptionsIndex=7,
    189     stringTableIndex=8,
    190     normalizedStringTableIndex=9,
    191     offsetsCount,    /* length of the swapper's temporary offsets[] */
    192     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
    193 };
    194 
    195 static const UConverterAliasOptions defaultTableOptions = {
    196     UCNV_IO_UNNORMALIZED,
    197     0 /* containsCnvOptionInfo */
    198 };
    199 static UConverterAlias gMainTable;
    200 
    201 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
    202 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
    203 
    204 static UBool U_CALLCONV
    205 isAcceptable(void * /*context*/,
    206              const char * /*type*/, const char * /*name*/,
    207              const UDataInfo *pInfo) {
    208     return (UBool)(
    209         pInfo->size>=20 &&
    210         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    211         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    212         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
    213         pInfo->dataFormat[1]==0x76 &&
    214         pInfo->dataFormat[2]==0x41 &&
    215         pInfo->dataFormat[3]==0x6c &&
    216         pInfo->formatVersion[0]==3);
    217 }
    218 
    219 static UBool U_CALLCONV ucnv_io_cleanup(void)
    220 {
    221     if (gAliasData) {
    222         udata_close(gAliasData);
    223         gAliasData = NULL;
    224     }
    225     gAliasDataInitOnce.reset();
    226 
    227     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
    228 
    229     return TRUE;                   /* Everything was cleaned up */
    230 }
    231 
    232 static void U_CALLCONV initAliasData(UErrorCode &errCode) {
    233     UDataMemory *data;
    234     const uint16_t *table;
    235     const uint32_t *sectionSizes;
    236     uint32_t tableStart;
    237     uint32_t currOffset;
    238 
    239     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
    240 
    241     U_ASSERT(gAliasData == NULL);
    242     data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode);
    243     if(U_FAILURE(errCode)) {
    244         return;
    245     }
    246 
    247     sectionSizes = (const uint32_t *)udata_getMemory(data);
    248     table = (const uint16_t *)sectionSizes;
    249 
    250     tableStart      = sectionSizes[0];
    251     if (tableStart < minTocLength) {
    252         errCode = U_INVALID_FORMAT_ERROR;
    253         udata_close(data);
    254         return;
    255     }
    256     gAliasData = data;
    257 
    258     gMainTable.converterListSize      = sectionSizes[1];
    259     gMainTable.tagListSize            = sectionSizes[2];
    260     gMainTable.aliasListSize          = sectionSizes[3];
    261     gMainTable.untaggedConvArraySize  = sectionSizes[4];
    262     gMainTable.taggedAliasArraySize   = sectionSizes[5];
    263     gMainTable.taggedAliasListsSize   = sectionSizes[6];
    264     gMainTable.optionTableSize        = sectionSizes[7];
    265     gMainTable.stringTableSize        = sectionSizes[8];
    266 
    267     if (tableStart > 8) {
    268         gMainTable.normalizedStringTableSize = sectionSizes[9];
    269     }
    270 
    271     currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
    272     gMainTable.converterList = table + currOffset;
    273 
    274     currOffset += gMainTable.converterListSize;
    275     gMainTable.tagList = table + currOffset;
    276 
    277     currOffset += gMainTable.tagListSize;
    278     gMainTable.aliasList = table + currOffset;
    279 
    280     currOffset += gMainTable.aliasListSize;
    281     gMainTable.untaggedConvArray = table + currOffset;
    282 
    283     currOffset += gMainTable.untaggedConvArraySize;
    284     gMainTable.taggedAliasArray = table + currOffset;
    285 
    286     /* aliasLists is a 1's based array, but it has a padding character */
    287     currOffset += gMainTable.taggedAliasArraySize;
    288     gMainTable.taggedAliasLists = table + currOffset;
    289 
    290     currOffset += gMainTable.taggedAliasListsSize;
    291     if (gMainTable.optionTableSize > 0
    292         && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
    293     {
    294         /* Faster table */
    295         gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
    296     }
    297     else {
    298         /* Smaller table, or I can't handle this normalization mode!
    299         Use the original slower table lookup. */
    300         gMainTable.optionTable = &defaultTableOptions;
    301     }
    302 
    303     currOffset += gMainTable.optionTableSize;
    304     gMainTable.stringTable = table + currOffset;
    305 
    306     currOffset += gMainTable.stringTableSize;
    307     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
    308         ? gMainTable.stringTable : (table + currOffset));
    309 }
    310 
    311 
    312 static UBool
    313 haveAliasData(UErrorCode *pErrorCode) {
    314     umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode);
    315     return U_SUCCESS(*pErrorCode);
    316 }
    317 
    318 static inline UBool
    319 isAlias(const char *alias, UErrorCode *pErrorCode) {
    320     if(alias==NULL) {
    321         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    322         return FALSE;
    323     }
    324     return (UBool)(*alias!=0);
    325 }
    326 
    327 static uint32_t getTagNumber(const char *tagname) {
    328     if (gMainTable.tagList) {
    329         uint32_t tagNum;
    330         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
    331             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
    332                 return tagNum;
    333             }
    334         }
    335     }
    336 
    337     return UINT32_MAX;
    338 }
    339 
    340 /* character types relevant for ucnv_compareNames() */
    341 enum {
    342     UIGNORE,
    343     ZERO,
    344     NONZERO,
    345     MINLETTER /* any values from here on are lowercase letter mappings */
    346 };
    347 
    348 /* character types for ASCII 00..7F */
    349 static const uint8_t asciiTypes[128] = {
    350     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    351     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    352     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    353     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
    354     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    355     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
    356     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    357     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
    358 };
    359 
    360 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE)
    361 
    362 /* character types for EBCDIC 80..FF */
    363 static const uint8_t ebcdicTypes[128] = {
    364     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    365     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    366     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    367     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    368     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    369     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    370     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    371     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
    372 };
    373 
    374 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE)
    375 
    376 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    377 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
    378 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    379 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
    380 #else
    381 #   error U_CHARSET_FAMILY is not valid
    382 #endif
    383 
    384 
    385 /* @see ucnv_compareNames */
    386 U_CAPI char * U_CALLCONV
    387 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
    388     char *dstItr = dst;
    389     uint8_t type, nextType;
    390     char c1;
    391     UBool afterDigit = FALSE;
    392 
    393     while ((c1 = *name++) != 0) {
    394         type = GET_ASCII_TYPE(c1);
    395         switch (type) {
    396         case UIGNORE:
    397             afterDigit = FALSE;
    398             continue; /* ignore all but letters and digits */
    399         case ZERO:
    400             if (!afterDigit) {
    401                 nextType = GET_ASCII_TYPE(*name);
    402                 if (nextType == ZERO || nextType == NONZERO) {
    403                     continue; /* ignore leading zero before another digit */
    404                 }
    405             }
    406             break;
    407         case NONZERO:
    408             afterDigit = TRUE;
    409             break;
    410         default:
    411             c1 = (char)type; /* lowercased letter */
    412             afterDigit = FALSE;
    413             break;
    414         }
    415         *dstItr++ = c1;
    416     }
    417     *dstItr = 0;
    418     return dst;
    419 }
    420 
    421 U_CAPI char * U_CALLCONV
    422 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
    423     char *dstItr = dst;
    424     uint8_t type, nextType;
    425     char c1;
    426     UBool afterDigit = FALSE;
    427 
    428     while ((c1 = *name++) != 0) {
    429         type = GET_EBCDIC_TYPE(c1);
    430         switch (type) {
    431         case UIGNORE:
    432             afterDigit = FALSE;
    433             continue; /* ignore all but letters and digits */
    434         case ZERO:
    435             if (!afterDigit) {
    436                 nextType = GET_EBCDIC_TYPE(*name);
    437                 if (nextType == ZERO || nextType == NONZERO) {
    438                     continue; /* ignore leading zero before another digit */
    439                 }
    440             }
    441             break;
    442         case NONZERO:
    443             afterDigit = TRUE;
    444             break;
    445         default:
    446             c1 = (char)type; /* lowercased letter */
    447             afterDigit = FALSE;
    448             break;
    449         }
    450         *dstItr++ = c1;
    451     }
    452     *dstItr = 0;
    453     return dst;
    454 }
    455 
    456 /**
    457  * Do a fuzzy compare of two converter/alias names.
    458  * The comparison is case-insensitive, ignores leading zeroes if they are not
    459  * followed by further digits, and ignores all but letters and digits.
    460  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
    461  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
    462  * at http://www.unicode.org/reports/tr22/
    463  *
    464  * This is a symmetrical (commutative) operation; order of arguments
    465  * is insignificant.  This is an important property for sorting the
    466  * list (when the list is preprocessed into binary form) and for
    467  * performing binary searches on it at run time.
    468  *
    469  * @param name1 a converter name or alias, zero-terminated
    470  * @param name2 a converter name or alias, zero-terminated
    471  * @return 0 if the names match, or a negative value if the name1
    472  * lexically precedes name2, or a positive value if the name1
    473  * lexically follows name2.
    474  *
    475  * @see ucnv_io_stripForCompare
    476  */
    477 U_CAPI int U_EXPORT2
    478 ucnv_compareNames(const char *name1, const char *name2) {
    479     int rc;
    480     uint8_t type, nextType;
    481     char c1, c2;
    482     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
    483 
    484     for (;;) {
    485         while ((c1 = *name1++) != 0) {
    486             type = GET_CHAR_TYPE(c1);
    487             switch (type) {
    488             case UIGNORE:
    489                 afterDigit1 = FALSE;
    490                 continue; /* ignore all but letters and digits */
    491             case ZERO:
    492                 if (!afterDigit1) {
    493                     nextType = GET_CHAR_TYPE(*name1);
    494                     if (nextType == ZERO || nextType == NONZERO) {
    495                         continue; /* ignore leading zero before another digit */
    496                     }
    497                 }
    498                 break;
    499             case NONZERO:
    500                 afterDigit1 = TRUE;
    501                 break;
    502             default:
    503                 c1 = (char)type; /* lowercased letter */
    504                 afterDigit1 = FALSE;
    505                 break;
    506             }
    507             break; /* deliver c1 */
    508         }
    509         while ((c2 = *name2++) != 0) {
    510             type = GET_CHAR_TYPE(c2);
    511             switch (type) {
    512             case UIGNORE:
    513                 afterDigit2 = FALSE;
    514                 continue; /* ignore all but letters and digits */
    515             case ZERO:
    516                 if (!afterDigit2) {
    517                     nextType = GET_CHAR_TYPE(*name2);
    518                     if (nextType == ZERO || nextType == NONZERO) {
    519                         continue; /* ignore leading zero before another digit */
    520                     }
    521                 }
    522                 break;
    523             case NONZERO:
    524                 afterDigit2 = TRUE;
    525                 break;
    526             default:
    527                 c2 = (char)type; /* lowercased letter */
    528                 afterDigit2 = FALSE;
    529                 break;
    530             }
    531             break; /* deliver c2 */
    532         }
    533 
    534         /* If we reach the ends of both strings then they match */
    535         if ((c1|c2)==0) {
    536             return 0;
    537         }
    538 
    539         /* Case-insensitive comparison */
    540         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
    541         if (rc != 0) {
    542             return rc;
    543         }
    544     }
    545 }
    546 
    547 /*
    548  * search for an alias
    549  * return the converter number index for gConverterList
    550  */
    551 static inline uint32_t
    552 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    553     uint32_t mid, start, limit;
    554     uint32_t lastMid;
    555     int result;
    556     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
    557     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    558 
    559     if (!isUnnormalized) {
    560         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
    561             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    562             return UINT32_MAX;
    563         }
    564 
    565         /* Lower case and remove ignoreable characters. */
    566         ucnv_io_stripForCompare(strippedName, alias);
    567         alias = strippedName;
    568     }
    569 
    570     /* do a binary search for the alias */
    571     start = 0;
    572     limit = gMainTable.untaggedConvArraySize;
    573     mid = limit;
    574     lastMid = UINT32_MAX;
    575 
    576     for (;;) {
    577         mid = (uint32_t)((start + limit) / 2);
    578         if (lastMid == mid) {   /* Have we moved? */
    579             break;  /* We haven't moved, and it wasn't found. */
    580         }
    581         lastMid = mid;
    582         if (isUnnormalized) {
    583             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
    584         }
    585         else {
    586             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
    587         }
    588 
    589         if (result < 0) {
    590             limit = mid;
    591         } else if (result > 0) {
    592             start = mid;
    593         } else {
    594             /* Since the gencnval tool folds duplicates into one entry,
    595              * this alias in gAliasList is unique, but different standards
    596              * may map an alias to different converters.
    597              */
    598             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
    599                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
    600             }
    601             /* State whether the canonical converter name contains an option.
    602             This information is contained in this list in order to maintain backward & forward compatibility. */
    603             if (containsOption) {
    604                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
    605                 *containsOption = (UBool)((containsCnvOptionInfo
    606                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
    607                     || !containsCnvOptionInfo);
    608             }
    609             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
    610         }
    611     }
    612 
    613     return UINT32_MAX;
    614 }
    615 
    616 /*
    617  * Is this alias in this list?
    618  * alias and listOffset should be non-NULL.
    619  */
    620 static inline UBool
    621 isAliasInList(const char *alias, uint32_t listOffset) {
    622     if (listOffset) {
    623         uint32_t currAlias;
    624         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    625         /* +1 to skip listCount */
    626         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    627         for (currAlias = 0; currAlias < listCount; currAlias++) {
    628             if (currList[currAlias]
    629                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
    630             {
    631                 return TRUE;
    632             }
    633         }
    634     }
    635     return FALSE;
    636 }
    637 
    638 /*
    639  * Search for an standard name of an alias (what is the default name
    640  * that this standard uses?)
    641  * return the listOffset for gTaggedAliasLists. If it's 0,
    642  * the it couldn't be found, but the parameters are valid.
    643  */
    644 static uint32_t
    645 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    646     uint32_t idx;
    647     uint32_t listOffset;
    648     uint32_t convNum;
    649     UErrorCode myErr = U_ZERO_ERROR;
    650     uint32_t tagNum = getTagNumber(standard);
    651 
    652     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    653     convNum = findConverter(alias, NULL, &myErr);
    654     if (myErr != U_ZERO_ERROR) {
    655         *pErrorCode = myErr;
    656     }
    657 
    658     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    659         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    660         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
    661             return listOffset;
    662         }
    663         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    664             /* Uh Oh! They used an ambiguous alias.
    665                We have to search the whole swiss cheese starting
    666                at the highest standard affinity.
    667                This may take a while.
    668             */
    669             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
    670                 listOffset = gMainTable.taggedAliasArray[idx];
    671                 if (listOffset && isAliasInList(alias, listOffset)) {
    672                     uint32_t currTagNum = idx/gMainTable.converterListSize;
    673                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
    674                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
    675                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
    676                         return tempListOffset;
    677                     }
    678                     /* else keep on looking */
    679                     /* We could speed this up by starting on the next row
    680                        because an alias is unique per row, right now.
    681                        This would change if alias versioning appears. */
    682                 }
    683             }
    684             /* The standard doesn't know about the alias */
    685         }
    686         /* else no default name */
    687         return 0;
    688     }
    689     /* else converter or tag not found */
    690 
    691     return UINT32_MAX;
    692 }
    693 
    694 /* Return the canonical name */
    695 static uint32_t
    696 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    697     uint32_t idx;
    698     uint32_t listOffset;
    699     uint32_t convNum;
    700     UErrorCode myErr = U_ZERO_ERROR;
    701     uint32_t tagNum = getTagNumber(standard);
    702 
    703     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    704     convNum = findConverter(alias, NULL, &myErr);
    705     if (myErr != U_ZERO_ERROR) {
    706         *pErrorCode = myErr;
    707     }
    708 
    709     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    710         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    711         if (listOffset && isAliasInList(alias, listOffset)) {
    712             return convNum;
    713         }
    714         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    715             /* Uh Oh! They used an ambiguous alias.
    716                We have to search one slice of the swiss cheese.
    717                We search only in the requested tag, not the whole thing.
    718                This may take a while.
    719             */
    720             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
    721             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
    722             for (idx = convStart; idx < convLimit; idx++) {
    723                 listOffset = gMainTable.taggedAliasArray[idx];
    724                 if (listOffset && isAliasInList(alias, listOffset)) {
    725                     return idx-convStart;
    726                 }
    727             }
    728             /* The standard doesn't know about the alias */
    729         }
    730         /* else no canonical name */
    731     }
    732     /* else converter or tag not found */
    733 
    734     return UINT32_MAX;
    735 }
    736 
    737 U_CAPI const char *
    738 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    739     const char *aliasTmp = alias;
    740     int32_t i = 0;
    741     for (i = 0; i < 2; i++) {
    742         if (i == 1) {
    743             /*
    744              * After the first unsuccess converter lookup, check to see if
    745              * the name begins with 'x-'. If it does, strip it off and try
    746              * again.  This behaviour is similar to how ICU4J does it.
    747              */
    748             if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') {
    749                 aliasTmp = aliasTmp+2;
    750             } else {
    751                 break;
    752             }
    753         }
    754         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
    755             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
    756             if (convNum < gMainTable.converterListSize) {
    757                 return GET_STRING(gMainTable.converterList[convNum]);
    758             }
    759             /* else converter not found */
    760         } else {
    761             break;
    762         }
    763     }
    764 
    765     return NULL;
    766 }
    767 
    768 U_CDECL_BEGIN
    769 
    770 
    771 static int32_t U_CALLCONV
    772 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    773     int32_t value = 0;
    774     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    775     uint32_t listOffset = myContext->listOffset;
    776 
    777     if (listOffset) {
    778         value = gMainTable.taggedAliasLists[listOffset];
    779     }
    780     return value;
    781 }
    782 
    783 static const char * U_CALLCONV
    784 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
    785                             int32_t* resultLength,
    786                             UErrorCode * /*pErrorCode*/)
    787 {
    788     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    789     uint32_t listOffset = myContext->listOffset;
    790 
    791     if (listOffset) {
    792         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    793         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    794 
    795         if (myContext->listIdx < listCount) {
    796             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
    797             if (resultLength) {
    798                 *resultLength = (int32_t)uprv_strlen(myStr);
    799             }
    800             return myStr;
    801         }
    802     }
    803     /* Either we accessed a zero length list, or we enumerated too far. */
    804     if (resultLength) {
    805         *resultLength = 0;
    806     }
    807     return NULL;
    808 }
    809 
    810 static void U_CALLCONV
    811 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    812     ((UAliasContext *)(enumerator->context))->listIdx = 0;
    813 }
    814 
    815 static void U_CALLCONV
    816 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
    817     uprv_free(enumerator->context);
    818     uprv_free(enumerator);
    819 }
    820 
    821 U_CDECL_END
    822 
    823 /* Enumerate the aliases for the specified converter and standard tag */
    824 static const UEnumeration gEnumAliases = {
    825     NULL,
    826     NULL,
    827     ucnv_io_closeUEnumeration,
    828     ucnv_io_countStandardAliases,
    829     uenum_unextDefault,
    830     ucnv_io_nextStandardAliases,
    831     ucnv_io_resetStandardAliases
    832 };
    833 
    834 U_CAPI UEnumeration * U_EXPORT2
    835 ucnv_openStandardNames(const char *convName,
    836                        const char *standard,
    837                        UErrorCode *pErrorCode)
    838 {
    839     UEnumeration *myEnum = NULL;
    840     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
    841         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
    842 
    843         /* When listOffset == 0, we want to acknowledge that the
    844            converter name and standard are okay, but there
    845            is nothing to enumerate. */
    846         if (listOffset < gMainTable.taggedAliasListsSize) {
    847             UAliasContext *myContext;
    848 
    849             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
    850             if (myEnum == NULL) {
    851                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    852                 return NULL;
    853             }
    854             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
    855             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
    856             if (myContext == NULL) {
    857                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    858                 uprv_free(myEnum);
    859                 return NULL;
    860             }
    861             myContext->listOffset = listOffset;
    862             myContext->listIdx = 0;
    863             myEnum->context = myContext;
    864         }
    865         /* else converter or tag not found */
    866     }
    867     return myEnum;
    868 }
    869 
    870 static uint16_t
    871 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
    872     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    873         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    874         if (convNum < gMainTable.converterListSize) {
    875             /* tagListNum - 1 is the ALL tag */
    876             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    877 
    878             if (listOffset) {
    879                 return gMainTable.taggedAliasLists[listOffset];
    880             }
    881             /* else this shouldn't happen. internal program error */
    882         }
    883         /* else converter not found */
    884     }
    885     return 0;
    886 }
    887 
    888 static uint16_t
    889 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
    890     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    891         uint32_t currAlias;
    892         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    893         if (convNum < gMainTable.converterListSize) {
    894             /* tagListNum - 1 is the ALL tag */
    895             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    896 
    897             if (listOffset) {
    898                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    899                 /* +1 to skip listCount */
    900                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    901 
    902                 for (currAlias = start; currAlias < listCount; currAlias++) {
    903                     aliases[currAlias] = GET_STRING(currList[currAlias]);
    904                 }
    905             }
    906             /* else this shouldn't happen. internal program error */
    907         }
    908         /* else converter not found */
    909     }
    910     return 0;
    911 }
    912 
    913 static const char *
    914 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
    915     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    916         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    917         if (convNum < gMainTable.converterListSize) {
    918             /* tagListNum - 1 is the ALL tag */
    919             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    920 
    921             if (listOffset) {
    922                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    923                 /* +1 to skip listCount */
    924                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    925 
    926                 if (n < listCount)  {
    927                     return GET_STRING(currList[n]);
    928                 }
    929                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    930             }
    931             /* else this shouldn't happen. internal program error */
    932         }
    933         /* else converter not found */
    934     }
    935     return NULL;
    936 }
    937 
    938 static uint16_t
    939 ucnv_io_countStandards(UErrorCode *pErrorCode) {
    940     if (haveAliasData(pErrorCode)) {
    941         /* Don't include the empty list */
    942         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
    943     }
    944 
    945     return 0;
    946 }
    947 
    948 U_CAPI const char * U_EXPORT2
    949 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
    950     if (haveAliasData(pErrorCode)) {
    951         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
    952             return GET_STRING(gMainTable.tagList[n]);
    953         }
    954         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    955     }
    956 
    957     return NULL;
    958 }
    959 
    960 U_CAPI const char * U_EXPORT2
    961 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    962     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    963         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
    964 
    965         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
    966             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    967 
    968             /* Get the preferred name from this list */
    969             if (currList[0]) {
    970                 return GET_STRING(currList[0]);
    971             }
    972             /* else someone screwed up the alias table. */
    973             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
    974         }
    975     }
    976 
    977     return NULL;
    978 }
    979 
    980 U_CAPI uint16_t U_EXPORT2
    981 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
    982 {
    983     return ucnv_io_countAliases(alias, pErrorCode);
    984 }
    985 
    986 
    987 U_CAPI const char* U_EXPORT2
    988 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
    989 {
    990     return ucnv_io_getAlias(alias, n, pErrorCode);
    991 }
    992 
    993 U_CAPI void U_EXPORT2
    994 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
    995 {
    996     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
    997 }
    998 
    999 U_CAPI uint16_t U_EXPORT2
   1000 ucnv_countStandards(void)
   1001 {
   1002     UErrorCode err = U_ZERO_ERROR;
   1003     return ucnv_io_countStandards(&err);
   1004 }
   1005 
   1006 U_CAPI const char * U_EXPORT2
   1007 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   1008     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   1009         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
   1010 
   1011         if (convNum < gMainTable.converterListSize) {
   1012             return GET_STRING(gMainTable.converterList[convNum]);
   1013         }
   1014     }
   1015 
   1016     return NULL;
   1017 }
   1018 
   1019 U_CDECL_BEGIN
   1020 
   1021 
   1022 static int32_t U_CALLCONV
   1023 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
   1024     return gMainTable.converterListSize;
   1025 }
   1026 
   1027 static const char * U_CALLCONV
   1028 ucnv_io_nextAllConverters(UEnumeration *enumerator,
   1029                             int32_t* resultLength,
   1030                             UErrorCode * /*pErrorCode*/)
   1031 {
   1032     uint16_t *myContext = (uint16_t *)(enumerator->context);
   1033 
   1034     if (*myContext < gMainTable.converterListSize) {
   1035         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
   1036         if (resultLength) {
   1037             *resultLength = (int32_t)uprv_strlen(myStr);
   1038         }
   1039         return myStr;
   1040     }
   1041     /* Either we accessed a zero length list, or we enumerated too far. */
   1042     if (resultLength) {
   1043         *resultLength = 0;
   1044     }
   1045     return NULL;
   1046 }
   1047 
   1048 static void U_CALLCONV
   1049 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
   1050     *((uint16_t *)(enumerator->context)) = 0;
   1051 }
   1052 U_CDECL_END
   1053 static const UEnumeration gEnumAllConverters = {
   1054     NULL,
   1055     NULL,
   1056     ucnv_io_closeUEnumeration,
   1057     ucnv_io_countAllConverters,
   1058     uenum_unextDefault,
   1059     ucnv_io_nextAllConverters,
   1060     ucnv_io_resetAllConverters
   1061 };
   1062 
   1063 U_CAPI UEnumeration * U_EXPORT2
   1064 ucnv_openAllNames(UErrorCode *pErrorCode) {
   1065     UEnumeration *myEnum = NULL;
   1066     if (haveAliasData(pErrorCode)) {
   1067         uint16_t *myContext;
   1068 
   1069         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
   1070         if (myEnum == NULL) {
   1071             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1072             return NULL;
   1073         }
   1074         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
   1075         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
   1076         if (myContext == NULL) {
   1077             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1078             uprv_free(myEnum);
   1079             return NULL;
   1080         }
   1081         *myContext = 0;
   1082         myEnum->context = myContext;
   1083     }
   1084     return myEnum;
   1085 }
   1086 
   1087 U_CAPI uint16_t
   1088 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
   1089     if (haveAliasData(pErrorCode)) {
   1090         return (uint16_t)gMainTable.converterListSize;
   1091     }
   1092     return 0;
   1093 }
   1094 
   1095 /* alias table swapping ----------------------------------------------------- */
   1096 
   1097 U_CDECL_BEGIN
   1098 
   1099 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
   1100 U_CDECL_END
   1101 
   1102 
   1103 /*
   1104  * row of a temporary array
   1105  *
   1106  * gets platform-endian charset string indexes and sorting indexes;
   1107  * after sorting this array by strings, the actual arrays are permutated
   1108  * according to the sorting indexes
   1109  */
   1110 typedef struct TempRow {
   1111     uint16_t strIndex, sortIndex;
   1112 } TempRow;
   1113 
   1114 typedef struct TempAliasTable {
   1115     const char *chars;
   1116     TempRow *rows;
   1117     uint16_t *resort;
   1118     StripForCompareFn *stripForCompare;
   1119 } TempAliasTable;
   1120 
   1121 enum {
   1122     STACK_ROW_CAPACITY=500
   1123 };
   1124 
   1125 static int32_t U_CALLCONV
   1126 io_compareRows(const void *context, const void *left, const void *right) {
   1127     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
   1128          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
   1129 
   1130     TempAliasTable *tempTable=(TempAliasTable *)context;
   1131     const char *chars=tempTable->chars;
   1132 
   1133     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
   1134                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
   1135 }
   1136 
   1137 U_CAPI int32_t U_EXPORT2
   1138 ucnv_swapAliases(const UDataSwapper *ds,
   1139                  const void *inData, int32_t length, void *outData,
   1140                  UErrorCode *pErrorCode) {
   1141     const UDataInfo *pInfo;
   1142     int32_t headerSize;
   1143 
   1144     const uint16_t *inTable;
   1145     const uint32_t *inSectionSizes;
   1146     uint32_t toc[offsetsCount];
   1147     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
   1148     uint32_t i, count, tocLength, topOffset;
   1149 
   1150     TempRow rows[STACK_ROW_CAPACITY];
   1151     uint16_t resort[STACK_ROW_CAPACITY];
   1152     TempAliasTable tempTable;
   1153 
   1154     /* udata_swapDataHeader checks the arguments */
   1155     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1156     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1157         return 0;
   1158     }
   1159 
   1160     /* check data format and format version */
   1161     pInfo=(const UDataInfo *)((const char *)inData+4);
   1162     if(!(
   1163         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
   1164         pInfo->dataFormat[1]==0x76 &&
   1165         pInfo->dataFormat[2]==0x41 &&
   1166         pInfo->dataFormat[3]==0x6c &&
   1167         pInfo->formatVersion[0]==3
   1168     )) {
   1169         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
   1170                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1171                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1172                          pInfo->formatVersion[0]);
   1173         *pErrorCode=U_UNSUPPORTED_ERROR;
   1174         return 0;
   1175     }
   1176 
   1177     /* an alias table must contain at least the table of contents array */
   1178     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
   1179         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1180                          length-headerSize);
   1181         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1182         return 0;
   1183     }
   1184 
   1185     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
   1186     inTable=(const uint16_t *)inSectionSizes;
   1187     uprv_memset(toc, 0, sizeof(toc));
   1188     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
   1189     if(tocLength<minTocLength || offsetsCount<=tocLength) {
   1190         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
   1191         *pErrorCode=U_INVALID_FORMAT_ERROR;
   1192         return 0;
   1193     }
   1194 
   1195     /* read the known part of the table of contents */
   1196     for(i=converterListIndex; i<=tocLength; ++i) {
   1197         toc[i]=ds->readUInt32(inSectionSizes[i]);
   1198     }
   1199 
   1200     /* compute offsets */
   1201     uprv_memset(offsets, 0, sizeof(offsets));
   1202     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
   1203     for(i=tagListIndex; i<=tocLength; ++i) {
   1204         offsets[i]=offsets[i-1]+toc[i-1];
   1205     }
   1206 
   1207     /* compute the overall size of the after-header data, in numbers of 16-bit units */
   1208     topOffset=offsets[i-1]+toc[i-1];
   1209 
   1210     if(length>=0) {
   1211         uint16_t *outTable;
   1212         const uint16_t *p, *p2;
   1213         uint16_t *q, *q2;
   1214         uint16_t oldIndex;
   1215 
   1216         if((length-headerSize)<(2*(int32_t)topOffset)) {
   1217             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1218                              length-headerSize);
   1219             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1220             return 0;
   1221         }
   1222 
   1223         outTable=(uint16_t *)((char *)outData+headerSize);
   1224 
   1225         /* swap the entire table of contents */
   1226         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
   1227 
   1228         /* swap unormalized strings & normalized strings */
   1229         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
   1230                              outTable+offsets[stringTableIndex], pErrorCode);
   1231         if(U_FAILURE(*pErrorCode)) {
   1232             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
   1233             return 0;
   1234         }
   1235 
   1236         if(ds->inCharset==ds->outCharset) {
   1237             /* no need to sort, just swap all 16-bit values together */
   1238             ds->swapArray16(ds,
   1239                             inTable+offsets[converterListIndex],
   1240                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
   1241                             outTable+offsets[converterListIndex],
   1242                             pErrorCode);
   1243         } else {
   1244             /* allocate the temporary table for sorting */
   1245             count=toc[aliasListIndex];
   1246 
   1247             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
   1248 
   1249             if(count<=STACK_ROW_CAPACITY) {
   1250                 tempTable.rows=rows;
   1251                 tempTable.resort=resort;
   1252             } else {
   1253                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
   1254                 if(tempTable.rows==NULL) {
   1255                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
   1256                                      count);
   1257                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1258                     return 0;
   1259                 }
   1260                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
   1261             }
   1262 
   1263             if(ds->outCharset==U_ASCII_FAMILY) {
   1264                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
   1265             } else /* U_EBCDIC_FAMILY */ {
   1266                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
   1267             }
   1268 
   1269             /*
   1270              * Sort unique aliases+mapped names.
   1271              *
   1272              * We need to sort the list again by outCharset strings because they
   1273              * sort differently for different charset families.
   1274              * First we set up a temporary table with the string indexes and
   1275              * sorting indexes and sort that.
   1276              * Then we permutate and copy/swap the actual values.
   1277              */
   1278             p=inTable+offsets[aliasListIndex];
   1279             q=outTable+offsets[aliasListIndex];
   1280 
   1281             p2=inTable+offsets[untaggedConvArrayIndex];
   1282             q2=outTable+offsets[untaggedConvArrayIndex];
   1283 
   1284             for(i=0; i<count; ++i) {
   1285                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
   1286                 tempTable.rows[i].sortIndex=(uint16_t)i;
   1287             }
   1288 
   1289             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
   1290                            io_compareRows, &tempTable,
   1291                            FALSE, pErrorCode);
   1292 
   1293             if(U_SUCCESS(*pErrorCode)) {
   1294                 /* copy/swap/permutate items */
   1295                 if(p!=q) {
   1296                     for(i=0; i<count; ++i) {
   1297                         oldIndex=tempTable.rows[i].sortIndex;
   1298                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
   1299                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
   1300                     }
   1301                 } else {
   1302                     /*
   1303                      * If we swap in-place, then the permutation must use another
   1304                      * temporary array (tempTable.resort)
   1305                      * before the results are copied to the outBundle.
   1306                      */
   1307                     uint16_t *r=tempTable.resort;
   1308 
   1309                     for(i=0; i<count; ++i) {
   1310                         oldIndex=tempTable.rows[i].sortIndex;
   1311                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
   1312                     }
   1313                     uprv_memcpy(q, r, 2*(size_t)count);
   1314 
   1315                     for(i=0; i<count; ++i) {
   1316                         oldIndex=tempTable.rows[i].sortIndex;
   1317                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
   1318                     }
   1319                     uprv_memcpy(q2, r, 2*(size_t)count);
   1320                 }
   1321             }
   1322 
   1323             if(tempTable.rows!=rows) {
   1324                 uprv_free(tempTable.rows);
   1325             }
   1326 
   1327             if(U_FAILURE(*pErrorCode)) {
   1328                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
   1329                                  count);
   1330                 return 0;
   1331             }
   1332 
   1333             /* swap remaining 16-bit values */
   1334             ds->swapArray16(ds,
   1335                             inTable+offsets[converterListIndex],
   1336                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
   1337                             outTable+offsets[converterListIndex],
   1338                             pErrorCode);
   1339             ds->swapArray16(ds,
   1340                             inTable+offsets[taggedAliasArrayIndex],
   1341                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
   1342                             outTable+offsets[taggedAliasArrayIndex],
   1343                             pErrorCode);
   1344         }
   1345     }
   1346 
   1347     return headerSize+2*(int32_t)topOffset;
   1348 }
   1349 
   1350 #endif
   1351 
   1352 
   1353 /*
   1354  * Hey, Emacs, please set the following:
   1355  *
   1356  * Local Variables:
   1357  * indent-tabs-mode: nil
   1358  * End:
   1359  *
   1360  */
   1361