Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2012, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 ******************************************************************************
      8 *
      9 *
     10 *  ucnv_io.cpp:
     11 *  initializes global variables and defines functions pertaining to converter
     12 *  name resolution aspect of the conversion code.
     13 *
     14 *   new implementation:
     15 *
     16 *   created on: 1999nov22
     17 *   created by: Markus W. Scherer
     18 *
     19 *   Use the binary cnvalias.icu (created from convrtrs.txt) to work
     20 *   with aliases for converter names.
     21 *
     22 *   Date        Name        Description
     23 *   11/22/1999  markus      Created
     24 *   06/28/2002  grhoten     Major overhaul of the converter alias design.
     25 *                           Now an alias can map to different converters
     26 *                           depending on the specified standard.
     27 *******************************************************************************
     28 */
     29 
     30 #include "unicode/utypes.h"
     31 
     32 #if !UCONFIG_NO_CONVERSION
     33 
     34 #include "unicode/ucnv.h"
     35 #include "unicode/udata.h"
     36 
     37 #include "umutex.h"
     38 #include "uarrsort.h"
     39 #include "udataswp.h"
     40 #include "cstring.h"
     41 #include "cmemory.h"
     42 #include "ucnv_io.h"
     43 #include "uenumimp.h"
     44 #include "ucln_cmn.h"
     45 
     46 /* Format of cnvalias.icu -----------------------------------------------------
     47  *
     48  * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt.
     49  * This binary form contains several tables. All indexes are to uint16_t
     50  * units, and not to the bytes (uint8_t units). Addressing everything on
     51  * 16-bit boundaries allows us to store more information with small index
     52  * numbers, which are also 16-bit in size. The majority of the table (except
     53  * the string table) are 16-bit numbers.
     54  *
     55  * First there is the size of the Table of Contents (TOC). The TOC
     56  * entries contain the size of each section. In order to find the offset
     57  * you just need to sum up the previous offsets.
     58  * The TOC length and entries are an array of uint32_t values.
     59  * The first section after the TOC starts immediately after the TOC.
     60  *
     61  * 1) This section contains a list of converters. This list contains indexes
     62  * into the string table for the converter name. The index of this list is
     63  * also used by other sections, which are mentioned later on.
     64  * This list is not sorted.
     65  *
     66  * 2) This section contains a list of tags. This list contains indexes
     67  * into the string table for the tag name. The index of this list is
     68  * also used by other sections, which are mentioned later on.
     69  * This list is in priority order of standards.
     70  *
     71  * 3) This section contains a list of sorted unique aliases. This
     72  * list contains indexes into the string table for the alias name. The
     73  * index of this list is also used by other sections, like the 4th section.
     74  * The index for the 3rd and 4th section is used to get the
     75  * alias -> converter name mapping. Section 3 and 4 form a two column table.
     76  * Some of the most significant bits of each index may contain other
     77  * information (see findConverter for details).
     78  *
     79  * 4) This section contains a list of mapped converter names. Consider this
     80  * as a table that maps the 3rd section to the 1st section. This list contains
     81  * indexes into the 1st section. The index of this list is the same index in
     82  * the 3rd section. There is also some extra information in the high bits of
     83  * each converter index in this table. Currently it's only used to say that
     84  * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK
     85  * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is
     86  * the predigested form of the 5th section so that an alias lookup can be fast.
     87  *
     88  * 5) This section contains a 2D array with indexes to the 6th section. This
     89  * section is the full form of all alias mappings. The column index is the
     90  * index into the converter list (column header). The row index is the index
     91  * to tag list (row header). This 2D array is the top part a 3D array. The
     92  * third dimension is in the 6th section.
     93  *
     94  * 6) This is blob of variable length arrays. Each array starts with a size,
     95  * and is followed by indexes to alias names in the string table. This is
     96  * the third dimension to the section 5. No other section should be referencing
     97  * this section.
     98  *
     99  * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its
    100  * presence indicates that a section 9 exists. UConverterAliasOptions specifies
    101  * what type of string normalization is used among other potential things in the
    102  * future.
    103  *
    104  * 8) This is the string table. All strings are indexed on an even address.
    105  * There are two reasons for this. First many chip architectures locate strings
    106  * faster on even address boundaries. Second, since all indexes are 16-bit
    107  * numbers, this string table can be 128KB in size instead of 64KB when we
    108  * only have strings starting on an even address.
    109  *
    110  * 9) When present this is a set of prenormalized strings from section 8. This
    111  * table contains normalized strings with the dashes and spaces stripped out,
    112  * and all strings lowercased. In the future, the options in section 7 may state
    113  * other types of normalization.
    114  *
    115  * Here is the concept of section 5 and 6. It's a 3D cube. Each tag
    116  * has a unique alias among all converters. That same alias can
    117  * be mentioned in other standards on different converters,
    118  * but only one alias per tag can be unique.
    119  *
    120  *
    121  *              Converter Names (Usually in TR22 form)
    122  *           -------------------------------------------.
    123  *     T    /                                          /|
    124  *     a   /                                          / |
    125  *     g  /                                          /  |
    126  *     s /                                          /   |
    127  *      /                                          /    |
    128  *      ------------------------------------------/     |
    129  *    A |                                         |     |
    130  *    l |                                         |     |
    131  *    i |                                         |    /
    132  *    a |                                         |   /
    133  *    s |                                         |  /
    134  *    e |                                         | /
    135  *    s |                                         |/
    136  *      -------------------------------------------
    137  *
    138  *
    139  *
    140  * Here is what it really looks like. It's like swiss cheese.
    141  * There are holes. Some converters aren't recognized by
    142  * a standard, or they are really old converters that the
    143  * standard doesn't recognize anymore.
    144  *
    145  *              Converter Names (Usually in TR22 form)
    146  *           -------------------------------------------.
    147  *     T    /##########################################/|
    148  *     a   /     #            #                       /#
    149  *     g  /  #      ##     ##     ### # ### ### ### #/
    150  *     s / #             #####  ####        ##  ## #/#
    151  *      / ### # # ##  #  #   #          ### # #   #/##
    152  *      ------------------------------------------/# #
    153  *    A |### # # ##  #  #   #          ### # #   #|# #
    154  *    l |# # #    #     #               ## #     #|# #
    155  *    i |# # #    #     #                #       #|#
    156  *    a |#                                       #|#
    157  *    s |                                        #|#
    158  *    e
    159  *    s
    160  *
    161  */
    162 
    163 /**
    164  * Used by the UEnumeration API
    165  */
    166 typedef struct UAliasContext {
    167     uint32_t listOffset;
    168     uint32_t listIdx;
    169 } UAliasContext;
    170 
    171 static const char DATA_NAME[] = "cnvalias";
    172 static const char DATA_TYPE[] = "icu";
    173 
    174 static UDataMemory *gAliasData=NULL;
    175 
    176 enum {
    177     tocLengthIndex=0,
    178     converterListIndex=1,
    179     tagListIndex=2,
    180     aliasListIndex=3,
    181     untaggedConvArrayIndex=4,
    182     taggedAliasArrayIndex=5,
    183     taggedAliasListsIndex=6,
    184     tableOptionsIndex=7,
    185     stringTableIndex=8,
    186     normalizedStringTableIndex=9,
    187     offsetsCount,    /* length of the swapper's temporary offsets[] */
    188     minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */
    189 };
    190 
    191 static const UConverterAliasOptions defaultTableOptions = {
    192     UCNV_IO_UNNORMALIZED,
    193     0 /* containsCnvOptionInfo */
    194 };
    195 static UConverterAlias gMainTable;
    196 
    197 #define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx))
    198 #define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx))
    199 
    200 static UBool U_CALLCONV
    201 isAcceptable(void * /*context*/,
    202              const char * /*type*/, const char * /*name*/,
    203              const UDataInfo *pInfo) {
    204     return (UBool)(
    205         pInfo->size>=20 &&
    206         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
    207         pInfo->charsetFamily==U_CHARSET_FAMILY &&
    208         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
    209         pInfo->dataFormat[1]==0x76 &&
    210         pInfo->dataFormat[2]==0x41 &&
    211         pInfo->dataFormat[3]==0x6c &&
    212         pInfo->formatVersion[0]==3);
    213 }
    214 
    215 static UBool U_CALLCONV ucnv_io_cleanup(void)
    216 {
    217     if (gAliasData) {
    218         udata_close(gAliasData);
    219         gAliasData = NULL;
    220     }
    221 
    222     uprv_memset(&gMainTable, 0, sizeof(gMainTable));
    223 
    224     return TRUE;                   /* Everything was cleaned up */
    225 }
    226 
    227 static UBool
    228 haveAliasData(UErrorCode *pErrorCode) {
    229     int needInit;
    230 
    231     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    232         return FALSE;
    233     }
    234 
    235     UMTX_CHECK(NULL, (gAliasData==NULL), needInit);
    236 
    237     /* load converter alias data from file if necessary */
    238     if (needInit) {
    239         UDataMemory *data;
    240         const uint16_t *table;
    241         const uint32_t *sectionSizes;
    242         uint32_t tableStart;
    243         uint32_t currOffset;
    244 
    245         data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    246         if(U_FAILURE(*pErrorCode)) {
    247             return FALSE;
    248         }
    249 
    250         sectionSizes = (const uint32_t *)udata_getMemory(data);
    251         table = (const uint16_t *)sectionSizes;
    252 
    253         tableStart      = sectionSizes[0];
    254         if (tableStart < minTocLength) {
    255             *pErrorCode = U_INVALID_FORMAT_ERROR;
    256             udata_close(data);
    257             return FALSE;
    258         }
    259 
    260         umtx_lock(NULL);
    261         if(gAliasData==NULL) {
    262             gMainTable.converterListSize      = sectionSizes[1];
    263             gMainTable.tagListSize            = sectionSizes[2];
    264             gMainTable.aliasListSize          = sectionSizes[3];
    265             gMainTable.untaggedConvArraySize  = sectionSizes[4];
    266             gMainTable.taggedAliasArraySize   = sectionSizes[5];
    267             gMainTable.taggedAliasListsSize   = sectionSizes[6];
    268             gMainTable.optionTableSize        = sectionSizes[7];
    269             gMainTable.stringTableSize        = sectionSizes[8];
    270 
    271             if (tableStart > 8) {
    272                 gMainTable.normalizedStringTableSize = sectionSizes[9];
    273             }
    274 
    275             currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
    276             gMainTable.converterList = table + currOffset;
    277 
    278             currOffset += gMainTable.converterListSize;
    279             gMainTable.tagList = table + currOffset;
    280 
    281             currOffset += gMainTable.tagListSize;
    282             gMainTable.aliasList = table + currOffset;
    283 
    284             currOffset += gMainTable.aliasListSize;
    285             gMainTable.untaggedConvArray = table + currOffset;
    286 
    287             currOffset += gMainTable.untaggedConvArraySize;
    288             gMainTable.taggedAliasArray = table + currOffset;
    289 
    290             /* aliasLists is a 1's based array, but it has a padding character */
    291             currOffset += gMainTable.taggedAliasArraySize;
    292             gMainTable.taggedAliasLists = table + currOffset;
    293 
    294             currOffset += gMainTable.taggedAliasListsSize;
    295             if (gMainTable.optionTableSize > 0
    296                 && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT)
    297             {
    298                 /* Faster table */
    299                 gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset);
    300             }
    301             else {
    302                 /* Smaller table, or I can't handle this normalization mode!
    303                 Use the original slower table lookup. */
    304                 gMainTable.optionTable = &defaultTableOptions;
    305             }
    306 
    307             currOffset += gMainTable.optionTableSize;
    308             gMainTable.stringTable = table + currOffset;
    309 
    310             currOffset += gMainTable.stringTableSize;
    311             gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
    312                 ? gMainTable.stringTable : (table + currOffset));
    313 
    314             ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
    315 
    316             gAliasData = data;
    317             data=NULL;
    318         }
    319         umtx_unlock(NULL);
    320 
    321         /* if a different thread set it first, then close the extra data */
    322         if(data!=NULL) {
    323             udata_close(data); /* NULL if it was set correctly */
    324         }
    325     }
    326 
    327     return TRUE;
    328 }
    329 
    330 static inline UBool
    331 isAlias(const char *alias, UErrorCode *pErrorCode) {
    332     if(alias==NULL) {
    333         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    334         return FALSE;
    335     }
    336     return (UBool)(*alias!=0);
    337 }
    338 
    339 static uint32_t getTagNumber(const char *tagname) {
    340     if (gMainTable.tagList) {
    341         uint32_t tagNum;
    342         for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) {
    343             if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) {
    344                 return tagNum;
    345             }
    346         }
    347     }
    348 
    349     return UINT32_MAX;
    350 }
    351 
    352 /* character types relevant for ucnv_compareNames() */
    353 enum {
    354     IGNORE,
    355     ZERO,
    356     NONZERO,
    357     MINLETTER /* any values from here on are lowercase letter mappings */
    358 };
    359 
    360 /* character types for ASCII 00..7F */
    361 static const uint8_t asciiTypes[128] = {
    362     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    363     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    364     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    365     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0,
    366     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    367     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0,
    368     0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
    369     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0
    370 };
    371 
    372 #define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)IGNORE)
    373 
    374 /* character types for EBCDIC 80..FF */
    375 static const uint8_t ebcdicTypes[128] = {
    376     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    377     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    378     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    379     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    380     0,    0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0,
    381     0,    0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0,
    382     0,    0,    0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0,
    383     ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0
    384 };
    385 
    386 #define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)IGNORE)
    387 
    388 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
    389 #   define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c)
    390 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    391 #   define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c)
    392 #else
    393 #   error U_CHARSET_FAMILY is not valid
    394 #endif
    395 
    396 /* @see ucnv_compareNames */
    397 U_CFUNC char * U_EXPORT2
    398 ucnv_io_stripASCIIForCompare(char *dst, const char *name) {
    399     char *dstItr = dst;
    400     uint8_t type, nextType;
    401     char c1;
    402     UBool afterDigit = FALSE;
    403 
    404     while ((c1 = *name++) != 0) {
    405         type = GET_ASCII_TYPE(c1);
    406         switch (type) {
    407         case IGNORE:
    408             afterDigit = FALSE;
    409             continue; /* ignore all but letters and digits */
    410         case ZERO:
    411             if (!afterDigit) {
    412                 nextType = GET_ASCII_TYPE(*name);
    413                 if (nextType == ZERO || nextType == NONZERO) {
    414                     continue; /* ignore leading zero before another digit */
    415                 }
    416             }
    417             break;
    418         case NONZERO:
    419             afterDigit = TRUE;
    420             break;
    421         default:
    422             c1 = (char)type; /* lowercased letter */
    423             afterDigit = FALSE;
    424             break;
    425         }
    426         *dstItr++ = c1;
    427     }
    428     *dstItr = 0;
    429     return dst;
    430 }
    431 
    432 U_CFUNC char * U_EXPORT2
    433 ucnv_io_stripEBCDICForCompare(char *dst, const char *name) {
    434     char *dstItr = dst;
    435     uint8_t type, nextType;
    436     char c1;
    437     UBool afterDigit = FALSE;
    438 
    439     while ((c1 = *name++) != 0) {
    440         type = GET_EBCDIC_TYPE(c1);
    441         switch (type) {
    442         case IGNORE:
    443             afterDigit = FALSE;
    444             continue; /* ignore all but letters and digits */
    445         case ZERO:
    446             if (!afterDigit) {
    447                 nextType = GET_EBCDIC_TYPE(*name);
    448                 if (nextType == ZERO || nextType == NONZERO) {
    449                     continue; /* ignore leading zero before another digit */
    450                 }
    451             }
    452             break;
    453         case NONZERO:
    454             afterDigit = TRUE;
    455             break;
    456         default:
    457             c1 = (char)type; /* lowercased letter */
    458             afterDigit = FALSE;
    459             break;
    460         }
    461         *dstItr++ = c1;
    462     }
    463     *dstItr = 0;
    464     return dst;
    465 }
    466 
    467 /**
    468  * Do a fuzzy compare of two converter/alias names.
    469  * The comparison is case-insensitive, ignores leading zeroes if they are not
    470  * followed by further digits, and ignores all but letters and digits.
    471  * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
    472  * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
    473  * at http://www.unicode.org/reports/tr22/
    474  *
    475  * This is a symmetrical (commutative) operation; order of arguments
    476  * is insignificant.  This is an important property for sorting the
    477  * list (when the list is preprocessed into binary form) and for
    478  * performing binary searches on it at run time.
    479  *
    480  * @param name1 a converter name or alias, zero-terminated
    481  * @param name2 a converter name or alias, zero-terminated
    482  * @return 0 if the names match, or a negative value if the name1
    483  * lexically precedes name2, or a positive value if the name1
    484  * lexically follows name2.
    485  *
    486  * @see ucnv_io_stripForCompare
    487  */
    488 U_CAPI int U_EXPORT2
    489 ucnv_compareNames(const char *name1, const char *name2) {
    490     int rc;
    491     uint8_t type, nextType;
    492     char c1, c2;
    493     UBool afterDigit1 = FALSE, afterDigit2 = FALSE;
    494 
    495     for (;;) {
    496         while ((c1 = *name1++) != 0) {
    497             type = GET_CHAR_TYPE(c1);
    498             switch (type) {
    499             case IGNORE:
    500                 afterDigit1 = FALSE;
    501                 continue; /* ignore all but letters and digits */
    502             case ZERO:
    503                 if (!afterDigit1) {
    504                     nextType = GET_CHAR_TYPE(*name1);
    505                     if (nextType == ZERO || nextType == NONZERO) {
    506                         continue; /* ignore leading zero before another digit */
    507                     }
    508                 }
    509                 break;
    510             case NONZERO:
    511                 afterDigit1 = TRUE;
    512                 break;
    513             default:
    514                 c1 = (char)type; /* lowercased letter */
    515                 afterDigit1 = FALSE;
    516                 break;
    517             }
    518             break; /* deliver c1 */
    519         }
    520         while ((c2 = *name2++) != 0) {
    521             type = GET_CHAR_TYPE(c2);
    522             switch (type) {
    523             case IGNORE:
    524                 afterDigit2 = FALSE;
    525                 continue; /* ignore all but letters and digits */
    526             case ZERO:
    527                 if (!afterDigit2) {
    528                     nextType = GET_CHAR_TYPE(*name2);
    529                     if (nextType == ZERO || nextType == NONZERO) {
    530                         continue; /* ignore leading zero before another digit */
    531                     }
    532                 }
    533                 break;
    534             case NONZERO:
    535                 afterDigit2 = TRUE;
    536                 break;
    537             default:
    538                 c2 = (char)type; /* lowercased letter */
    539                 afterDigit2 = FALSE;
    540                 break;
    541             }
    542             break; /* deliver c2 */
    543         }
    544 
    545         /* If we reach the ends of both strings then they match */
    546         if ((c1|c2)==0) {
    547             return 0;
    548         }
    549 
    550         /* Case-insensitive comparison */
    551         rc = (int)(unsigned char)c1 - (int)(unsigned char)c2;
    552         if (rc != 0) {
    553             return rc;
    554         }
    555     }
    556 }
    557 
    558 /*
    559  * search for an alias
    560  * return the converter number index for gConverterList
    561  */
    562 static inline uint32_t
    563 findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    564     uint32_t mid, start, limit;
    565     uint32_t lastMid;
    566     int result;
    567     int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED);
    568     char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH];
    569 
    570     if (!isUnnormalized) {
    571         if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) {
    572             *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    573             return UINT32_MAX;
    574         }
    575 
    576         /* Lower case and remove ignoreable characters. */
    577         ucnv_io_stripForCompare(strippedName, alias);
    578         alias = strippedName;
    579     }
    580 
    581     /* do a binary search for the alias */
    582     start = 0;
    583     limit = gMainTable.untaggedConvArraySize;
    584     mid = limit;
    585     lastMid = UINT32_MAX;
    586 
    587     for (;;) {
    588         mid = (uint32_t)((start + limit) / 2);
    589         if (lastMid == mid) {   /* Have we moved? */
    590             break;  /* We haven't moved, and it wasn't found. */
    591         }
    592         lastMid = mid;
    593         if (isUnnormalized) {
    594             result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid]));
    595         }
    596         else {
    597             result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid]));
    598         }
    599 
    600         if (result < 0) {
    601             limit = mid;
    602         } else if (result > 0) {
    603             start = mid;
    604         } else {
    605             /* Since the gencnval tool folds duplicates into one entry,
    606              * this alias in gAliasList is unique, but different standards
    607              * may map an alias to different converters.
    608              */
    609             if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) {
    610                 *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING;
    611             }
    612             /* State whether the canonical converter name contains an option.
    613             This information is contained in this list in order to maintain backward & forward compatibility. */
    614             if (containsOption) {
    615                 UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo;
    616                 *containsOption = (UBool)((containsCnvOptionInfo
    617                     && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0))
    618                     || !containsCnvOptionInfo);
    619             }
    620             return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK;
    621         }
    622     }
    623 
    624     return UINT32_MAX;
    625 }
    626 
    627 /*
    628  * Is this alias in this list?
    629  * alias and listOffset should be non-NULL.
    630  */
    631 static inline UBool
    632 isAliasInList(const char *alias, uint32_t listOffset) {
    633     if (listOffset) {
    634         uint32_t currAlias;
    635         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    636         /* +1 to skip listCount */
    637         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    638         for (currAlias = 0; currAlias < listCount; currAlias++) {
    639             if (currList[currAlias]
    640                 && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0)
    641             {
    642                 return TRUE;
    643             }
    644         }
    645     }
    646     return FALSE;
    647 }
    648 
    649 /*
    650  * Search for an standard name of an alias (what is the default name
    651  * that this standard uses?)
    652  * return the listOffset for gTaggedAliasLists. If it's 0,
    653  * the it couldn't be found, but the parameters are valid.
    654  */
    655 static uint32_t
    656 findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    657     uint32_t idx;
    658     uint32_t listOffset;
    659     uint32_t convNum;
    660     UErrorCode myErr = U_ZERO_ERROR;
    661     uint32_t tagNum = getTagNumber(standard);
    662 
    663     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    664     convNum = findConverter(alias, NULL, &myErr);
    665     if (myErr != U_ZERO_ERROR) {
    666         *pErrorCode = myErr;
    667     }
    668 
    669     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    670         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    671         if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) {
    672             return listOffset;
    673         }
    674         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    675             /* Uh Oh! They used an ambiguous alias.
    676                We have to search the whole swiss cheese starting
    677                at the highest standard affinity.
    678                This may take a while.
    679             */
    680             for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) {
    681                 listOffset = gMainTable.taggedAliasArray[idx];
    682                 if (listOffset && isAliasInList(alias, listOffset)) {
    683                     uint32_t currTagNum = idx/gMainTable.converterListSize;
    684                     uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize);
    685                     uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum];
    686                     if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) {
    687                         return tempListOffset;
    688                     }
    689                     /* else keep on looking */
    690                     /* We could speed this up by starting on the next row
    691                        because an alias is unique per row, right now.
    692                        This would change if alias versioning appears. */
    693                 }
    694             }
    695             /* The standard doesn't know about the alias */
    696         }
    697         /* else no default name */
    698         return 0;
    699     }
    700     /* else converter or tag not found */
    701 
    702     return UINT32_MAX;
    703 }
    704 
    705 /* Return the canonical name */
    706 static uint32_t
    707 findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    708     uint32_t idx;
    709     uint32_t listOffset;
    710     uint32_t convNum;
    711     UErrorCode myErr = U_ZERO_ERROR;
    712     uint32_t tagNum = getTagNumber(standard);
    713 
    714     /* Make a quick guess. Hopefully they used a TR22 canonical alias. */
    715     convNum = findConverter(alias, NULL, &myErr);
    716     if (myErr != U_ZERO_ERROR) {
    717         *pErrorCode = myErr;
    718     }
    719 
    720     if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) {
    721         listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum];
    722         if (listOffset && isAliasInList(alias, listOffset)) {
    723             return convNum;
    724         }
    725         if (myErr == U_AMBIGUOUS_ALIAS_WARNING) {
    726             /* Uh Oh! They used an ambiguous alias.
    727                We have to search one slice of the swiss cheese.
    728                We search only in the requested tag, not the whole thing.
    729                This may take a while.
    730             */
    731             uint32_t convStart = (tagNum)*gMainTable.converterListSize;
    732             uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize;
    733             for (idx = convStart; idx < convLimit; idx++) {
    734                 listOffset = gMainTable.taggedAliasArray[idx];
    735                 if (listOffset && isAliasInList(alias, listOffset)) {
    736                     return idx-convStart;
    737                 }
    738             }
    739             /* The standard doesn't know about the alias */
    740         }
    741         /* else no canonical name */
    742     }
    743     /* else converter or tag not found */
    744 
    745     return UINT32_MAX;
    746 }
    747 
    748 
    749 
    750 U_CFUNC const char *
    751 ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) {
    752     const char *aliasTmp = alias;
    753     int32_t i = 0;
    754     for (i = 0; i < 2; i++) {
    755         if (i == 1) {
    756             /*
    757              * After the first unsuccess converter lookup, check to see if
    758              * the name begins with 'x-'. If it does, strip it off and try
    759              * again.  This behaviour is similar to how ICU4J does it.
    760              */
    761             if (aliasTmp[0] == 'x' || aliasTmp[1] == '-') {
    762                 aliasTmp = aliasTmp+2;
    763             } else {
    764                 break;
    765             }
    766         }
    767         if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) {
    768             uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode);
    769             if (convNum < gMainTable.converterListSize) {
    770                 return GET_STRING(gMainTable.converterList[convNum]);
    771             }
    772             /* else converter not found */
    773         } else {
    774             break;
    775         }
    776     }
    777 
    778     return NULL;
    779 }
    780 
    781 static int32_t U_CALLCONV
    782 ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    783     int32_t value = 0;
    784     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    785     uint32_t listOffset = myContext->listOffset;
    786 
    787     if (listOffset) {
    788         value = gMainTable.taggedAliasLists[listOffset];
    789     }
    790     return value;
    791 }
    792 
    793 static const char* U_CALLCONV
    794 ucnv_io_nextStandardAliases(UEnumeration *enumerator,
    795                             int32_t* resultLength,
    796                             UErrorCode * /*pErrorCode*/)
    797 {
    798     UAliasContext *myContext = (UAliasContext *)(enumerator->context);
    799     uint32_t listOffset = myContext->listOffset;
    800 
    801     if (listOffset) {
    802         uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    803         const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    804 
    805         if (myContext->listIdx < listCount) {
    806             const char *myStr = GET_STRING(currList[myContext->listIdx++]);
    807             if (resultLength) {
    808                 *resultLength = (int32_t)uprv_strlen(myStr);
    809             }
    810             return myStr;
    811         }
    812     }
    813     /* Either we accessed a zero length list, or we enumerated too far. */
    814     if (resultLength) {
    815         *resultLength = 0;
    816     }
    817     return NULL;
    818 }
    819 
    820 static void U_CALLCONV
    821 ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
    822     ((UAliasContext *)(enumerator->context))->listIdx = 0;
    823 }
    824 
    825 static void U_CALLCONV
    826 ucnv_io_closeUEnumeration(UEnumeration *enumerator) {
    827     uprv_free(enumerator->context);
    828     uprv_free(enumerator);
    829 }
    830 
    831 /* Enumerate the aliases for the specified converter and standard tag */
    832 static const UEnumeration gEnumAliases = {
    833     NULL,
    834     NULL,
    835     ucnv_io_closeUEnumeration,
    836     ucnv_io_countStandardAliases,
    837     uenum_unextDefault,
    838     ucnv_io_nextStandardAliases,
    839     ucnv_io_resetStandardAliases
    840 };
    841 
    842 U_CAPI UEnumeration * U_EXPORT2
    843 ucnv_openStandardNames(const char *convName,
    844                        const char *standard,
    845                        UErrorCode *pErrorCode)
    846 {
    847     UEnumeration *myEnum = NULL;
    848     if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) {
    849         uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode);
    850 
    851         /* When listOffset == 0, we want to acknowledge that the
    852            converter name and standard are okay, but there
    853            is nothing to enumerate. */
    854         if (listOffset < gMainTable.taggedAliasListsSize) {
    855             UAliasContext *myContext;
    856 
    857             myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
    858             if (myEnum == NULL) {
    859                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    860                 return NULL;
    861             }
    862             uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration));
    863             myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext)));
    864             if (myContext == NULL) {
    865                 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
    866                 uprv_free(myEnum);
    867                 return NULL;
    868             }
    869             myContext->listOffset = listOffset;
    870             myContext->listIdx = 0;
    871             myEnum->context = myContext;
    872         }
    873         /* else converter or tag not found */
    874     }
    875     return myEnum;
    876 }
    877 
    878 static uint16_t
    879 ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) {
    880     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    881         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    882         if (convNum < gMainTable.converterListSize) {
    883             /* tagListNum - 1 is the ALL tag */
    884             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    885 
    886             if (listOffset) {
    887                 return gMainTable.taggedAliasLists[listOffset];
    888             }
    889             /* else this shouldn't happen. internal program error */
    890         }
    891         /* else converter not found */
    892     }
    893     return 0;
    894 }
    895 
    896 static uint16_t
    897 ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) {
    898     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    899         uint32_t currAlias;
    900         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    901         if (convNum < gMainTable.converterListSize) {
    902             /* tagListNum - 1 is the ALL tag */
    903             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    904 
    905             if (listOffset) {
    906                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    907                 /* +1 to skip listCount */
    908                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    909 
    910                 for (currAlias = start; currAlias < listCount; currAlias++) {
    911                     aliases[currAlias] = GET_STRING(currList[currAlias]);
    912                 }
    913             }
    914             /* else this shouldn't happen. internal program error */
    915         }
    916         /* else converter not found */
    917     }
    918     return 0;
    919 }
    920 
    921 static const char *
    922 ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) {
    923     if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    924         uint32_t convNum = findConverter(alias, NULL, pErrorCode);
    925         if (convNum < gMainTable.converterListSize) {
    926             /* tagListNum - 1 is the ALL tag */
    927             int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum];
    928 
    929             if (listOffset) {
    930                 uint32_t listCount = gMainTable.taggedAliasLists[listOffset];
    931                 /* +1 to skip listCount */
    932                 const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    933 
    934                 if (n < listCount)  {
    935                     return GET_STRING(currList[n]);
    936                 }
    937                 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    938             }
    939             /* else this shouldn't happen. internal program error */
    940         }
    941         /* else converter not found */
    942     }
    943     return NULL;
    944 }
    945 
    946 static uint16_t
    947 ucnv_io_countStandards(UErrorCode *pErrorCode) {
    948     if (haveAliasData(pErrorCode)) {
    949         /* Don't include the empty list */
    950         return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS);
    951     }
    952 
    953     return 0;
    954 }
    955 
    956 U_CAPI const char * U_EXPORT2
    957 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) {
    958     if (haveAliasData(pErrorCode)) {
    959         if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) {
    960             return GET_STRING(gMainTable.tagList[n]);
    961         }
    962         *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    963     }
    964 
    965     return NULL;
    966 }
    967 
    968 U_CAPI const char * U_EXPORT2
    969 ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
    970     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
    971         uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode);
    972 
    973         if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) {
    974             const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1;
    975 
    976             /* Get the preferred name from this list */
    977             if (currList[0]) {
    978                 return GET_STRING(currList[0]);
    979             }
    980             /* else someone screwed up the alias table. */
    981             /* *pErrorCode = U_INVALID_FORMAT_ERROR */
    982         }
    983     }
    984 
    985     return NULL;
    986 }
    987 
    988 U_CAPI uint16_t U_EXPORT2
    989 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
    990 {
    991     return ucnv_io_countAliases(alias, pErrorCode);
    992 }
    993 
    994 
    995 U_CAPI const char* U_EXPORT2
    996 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
    997 {
    998     return ucnv_io_getAlias(alias, n, pErrorCode);
    999 }
   1000 
   1001 U_CAPI void U_EXPORT2
   1002 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
   1003 {
   1004     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
   1005 }
   1006 
   1007 U_CAPI uint16_t U_EXPORT2
   1008 ucnv_countStandards(void)
   1009 {
   1010     UErrorCode err = U_ZERO_ERROR;
   1011     return ucnv_io_countStandards(&err);
   1012 }
   1013 
   1014 U_CAPI const char * U_EXPORT2
   1015 ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) {
   1016     if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) {
   1017         uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode);
   1018 
   1019         if (convNum < gMainTable.converterListSize) {
   1020             return GET_STRING(gMainTable.converterList[convNum]);
   1021         }
   1022     }
   1023 
   1024     return NULL;
   1025 }
   1026 
   1027 static int32_t U_CALLCONV
   1028 ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) {
   1029     return gMainTable.converterListSize;
   1030 }
   1031 
   1032 static const char* U_CALLCONV
   1033 ucnv_io_nextAllConverters(UEnumeration *enumerator,
   1034                             int32_t* resultLength,
   1035                             UErrorCode * /*pErrorCode*/)
   1036 {
   1037     uint16_t *myContext = (uint16_t *)(enumerator->context);
   1038 
   1039     if (*myContext < gMainTable.converterListSize) {
   1040         const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]);
   1041         if (resultLength) {
   1042             *resultLength = (int32_t)uprv_strlen(myStr);
   1043         }
   1044         return myStr;
   1045     }
   1046     /* Either we accessed a zero length list, or we enumerated too far. */
   1047     if (resultLength) {
   1048         *resultLength = 0;
   1049     }
   1050     return NULL;
   1051 }
   1052 
   1053 static void U_CALLCONV
   1054 ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) {
   1055     *((uint16_t *)(enumerator->context)) = 0;
   1056 }
   1057 
   1058 static const UEnumeration gEnumAllConverters = {
   1059     NULL,
   1060     NULL,
   1061     ucnv_io_closeUEnumeration,
   1062     ucnv_io_countAllConverters,
   1063     uenum_unextDefault,
   1064     ucnv_io_nextAllConverters,
   1065     ucnv_io_resetAllConverters
   1066 };
   1067 
   1068 U_CAPI UEnumeration * U_EXPORT2
   1069 ucnv_openAllNames(UErrorCode *pErrorCode) {
   1070     UEnumeration *myEnum = NULL;
   1071     if (haveAliasData(pErrorCode)) {
   1072         uint16_t *myContext;
   1073 
   1074         myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)));
   1075         if (myEnum == NULL) {
   1076             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1077             return NULL;
   1078         }
   1079         uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration));
   1080         myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t)));
   1081         if (myContext == NULL) {
   1082             *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   1083             uprv_free(myEnum);
   1084             return NULL;
   1085         }
   1086         *myContext = 0;
   1087         myEnum->context = myContext;
   1088     }
   1089     return myEnum;
   1090 }
   1091 
   1092 U_CFUNC uint16_t
   1093 ucnv_io_countKnownConverters(UErrorCode *pErrorCode) {
   1094     if (haveAliasData(pErrorCode)) {
   1095         return (uint16_t)gMainTable.converterListSize;
   1096     }
   1097     return 0;
   1098 }
   1099 
   1100 /* alias table swapping ----------------------------------------------------- */
   1101 
   1102 typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name);
   1103 
   1104 /*
   1105  * row of a temporary array
   1106  *
   1107  * gets platform-endian charset string indexes and sorting indexes;
   1108  * after sorting this array by strings, the actual arrays are permutated
   1109  * according to the sorting indexes
   1110  */
   1111 typedef struct TempRow {
   1112     uint16_t strIndex, sortIndex;
   1113 } TempRow;
   1114 
   1115 typedef struct TempAliasTable {
   1116     const char *chars;
   1117     TempRow *rows;
   1118     uint16_t *resort;
   1119     StripForCompareFn *stripForCompare;
   1120 } TempAliasTable;
   1121 
   1122 enum {
   1123     STACK_ROW_CAPACITY=500
   1124 };
   1125 
   1126 static int32_t
   1127 io_compareRows(const void *context, const void *left, const void *right) {
   1128     char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH],
   1129          strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH];
   1130 
   1131     TempAliasTable *tempTable=(TempAliasTable *)context;
   1132     const char *chars=tempTable->chars;
   1133 
   1134     return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex),
   1135                                 tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex));
   1136 }
   1137 
   1138 U_CAPI int32_t U_EXPORT2
   1139 ucnv_swapAliases(const UDataSwapper *ds,
   1140                  const void *inData, int32_t length, void *outData,
   1141                  UErrorCode *pErrorCode) {
   1142     const UDataInfo *pInfo;
   1143     int32_t headerSize;
   1144 
   1145     const uint16_t *inTable;
   1146     const uint32_t *inSectionSizes;
   1147     uint32_t toc[offsetsCount];
   1148     uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */
   1149     uint32_t i, count, tocLength, topOffset;
   1150 
   1151     TempRow rows[STACK_ROW_CAPACITY];
   1152     uint16_t resort[STACK_ROW_CAPACITY];
   1153     TempAliasTable tempTable;
   1154 
   1155     /* udata_swapDataHeader checks the arguments */
   1156     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
   1157     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
   1158         return 0;
   1159     }
   1160 
   1161     /* check data format and format version */
   1162     pInfo=(const UDataInfo *)((const char *)inData+4);
   1163     if(!(
   1164         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CvAl" */
   1165         pInfo->dataFormat[1]==0x76 &&
   1166         pInfo->dataFormat[2]==0x41 &&
   1167         pInfo->dataFormat[3]==0x6c &&
   1168         pInfo->formatVersion[0]==3
   1169     )) {
   1170         udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n",
   1171                          pInfo->dataFormat[0], pInfo->dataFormat[1],
   1172                          pInfo->dataFormat[2], pInfo->dataFormat[3],
   1173                          pInfo->formatVersion[0]);
   1174         *pErrorCode=U_UNSUPPORTED_ERROR;
   1175         return 0;
   1176     }
   1177 
   1178     /* an alias table must contain at least the table of contents array */
   1179     if(length>=0 && (length-headerSize)<4*(1+minTocLength)) {
   1180         udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1181                          length-headerSize);
   1182         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1183         return 0;
   1184     }
   1185 
   1186     inSectionSizes=(const uint32_t *)((const char *)inData+headerSize);
   1187     inTable=(const uint16_t *)inSectionSizes;
   1188     uprv_memset(toc, 0, sizeof(toc));
   1189     toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]);
   1190     if(tocLength<minTocLength || offsetsCount<=tocLength) {
   1191         udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength);
   1192         *pErrorCode=U_INVALID_FORMAT_ERROR;
   1193         return 0;
   1194     }
   1195 
   1196     /* read the known part of the table of contents */
   1197     for(i=converterListIndex; i<=tocLength; ++i) {
   1198         toc[i]=ds->readUInt32(inSectionSizes[i]);
   1199     }
   1200 
   1201     /* compute offsets */
   1202     uprv_memset(offsets, 0, sizeof(offsets));
   1203     offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */
   1204     for(i=tagListIndex; i<=tocLength; ++i) {
   1205         offsets[i]=offsets[i-1]+toc[i-1];
   1206     }
   1207 
   1208     /* compute the overall size of the after-header data, in numbers of 16-bit units */
   1209     topOffset=offsets[i-1]+toc[i-1];
   1210 
   1211     if(length>=0) {
   1212         uint16_t *outTable;
   1213         const uint16_t *p, *p2;
   1214         uint16_t *q, *q2;
   1215         uint16_t oldIndex;
   1216 
   1217         if((length-headerSize)<(2*(int32_t)topOffset)) {
   1218             udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n",
   1219                              length-headerSize);
   1220             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   1221             return 0;
   1222         }
   1223 
   1224         outTable=(uint16_t *)((char *)outData+headerSize);
   1225 
   1226         /* swap the entire table of contents */
   1227         ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode);
   1228 
   1229         /* swap unormalized strings & normalized strings */
   1230         ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]),
   1231                              outTable+offsets[stringTableIndex], pErrorCode);
   1232         if(U_FAILURE(*pErrorCode)) {
   1233             udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n");
   1234             return 0;
   1235         }
   1236 
   1237         if(ds->inCharset==ds->outCharset) {
   1238             /* no need to sort, just swap all 16-bit values together */
   1239             ds->swapArray16(ds,
   1240                             inTable+offsets[converterListIndex],
   1241                             2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]),
   1242                             outTable+offsets[converterListIndex],
   1243                             pErrorCode);
   1244         } else {
   1245             /* allocate the temporary table for sorting */
   1246             count=toc[aliasListIndex];
   1247 
   1248             tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */
   1249 
   1250             if(count<=STACK_ROW_CAPACITY) {
   1251                 tempTable.rows=rows;
   1252                 tempTable.resort=resort;
   1253             } else {
   1254                 tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2);
   1255                 if(tempTable.rows==NULL) {
   1256                     udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n",
   1257                                      count);
   1258                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1259                     return 0;
   1260                 }
   1261                 tempTable.resort=(uint16_t *)(tempTable.rows+count);
   1262             }
   1263 
   1264             if(ds->outCharset==U_ASCII_FAMILY) {
   1265                 tempTable.stripForCompare=ucnv_io_stripASCIIForCompare;
   1266             } else /* U_EBCDIC_FAMILY */ {
   1267                 tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare;
   1268             }
   1269 
   1270             /*
   1271              * Sort unique aliases+mapped names.
   1272              *
   1273              * We need to sort the list again by outCharset strings because they
   1274              * sort differently for different charset families.
   1275              * First we set up a temporary table with the string indexes and
   1276              * sorting indexes and sort that.
   1277              * Then we permutate and copy/swap the actual values.
   1278              */
   1279             p=inTable+offsets[aliasListIndex];
   1280             q=outTable+offsets[aliasListIndex];
   1281 
   1282             p2=inTable+offsets[untaggedConvArrayIndex];
   1283             q2=outTable+offsets[untaggedConvArrayIndex];
   1284 
   1285             for(i=0; i<count; ++i) {
   1286                 tempTable.rows[i].strIndex=ds->readUInt16(p[i]);
   1287                 tempTable.rows[i].sortIndex=(uint16_t)i;
   1288             }
   1289 
   1290             uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow),
   1291                            io_compareRows, &tempTable,
   1292                            FALSE, pErrorCode);
   1293 
   1294             if(U_SUCCESS(*pErrorCode)) {
   1295                 /* copy/swap/permutate items */
   1296                 if(p!=q) {
   1297                     for(i=0; i<count; ++i) {
   1298                         oldIndex=tempTable.rows[i].sortIndex;
   1299                         ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode);
   1300                         ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode);
   1301                     }
   1302                 } else {
   1303                     /*
   1304                      * If we swap in-place, then the permutation must use another
   1305                      * temporary array (tempTable.resort)
   1306                      * before the results are copied to the outBundle.
   1307                      */
   1308                     uint16_t *r=tempTable.resort;
   1309 
   1310                     for(i=0; i<count; ++i) {
   1311                         oldIndex=tempTable.rows[i].sortIndex;
   1312                         ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode);
   1313                     }
   1314                     uprv_memcpy(q, r, 2*count);
   1315 
   1316                     for(i=0; i<count; ++i) {
   1317                         oldIndex=tempTable.rows[i].sortIndex;
   1318                         ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode);
   1319                     }
   1320                     uprv_memcpy(q2, r, 2*count);
   1321                 }
   1322             }
   1323 
   1324             if(tempTable.rows!=rows) {
   1325                 uprv_free(tempTable.rows);
   1326             }
   1327 
   1328             if(U_FAILURE(*pErrorCode)) {
   1329                 udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n",
   1330                                  count);
   1331                 return 0;
   1332             }
   1333 
   1334             /* swap remaining 16-bit values */
   1335             ds->swapArray16(ds,
   1336                             inTable+offsets[converterListIndex],
   1337                             2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]),
   1338                             outTable+offsets[converterListIndex],
   1339                             pErrorCode);
   1340             ds->swapArray16(ds,
   1341                             inTable+offsets[taggedAliasArrayIndex],
   1342                             2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]),
   1343                             outTable+offsets[taggedAliasArrayIndex],
   1344                             pErrorCode);
   1345         }
   1346     }
   1347 
   1348     return headerSize+2*(int32_t)topOffset;
   1349 }
   1350 
   1351 #endif
   1352 
   1353 /*
   1354  * Hey, Emacs, please set the following:
   1355  *
   1356  * Local Variables:
   1357  * indent-tabs-mode: nil
   1358  * End:
   1359  *
   1360  */
   1361