Home | History | Annotate | Download | only in toolutil
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2005-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  swapimpl.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2005may05
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Data file swapping functions moved here from the common library
     17 *   because some data is hardcoded in ICU4C and needs not be swapped any more.
     18 *   Moving the functions here simplifies testing (for code coverage) because
     19 *   we need not jump through hoops (like adding snapshots of these files
     20 *   to testdata).
     21 *
     22 *   The declarations for these functions remain in the internal header files
     23 *   in icu/source/common/
     24 */
     25 
     26 #include "unicode/utypes.h"
     27 #include "unicode/putil.h"
     28 #include "unicode/udata.h"
     29 
     30 /* Explicit include statement for std_string.h is needed
     31  * for compilation on certain platforms. (e.g. AIX/VACPP)
     32  */
     33 #include "unicode/std_string.h"
     34 
     35 #include "cmemory.h"
     36 #include "cstring.h"
     37 #include "uinvchar.h"
     38 #include "uassert.h"
     39 #include "uarrsort.h"
     40 #include "ucmndata.h"
     41 #include "udataswp.h"
     42 
     43 /* swapping implementations in common */
     44 
     45 #include "uresdata.h"
     46 #include "ucnv_io.h"
     47 #include "uprops.h"
     48 #include "ucase.h"
     49 #include "ubidi_props.h"
     50 #include "ucol_swp.h"
     51 #include "ucnv_bld.h"
     52 #include "unormimp.h"
     53 #include "normalizer2impl.h"
     54 #include "sprpimpl.h"
     55 #include "propname.h"
     56 #include "rbbidata.h"
     57 #include "triedict.h"
     58 #include "utrie2.h"
     59 
     60 /* swapping implementations in i18n */
     61 
     62 #if !UCONFIG_NO_NORMALIZATION
     63 #include "uspoof_impl.h"
     64 #endif
     65 
     66 
     67 /* definitions */
     68 
     69 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     70 
     71 /* Unicode property (value) aliases data swapping --------------------------- */
     72 
     73 static int32_t U_CALLCONV
     74 upname_swap(const UDataSwapper *ds,
     75             const void *inData, int32_t length, void *outData,
     76             UErrorCode *pErrorCode) {
     77     /* udata_swapDataHeader checks the arguments */
     78     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
     79     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     80         return 0;
     81     }
     82 
     83     /* check data format and format version */
     84     const UDataInfo *pInfo=
     85         reinterpret_cast<const UDataInfo *>(
     86             reinterpret_cast<const char *>(inData)+4);
     87     if(!(
     88         pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
     89         pInfo->dataFormat[1]==0x6e &&
     90         pInfo->dataFormat[2]==0x61 &&
     91         pInfo->dataFormat[3]==0x6d &&
     92         pInfo->formatVersion[0]==2
     93     )) {
     94         udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
     95                          pInfo->dataFormat[0], pInfo->dataFormat[1],
     96                          pInfo->dataFormat[2], pInfo->dataFormat[3],
     97                          pInfo->formatVersion[0]);
     98         *pErrorCode=U_UNSUPPORTED_ERROR;
     99         return 0;
    100     }
    101 
    102     const uint8_t *inBytes=reinterpret_cast<const uint8_t *>(inData)+headerSize;
    103     uint8_t *outBytes=reinterpret_cast<uint8_t *>(outData)+headerSize;
    104 
    105     if(length>=0) {
    106         length-=headerSize;
    107         // formatVersion 2 initially has indexes[8], 32 bytes.
    108         if(length<32) {
    109             udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
    110                              (int)length);
    111             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    112             return 0;
    113         }
    114     }
    115 
    116     const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
    117     int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
    118     if(length>=0) {
    119         if(length<totalSize) {
    120             udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
    121                              "for pnames.icu\n",
    122                              (int)length, (int)totalSize);
    123             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    124             return 0;
    125         }
    126 
    127         int32_t numBytesIndexesAndValueMaps=
    128             udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
    129 
    130         // Swap the indexes[] and the valueMaps[].
    131         ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
    132 
    133         // Copy the rest of the data.
    134         if(inBytes!=outBytes) {
    135             uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
    136                         inBytes+numBytesIndexesAndValueMaps,
    137                         totalSize-numBytesIndexesAndValueMaps);
    138         }
    139 
    140         // We need not swap anything else:
    141         //
    142         // The ByteTries are already byte-serialized, and are fixed on ASCII.
    143         // (On an EBCDIC machine, the input string is converted to lowercase ASCII
    144         // while matching.)
    145         //
    146         // The name groups are mostly invariant characters, but since we only
    147         // generate, and keep in subversion, ASCII versions of pnames.icu,
    148         // and since only ICU4J uses the pnames.icu data file
    149         // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
    150         // we just copy those bytes too.
    151     }
    152 
    153     return headerSize+totalSize;
    154 }
    155 
    156 /* Unicode properties data swapping ----------------------------------------- */
    157 
    158 static int32_t U_CALLCONV
    159 uprops_swap(const UDataSwapper *ds,
    160             const void *inData, int32_t length, void *outData,
    161             UErrorCode *pErrorCode) {
    162     const UDataInfo *pInfo;
    163     int32_t headerSize, i;
    164 
    165     int32_t dataIndexes[UPROPS_INDEX_COUNT];
    166     const int32_t *inData32;
    167 
    168     /* udata_swapDataHeader checks the arguments */
    169     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    170     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    171         return 0;
    172     }
    173 
    174     /* check data format and format version */
    175     pInfo=(const UDataInfo *)((const char *)inData+4);
    176     if(!(
    177         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
    178         pInfo->dataFormat[1]==0x50 &&
    179         pInfo->dataFormat[2]==0x72 &&
    180         pInfo->dataFormat[3]==0x6f &&
    181         (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
    182         (pInfo->formatVersion[0]>=7 ||
    183             (pInfo->formatVersion[2]==UTRIE_SHIFT &&
    184              pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
    185     )) {
    186         udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
    187                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    188                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    189                          pInfo->formatVersion[0]);
    190         *pErrorCode=U_UNSUPPORTED_ERROR;
    191         return 0;
    192     }
    193 
    194     /* the properties file must contain at least the indexes array */
    195     if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
    196         udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
    197                          length-headerSize);
    198         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    199         return 0;
    200     }
    201 
    202     /* read the indexes */
    203     inData32=(const int32_t *)((const char *)inData+headerSize);
    204     for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
    205         dataIndexes[i]=udata_readInt32(ds, inData32[i]);
    206     }
    207 
    208     /*
    209      * comments are copied from the data format description in genprops/store.c
    210      * indexes[] constants are in uprops.h
    211      */
    212     int32_t dataTop;
    213     if(length>=0) {
    214         int32_t *outData32;
    215 
    216         /*
    217          * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
    218          * In earlier formatVersions, it is 0 and a lower dataIndexes entry
    219          * has the top of the last item.
    220          */
    221         for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
    222 
    223         if((length-headerSize)<(4*dataTop)) {
    224             udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
    225                              length-headerSize);
    226             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    227             return 0;
    228         }
    229 
    230         outData32=(int32_t *)((char *)outData+headerSize);
    231 
    232         /* copy everything for inaccessible data (padding) */
    233         if(inData32!=outData32) {
    234             uprv_memcpy(outData32, inData32, 4*dataTop);
    235         }
    236 
    237         /* swap the indexes[16] */
    238         ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
    239 
    240         /*
    241          * swap the main properties UTrie
    242          * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
    243          */
    244         utrie2_swapAnyVersion(ds,
    245             inData32+UPROPS_INDEX_COUNT,
    246             4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
    247             outData32+UPROPS_INDEX_COUNT,
    248             pErrorCode);
    249 
    250         /*
    251          * swap the properties and exceptions words
    252          * P  const uint32_t props32[i1-i0];
    253          * E  const uint32_t exceptions[i2-i1];
    254          */
    255         ds->swapArray32(ds,
    256             inData32+dataIndexes[UPROPS_PROPS32_INDEX],
    257             4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
    258             outData32+dataIndexes[UPROPS_PROPS32_INDEX],
    259             pErrorCode);
    260 
    261         /*
    262          * swap the UChars
    263          * U  const UChar uchars[2*(i3-i2)];
    264          */
    265         ds->swapArray16(ds,
    266             inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
    267             4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
    268             outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
    269             pErrorCode);
    270 
    271         /*
    272          * swap the additional UTrie
    273          * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
    274          */
    275         utrie2_swapAnyVersion(ds,
    276             inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
    277             4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
    278             outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
    279             pErrorCode);
    280 
    281         /*
    282          * swap the properties vectors
    283          * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
    284          */
    285         ds->swapArray32(ds,
    286             inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
    287             4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
    288             outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
    289             pErrorCode);
    290 
    291         // swap the Script_Extensions data
    292         // SCX const uint16_t scriptExtensions[2*(i7-i6)];
    293         ds->swapArray16(ds,
    294             inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
    295             4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
    296             outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
    297             pErrorCode);
    298     }
    299 
    300     /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
    301     return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
    302 }
    303 
    304 /* Unicode case mapping data swapping --------------------------------------- */
    305 
    306 static int32_t U_CALLCONV
    307 ucase_swap(const UDataSwapper *ds,
    308            const void *inData, int32_t length, void *outData,
    309            UErrorCode *pErrorCode) {
    310     const UDataInfo *pInfo;
    311     int32_t headerSize;
    312 
    313     const uint8_t *inBytes;
    314     uint8_t *outBytes;
    315 
    316     const int32_t *inIndexes;
    317     int32_t indexes[16];
    318 
    319     int32_t i, offset, count, size;
    320 
    321     /* udata_swapDataHeader checks the arguments */
    322     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    323     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    324         return 0;
    325     }
    326 
    327     /* check data format and format version */
    328     pInfo=(const UDataInfo *)((const char *)inData+4);
    329     if(!(
    330         pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
    331         pInfo->dataFormat[1]==UCASE_FMT_1 &&
    332         pInfo->dataFormat[2]==UCASE_FMT_2 &&
    333         pInfo->dataFormat[3]==UCASE_FMT_3 &&
    334         ((pInfo->formatVersion[0]==1 &&
    335           pInfo->formatVersion[2]==UTRIE_SHIFT &&
    336           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
    337          pInfo->formatVersion[0]==2)
    338     )) {
    339         udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
    340                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    341                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    342                          pInfo->formatVersion[0]);
    343         *pErrorCode=U_UNSUPPORTED_ERROR;
    344         return 0;
    345     }
    346 
    347     inBytes=(const uint8_t *)inData+headerSize;
    348     outBytes=(uint8_t *)outData+headerSize;
    349 
    350     inIndexes=(const int32_t *)inBytes;
    351 
    352     if(length>=0) {
    353         length-=headerSize;
    354         if(length<16*4) {
    355             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
    356                              length);
    357             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    358             return 0;
    359         }
    360     }
    361 
    362     /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
    363     for(i=0; i<16; ++i) {
    364         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    365     }
    366 
    367     /* get the total length of the data */
    368     size=indexes[UCASE_IX_LENGTH];
    369 
    370     if(length>=0) {
    371         if(length<size) {
    372             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
    373                              length);
    374             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    375             return 0;
    376         }
    377 
    378         /* copy the data for inaccessible bytes */
    379         if(inBytes!=outBytes) {
    380             uprv_memcpy(outBytes, inBytes, size);
    381         }
    382 
    383         offset=0;
    384 
    385         /* swap the int32_t indexes[] */
    386         count=indexes[UCASE_IX_INDEX_TOP]*4;
    387         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    388         offset+=count;
    389 
    390         /* swap the UTrie */
    391         count=indexes[UCASE_IX_TRIE_SIZE];
    392         utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    393         offset+=count;
    394 
    395         /* swap the uint16_t exceptions[] and unfold[] */
    396         count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
    397         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    398         offset+=count;
    399 
    400         U_ASSERT(offset==size);
    401     }
    402 
    403     return headerSize+size;
    404 }
    405 
    406 /* Unicode bidi/shaping data swapping --------------------------------------- */
    407 
    408 static int32_t U_CALLCONV
    409 ubidi_swap(const UDataSwapper *ds,
    410            const void *inData, int32_t length, void *outData,
    411            UErrorCode *pErrorCode) {
    412     const UDataInfo *pInfo;
    413     int32_t headerSize;
    414 
    415     const uint8_t *inBytes;
    416     uint8_t *outBytes;
    417 
    418     const int32_t *inIndexes;
    419     int32_t indexes[16];
    420 
    421     int32_t i, offset, count, size;
    422 
    423     /* udata_swapDataHeader checks the arguments */
    424     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    425     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    426         return 0;
    427     }
    428 
    429     /* check data format and format version */
    430     pInfo=(const UDataInfo *)((const char *)inData+4);
    431     if(!(
    432         pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
    433         pInfo->dataFormat[1]==UBIDI_FMT_1 &&
    434         pInfo->dataFormat[2]==UBIDI_FMT_2 &&
    435         pInfo->dataFormat[3]==UBIDI_FMT_3 &&
    436         ((pInfo->formatVersion[0]==1 &&
    437           pInfo->formatVersion[2]==UTRIE_SHIFT &&
    438           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
    439          pInfo->formatVersion[0]==2)
    440     )) {
    441         udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
    442                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    443                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    444                          pInfo->formatVersion[0]);
    445         *pErrorCode=U_UNSUPPORTED_ERROR;
    446         return 0;
    447     }
    448 
    449     inBytes=(const uint8_t *)inData+headerSize;
    450     outBytes=(uint8_t *)outData+headerSize;
    451 
    452     inIndexes=(const int32_t *)inBytes;
    453 
    454     if(length>=0) {
    455         length-=headerSize;
    456         if(length<16*4) {
    457             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
    458                              length);
    459             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    460             return 0;
    461         }
    462     }
    463 
    464     /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
    465     for(i=0; i<16; ++i) {
    466         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    467     }
    468 
    469     /* get the total length of the data */
    470     size=indexes[UBIDI_IX_LENGTH];
    471 
    472     if(length>=0) {
    473         if(length<size) {
    474             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
    475                              length);
    476             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    477             return 0;
    478         }
    479 
    480         /* copy the data for inaccessible bytes */
    481         if(inBytes!=outBytes) {
    482             uprv_memcpy(outBytes, inBytes, size);
    483         }
    484 
    485         offset=0;
    486 
    487         /* swap the int32_t indexes[] */
    488         count=indexes[UBIDI_IX_INDEX_TOP]*4;
    489         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    490         offset+=count;
    491 
    492         /* swap the UTrie */
    493         count=indexes[UBIDI_IX_TRIE_SIZE];
    494         utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    495         offset+=count;
    496 
    497         /* swap the uint32_t mirrors[] */
    498         count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
    499         ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    500         offset+=count;
    501 
    502         /* just skip the uint8_t jgArray[] */
    503         count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
    504         offset+=count;
    505 
    506         U_ASSERT(offset==size);
    507     }
    508 
    509     return headerSize+size;
    510 }
    511 
    512 /* Unicode normalization data swapping -------------------------------------- */
    513 
    514 #if !UCONFIG_NO_NORMALIZATION
    515 
    516 static int32_t U_CALLCONV
    517 unorm_swap(const UDataSwapper *ds,
    518            const void *inData, int32_t length, void *outData,
    519            UErrorCode *pErrorCode) {
    520     const UDataInfo *pInfo;
    521     int32_t headerSize;
    522 
    523     const uint8_t *inBytes;
    524     uint8_t *outBytes;
    525 
    526     const int32_t *inIndexes;
    527     int32_t indexes[32];
    528 
    529     int32_t i, offset, count, size;
    530 
    531     /* udata_swapDataHeader checks the arguments */
    532     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    533     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    534         return 0;
    535     }
    536 
    537     /* check data format and format version */
    538     pInfo=(const UDataInfo *)((const char *)inData+4);
    539     if(!(
    540         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
    541         pInfo->dataFormat[1]==0x6f &&
    542         pInfo->dataFormat[2]==0x72 &&
    543         pInfo->dataFormat[3]==0x6d &&
    544         pInfo->formatVersion[0]==2
    545     )) {
    546         udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
    547                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    548                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    549                          pInfo->formatVersion[0]);
    550         *pErrorCode=U_UNSUPPORTED_ERROR;
    551         return 0;
    552     }
    553 
    554     inBytes=(const uint8_t *)inData+headerSize;
    555     outBytes=(uint8_t *)outData+headerSize;
    556 
    557     inIndexes=(const int32_t *)inBytes;
    558 
    559     if(length>=0) {
    560         length-=headerSize;
    561         if(length<32*4) {
    562             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
    563                              length);
    564             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    565             return 0;
    566         }
    567     }
    568 
    569     /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
    570     for(i=0; i<32; ++i) {
    571         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    572     }
    573 
    574     /* calculate the total length of the data */
    575     size=
    576         32*4+ /* size of indexes[] */
    577         indexes[_NORM_INDEX_TRIE_SIZE]+
    578         indexes[_NORM_INDEX_UCHAR_COUNT]*2+
    579         indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
    580         indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
    581         indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
    582         indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
    583 
    584     if(length>=0) {
    585         if(length<size) {
    586             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
    587                              length);
    588             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    589             return 0;
    590         }
    591 
    592         /* copy the data for inaccessible bytes */
    593         if(inBytes!=outBytes) {
    594             uprv_memcpy(outBytes, inBytes, size);
    595         }
    596 
    597         offset=0;
    598 
    599         /* swap the indexes[] */
    600         count=32*4;
    601         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    602         offset+=count;
    603 
    604         /* swap the main UTrie */
    605         count=indexes[_NORM_INDEX_TRIE_SIZE];
    606         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    607         offset+=count;
    608 
    609         /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
    610         count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
    611         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    612         offset+=count;
    613 
    614         /* swap the FCD UTrie */
    615         count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
    616         if(count!=0) {
    617             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    618             offset+=count;
    619         }
    620 
    621         /* swap the aux UTrie */
    622         count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
    623         if(count!=0) {
    624             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    625             offset+=count;
    626         }
    627 
    628         /* swap the uint16_t combiningTable[] */
    629         count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
    630         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    631         offset+=count;
    632     }
    633 
    634     return headerSize+size;
    635 }
    636 
    637 #endif
    638 
    639 /* Swap 'Test' data from gentest */
    640 static int32_t U_CALLCONV
    641 test_swap(const UDataSwapper *ds,
    642            const void *inData, int32_t length, void *outData,
    643            UErrorCode *pErrorCode) {
    644     const UDataInfo *pInfo;
    645     int32_t headerSize;
    646 
    647     const uint8_t *inBytes;
    648     uint8_t *outBytes;
    649 
    650     int32_t offset;
    651 
    652     /* udata_swapDataHeader checks the arguments */
    653     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    654     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    655         udata_printError(ds, "test_swap(): data header swap failed %s\n", u_errorName(*pErrorCode));
    656         return 0;
    657     }
    658 
    659     /* check data format and format version */
    660     pInfo=(const UDataInfo *)((const char *)inData+4);
    661     if(!(
    662         pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
    663         pInfo->dataFormat[1]==0x65 &&
    664         pInfo->dataFormat[2]==0x73 &&
    665         pInfo->dataFormat[3]==0x74 &&
    666         pInfo->formatVersion[0]==1
    667     )) {
    668         udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
    669                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    670                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    671                          pInfo->formatVersion[0]);
    672         *pErrorCode=U_UNSUPPORTED_ERROR;
    673         return 0;
    674     }
    675 
    676     inBytes=(const uint8_t *)inData+headerSize;
    677     outBytes=(uint8_t *)outData+headerSize;
    678 
    679     int32_t size16 = 2; // 16bit plus padding
    680     int32_t sizeStr = 5; // 4 char inv-str plus null
    681     int32_t size = size16 + sizeStr;
    682 
    683     if(length>=0) {
    684         if(length<size) {
    685             udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
    686                              length, size);
    687             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    688             return 0;
    689         }
    690 
    691 	offset =0;
    692 	/* swap a 1 entry array */
    693         ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
    694 	offset+=size16;
    695 	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
    696     }
    697 
    698     return headerSize+size;
    699 }
    700 
    701 /* swap any data (except a .dat package) ------------------------------------ */
    702 
    703 static const struct {
    704     uint8_t dataFormat[4];
    705     UDataSwapFn *swapFn;
    706 } swapFns[]={
    707     { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
    708 #if !UCONFIG_NO_LEGACY_CONVERSION
    709     { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
    710 #endif
    711 #if !UCONFIG_NO_CONVERSION
    712     { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
    713 #endif
    714 #if !UCONFIG_NO_IDNA
    715     { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
    716 #endif
    717     /* insert data formats here, descending by expected frequency of occurrence */
    718     { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
    719 
    720     { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
    721                                   ucase_swap },         /* dataFormat="cAsE" */
    722 
    723     { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
    724                                   ubidi_swap },         /* dataFormat="BiDi" */
    725 
    726 #if !UCONFIG_NO_NORMALIZATION
    727     { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
    728     { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
    729 #endif
    730 #if !UCONFIG_NO_COLLATION
    731     { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
    732     { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
    733 #endif
    734 #if !UCONFIG_NO_BREAK_ITERATION
    735     { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
    736     { { 0x54, 0x72, 0x44, 0x63 }, triedict_swap },      /* dataFormat="TrDc " */
    737 #endif
    738     { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
    739     { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
    740 #if !UCONFIG_NO_NORMALIZATION
    741     { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
    742 #endif
    743     { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
    744 };
    745 
    746 U_CAPI int32_t U_EXPORT2
    747 udata_swap(const UDataSwapper *ds,
    748            const void *inData, int32_t length, void *outData,
    749            UErrorCode *pErrorCode) {
    750     char dataFormatChars[4];
    751     const UDataInfo *pInfo;
    752     int32_t headerSize, i, swappedLength;
    753 
    754     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    755         return 0;
    756     }
    757 
    758     /*
    759      * Preflight the header first; checks for illegal arguments, too.
    760      * Do not swap the header right away because the format-specific swapper
    761      * will swap it, get the headerSize again, and also use the header
    762      * information. Otherwise we would have to pass some of the information
    763      * and not be able to use the UDataSwapFn signature.
    764      */
    765     headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
    766 
    767     /*
    768      * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
    769      * then we could check here for further known magic values and structures.
    770      */
    771     if(U_FAILURE(*pErrorCode)) {
    772         return 0; /* the data format was not recognized */
    773     }
    774 
    775     pInfo=(const UDataInfo *)((const char *)inData+4);
    776 
    777     {
    778         /* convert the data format from ASCII to Unicode to the system charset */
    779         UChar u[4]={
    780              pInfo->dataFormat[0], pInfo->dataFormat[1],
    781              pInfo->dataFormat[2], pInfo->dataFormat[3]
    782         };
    783 
    784         if(uprv_isInvariantUString(u, 4)) {
    785             u_UCharsToChars(u, dataFormatChars, 4);
    786         } else {
    787             dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
    788         }
    789     }
    790 
    791     /* dispatch to the swap function for the dataFormat */
    792     for(i=0; i<LENGTHOF(swapFns); ++i) {
    793         if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
    794             swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
    795 
    796             if(U_FAILURE(*pErrorCode)) {
    797                 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
    798                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
    799                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
    800                                  dataFormatChars[0], dataFormatChars[1],
    801                                  dataFormatChars[2], dataFormatChars[3],
    802                                  u_errorName(*pErrorCode));
    803             } else if(swappedLength<(length-15)) {
    804                 /* swapped less than expected */
    805                 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
    806                                  swappedLength, length,
    807                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
    808                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
    809                                  dataFormatChars[0], dataFormatChars[1],
    810                                  dataFormatChars[2], dataFormatChars[3],
    811                                  u_errorName(*pErrorCode));
    812             }
    813 
    814             return swappedLength;
    815         }
    816     }
    817 
    818     /* the dataFormat was not recognized */
    819     udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
    820                      pInfo->dataFormat[0], pInfo->dataFormat[1],
    821                      pInfo->dataFormat[2], pInfo->dataFormat[3],
    822                      dataFormatChars[0], dataFormatChars[1],
    823                      dataFormatChars[2], dataFormatChars[3]);
    824 
    825     *pErrorCode=U_UNSUPPORTED_ERROR;
    826     return 0;
    827 }
    828