Home | History | Annotate | Download | only in toolutil
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2005-2014, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  swapimpl.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2005may05
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Data file swapping functions moved here from the common library
     19 *   because some data is hardcoded in ICU4C and needs not be swapped any more.
     20 *   Moving the functions here simplifies testing (for code coverage) because
     21 *   we need not jump through hoops (like adding snapshots of these files
     22 *   to testdata).
     23 *
     24 *   The declarations for these functions remain in the internal header files
     25 *   in icu/source/common/
     26 */
     27 
     28 #include "unicode/utypes.h"
     29 #include "unicode/putil.h"
     30 #include "unicode/udata.h"
     31 
     32 /* Explicit include statement for std_string.h is needed
     33  * for compilation on certain platforms. (e.g. AIX/VACPP)
     34  */
     35 #include "unicode/std_string.h"
     36 
     37 #include "cmemory.h"
     38 #include "cstring.h"
     39 #include "uinvchar.h"
     40 #include "uassert.h"
     41 #include "uarrsort.h"
     42 #include "ucmndata.h"
     43 #include "udataswp.h"
     44 
     45 /* swapping implementations in common */
     46 
     47 #include "uresdata.h"
     48 #include "ucnv_io.h"
     49 #include "uprops.h"
     50 #include "ucase.h"
     51 #include "ubidi_props.h"
     52 #include "ucol_swp.h"
     53 #include "ucnv_bld.h"
     54 #include "unormimp.h"
     55 #include "normalizer2impl.h"
     56 #include "sprpimpl.h"
     57 #include "propname.h"
     58 #include "rbbidata.h"
     59 #include "utrie.h"
     60 #include "utrie2.h"
     61 #include "dictionarydata.h"
     62 
     63 /* swapping implementations in i18n */
     64 
     65 #if !UCONFIG_NO_NORMALIZATION
     66 #include "uspoof_impl.h"
     67 #endif
     68 
     69 U_NAMESPACE_USE
     70 
     71 /* definitions */
     72 
     73 /* Unicode property (value) aliases data swapping --------------------------- */
     74 
     75 static int32_t U_CALLCONV
     76 upname_swap(const UDataSwapper *ds,
     77             const void *inData, int32_t length, void *outData,
     78             UErrorCode *pErrorCode) {
     79     /* udata_swapDataHeader checks the arguments */
     80     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
     81     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     82         return 0;
     83     }
     84 
     85     /* check data format and format version */
     86     const UDataInfo *pInfo=
     87         reinterpret_cast<const UDataInfo *>(
     88             static_cast<const char *>(inData)+4);
     89     if(!(
     90         pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
     91         pInfo->dataFormat[1]==0x6e &&
     92         pInfo->dataFormat[2]==0x61 &&
     93         pInfo->dataFormat[3]==0x6d &&
     94         pInfo->formatVersion[0]==2
     95     )) {
     96         udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
     97                          pInfo->dataFormat[0], pInfo->dataFormat[1],
     98                          pInfo->dataFormat[2], pInfo->dataFormat[3],
     99                          pInfo->formatVersion[0]);
    100         *pErrorCode=U_UNSUPPORTED_ERROR;
    101         return 0;
    102     }
    103 
    104     const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
    105     uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
    106 
    107     if(length>=0) {
    108         length-=headerSize;
    109         // formatVersion 2 initially has indexes[8], 32 bytes.
    110         if(length<32) {
    111             udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
    112                              (int)length);
    113             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    114             return 0;
    115         }
    116     }
    117 
    118     const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
    119     int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
    120     if(length>=0) {
    121         if(length<totalSize) {
    122             udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
    123                              "for pnames.icu\n",
    124                              (int)length, (int)totalSize);
    125             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    126             return 0;
    127         }
    128 
    129         int32_t numBytesIndexesAndValueMaps=
    130             udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
    131 
    132         // Swap the indexes[] and the valueMaps[].
    133         ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
    134 
    135         // Copy the rest of the data.
    136         if(inBytes!=outBytes) {
    137             uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
    138                         inBytes+numBytesIndexesAndValueMaps,
    139                         totalSize-numBytesIndexesAndValueMaps);
    140         }
    141 
    142         // We need not swap anything else:
    143         //
    144         // The ByteTries are already byte-serialized, and are fixed on ASCII.
    145         // (On an EBCDIC machine, the input string is converted to lowercase ASCII
    146         // while matching.)
    147         //
    148         // The name groups are mostly invariant characters, but since we only
    149         // generate, and keep in subversion, ASCII versions of pnames.icu,
    150         // and since only ICU4J uses the pnames.icu data file
    151         // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
    152         // we just copy those bytes too.
    153     }
    154 
    155     return headerSize+totalSize;
    156 }
    157 
    158 /* Unicode properties data swapping ----------------------------------------- */
    159 
    160 static int32_t U_CALLCONV
    161 uprops_swap(const UDataSwapper *ds,
    162             const void *inData, int32_t length, void *outData,
    163             UErrorCode *pErrorCode) {
    164     const UDataInfo *pInfo;
    165     int32_t headerSize, i;
    166 
    167     int32_t dataIndexes[UPROPS_INDEX_COUNT];
    168     const int32_t *inData32;
    169 
    170     /* udata_swapDataHeader checks the arguments */
    171     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    172     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    173         return 0;
    174     }
    175 
    176     /* check data format and format version */
    177     pInfo=(const UDataInfo *)((const char *)inData+4);
    178     if(!(
    179         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
    180         pInfo->dataFormat[1]==0x50 &&
    181         pInfo->dataFormat[2]==0x72 &&
    182         pInfo->dataFormat[3]==0x6f &&
    183         (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
    184         (pInfo->formatVersion[0]>=7 ||
    185             (pInfo->formatVersion[2]==UTRIE_SHIFT &&
    186              pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
    187     )) {
    188         udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
    189                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    190                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    191                          pInfo->formatVersion[0]);
    192         *pErrorCode=U_UNSUPPORTED_ERROR;
    193         return 0;
    194     }
    195 
    196     /* the properties file must contain at least the indexes array */
    197     if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
    198         udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
    199                          length-headerSize);
    200         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    201         return 0;
    202     }
    203 
    204     /* read the indexes */
    205     inData32=(const int32_t *)((const char *)inData+headerSize);
    206     for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
    207         dataIndexes[i]=udata_readInt32(ds, inData32[i]);
    208     }
    209 
    210     /*
    211      * comments are copied from the data format description in genprops/store.c
    212      * indexes[] constants are in uprops.h
    213      */
    214     int32_t dataTop;
    215     if(length>=0) {
    216         int32_t *outData32;
    217 
    218         /*
    219          * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
    220          * In earlier formatVersions, it is 0 and a lower dataIndexes entry
    221          * has the top of the last item.
    222          */
    223         for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
    224 
    225         if((length-headerSize)<(4*dataTop)) {
    226             udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
    227                              length-headerSize);
    228             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    229             return 0;
    230         }
    231 
    232         outData32=(int32_t *)((char *)outData+headerSize);
    233 
    234         /* copy everything for inaccessible data (padding) */
    235         if(inData32!=outData32) {
    236             uprv_memcpy(outData32, inData32, 4*(size_t)dataTop);
    237         }
    238 
    239         /* swap the indexes[16] */
    240         ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
    241 
    242         /*
    243          * swap the main properties UTrie
    244          * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
    245          */
    246         utrie2_swapAnyVersion(ds,
    247             inData32+UPROPS_INDEX_COUNT,
    248             4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
    249             outData32+UPROPS_INDEX_COUNT,
    250             pErrorCode);
    251 
    252         /*
    253          * swap the properties and exceptions words
    254          * P  const uint32_t props32[i1-i0];
    255          * E  const uint32_t exceptions[i2-i1];
    256          */
    257         ds->swapArray32(ds,
    258             inData32+dataIndexes[UPROPS_PROPS32_INDEX],
    259             4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
    260             outData32+dataIndexes[UPROPS_PROPS32_INDEX],
    261             pErrorCode);
    262 
    263         /*
    264          * swap the UChars
    265          * U  const UChar uchars[2*(i3-i2)];
    266          */
    267         ds->swapArray16(ds,
    268             inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
    269             4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
    270             outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
    271             pErrorCode);
    272 
    273         /*
    274          * swap the additional UTrie
    275          * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
    276          */
    277         utrie2_swapAnyVersion(ds,
    278             inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
    279             4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
    280             outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
    281             pErrorCode);
    282 
    283         /*
    284          * swap the properties vectors
    285          * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
    286          */
    287         ds->swapArray32(ds,
    288             inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
    289             4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
    290             outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
    291             pErrorCode);
    292 
    293         // swap the Script_Extensions data
    294         // SCX const uint16_t scriptExtensions[2*(i7-i6)];
    295         ds->swapArray16(ds,
    296             inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
    297             4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
    298             outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
    299             pErrorCode);
    300     }
    301 
    302     /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
    303     return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
    304 }
    305 
    306 /* Unicode case mapping data swapping --------------------------------------- */
    307 
    308 static int32_t U_CALLCONV
    309 ucase_swap(const UDataSwapper *ds,
    310            const void *inData, int32_t length, void *outData,
    311            UErrorCode *pErrorCode) {
    312     const UDataInfo *pInfo;
    313     int32_t headerSize;
    314 
    315     const uint8_t *inBytes;
    316     uint8_t *outBytes;
    317 
    318     const int32_t *inIndexes;
    319     int32_t indexes[16];
    320 
    321     int32_t i, offset, count, size;
    322 
    323     /* udata_swapDataHeader checks the arguments */
    324     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    325     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    326         return 0;
    327     }
    328 
    329     /* check data format and format version */
    330     pInfo=(const UDataInfo *)((const char *)inData+4);
    331     if(!(
    332         pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
    333         pInfo->dataFormat[1]==UCASE_FMT_1 &&
    334         pInfo->dataFormat[2]==UCASE_FMT_2 &&
    335         pInfo->dataFormat[3]==UCASE_FMT_3 &&
    336         ((pInfo->formatVersion[0]==1 &&
    337           pInfo->formatVersion[2]==UTRIE_SHIFT &&
    338           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
    339          pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3)
    340     )) {
    341         udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
    342                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    343                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    344                          pInfo->formatVersion[0]);
    345         *pErrorCode=U_UNSUPPORTED_ERROR;
    346         return 0;
    347     }
    348 
    349     inBytes=(const uint8_t *)inData+headerSize;
    350     outBytes=(uint8_t *)outData+headerSize;
    351 
    352     inIndexes=(const int32_t *)inBytes;
    353 
    354     if(length>=0) {
    355         length-=headerSize;
    356         if(length<16*4) {
    357             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
    358                              length);
    359             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    360             return 0;
    361         }
    362     }
    363 
    364     /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
    365     for(i=0; i<16; ++i) {
    366         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    367     }
    368 
    369     /* get the total length of the data */
    370     size=indexes[UCASE_IX_LENGTH];
    371 
    372     if(length>=0) {
    373         if(length<size) {
    374             udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
    375                              length);
    376             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    377             return 0;
    378         }
    379 
    380         /* copy the data for inaccessible bytes */
    381         if(inBytes!=outBytes) {
    382             uprv_memcpy(outBytes, inBytes, size);
    383         }
    384 
    385         offset=0;
    386 
    387         /* swap the int32_t indexes[] */
    388         count=indexes[UCASE_IX_INDEX_TOP]*4;
    389         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    390         offset+=count;
    391 
    392         /* swap the UTrie */
    393         count=indexes[UCASE_IX_TRIE_SIZE];
    394         utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    395         offset+=count;
    396 
    397         /* swap the uint16_t exceptions[] and unfold[] */
    398         count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
    399         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    400         offset+=count;
    401 
    402         U_ASSERT(offset==size);
    403     }
    404 
    405     return headerSize+size;
    406 }
    407 
    408 /* Unicode bidi/shaping data swapping --------------------------------------- */
    409 
    410 static int32_t U_CALLCONV
    411 ubidi_swap(const UDataSwapper *ds,
    412            const void *inData, int32_t length, void *outData,
    413            UErrorCode *pErrorCode) {
    414     const UDataInfo *pInfo;
    415     int32_t headerSize;
    416 
    417     const uint8_t *inBytes;
    418     uint8_t *outBytes;
    419 
    420     const int32_t *inIndexes;
    421     int32_t indexes[16];
    422 
    423     int32_t i, offset, count, size;
    424 
    425     /* udata_swapDataHeader checks the arguments */
    426     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    427     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    428         return 0;
    429     }
    430 
    431     /* check data format and format version */
    432     pInfo=(const UDataInfo *)((const char *)inData+4);
    433     if(!(
    434         pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
    435         pInfo->dataFormat[1]==UBIDI_FMT_1 &&
    436         pInfo->dataFormat[2]==UBIDI_FMT_2 &&
    437         pInfo->dataFormat[3]==UBIDI_FMT_3 &&
    438         ((pInfo->formatVersion[0]==1 &&
    439           pInfo->formatVersion[2]==UTRIE_SHIFT &&
    440           pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
    441          pInfo->formatVersion[0]==2)
    442     )) {
    443         udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
    444                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    445                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    446                          pInfo->formatVersion[0]);
    447         *pErrorCode=U_UNSUPPORTED_ERROR;
    448         return 0;
    449     }
    450 
    451     inBytes=(const uint8_t *)inData+headerSize;
    452     outBytes=(uint8_t *)outData+headerSize;
    453 
    454     inIndexes=(const int32_t *)inBytes;
    455 
    456     if(length>=0) {
    457         length-=headerSize;
    458         if(length<16*4) {
    459             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
    460                              length);
    461             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    462             return 0;
    463         }
    464     }
    465 
    466     /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
    467     for(i=0; i<16; ++i) {
    468         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    469     }
    470 
    471     /* get the total length of the data */
    472     size=indexes[UBIDI_IX_LENGTH];
    473 
    474     if(length>=0) {
    475         if(length<size) {
    476             udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
    477                              length);
    478             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    479             return 0;
    480         }
    481 
    482         /* copy the data for inaccessible bytes */
    483         if(inBytes!=outBytes) {
    484             uprv_memcpy(outBytes, inBytes, size);
    485         }
    486 
    487         offset=0;
    488 
    489         /* swap the int32_t indexes[] */
    490         count=indexes[UBIDI_IX_INDEX_TOP]*4;
    491         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    492         offset+=count;
    493 
    494         /* swap the UTrie */
    495         count=indexes[UBIDI_IX_TRIE_SIZE];
    496         utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    497         offset+=count;
    498 
    499         /* swap the uint32_t mirrors[] */
    500         count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
    501         ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    502         offset+=count;
    503 
    504         /* just skip the uint8_t jgArray[] and jgArray2[] */
    505         count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
    506         offset+=count;
    507         count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2];
    508         offset+=count;
    509 
    510         U_ASSERT(offset==size);
    511     }
    512 
    513     return headerSize+size;
    514 }
    515 
    516 /* Unicode normalization data swapping -------------------------------------- */
    517 
    518 #if !UCONFIG_NO_NORMALIZATION
    519 
    520 static int32_t U_CALLCONV
    521 unorm_swap(const UDataSwapper *ds,
    522            const void *inData, int32_t length, void *outData,
    523            UErrorCode *pErrorCode) {
    524     const UDataInfo *pInfo;
    525     int32_t headerSize;
    526 
    527     const uint8_t *inBytes;
    528     uint8_t *outBytes;
    529 
    530     const int32_t *inIndexes;
    531     int32_t indexes[32];
    532 
    533     int32_t i, offset, count, size;
    534 
    535     /* udata_swapDataHeader checks the arguments */
    536     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    537     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    538         return 0;
    539     }
    540 
    541     /* check data format and format version */
    542     pInfo=(const UDataInfo *)((const char *)inData+4);
    543     if(!(
    544         pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
    545         pInfo->dataFormat[1]==0x6f &&
    546         pInfo->dataFormat[2]==0x72 &&
    547         pInfo->dataFormat[3]==0x6d &&
    548         pInfo->formatVersion[0]==2
    549     )) {
    550         udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
    551                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    552                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    553                          pInfo->formatVersion[0]);
    554         *pErrorCode=U_UNSUPPORTED_ERROR;
    555         return 0;
    556     }
    557 
    558     inBytes=(const uint8_t *)inData+headerSize;
    559     outBytes=(uint8_t *)outData+headerSize;
    560 
    561     inIndexes=(const int32_t *)inBytes;
    562 
    563     if(length>=0) {
    564         length-=headerSize;
    565         if(length<32*4) {
    566             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
    567                              length);
    568             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    569             return 0;
    570         }
    571     }
    572 
    573     /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
    574     for(i=0; i<32; ++i) {
    575         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    576     }
    577 
    578     /* calculate the total length of the data */
    579     size=
    580         32*4+ /* size of indexes[] */
    581         indexes[_NORM_INDEX_TRIE_SIZE]+
    582         indexes[_NORM_INDEX_UCHAR_COUNT]*2+
    583         indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
    584         indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
    585         indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
    586         indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
    587 
    588     if(length>=0) {
    589         if(length<size) {
    590             udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
    591                              length);
    592             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    593             return 0;
    594         }
    595 
    596         /* copy the data for inaccessible bytes */
    597         if(inBytes!=outBytes) {
    598             uprv_memcpy(outBytes, inBytes, size);
    599         }
    600 
    601         offset=0;
    602 
    603         /* swap the indexes[] */
    604         count=32*4;
    605         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
    606         offset+=count;
    607 
    608         /* swap the main UTrie */
    609         count=indexes[_NORM_INDEX_TRIE_SIZE];
    610         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    611         offset+=count;
    612 
    613         /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
    614         count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
    615         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    616         offset+=count;
    617 
    618         /* swap the FCD UTrie */
    619         count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
    620         if(count!=0) {
    621             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    622             offset+=count;
    623         }
    624 
    625         /* swap the aux UTrie */
    626         count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
    627         if(count!=0) {
    628             utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    629             offset+=count;
    630         }
    631 
    632         /* swap the uint16_t combiningTable[] */
    633         count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
    634         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
    635         offset+=count;
    636     }
    637 
    638     return headerSize+size;
    639 }
    640 
    641 #endif
    642 
    643 /* Swap 'Test' data from gentest */
    644 static int32_t U_CALLCONV
    645 test_swap(const UDataSwapper *ds,
    646            const void *inData, int32_t length, void *outData,
    647            UErrorCode *pErrorCode) {
    648     const UDataInfo *pInfo;
    649     int32_t headerSize;
    650 
    651     const uint8_t *inBytes;
    652     uint8_t *outBytes;
    653 
    654     int32_t offset;
    655 
    656     /* udata_swapDataHeader checks the arguments */
    657     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    658     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    659         udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL");
    660         return 0;
    661     }
    662 
    663     /* check data format and format version */
    664     pInfo=(const UDataInfo *)((const char *)inData+4);
    665     if(!(
    666         pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
    667         pInfo->dataFormat[1]==0x65 &&
    668         pInfo->dataFormat[2]==0x73 &&
    669         pInfo->dataFormat[3]==0x74 &&
    670         pInfo->formatVersion[0]==1
    671     )) {
    672         udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
    673                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    674                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    675                          pInfo->formatVersion[0]);
    676         *pErrorCode=U_UNSUPPORTED_ERROR;
    677         return 0;
    678     }
    679 
    680     inBytes=(const uint8_t *)inData+headerSize;
    681     outBytes=(uint8_t *)outData+headerSize;
    682 
    683     int32_t size16 = 2; // 16bit plus padding
    684     int32_t sizeStr = 5; // 4 char inv-str plus null
    685     int32_t size = size16 + sizeStr;
    686 
    687     if(length>=0) {
    688         if(length<size) {
    689             udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
    690                              length, size);
    691             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    692             return 0;
    693         }
    694 
    695 	offset =0;
    696 	/* swap a 1 entry array */
    697         ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
    698 	offset+=size16;
    699 	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
    700     }
    701 
    702     return headerSize+size;
    703 }
    704 
    705 /* swap any data (except a .dat package) ------------------------------------ */
    706 
    707 static const struct {
    708     uint8_t dataFormat[4];
    709     UDataSwapFn *swapFn;
    710 } swapFns[]={
    711     { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
    712 #if !UCONFIG_NO_LEGACY_CONVERSION
    713     { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
    714 #endif
    715 #if !UCONFIG_NO_CONVERSION
    716     { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
    717 #endif
    718 #if !UCONFIG_NO_IDNA
    719     { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
    720 #endif
    721     /* insert data formats here, descending by expected frequency of occurrence */
    722     { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
    723 
    724     { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
    725                                   ucase_swap },         /* dataFormat="cAsE" */
    726 
    727     { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
    728                                   ubidi_swap },         /* dataFormat="BiDi" */
    729 
    730 #if !UCONFIG_NO_NORMALIZATION
    731     { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
    732     { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
    733 #endif
    734 #if !UCONFIG_NO_COLLATION
    735     { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
    736     { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
    737 #endif
    738 #if !UCONFIG_NO_BREAK_ITERATION
    739     { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
    740     { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
    741 #endif
    742     { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
    743     { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
    744 #if !UCONFIG_NO_NORMALIZATION
    745     { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
    746 #endif
    747     { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
    748 };
    749 
    750 U_CAPI int32_t U_EXPORT2
    751 udata_swap(const UDataSwapper *ds,
    752            const void *inData, int32_t length, void *outData,
    753            UErrorCode *pErrorCode) {
    754     char dataFormatChars[4];
    755     const UDataInfo *pInfo;
    756     int32_t i, swappedLength;
    757 
    758     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    759         return 0;
    760     }
    761 
    762     /*
    763      * Preflight the header first; checks for illegal arguments, too.
    764      * Do not swap the header right away because the format-specific swapper
    765      * will swap it, get the headerSize again, and also use the header
    766      * information. Otherwise we would have to pass some of the information
    767      * and not be able to use the UDataSwapFn signature.
    768      */
    769     udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode);
    770 
    771     /*
    772      * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
    773      * then we could check here for further known magic values and structures.
    774      */
    775     if(U_FAILURE(*pErrorCode)) {
    776         return 0; /* the data format was not recognized */
    777     }
    778 
    779     pInfo=(const UDataInfo *)((const char *)inData+4);
    780 
    781     {
    782         /* convert the data format from ASCII to Unicode to the system charset */
    783         UChar u[4]={
    784              pInfo->dataFormat[0], pInfo->dataFormat[1],
    785              pInfo->dataFormat[2], pInfo->dataFormat[3]
    786         };
    787 
    788         if(uprv_isInvariantUString(u, 4)) {
    789             u_UCharsToChars(u, dataFormatChars, 4);
    790         } else {
    791             dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
    792         }
    793     }
    794 
    795     /* dispatch to the swap function for the dataFormat */
    796     for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) {
    797         if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
    798             swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
    799 
    800             if(U_FAILURE(*pErrorCode)) {
    801                 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
    802                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
    803                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
    804                                  dataFormatChars[0], dataFormatChars[1],
    805                                  dataFormatChars[2], dataFormatChars[3],
    806                                  u_errorName(*pErrorCode));
    807             } else if(swappedLength<(length-15)) {
    808                 /* swapped less than expected */
    809                 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
    810                                  swappedLength, length,
    811                                  pInfo->dataFormat[0], pInfo->dataFormat[1],
    812                                  pInfo->dataFormat[2], pInfo->dataFormat[3],
    813                                  dataFormatChars[0], dataFormatChars[1],
    814                                  dataFormatChars[2], dataFormatChars[3],
    815                                  u_errorName(*pErrorCode));
    816             }
    817 
    818             return swappedLength;
    819         }
    820     }
    821 
    822     /* the dataFormat was not recognized */
    823     udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
    824                      pInfo->dataFormat[0], pInfo->dataFormat[1],
    825                      pInfo->dataFormat[2], pInfo->dataFormat[3],
    826                      dataFormatChars[0], dataFormatChars[1],
    827                      dataFormatChars[2], dataFormatChars[3]);
    828 
    829     *pErrorCode=U_UNSUPPORTED_ERROR;
    830     return 0;
    831 }
    832