Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2003-2014, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  ucol_swp.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2003sep10
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Swap collation binaries.
     17 */
     18 
     19 #include "unicode/udata.h" /* UDataInfo */
     20 #include "utrie.h"
     21 #include "utrie2.h"
     22 #include "udataswp.h"
     23 #include "cmemory.h"
     24 #include "ucol_data.h"
     25 #include "ucol_swp.h"
     26 
     27 /* swapping ----------------------------------------------------------------- */
     28 
     29 /*
     30  * This performs data swapping for a folded trie (see utrie.c for details).
     31  */
     32 
     33 U_CAPI int32_t U_EXPORT2
     34 utrie_swap(const UDataSwapper *ds,
     35            const void *inData, int32_t length, void *outData,
     36            UErrorCode *pErrorCode) {
     37     const UTrieHeader *inTrie;
     38     UTrieHeader trie;
     39     int32_t size;
     40     UBool dataIs32;
     41 
     42     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
     43         return 0;
     44     }
     45     if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
     46         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
     47         return 0;
     48     }
     49 
     50     /* setup and swapping */
     51     if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
     52         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     53         return 0;
     54     }
     55 
     56     inTrie=(const UTrieHeader *)inData;
     57     trie.signature=ds->readUInt32(inTrie->signature);
     58     trie.options=ds->readUInt32(inTrie->options);
     59     trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
     60     trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
     61 
     62     if( trie.signature!=0x54726965 ||
     63         (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
     64         ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
     65         trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
     66         (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
     67         trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
     68         (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
     69         ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
     70     ) {
     71         *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
     72         return 0;
     73     }
     74 
     75     dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
     76     size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
     77 
     78     if(length>=0) {
     79         UTrieHeader *outTrie;
     80 
     81         if(length<size) {
     82             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
     83             return 0;
     84         }
     85 
     86         outTrie=(UTrieHeader *)outData;
     87 
     88         /* swap the header */
     89         ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
     90 
     91         /* swap the index and the data */
     92         if(dataIs32) {
     93             ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
     94             ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
     95                                      (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
     96         } else {
     97             ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
     98         }
     99     }
    100 
    101     return size;
    102 }
    103 
    104 #if !UCONFIG_NO_COLLATION
    105 
    106 U_CAPI UBool U_EXPORT2
    107 ucol_looksLikeCollationBinary(const UDataSwapper *ds,
    108                               const void *inData, int32_t length) {
    109     if(ds==NULL || inData==NULL || length<-1) {
    110         return FALSE;
    111     }
    112 
    113     // First check for format version 4+ which has a standard data header.
    114     UErrorCode errorCode=U_ZERO_ERROR;
    115     (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);
    116     if(U_SUCCESS(errorCode)) {
    117         const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
    118         if(info.dataFormat[0]==0x55 &&   // dataFormat="UCol"
    119                 info.dataFormat[1]==0x43 &&
    120                 info.dataFormat[2]==0x6f &&
    121                 info.dataFormat[3]==0x6c) {
    122             return TRUE;
    123         }
    124     }
    125 
    126     // Else check for format version 3.
    127     const UCATableHeader *inHeader=(const UCATableHeader *)inData;
    128 
    129     /*
    130      * The collation binary must contain at least the UCATableHeader,
    131      * starting with its size field.
    132      * sizeof(UCATableHeader)==42*4 in ICU 2.8
    133      * check the length against the header size before reading the size field
    134      */
    135     UCATableHeader header;
    136     uprv_memset(&header, 0, sizeof(header));
    137     if(length<0) {
    138         header.size=udata_readInt32(ds, inHeader->size);
    139     } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
    140         return FALSE;
    141     }
    142 
    143     header.magic=ds->readUInt32(inHeader->magic);
    144     if(!(
    145         header.magic==UCOL_HEADER_MAGIC &&
    146         inHeader->formatVersion[0]==3 /*&&
    147         inHeader->formatVersion[1]>=0*/
    148     )) {
    149         return FALSE;
    150     }
    151 
    152     if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
    153         return FALSE;
    154     }
    155 
    156     return TRUE;
    157 }
    158 
    159 namespace {
    160 
    161 /* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
    162 int32_t
    163 swapFormatVersion3(const UDataSwapper *ds,
    164                    const void *inData, int32_t length, void *outData,
    165                    UErrorCode *pErrorCode) {
    166     const uint8_t *inBytes;
    167     uint8_t *outBytes;
    168 
    169     const UCATableHeader *inHeader;
    170     UCATableHeader *outHeader;
    171     UCATableHeader header;
    172 
    173     uint32_t count;
    174 
    175     /* argument checking in case we were not called from ucol_swap() */
    176     if(U_FAILURE(*pErrorCode)) {
    177         return 0;
    178     }
    179     if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
    180         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    181         return 0;
    182     }
    183 
    184     inBytes=(const uint8_t *)inData;
    185     outBytes=(uint8_t *)outData;
    186 
    187     inHeader=(const UCATableHeader *)inData;
    188     outHeader=(UCATableHeader *)outData;
    189 
    190     /*
    191      * The collation binary must contain at least the UCATableHeader,
    192      * starting with its size field.
    193      * sizeof(UCATableHeader)==42*4 in ICU 2.8
    194      * check the length against the header size before reading the size field
    195      */
    196     uprv_memset(&header, 0, sizeof(header));
    197     if(length<0) {
    198         header.size=udata_readInt32(ds, inHeader->size);
    199     } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
    200         udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
    201                          length);
    202         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    203         return 0;
    204     }
    205 
    206     header.magic=ds->readUInt32(inHeader->magic);
    207     if(!(
    208         header.magic==UCOL_HEADER_MAGIC &&
    209         inHeader->formatVersion[0]==3 /*&&
    210         inHeader->formatVersion[1]>=0*/
    211     )) {
    212         udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
    213                          header.magic,
    214                          inHeader->formatVersion[0], inHeader->formatVersion[1]);
    215         *pErrorCode=U_UNSUPPORTED_ERROR;
    216         return 0;
    217     }
    218 
    219     if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
    220         udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
    221                          inHeader->isBigEndian, inHeader->charSetFamily);
    222         *pErrorCode=U_INVALID_FORMAT_ERROR;
    223         return 0;
    224     }
    225 
    226     if(length>=0) {
    227         /* copy everything, takes care of data that needs no swapping */
    228         if(inBytes!=outBytes) {
    229             uprv_memcpy(outBytes, inBytes, header.size);
    230         }
    231 
    232         /* swap the necessary pieces in the order of their occurrence in the data */
    233 
    234         /* read more of the UCATableHeader (the size field was read above) */
    235         header.options=                 ds->readUInt32(inHeader->options);
    236         header.UCAConsts=               ds->readUInt32(inHeader->UCAConsts);
    237         header.contractionUCACombos=    ds->readUInt32(inHeader->contractionUCACombos);
    238         header.mappingPosition=         ds->readUInt32(inHeader->mappingPosition);
    239         header.expansion=               ds->readUInt32(inHeader->expansion);
    240         header.contractionIndex=        ds->readUInt32(inHeader->contractionIndex);
    241         header.contractionCEs=          ds->readUInt32(inHeader->contractionCEs);
    242         header.contractionSize=         ds->readUInt32(inHeader->contractionSize);
    243         header.endExpansionCE=          ds->readUInt32(inHeader->endExpansionCE);
    244         header.expansionCESize=         ds->readUInt32(inHeader->expansionCESize);
    245         header.endExpansionCECount=     udata_readInt32(ds, inHeader->endExpansionCECount);
    246         header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize);
    247         header.scriptToLeadByte=        ds->readUInt32(inHeader->scriptToLeadByte);
    248         header.leadByteToScript=        ds->readUInt32(inHeader->leadByteToScript);
    249 
    250         /* swap the 32-bit integers in the header */
    251         ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader),
    252                            outHeader, pErrorCode);
    253         ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript),
    254                            &(outHeader->scriptToLeadByte), pErrorCode);
    255         /* set the output platform properties */
    256         outHeader->isBigEndian=ds->outIsBigEndian;
    257         outHeader->charSetFamily=ds->outCharset;
    258 
    259         /* swap the options */
    260         if(header.options!=0) {
    261             ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options,
    262                                outBytes+header.options, pErrorCode);
    263         }
    264 
    265         /* swap the expansions */
    266         if(header.mappingPosition!=0 && header.expansion!=0) {
    267             if(header.contractionIndex!=0) {
    268                 /* expansions bounded by contractions */
    269                 count=header.contractionIndex-header.expansion;
    270             } else {
    271                 /* no contractions: expansions bounded by the main trie */
    272                 count=header.mappingPosition-header.expansion;
    273             }
    274             ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,
    275                                outBytes+header.expansion, pErrorCode);
    276         }
    277 
    278         /* swap the contractions */
    279         if(header.contractionSize!=0) {
    280             /* contractionIndex: UChar[] */
    281             ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2,
    282                                outBytes+header.contractionIndex, pErrorCode);
    283 
    284             /* contractionCEs: CEs[] */
    285             ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4,
    286                                outBytes+header.contractionCEs, pErrorCode);
    287         }
    288 
    289         /* swap the main trie */
    290         if(header.mappingPosition!=0) {
    291             count=header.endExpansionCE-header.mappingPosition;
    292             utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,
    293                           outBytes+header.mappingPosition, pErrorCode);
    294         }
    295 
    296         /* swap the max expansion table */
    297         if(header.endExpansionCECount!=0) {
    298             ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4,
    299                                outBytes+header.endExpansionCE, pErrorCode);
    300         }
    301 
    302         /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */
    303 
    304         /* swap UCA constants */
    305         if(header.UCAConsts!=0) {
    306             /*
    307              * if UCAConsts!=0 then contractionUCACombos because we are swapping
    308              * the UCA data file, and we know that the UCA contains contractions
    309              */
    310             count=header.contractionUCACombos-header.UCAConsts;
    311             ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
    312                                outBytes+header.UCAConsts, pErrorCode);
    313         }
    314 
    315         /* swap UCA contractions */
    316         if(header.contractionUCACombosSize!=0) {
    317             count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR;
    318             ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count,
    319                                outBytes+header.contractionUCACombos, pErrorCode);
    320         }
    321 
    322         /* swap the script to lead bytes */
    323         if(header.scriptToLeadByte!=0) {
    324             int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16
    325             int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16
    326             ds->swapArray16(ds, inBytes+header.scriptToLeadByte,
    327                                 4 + (4 * indexCount) + (2 * dataCount),
    328                                 outBytes+header.scriptToLeadByte, pErrorCode);
    329         }
    330 
    331         /* swap the lead byte to scripts */
    332         if(header.leadByteToScript!=0) {
    333             int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16
    334             int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16
    335             ds->swapArray16(ds, inBytes+header.leadByteToScript,
    336                                 4 + (2 * indexCount) + (2 * dataCount),
    337                                 outBytes+header.leadByteToScript, pErrorCode);
    338         }
    339     }
    340 
    341     return header.size;
    342 }
    343 
    344 // swap formatVersion 4 ---------------------------------------------------- ***
    345 
    346 // The following are copied from CollationDataReader, trading an awkward copy of constants
    347 // for an awkward relocation of the i18n collationdatareader.h file into the common library.
    348 // Keep them in sync!
    349 
    350 enum {
    351     IX_INDEXES_LENGTH,  // 0
    352     IX_OPTIONS,
    353     IX_RESERVED2,
    354     IX_RESERVED3,
    355 
    356     IX_JAMO_CE32S_START,  // 4
    357     IX_REORDER_CODES_OFFSET,
    358     IX_REORDER_TABLE_OFFSET,
    359     IX_TRIE_OFFSET,
    360 
    361     IX_RESERVED8_OFFSET,  // 8
    362     IX_CES_OFFSET,
    363     IX_RESERVED10_OFFSET,
    364     IX_CE32S_OFFSET,
    365 
    366     IX_ROOT_ELEMENTS_OFFSET,  // 12
    367     IX_CONTEXTS_OFFSET,
    368     IX_UNSAFE_BWD_OFFSET,
    369     IX_FAST_LATIN_TABLE_OFFSET,
    370 
    371     IX_SCRIPTS_OFFSET,  // 16
    372     IX_COMPRESSIBLE_BYTES_OFFSET,
    373     IX_RESERVED18_OFFSET,
    374     IX_TOTAL_SIZE
    375 };
    376 
    377 int32_t
    378 swapFormatVersion4(const UDataSwapper *ds,
    379                    const void *inData, int32_t length, void *outData,
    380                    UErrorCode &errorCode) {
    381     if(U_FAILURE(errorCode)) { return 0; }
    382 
    383     const uint8_t *inBytes=(const uint8_t *)inData;
    384     uint8_t *outBytes=(uint8_t *)outData;
    385 
    386     const int32_t *inIndexes=(const int32_t *)inBytes;
    387     int32_t indexes[IX_TOTAL_SIZE+1];
    388 
    389     // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
    390     if(0<=length && length<8) {
    391         udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
    392                          "(%d after header) for collation data\n",
    393                          length);
    394         errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    395         return 0;
    396     }
    397 
    398     int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
    399     if(0<=length && length<(indexesLength*4)) {
    400         udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
    401                          "(%d after header) for collation data\n",
    402                          length);
    403         errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    404         return 0;
    405     }
    406 
    407     for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
    408         indexes[i]=udata_readInt32(ds, inIndexes[i]);
    409     }
    410     for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
    411         indexes[i]=-1;
    412     }
    413     inIndexes=NULL;  // Make sure we do not accidentally use these instead of indexes[].
    414 
    415     // Get the total length of the data.
    416     int32_t size;
    417     if(indexesLength>IX_TOTAL_SIZE) {
    418         size=indexes[IX_TOTAL_SIZE];
    419     } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
    420         size=indexes[indexesLength-1];
    421     } else {
    422         size=indexesLength*4;
    423     }
    424     if(length<0) { return size; }
    425 
    426     if(length<size) {
    427         udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
    428                          "(%d after header) for collation data\n",
    429                          length);
    430         errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    431         return 0;
    432     }
    433 
    434     // Copy the data for inaccessible bytes and arrays of bytes.
    435     if(inBytes!=outBytes) {
    436         uprv_memcpy(outBytes, inBytes, size);
    437     }
    438 
    439     // Swap the int32_t indexes[].
    440     ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
    441 
    442     // The following is a modified version of CollationDataReader::read().
    443     // Here we use indexes[] not inIndexes[] because
    444     // the inIndexes[] may not be in this machine's endianness.
    445     int32_t index;  // one of the indexes[] slots
    446     int32_t offset;  // byte offset for the index part
    447     // int32_t length;  // number of bytes in the index part
    448 
    449     index = IX_REORDER_CODES_OFFSET;
    450     offset = indexes[index];
    451     length = indexes[index + 1] - offset;
    452     if(length > 0) {
    453         ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    454     }
    455 
    456     // Skip the IX_REORDER_TABLE_OFFSET byte array.
    457 
    458     index = IX_TRIE_OFFSET;
    459     offset = indexes[index];
    460     length = indexes[index + 1] - offset;
    461     if(length > 0) {
    462         utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    463     }
    464 
    465     index = IX_RESERVED8_OFFSET;
    466     offset = indexes[index];
    467     length = indexes[index + 1] - offset;
    468     if(length > 0) {
    469         udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
    470         errorCode = U_UNSUPPORTED_ERROR;
    471         return 0;
    472     }
    473 
    474     index = IX_CES_OFFSET;
    475     offset = indexes[index];
    476     length = indexes[index + 1] - offset;
    477     if(length > 0) {
    478         ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    479     }
    480 
    481     index = IX_RESERVED10_OFFSET;
    482     offset = indexes[index];
    483     length = indexes[index + 1] - offset;
    484     if(length > 0) {
    485         udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
    486         errorCode = U_UNSUPPORTED_ERROR;
    487         return 0;
    488     }
    489 
    490     index = IX_CE32S_OFFSET;
    491     offset = indexes[index];
    492     length = indexes[index + 1] - offset;
    493     if(length > 0) {
    494         ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    495     }
    496 
    497     index = IX_ROOT_ELEMENTS_OFFSET;
    498     offset = indexes[index];
    499     length = indexes[index + 1] - offset;
    500     if(length > 0) {
    501         ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    502     }
    503 
    504     index = IX_CONTEXTS_OFFSET;
    505     offset = indexes[index];
    506     length = indexes[index + 1] - offset;
    507     if(length > 0) {
    508         ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    509     }
    510 
    511     index = IX_UNSAFE_BWD_OFFSET;
    512     offset = indexes[index];
    513     length = indexes[index + 1] - offset;
    514     if(length > 0) {
    515         ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    516     }
    517 
    518     index = IX_FAST_LATIN_TABLE_OFFSET;
    519     offset = indexes[index];
    520     length = indexes[index + 1] - offset;
    521     if(length > 0) {
    522         ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    523     }
    524 
    525     index = IX_SCRIPTS_OFFSET;
    526     offset = indexes[index];
    527     length = indexes[index + 1] - offset;
    528     if(length > 0) {
    529         ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
    530     }
    531 
    532     // Skip the  IX_COMPRESSIBLE_BYTES_OFFSET byte array.
    533 
    534     index = IX_RESERVED18_OFFSET;
    535     offset = indexes[index];
    536     length = indexes[index + 1] - offset;
    537     if(length > 0) {
    538         udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
    539         errorCode = U_UNSUPPORTED_ERROR;
    540         return 0;
    541     }
    542 
    543     return size;
    544 }
    545 
    546 }  // namespace
    547 
    548 /* swap ICU collation data like ucadata.icu */
    549 U_CAPI int32_t U_EXPORT2
    550 ucol_swap(const UDataSwapper *ds,
    551           const void *inData, int32_t length, void *outData,
    552           UErrorCode *pErrorCode) {
    553     if(U_FAILURE(*pErrorCode)) { return 0; }
    554 
    555     /* udata_swapDataHeader checks the arguments */
    556     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    557     if(U_FAILURE(*pErrorCode)) {
    558         // Try to swap the old format version which did not have a standard data header.
    559         *pErrorCode=U_ZERO_ERROR;
    560         return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
    561     }
    562 
    563     /* check data format and format version */
    564     const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
    565     if(!(
    566         info.dataFormat[0]==0x55 &&   // dataFormat="UCol"
    567         info.dataFormat[1]==0x43 &&
    568         info.dataFormat[2]==0x6f &&
    569         info.dataFormat[3]==0x6c &&
    570         (info.formatVersion[0]==3 || info.formatVersion[0]==4)
    571     )) {
    572         udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
    573                          "(format version %02x.%02x) is not recognized as collation data\n",
    574                          info.dataFormat[0], info.dataFormat[1],
    575                          info.dataFormat[2], info.dataFormat[3],
    576                          info.formatVersion[0], info.formatVersion[1]);
    577         *pErrorCode=U_UNSUPPORTED_ERROR;
    578         return 0;
    579     }
    580 
    581     inData=(const char *)inData+headerSize;
    582     if(length>=0) { length-=headerSize; }
    583     outData=(char *)outData+headerSize;
    584     int32_t collationSize;
    585     if(info.formatVersion[0]>=4) {
    586         collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
    587     } else {
    588         collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
    589     }
    590     if(U_SUCCESS(*pErrorCode)) {
    591         return headerSize+collationSize;
    592     } else {
    593         return 0;
    594     }
    595 }
    596 
    597 /* swap inverse UCA collation data (invuca.icu) */
    598 U_CAPI int32_t U_EXPORT2
    599 ucol_swapInverseUCA(const UDataSwapper *ds,
    600                     const void *inData, int32_t length, void *outData,
    601                     UErrorCode *pErrorCode) {
    602     const UDataInfo *pInfo;
    603     int32_t headerSize;
    604 
    605     const uint8_t *inBytes;
    606     uint8_t *outBytes;
    607 
    608     const InverseUCATableHeader *inHeader;
    609     InverseUCATableHeader *outHeader;
    610     InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };
    611 
    612     /* udata_swapDataHeader checks the arguments */
    613     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
    614     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    615         return 0;
    616     }
    617 
    618     /* check data format and format version */
    619     pInfo=(const UDataInfo *)((const char *)inData+4);
    620     if(!(
    621         pInfo->dataFormat[0]==0x49 &&   /* dataFormat="InvC" */
    622         pInfo->dataFormat[1]==0x6e &&
    623         pInfo->dataFormat[2]==0x76 &&
    624         pInfo->dataFormat[3]==0x43 &&
    625         pInfo->formatVersion[0]==2 &&
    626         pInfo->formatVersion[1]>=1
    627     )) {
    628         udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n",
    629                          pInfo->dataFormat[0], pInfo->dataFormat[1],
    630                          pInfo->dataFormat[2], pInfo->dataFormat[3],
    631                          pInfo->formatVersion[0], pInfo->formatVersion[1]);
    632         *pErrorCode=U_UNSUPPORTED_ERROR;
    633         return 0;
    634     }
    635 
    636     inBytes=(const uint8_t *)inData+headerSize;
    637     outBytes=(uint8_t *)outData+headerSize;
    638 
    639     inHeader=(const InverseUCATableHeader *)inBytes;
    640     outHeader=(InverseUCATableHeader *)outBytes;
    641 
    642     /*
    643      * The inverse UCA collation binary must contain at least the InverseUCATableHeader,
    644      * starting with its size field.
    645      * sizeof(UCATableHeader)==8*4 in ICU 2.8
    646      * check the length against the header size before reading the size field
    647      */
    648     if(length<0) {
    649         header.byteSize=udata_readInt32(ds, inHeader->byteSize);
    650     } else if(
    651         ((length-headerSize)<(8*4) ||
    652          (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize)))
    653     ) {
    654         udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n",
    655                          length);
    656         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    657         return 0;
    658     }
    659 
    660     if(length>=0) {
    661         /* copy everything, takes care of data that needs no swapping */
    662         if(inBytes!=outBytes) {
    663             uprv_memcpy(outBytes, inBytes, header.byteSize);
    664         }
    665 
    666         /* swap the necessary pieces in the order of their occurrence in the data */
    667 
    668         /* read more of the InverseUCATableHeader (the byteSize field was read above) */
    669         header.tableSize=   ds->readUInt32(inHeader->tableSize);
    670         header.contsSize=   ds->readUInt32(inHeader->contsSize);
    671         header.table=       ds->readUInt32(inHeader->table);
    672         header.conts=       ds->readUInt32(inHeader->conts);
    673 
    674         /* swap the 32-bit integers in the header */
    675         ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);
    676 
    677         /* swap the inverse table; tableSize counts uint32_t[3] rows */
    678         ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4,
    679                            outBytes+header.table, pErrorCode);
    680 
    681         /* swap the continuation table; contsSize counts UChars */
    682         ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR,
    683                            outBytes+header.conts, pErrorCode);
    684     }
    685 
    686     return headerSize+header.byteSize;
    687 }
    688 
    689 #endif /* #if !UCONFIG_NO_COLLATION */
    690