1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: ucol_swp.cpp 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003sep10 16 * created by: Markus W. Scherer 17 * 18 * Swap collation binaries. 19 */ 20 21 #include "unicode/udata.h" /* UDataInfo */ 22 #include "utrie.h" 23 #include "utrie2.h" 24 #include "udataswp.h" 25 #include "cmemory.h" 26 #include "ucol_data.h" 27 #include "ucol_swp.h" 28 29 /* swapping ----------------------------------------------------------------- */ 30 31 /* 32 * This performs data swapping for a folded trie (see utrie.c for details). 33 */ 34 35 U_CAPI int32_t U_EXPORT2 36 utrie_swap(const UDataSwapper *ds, 37 const void *inData, int32_t length, void *outData, 38 UErrorCode *pErrorCode) { 39 const UTrieHeader *inTrie; 40 UTrieHeader trie; 41 int32_t size; 42 UBool dataIs32; 43 44 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 45 return 0; 46 } 47 if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { 48 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 49 return 0; 50 } 51 52 /* setup and swapping */ 53 if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { 54 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 55 return 0; 56 } 57 58 inTrie=(const UTrieHeader *)inData; 59 trie.signature=ds->readUInt32(inTrie->signature); 60 trie.options=ds->readUInt32(inTrie->options); 61 trie.indexLength=udata_readInt32(ds, inTrie->indexLength); 62 trie.dataLength=udata_readInt32(ds, inTrie->dataLength); 63 64 if( trie.signature!=0x54726965 || 65 (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || 66 ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || 67 trie.indexLength<UTRIE_BMP_INDEX_LENGTH || 68 (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || 69 trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || 70 (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || 71 ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) 72 ) { 73 *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ 74 return 0; 75 } 76 77 dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); 78 size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); 79 80 if(length>=0) { 81 UTrieHeader *outTrie; 82 83 if(length<size) { 84 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 85 return 0; 86 } 87 88 outTrie=(UTrieHeader *)outData; 89 90 /* swap the header */ 91 ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); 92 93 /* swap the index and the data */ 94 if(dataIs32) { 95 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); 96 ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, 97 (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); 98 } else { 99 ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); 100 } 101 } 102 103 return size; 104 } 105 106 #if !UCONFIG_NO_COLLATION 107 108 U_CAPI UBool U_EXPORT2 109 ucol_looksLikeCollationBinary(const UDataSwapper *ds, 110 const void *inData, int32_t length) { 111 if(ds==NULL || inData==NULL || length<-1) { 112 return FALSE; 113 } 114 115 // First check for format version 4+ which has a standard data header. 116 UErrorCode errorCode=U_ZERO_ERROR; 117 (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode); 118 if(U_SUCCESS(errorCode)) { 119 const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4); 120 if(info.dataFormat[0]==0x55 && // dataFormat="UCol" 121 info.dataFormat[1]==0x43 && 122 info.dataFormat[2]==0x6f && 123 info.dataFormat[3]==0x6c) { 124 return TRUE; 125 } 126 } 127 128 // Else check for format version 3. 129 const UCATableHeader *inHeader=(const UCATableHeader *)inData; 130 131 /* 132 * The collation binary must contain at least the UCATableHeader, 133 * starting with its size field. 134 * sizeof(UCATableHeader)==42*4 in ICU 2.8 135 * check the length against the header size before reading the size field 136 */ 137 UCATableHeader header; 138 uprv_memset(&header, 0, sizeof(header)); 139 if(length<0) { 140 header.size=udata_readInt32(ds, inHeader->size); 141 } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { 142 return FALSE; 143 } 144 145 header.magic=ds->readUInt32(inHeader->magic); 146 if(!( 147 header.magic==UCOL_HEADER_MAGIC && 148 inHeader->formatVersion[0]==3 /*&& 149 inHeader->formatVersion[1]>=0*/ 150 )) { 151 return FALSE; 152 } 153 154 if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { 155 return FALSE; 156 } 157 158 return TRUE; 159 } 160 161 namespace { 162 163 /* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */ 164 int32_t 165 swapFormatVersion3(const UDataSwapper *ds, 166 const void *inData, int32_t length, void *outData, 167 UErrorCode *pErrorCode) { 168 const uint8_t *inBytes; 169 uint8_t *outBytes; 170 171 const UCATableHeader *inHeader; 172 UCATableHeader *outHeader; 173 UCATableHeader header; 174 175 uint32_t count; 176 177 /* argument checking in case we were not called from ucol_swap() */ 178 if(U_FAILURE(*pErrorCode)) { 179 return 0; 180 } 181 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { 182 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 183 return 0; 184 } 185 186 inBytes=(const uint8_t *)inData; 187 outBytes=(uint8_t *)outData; 188 189 inHeader=(const UCATableHeader *)inData; 190 outHeader=(UCATableHeader *)outData; 191 192 /* 193 * The collation binary must contain at least the UCATableHeader, 194 * starting with its size field. 195 * sizeof(UCATableHeader)==42*4 in ICU 2.8 196 * check the length against the header size before reading the size field 197 */ 198 uprv_memset(&header, 0, sizeof(header)); 199 if(length<0) { 200 header.size=udata_readInt32(ds, inHeader->size); 201 } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { 202 udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n", 203 length); 204 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 205 return 0; 206 } 207 208 header.magic=ds->readUInt32(inHeader->magic); 209 if(!( 210 header.magic==UCOL_HEADER_MAGIC && 211 inHeader->formatVersion[0]==3 /*&& 212 inHeader->formatVersion[1]>=0*/ 213 )) { 214 udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n", 215 header.magic, 216 inHeader->formatVersion[0], inHeader->formatVersion[1]); 217 *pErrorCode=U_UNSUPPORTED_ERROR; 218 return 0; 219 } 220 221 if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { 222 udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n", 223 inHeader->isBigEndian, inHeader->charSetFamily); 224 *pErrorCode=U_INVALID_FORMAT_ERROR; 225 return 0; 226 } 227 228 if(length>=0) { 229 /* copy everything, takes care of data that needs no swapping */ 230 if(inBytes!=outBytes) { 231 uprv_memcpy(outBytes, inBytes, header.size); 232 } 233 234 /* swap the necessary pieces in the order of their occurrence in the data */ 235 236 /* read more of the UCATableHeader (the size field was read above) */ 237 header.options= ds->readUInt32(inHeader->options); 238 header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); 239 header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); 240 header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); 241 header.expansion= ds->readUInt32(inHeader->expansion); 242 header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); 243 header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); 244 header.contractionSize= ds->readUInt32(inHeader->contractionSize); 245 header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); 246 header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); 247 header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); 248 header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); 249 header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte); 250 header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript); 251 252 /* swap the 32-bit integers in the header */ 253 ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), 254 outHeader, pErrorCode); 255 ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript), 256 &(outHeader->scriptToLeadByte), pErrorCode); 257 /* set the output platform properties */ 258 outHeader->isBigEndian=ds->outIsBigEndian; 259 outHeader->charSetFamily=ds->outCharset; 260 261 /* swap the options */ 262 if(header.options!=0) { 263 ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, 264 outBytes+header.options, pErrorCode); 265 } 266 267 /* swap the expansions */ 268 if(header.mappingPosition!=0 && header.expansion!=0) { 269 if(header.contractionIndex!=0) { 270 /* expansions bounded by contractions */ 271 count=header.contractionIndex-header.expansion; 272 } else { 273 /* no contractions: expansions bounded by the main trie */ 274 count=header.mappingPosition-header.expansion; 275 } 276 ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, 277 outBytes+header.expansion, pErrorCode); 278 } 279 280 /* swap the contractions */ 281 if(header.contractionSize!=0) { 282 /* contractionIndex: UChar[] */ 283 ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, 284 outBytes+header.contractionIndex, pErrorCode); 285 286 /* contractionCEs: CEs[] */ 287 ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, 288 outBytes+header.contractionCEs, pErrorCode); 289 } 290 291 /* swap the main trie */ 292 if(header.mappingPosition!=0) { 293 count=header.endExpansionCE-header.mappingPosition; 294 utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, 295 outBytes+header.mappingPosition, pErrorCode); 296 } 297 298 /* swap the max expansion table */ 299 if(header.endExpansionCECount!=0) { 300 ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, 301 outBytes+header.endExpansionCE, pErrorCode); 302 } 303 304 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ 305 306 /* swap UCA constants */ 307 if(header.UCAConsts!=0) { 308 /* 309 * if UCAConsts!=0 then contractionUCACombos because we are swapping 310 * the UCA data file, and we know that the UCA contains contractions 311 */ 312 ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, 313 outBytes+header.UCAConsts, pErrorCode); 314 } 315 316 /* swap UCA contractions */ 317 if(header.contractionUCACombosSize!=0) { 318 count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; 319 ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, 320 outBytes+header.contractionUCACombos, pErrorCode); 321 } 322 323 /* swap the script to lead bytes */ 324 if(header.scriptToLeadByte!=0) { 325 int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16 326 int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16 327 ds->swapArray16(ds, inBytes+header.scriptToLeadByte, 328 4 + (4 * indexCount) + (2 * dataCount), 329 outBytes+header.scriptToLeadByte, pErrorCode); 330 } 331 332 /* swap the lead byte to scripts */ 333 if(header.leadByteToScript!=0) { 334 int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16 335 int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16 336 ds->swapArray16(ds, inBytes+header.leadByteToScript, 337 4 + (2 * indexCount) + (2 * dataCount), 338 outBytes+header.leadByteToScript, pErrorCode); 339 } 340 } 341 342 return header.size; 343 } 344 345 // swap formatVersion 4 or 5 ----------------------------------------------- *** 346 347 // The following are copied from CollationDataReader, trading an awkward copy of constants 348 // for an awkward relocation of the i18n collationdatareader.h file into the common library. 349 // Keep them in sync! 350 351 enum { 352 IX_INDEXES_LENGTH, // 0 353 IX_OPTIONS, 354 IX_RESERVED2, 355 IX_RESERVED3, 356 357 IX_JAMO_CE32S_START, // 4 358 IX_REORDER_CODES_OFFSET, 359 IX_REORDER_TABLE_OFFSET, 360 IX_TRIE_OFFSET, 361 362 IX_RESERVED8_OFFSET, // 8 363 IX_CES_OFFSET, 364 IX_RESERVED10_OFFSET, 365 IX_CE32S_OFFSET, 366 367 IX_ROOT_ELEMENTS_OFFSET, // 12 368 IX_CONTEXTS_OFFSET, 369 IX_UNSAFE_BWD_OFFSET, 370 IX_FAST_LATIN_TABLE_OFFSET, 371 372 IX_SCRIPTS_OFFSET, // 16 373 IX_COMPRESSIBLE_BYTES_OFFSET, 374 IX_RESERVED18_OFFSET, 375 IX_TOTAL_SIZE 376 }; 377 378 int32_t 379 swapFormatVersion4(const UDataSwapper *ds, 380 const void *inData, int32_t length, void *outData, 381 UErrorCode &errorCode) { 382 if(U_FAILURE(errorCode)) { return 0; } 383 384 const uint8_t *inBytes=(const uint8_t *)inData; 385 uint8_t *outBytes=(uint8_t *)outData; 386 387 const int32_t *inIndexes=(const int32_t *)inBytes; 388 int32_t indexes[IX_TOTAL_SIZE+1]; 389 390 // Need at least IX_INDEXES_LENGTH and IX_OPTIONS. 391 if(0<=length && length<8) { 392 udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " 393 "(%d after header) for collation data\n", 394 length); 395 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 396 return 0; 397 } 398 399 int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]); 400 if(0<=length && length<(indexesLength*4)) { 401 udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " 402 "(%d after header) for collation data\n", 403 length); 404 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 405 return 0; 406 } 407 408 for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) { 409 indexes[i]=udata_readInt32(ds, inIndexes[i]); 410 } 411 for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) { 412 indexes[i]=-1; 413 } 414 inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[]. 415 416 // Get the total length of the data. 417 int32_t size; 418 if(indexesLength>IX_TOTAL_SIZE) { 419 size=indexes[IX_TOTAL_SIZE]; 420 } else if(indexesLength>IX_REORDER_CODES_OFFSET) { 421 size=indexes[indexesLength-1]; 422 } else { 423 size=indexesLength*4; 424 } 425 if(length<0) { return size; } 426 427 if(length<size) { 428 udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " 429 "(%d after header) for collation data\n", 430 length); 431 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 432 return 0; 433 } 434 435 // Copy the data for inaccessible bytes and arrays of bytes. 436 if(inBytes!=outBytes) { 437 uprv_memcpy(outBytes, inBytes, size); 438 } 439 440 // Swap the int32_t indexes[]. 441 ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode); 442 443 // The following is a modified version of CollationDataReader::read(). 444 // Here we use indexes[] not inIndexes[] because 445 // the inIndexes[] may not be in this machine's endianness. 446 int32_t index; // one of the indexes[] slots 447 int32_t offset; // byte offset for the index part 448 // int32_t length; // number of bytes in the index part 449 450 index = IX_REORDER_CODES_OFFSET; 451 offset = indexes[index]; 452 length = indexes[index + 1] - offset; 453 if(length > 0) { 454 ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); 455 } 456 457 // Skip the IX_REORDER_TABLE_OFFSET byte array. 458 459 index = IX_TRIE_OFFSET; 460 offset = indexes[index]; 461 length = indexes[index + 1] - offset; 462 if(length > 0) { 463 utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode); 464 } 465 466 index = IX_RESERVED8_OFFSET; 467 offset = indexes[index]; 468 length = indexes[index + 1] - offset; 469 if(length > 0) { 470 udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length); 471 errorCode = U_UNSUPPORTED_ERROR; 472 return 0; 473 } 474 475 index = IX_CES_OFFSET; 476 offset = indexes[index]; 477 length = indexes[index + 1] - offset; 478 if(length > 0) { 479 ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode); 480 } 481 482 index = IX_RESERVED10_OFFSET; 483 offset = indexes[index]; 484 length = indexes[index + 1] - offset; 485 if(length > 0) { 486 udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length); 487 errorCode = U_UNSUPPORTED_ERROR; 488 return 0; 489 } 490 491 index = IX_CE32S_OFFSET; 492 offset = indexes[index]; 493 length = indexes[index + 1] - offset; 494 if(length > 0) { 495 ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); 496 } 497 498 index = IX_ROOT_ELEMENTS_OFFSET; 499 offset = indexes[index]; 500 length = indexes[index + 1] - offset; 501 if(length > 0) { 502 ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); 503 } 504 505 index = IX_CONTEXTS_OFFSET; 506 offset = indexes[index]; 507 length = indexes[index + 1] - offset; 508 if(length > 0) { 509 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); 510 } 511 512 index = IX_UNSAFE_BWD_OFFSET; 513 offset = indexes[index]; 514 length = indexes[index + 1] - offset; 515 if(length > 0) { 516 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); 517 } 518 519 index = IX_FAST_LATIN_TABLE_OFFSET; 520 offset = indexes[index]; 521 length = indexes[index + 1] - offset; 522 if(length > 0) { 523 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); 524 } 525 526 index = IX_SCRIPTS_OFFSET; 527 offset = indexes[index]; 528 length = indexes[index + 1] - offset; 529 if(length > 0) { 530 ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); 531 } 532 533 // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array. 534 535 index = IX_RESERVED18_OFFSET; 536 offset = indexes[index]; 537 length = indexes[index + 1] - offset; 538 if(length > 0) { 539 udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length); 540 errorCode = U_UNSUPPORTED_ERROR; 541 return 0; 542 } 543 544 return size; 545 } 546 547 } // namespace 548 549 /* swap ICU collation data like ucadata.icu */ 550 U_CAPI int32_t U_EXPORT2 551 ucol_swap(const UDataSwapper *ds, 552 const void *inData, int32_t length, void *outData, 553 UErrorCode *pErrorCode) { 554 if(U_FAILURE(*pErrorCode)) { return 0; } 555 556 /* udata_swapDataHeader checks the arguments */ 557 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 558 if(U_FAILURE(*pErrorCode)) { 559 // Try to swap the old format version which did not have a standard data header. 560 *pErrorCode=U_ZERO_ERROR; 561 return swapFormatVersion3(ds, inData, length, outData, pErrorCode); 562 } 563 564 /* check data format and format version */ 565 const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4); 566 if(!( 567 info.dataFormat[0]==0x55 && // dataFormat="UCol" 568 info.dataFormat[1]==0x43 && 569 info.dataFormat[2]==0x6f && 570 info.dataFormat[3]==0x6c && 571 (3<=info.formatVersion[0] && info.formatVersion[0]<=5) 572 )) { 573 udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x " 574 "(format version %02x.%02x) is not recognized as collation data\n", 575 info.dataFormat[0], info.dataFormat[1], 576 info.dataFormat[2], info.dataFormat[3], 577 info.formatVersion[0], info.formatVersion[1]); 578 *pErrorCode=U_UNSUPPORTED_ERROR; 579 return 0; 580 } 581 582 inData=(const char *)inData+headerSize; 583 if(length>=0) { length-=headerSize; } 584 outData=(char *)outData+headerSize; 585 int32_t collationSize; 586 if(info.formatVersion[0]>=4) { 587 collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode); 588 } else { 589 collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode); 590 } 591 if(U_SUCCESS(*pErrorCode)) { 592 return headerSize+collationSize; 593 } else { 594 return 0; 595 } 596 } 597 598 /* swap inverse UCA collation data (invuca.icu) */ 599 U_CAPI int32_t U_EXPORT2 600 ucol_swapInverseUCA(const UDataSwapper *ds, 601 const void *inData, int32_t length, void *outData, 602 UErrorCode *pErrorCode) { 603 const UDataInfo *pInfo; 604 int32_t headerSize; 605 606 const uint8_t *inBytes; 607 uint8_t *outBytes; 608 609 const InverseUCATableHeader *inHeader; 610 InverseUCATableHeader *outHeader; 611 InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} }; 612 613 /* udata_swapDataHeader checks the arguments */ 614 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 615 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 616 return 0; 617 } 618 619 /* check data format and format version */ 620 pInfo=(const UDataInfo *)((const char *)inData+4); 621 if(!( 622 pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ 623 pInfo->dataFormat[1]==0x6e && 624 pInfo->dataFormat[2]==0x76 && 625 pInfo->dataFormat[3]==0x43 && 626 pInfo->formatVersion[0]==2 && 627 pInfo->formatVersion[1]>=1 628 )) { 629 udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", 630 pInfo->dataFormat[0], pInfo->dataFormat[1], 631 pInfo->dataFormat[2], pInfo->dataFormat[3], 632 pInfo->formatVersion[0], pInfo->formatVersion[1]); 633 *pErrorCode=U_UNSUPPORTED_ERROR; 634 return 0; 635 } 636 637 inBytes=(const uint8_t *)inData+headerSize; 638 outBytes=(uint8_t *)outData+headerSize; 639 640 inHeader=(const InverseUCATableHeader *)inBytes; 641 outHeader=(InverseUCATableHeader *)outBytes; 642 643 /* 644 * The inverse UCA collation binary must contain at least the InverseUCATableHeader, 645 * starting with its size field. 646 * sizeof(UCATableHeader)==8*4 in ICU 2.8 647 * check the length against the header size before reading the size field 648 */ 649 if(length<0) { 650 header.byteSize=udata_readInt32(ds, inHeader->byteSize); 651 } else if( 652 ((length-headerSize)<(8*4) || 653 (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) 654 ) { 655 udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", 656 length); 657 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 658 return 0; 659 } 660 661 if(length>=0) { 662 /* copy everything, takes care of data that needs no swapping */ 663 if(inBytes!=outBytes) { 664 uprv_memcpy(outBytes, inBytes, header.byteSize); 665 } 666 667 /* swap the necessary pieces in the order of their occurrence in the data */ 668 669 /* read more of the InverseUCATableHeader (the byteSize field was read above) */ 670 header.tableSize= ds->readUInt32(inHeader->tableSize); 671 header.contsSize= ds->readUInt32(inHeader->contsSize); 672 header.table= ds->readUInt32(inHeader->table); 673 header.conts= ds->readUInt32(inHeader->conts); 674 675 /* swap the 32-bit integers in the header */ 676 ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); 677 678 /* swap the inverse table; tableSize counts uint32_t[3] rows */ 679 ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, 680 outBytes+header.table, pErrorCode); 681 682 /* swap the continuation table; contsSize counts UChars */ 683 ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, 684 outBytes+header.conts, pErrorCode); 685 } 686 687 return headerSize+header.byteSize; 688 } 689 690 #endif /* #if !UCONFIG_NO_COLLATION */ 691