1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2005-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: swapimpl.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005may05 14 * created by: Markus W. Scherer 15 * 16 * Data file swapping functions moved here from the common library 17 * because some data is hardcoded in ICU4C and needs not be swapped any more. 18 * Moving the functions here simplifies testing (for code coverage) because 19 * we need not jump through hoops (like adding snapshots of these files 20 * to testdata). 21 * 22 * The declarations for these functions remain in the internal header files 23 * in icu/source/common/ 24 */ 25 26 #include "unicode/utypes.h" 27 #include "unicode/putil.h" 28 #include "unicode/udata.h" 29 30 /* Explicit include statement for std_string.h is needed 31 * for compilation on certain platforms. (e.g. AIX/VACPP) 32 */ 33 #include "unicode/std_string.h" 34 35 #include "cmemory.h" 36 #include "cstring.h" 37 #include "uinvchar.h" 38 #include "uassert.h" 39 #include "uarrsort.h" 40 #include "ucmndata.h" 41 #include "udataswp.h" 42 43 /* swapping implementations in common */ 44 45 #include "uresdata.h" 46 #include "ucnv_io.h" 47 #include "uprops.h" 48 #include "ucase.h" 49 #include "ubidi_props.h" 50 #include "ucol_swp.h" 51 #include "ucnv_bld.h" 52 #include "unormimp.h" 53 #include "normalizer2impl.h" 54 #include "sprpimpl.h" 55 #include "propname.h" 56 #include "rbbidata.h" 57 #include "triedict.h" 58 #include "utrie2.h" 59 60 /* swapping implementations in i18n */ 61 62 #if !UCONFIG_NO_NORMALIZATION 63 #include "uspoof_impl.h" 64 #endif 65 66 67 /* definitions */ 68 69 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 70 71 /* Unicode property (value) aliases data swapping --------------------------- */ 72 73 static int32_t U_CALLCONV 74 upname_swap(const UDataSwapper *ds, 75 const void *inData, int32_t length, void *outData, 76 UErrorCode *pErrorCode) { 77 /* udata_swapDataHeader checks the arguments */ 78 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 79 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 80 return 0; 81 } 82 83 /* check data format and format version */ 84 const UDataInfo *pInfo= 85 reinterpret_cast<const UDataInfo *>( 86 reinterpret_cast<const char *>(inData)+4); 87 if(!( 88 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ 89 pInfo->dataFormat[1]==0x6e && 90 pInfo->dataFormat[2]==0x61 && 91 pInfo->dataFormat[3]==0x6d && 92 pInfo->formatVersion[0]==2 93 )) { 94 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", 95 pInfo->dataFormat[0], pInfo->dataFormat[1], 96 pInfo->dataFormat[2], pInfo->dataFormat[3], 97 pInfo->formatVersion[0]); 98 *pErrorCode=U_UNSUPPORTED_ERROR; 99 return 0; 100 } 101 102 const uint8_t *inBytes=reinterpret_cast<const uint8_t *>(inData)+headerSize; 103 uint8_t *outBytes=reinterpret_cast<uint8_t *>(outData)+headerSize; 104 105 if(length>=0) { 106 length-=headerSize; 107 // formatVersion 2 initially has indexes[8], 32 bytes. 108 if(length<32) { 109 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", 110 (int)length); 111 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 112 return 0; 113 } 114 } 115 116 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); 117 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); 118 if(length>=0) { 119 if(length<totalSize) { 120 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " 121 "for pnames.icu\n", 122 (int)length, (int)totalSize); 123 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 124 return 0; 125 } 126 127 int32_t numBytesIndexesAndValueMaps= 128 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); 129 130 // Swap the indexes[] and the valueMaps[]. 131 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); 132 133 // Copy the rest of the data. 134 if(inBytes!=outBytes) { 135 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, 136 inBytes+numBytesIndexesAndValueMaps, 137 totalSize-numBytesIndexesAndValueMaps); 138 } 139 140 // We need not swap anything else: 141 // 142 // The ByteTries are already byte-serialized, and are fixed on ASCII. 143 // (On an EBCDIC machine, the input string is converted to lowercase ASCII 144 // while matching.) 145 // 146 // The name groups are mostly invariant characters, but since we only 147 // generate, and keep in subversion, ASCII versions of pnames.icu, 148 // and since only ICU4J uses the pnames.icu data file 149 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, 150 // we just copy those bytes too. 151 } 152 153 return headerSize+totalSize; 154 } 155 156 /* Unicode properties data swapping ----------------------------------------- */ 157 158 static int32_t U_CALLCONV 159 uprops_swap(const UDataSwapper *ds, 160 const void *inData, int32_t length, void *outData, 161 UErrorCode *pErrorCode) { 162 const UDataInfo *pInfo; 163 int32_t headerSize, i; 164 165 int32_t dataIndexes[UPROPS_INDEX_COUNT]; 166 const int32_t *inData32; 167 168 /* udata_swapDataHeader checks the arguments */ 169 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 170 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 171 return 0; 172 } 173 174 /* check data format and format version */ 175 pInfo=(const UDataInfo *)((const char *)inData+4); 176 if(!( 177 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ 178 pInfo->dataFormat[1]==0x50 && 179 pInfo->dataFormat[2]==0x72 && 180 pInfo->dataFormat[3]==0x6f && 181 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && 182 (pInfo->formatVersion[0]>=7 || 183 (pInfo->formatVersion[2]==UTRIE_SHIFT && 184 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) 185 )) { 186 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", 187 pInfo->dataFormat[0], pInfo->dataFormat[1], 188 pInfo->dataFormat[2], pInfo->dataFormat[3], 189 pInfo->formatVersion[0]); 190 *pErrorCode=U_UNSUPPORTED_ERROR; 191 return 0; 192 } 193 194 /* the properties file must contain at least the indexes array */ 195 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { 196 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 197 length-headerSize); 198 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 199 return 0; 200 } 201 202 /* read the indexes */ 203 inData32=(const int32_t *)((const char *)inData+headerSize); 204 for(i=0; i<UPROPS_INDEX_COUNT; ++i) { 205 dataIndexes[i]=udata_readInt32(ds, inData32[i]); 206 } 207 208 /* 209 * comments are copied from the data format description in genprops/store.c 210 * indexes[] constants are in uprops.h 211 */ 212 int32_t dataTop; 213 if(length>=0) { 214 int32_t *outData32; 215 216 /* 217 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. 218 * In earlier formatVersions, it is 0 and a lower dataIndexes entry 219 * has the top of the last item. 220 */ 221 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} 222 223 if((length-headerSize)<(4*dataTop)) { 224 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 225 length-headerSize); 226 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 227 return 0; 228 } 229 230 outData32=(int32_t *)((char *)outData+headerSize); 231 232 /* copy everything for inaccessible data (padding) */ 233 if(inData32!=outData32) { 234 uprv_memcpy(outData32, inData32, 4*dataTop); 235 } 236 237 /* swap the indexes[16] */ 238 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); 239 240 /* 241 * swap the main properties UTrie 242 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) 243 */ 244 utrie2_swapAnyVersion(ds, 245 inData32+UPROPS_INDEX_COUNT, 246 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), 247 outData32+UPROPS_INDEX_COUNT, 248 pErrorCode); 249 250 /* 251 * swap the properties and exceptions words 252 * P const uint32_t props32[i1-i0]; 253 * E const uint32_t exceptions[i2-i1]; 254 */ 255 ds->swapArray32(ds, 256 inData32+dataIndexes[UPROPS_PROPS32_INDEX], 257 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), 258 outData32+dataIndexes[UPROPS_PROPS32_INDEX], 259 pErrorCode); 260 261 /* 262 * swap the UChars 263 * U const UChar uchars[2*(i3-i2)]; 264 */ 265 ds->swapArray16(ds, 266 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 267 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), 268 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 269 pErrorCode); 270 271 /* 272 * swap the additional UTrie 273 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties 274 */ 275 utrie2_swapAnyVersion(ds, 276 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 277 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), 278 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 279 pErrorCode); 280 281 /* 282 * swap the properties vectors 283 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; 284 */ 285 ds->swapArray32(ds, 286 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 287 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), 288 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 289 pErrorCode); 290 291 // swap the Script_Extensions data 292 // SCX const uint16_t scriptExtensions[2*(i7-i6)]; 293 ds->swapArray16(ds, 294 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 295 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), 296 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 297 pErrorCode); 298 } 299 300 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ 301 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; 302 } 303 304 /* Unicode case mapping data swapping --------------------------------------- */ 305 306 static int32_t U_CALLCONV 307 ucase_swap(const UDataSwapper *ds, 308 const void *inData, int32_t length, void *outData, 309 UErrorCode *pErrorCode) { 310 const UDataInfo *pInfo; 311 int32_t headerSize; 312 313 const uint8_t *inBytes; 314 uint8_t *outBytes; 315 316 const int32_t *inIndexes; 317 int32_t indexes[16]; 318 319 int32_t i, offset, count, size; 320 321 /* udata_swapDataHeader checks the arguments */ 322 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 323 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 324 return 0; 325 } 326 327 /* check data format and format version */ 328 pInfo=(const UDataInfo *)((const char *)inData+4); 329 if(!( 330 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ 331 pInfo->dataFormat[1]==UCASE_FMT_1 && 332 pInfo->dataFormat[2]==UCASE_FMT_2 && 333 pInfo->dataFormat[3]==UCASE_FMT_3 && 334 ((pInfo->formatVersion[0]==1 && 335 pInfo->formatVersion[2]==UTRIE_SHIFT && 336 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 337 pInfo->formatVersion[0]==2) 338 )) { 339 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", 340 pInfo->dataFormat[0], pInfo->dataFormat[1], 341 pInfo->dataFormat[2], pInfo->dataFormat[3], 342 pInfo->formatVersion[0]); 343 *pErrorCode=U_UNSUPPORTED_ERROR; 344 return 0; 345 } 346 347 inBytes=(const uint8_t *)inData+headerSize; 348 outBytes=(uint8_t *)outData+headerSize; 349 350 inIndexes=(const int32_t *)inBytes; 351 352 if(length>=0) { 353 length-=headerSize; 354 if(length<16*4) { 355 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", 356 length); 357 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 358 return 0; 359 } 360 } 361 362 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ 363 for(i=0; i<16; ++i) { 364 indexes[i]=udata_readInt32(ds, inIndexes[i]); 365 } 366 367 /* get the total length of the data */ 368 size=indexes[UCASE_IX_LENGTH]; 369 370 if(length>=0) { 371 if(length<size) { 372 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", 373 length); 374 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 375 return 0; 376 } 377 378 /* copy the data for inaccessible bytes */ 379 if(inBytes!=outBytes) { 380 uprv_memcpy(outBytes, inBytes, size); 381 } 382 383 offset=0; 384 385 /* swap the int32_t indexes[] */ 386 count=indexes[UCASE_IX_INDEX_TOP]*4; 387 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 388 offset+=count; 389 390 /* swap the UTrie */ 391 count=indexes[UCASE_IX_TRIE_SIZE]; 392 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 393 offset+=count; 394 395 /* swap the uint16_t exceptions[] and unfold[] */ 396 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; 397 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 398 offset+=count; 399 400 U_ASSERT(offset==size); 401 } 402 403 return headerSize+size; 404 } 405 406 /* Unicode bidi/shaping data swapping --------------------------------------- */ 407 408 static int32_t U_CALLCONV 409 ubidi_swap(const UDataSwapper *ds, 410 const void *inData, int32_t length, void *outData, 411 UErrorCode *pErrorCode) { 412 const UDataInfo *pInfo; 413 int32_t headerSize; 414 415 const uint8_t *inBytes; 416 uint8_t *outBytes; 417 418 const int32_t *inIndexes; 419 int32_t indexes[16]; 420 421 int32_t i, offset, count, size; 422 423 /* udata_swapDataHeader checks the arguments */ 424 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 425 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 426 return 0; 427 } 428 429 /* check data format and format version */ 430 pInfo=(const UDataInfo *)((const char *)inData+4); 431 if(!( 432 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ 433 pInfo->dataFormat[1]==UBIDI_FMT_1 && 434 pInfo->dataFormat[2]==UBIDI_FMT_2 && 435 pInfo->dataFormat[3]==UBIDI_FMT_3 && 436 ((pInfo->formatVersion[0]==1 && 437 pInfo->formatVersion[2]==UTRIE_SHIFT && 438 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 439 pInfo->formatVersion[0]==2) 440 )) { 441 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", 442 pInfo->dataFormat[0], pInfo->dataFormat[1], 443 pInfo->dataFormat[2], pInfo->dataFormat[3], 444 pInfo->formatVersion[0]); 445 *pErrorCode=U_UNSUPPORTED_ERROR; 446 return 0; 447 } 448 449 inBytes=(const uint8_t *)inData+headerSize; 450 outBytes=(uint8_t *)outData+headerSize; 451 452 inIndexes=(const int32_t *)inBytes; 453 454 if(length>=0) { 455 length-=headerSize; 456 if(length<16*4) { 457 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", 458 length); 459 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 460 return 0; 461 } 462 } 463 464 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ 465 for(i=0; i<16; ++i) { 466 indexes[i]=udata_readInt32(ds, inIndexes[i]); 467 } 468 469 /* get the total length of the data */ 470 size=indexes[UBIDI_IX_LENGTH]; 471 472 if(length>=0) { 473 if(length<size) { 474 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", 475 length); 476 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 477 return 0; 478 } 479 480 /* copy the data for inaccessible bytes */ 481 if(inBytes!=outBytes) { 482 uprv_memcpy(outBytes, inBytes, size); 483 } 484 485 offset=0; 486 487 /* swap the int32_t indexes[] */ 488 count=indexes[UBIDI_IX_INDEX_TOP]*4; 489 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 490 offset+=count; 491 492 /* swap the UTrie */ 493 count=indexes[UBIDI_IX_TRIE_SIZE]; 494 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 495 offset+=count; 496 497 /* swap the uint32_t mirrors[] */ 498 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; 499 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 500 offset+=count; 501 502 /* just skip the uint8_t jgArray[] */ 503 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; 504 offset+=count; 505 506 U_ASSERT(offset==size); 507 } 508 509 return headerSize+size; 510 } 511 512 /* Unicode normalization data swapping -------------------------------------- */ 513 514 #if !UCONFIG_NO_NORMALIZATION 515 516 static int32_t U_CALLCONV 517 unorm_swap(const UDataSwapper *ds, 518 const void *inData, int32_t length, void *outData, 519 UErrorCode *pErrorCode) { 520 const UDataInfo *pInfo; 521 int32_t headerSize; 522 523 const uint8_t *inBytes; 524 uint8_t *outBytes; 525 526 const int32_t *inIndexes; 527 int32_t indexes[32]; 528 529 int32_t i, offset, count, size; 530 531 /* udata_swapDataHeader checks the arguments */ 532 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 533 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 534 return 0; 535 } 536 537 /* check data format and format version */ 538 pInfo=(const UDataInfo *)((const char *)inData+4); 539 if(!( 540 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ 541 pInfo->dataFormat[1]==0x6f && 542 pInfo->dataFormat[2]==0x72 && 543 pInfo->dataFormat[3]==0x6d && 544 pInfo->formatVersion[0]==2 545 )) { 546 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", 547 pInfo->dataFormat[0], pInfo->dataFormat[1], 548 pInfo->dataFormat[2], pInfo->dataFormat[3], 549 pInfo->formatVersion[0]); 550 *pErrorCode=U_UNSUPPORTED_ERROR; 551 return 0; 552 } 553 554 inBytes=(const uint8_t *)inData+headerSize; 555 outBytes=(uint8_t *)outData+headerSize; 556 557 inIndexes=(const int32_t *)inBytes; 558 559 if(length>=0) { 560 length-=headerSize; 561 if(length<32*4) { 562 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", 563 length); 564 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 565 return 0; 566 } 567 } 568 569 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ 570 for(i=0; i<32; ++i) { 571 indexes[i]=udata_readInt32(ds, inIndexes[i]); 572 } 573 574 /* calculate the total length of the data */ 575 size= 576 32*4+ /* size of indexes[] */ 577 indexes[_NORM_INDEX_TRIE_SIZE]+ 578 indexes[_NORM_INDEX_UCHAR_COUNT]*2+ 579 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ 580 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ 581 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ 582 indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 583 584 if(length>=0) { 585 if(length<size) { 586 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", 587 length); 588 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 589 return 0; 590 } 591 592 /* copy the data for inaccessible bytes */ 593 if(inBytes!=outBytes) { 594 uprv_memcpy(outBytes, inBytes, size); 595 } 596 597 offset=0; 598 599 /* swap the indexes[] */ 600 count=32*4; 601 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 602 offset+=count; 603 604 /* swap the main UTrie */ 605 count=indexes[_NORM_INDEX_TRIE_SIZE]; 606 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 607 offset+=count; 608 609 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ 610 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; 611 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 612 offset+=count; 613 614 /* swap the FCD UTrie */ 615 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; 616 if(count!=0) { 617 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 618 offset+=count; 619 } 620 621 /* swap the aux UTrie */ 622 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; 623 if(count!=0) { 624 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 625 offset+=count; 626 } 627 628 /* swap the uint16_t combiningTable[] */ 629 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 630 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 631 offset+=count; 632 } 633 634 return headerSize+size; 635 } 636 637 #endif 638 639 /* Swap 'Test' data from gentest */ 640 static int32_t U_CALLCONV 641 test_swap(const UDataSwapper *ds, 642 const void *inData, int32_t length, void *outData, 643 UErrorCode *pErrorCode) { 644 const UDataInfo *pInfo; 645 int32_t headerSize; 646 647 const uint8_t *inBytes; 648 uint8_t *outBytes; 649 650 int32_t offset; 651 652 /* udata_swapDataHeader checks the arguments */ 653 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 654 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 655 udata_printError(ds, "test_swap(): data header swap failed %s\n", u_errorName(*pErrorCode)); 656 return 0; 657 } 658 659 /* check data format and format version */ 660 pInfo=(const UDataInfo *)((const char *)inData+4); 661 if(!( 662 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ 663 pInfo->dataFormat[1]==0x65 && 664 pInfo->dataFormat[2]==0x73 && 665 pInfo->dataFormat[3]==0x74 && 666 pInfo->formatVersion[0]==1 667 )) { 668 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", 669 pInfo->dataFormat[0], pInfo->dataFormat[1], 670 pInfo->dataFormat[2], pInfo->dataFormat[3], 671 pInfo->formatVersion[0]); 672 *pErrorCode=U_UNSUPPORTED_ERROR; 673 return 0; 674 } 675 676 inBytes=(const uint8_t *)inData+headerSize; 677 outBytes=(uint8_t *)outData+headerSize; 678 679 int32_t size16 = 2; // 16bit plus padding 680 int32_t sizeStr = 5; // 4 char inv-str plus null 681 int32_t size = size16 + sizeStr; 682 683 if(length>=0) { 684 if(length<size) { 685 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", 686 length, size); 687 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 688 return 0; 689 } 690 691 offset =0; 692 /* swap a 1 entry array */ 693 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); 694 offset+=size16; 695 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); 696 } 697 698 return headerSize+size; 699 } 700 701 /* swap any data (except a .dat package) ------------------------------------ */ 702 703 static const struct { 704 uint8_t dataFormat[4]; 705 UDataSwapFn *swapFn; 706 } swapFns[]={ 707 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ 708 #if !UCONFIG_NO_LEGACY_CONVERSION 709 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ 710 #endif 711 #if !UCONFIG_NO_CONVERSION 712 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ 713 #endif 714 #if !UCONFIG_NO_IDNA 715 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ 716 #endif 717 /* insert data formats here, descending by expected frequency of occurrence */ 718 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ 719 720 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, 721 ucase_swap }, /* dataFormat="cAsE" */ 722 723 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, 724 ubidi_swap }, /* dataFormat="BiDi" */ 725 726 #if !UCONFIG_NO_NORMALIZATION 727 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ 728 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ 729 #endif 730 #if !UCONFIG_NO_COLLATION 731 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ 732 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ 733 #endif 734 #if !UCONFIG_NO_BREAK_ITERATION 735 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ 736 { { 0x54, 0x72, 0x44, 0x63 }, triedict_swap }, /* dataFormat="TrDc " */ 737 #endif 738 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ 739 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ 740 #if !UCONFIG_NO_NORMALIZATION 741 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ 742 #endif 743 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ 744 }; 745 746 U_CAPI int32_t U_EXPORT2 747 udata_swap(const UDataSwapper *ds, 748 const void *inData, int32_t length, void *outData, 749 UErrorCode *pErrorCode) { 750 char dataFormatChars[4]; 751 const UDataInfo *pInfo; 752 int32_t headerSize, i, swappedLength; 753 754 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 755 return 0; 756 } 757 758 /* 759 * Preflight the header first; checks for illegal arguments, too. 760 * Do not swap the header right away because the format-specific swapper 761 * will swap it, get the headerSize again, and also use the header 762 * information. Otherwise we would have to pass some of the information 763 * and not be able to use the UDataSwapFn signature. 764 */ 765 headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); 766 767 /* 768 * If we wanted udata_swap() to also handle non-loadable data like a UTrie, 769 * then we could check here for further known magic values and structures. 770 */ 771 if(U_FAILURE(*pErrorCode)) { 772 return 0; /* the data format was not recognized */ 773 } 774 775 pInfo=(const UDataInfo *)((const char *)inData+4); 776 777 { 778 /* convert the data format from ASCII to Unicode to the system charset */ 779 UChar u[4]={ 780 pInfo->dataFormat[0], pInfo->dataFormat[1], 781 pInfo->dataFormat[2], pInfo->dataFormat[3] 782 }; 783 784 if(uprv_isInvariantUString(u, 4)) { 785 u_UCharsToChars(u, dataFormatChars, 4); 786 } else { 787 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; 788 } 789 } 790 791 /* dispatch to the swap function for the dataFormat */ 792 for(i=0; i<LENGTHOF(swapFns); ++i) { 793 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { 794 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); 795 796 if(U_FAILURE(*pErrorCode)) { 797 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", 798 pInfo->dataFormat[0], pInfo->dataFormat[1], 799 pInfo->dataFormat[2], pInfo->dataFormat[3], 800 dataFormatChars[0], dataFormatChars[1], 801 dataFormatChars[2], dataFormatChars[3], 802 u_errorName(*pErrorCode)); 803 } else if(swappedLength<(length-15)) { 804 /* swapped less than expected */ 805 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 806 swappedLength, length, 807 pInfo->dataFormat[0], pInfo->dataFormat[1], 808 pInfo->dataFormat[2], pInfo->dataFormat[3], 809 dataFormatChars[0], dataFormatChars[1], 810 dataFormatChars[2], dataFormatChars[3], 811 u_errorName(*pErrorCode)); 812 } 813 814 return swappedLength; 815 } 816 } 817 818 /* the dataFormat was not recognized */ 819 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 820 pInfo->dataFormat[0], pInfo->dataFormat[1], 821 pInfo->dataFormat[2], pInfo->dataFormat[3], 822 dataFormatChars[0], dataFormatChars[1], 823 dataFormatChars[2], dataFormatChars[3]); 824 825 *pErrorCode=U_UNSUPPORTED_ERROR; 826 return 0; 827 } 828