1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2005-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: swapimpl.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005may05 14 * created by: Markus W. Scherer 15 * 16 * Data file swapping functions moved here from the common library 17 * because some data is hardcoded in ICU4C and needs not be swapped any more. 18 * Moving the functions here simplifies testing (for code coverage) because 19 * we need not jump through hoops (like adding snapshots of these files 20 * to testdata). 21 * 22 * The declarations for these functions remain in the internal header files 23 * in icu/source/common/ 24 */ 25 26 #include "unicode/utypes.h" 27 #include "unicode/putil.h" 28 #include "unicode/udata.h" 29 30 /* Explicit include statement for std_string.h is needed 31 * for compilation on certain platforms. (e.g. AIX/VACPP) 32 */ 33 #include "unicode/std_string.h" 34 35 #include "cmemory.h" 36 #include "cstring.h" 37 #include "uinvchar.h" 38 #include "uassert.h" 39 #include "uarrsort.h" 40 #include "ucmndata.h" 41 #include "udataswp.h" 42 43 /* swapping implementations in common */ 44 45 #include "uresdata.h" 46 #include "ucnv_io.h" 47 #include "uprops.h" 48 #include "ucase.h" 49 #include "ubidi_props.h" 50 #include "ucol_swp.h" 51 #include "ucnv_bld.h" 52 #include "unormimp.h" 53 #include "normalizer2impl.h" 54 #include "sprpimpl.h" 55 #include "propname.h" 56 #include "rbbidata.h" 57 #include "utrie2.h" 58 #include "dictionarydata.h" 59 60 /* swapping implementations in i18n */ 61 62 #if !UCONFIG_NO_NORMALIZATION 63 #include "uspoof_impl.h" 64 #endif 65 66 U_NAMESPACE_USE 67 68 /* definitions */ 69 70 /* Unicode property (value) aliases data swapping --------------------------- */ 71 72 static int32_t U_CALLCONV 73 upname_swap(const UDataSwapper *ds, 74 const void *inData, int32_t length, void *outData, 75 UErrorCode *pErrorCode) { 76 /* udata_swapDataHeader checks the arguments */ 77 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 78 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 79 return 0; 80 } 81 82 /* check data format and format version */ 83 const UDataInfo *pInfo= 84 reinterpret_cast<const UDataInfo *>( 85 static_cast<const char *>(inData)+4); 86 if(!( 87 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ 88 pInfo->dataFormat[1]==0x6e && 89 pInfo->dataFormat[2]==0x61 && 90 pInfo->dataFormat[3]==0x6d && 91 pInfo->formatVersion[0]==2 92 )) { 93 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", 94 pInfo->dataFormat[0], pInfo->dataFormat[1], 95 pInfo->dataFormat[2], pInfo->dataFormat[3], 96 pInfo->formatVersion[0]); 97 *pErrorCode=U_UNSUPPORTED_ERROR; 98 return 0; 99 } 100 101 const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; 102 uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; 103 104 if(length>=0) { 105 length-=headerSize; 106 // formatVersion 2 initially has indexes[8], 32 bytes. 107 if(length<32) { 108 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", 109 (int)length); 110 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 111 return 0; 112 } 113 } 114 115 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); 116 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); 117 if(length>=0) { 118 if(length<totalSize) { 119 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " 120 "for pnames.icu\n", 121 (int)length, (int)totalSize); 122 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 123 return 0; 124 } 125 126 int32_t numBytesIndexesAndValueMaps= 127 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); 128 129 // Swap the indexes[] and the valueMaps[]. 130 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); 131 132 // Copy the rest of the data. 133 if(inBytes!=outBytes) { 134 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, 135 inBytes+numBytesIndexesAndValueMaps, 136 totalSize-numBytesIndexesAndValueMaps); 137 } 138 139 // We need not swap anything else: 140 // 141 // The ByteTries are already byte-serialized, and are fixed on ASCII. 142 // (On an EBCDIC machine, the input string is converted to lowercase ASCII 143 // while matching.) 144 // 145 // The name groups are mostly invariant characters, but since we only 146 // generate, and keep in subversion, ASCII versions of pnames.icu, 147 // and since only ICU4J uses the pnames.icu data file 148 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, 149 // we just copy those bytes too. 150 } 151 152 return headerSize+totalSize; 153 } 154 155 /* Unicode properties data swapping ----------------------------------------- */ 156 157 static int32_t U_CALLCONV 158 uprops_swap(const UDataSwapper *ds, 159 const void *inData, int32_t length, void *outData, 160 UErrorCode *pErrorCode) { 161 const UDataInfo *pInfo; 162 int32_t headerSize, i; 163 164 int32_t dataIndexes[UPROPS_INDEX_COUNT]; 165 const int32_t *inData32; 166 167 /* udata_swapDataHeader checks the arguments */ 168 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 169 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 170 return 0; 171 } 172 173 /* check data format and format version */ 174 pInfo=(const UDataInfo *)((const char *)inData+4); 175 if(!( 176 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ 177 pInfo->dataFormat[1]==0x50 && 178 pInfo->dataFormat[2]==0x72 && 179 pInfo->dataFormat[3]==0x6f && 180 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && 181 (pInfo->formatVersion[0]>=7 || 182 (pInfo->formatVersion[2]==UTRIE_SHIFT && 183 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) 184 )) { 185 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", 186 pInfo->dataFormat[0], pInfo->dataFormat[1], 187 pInfo->dataFormat[2], pInfo->dataFormat[3], 188 pInfo->formatVersion[0]); 189 *pErrorCode=U_UNSUPPORTED_ERROR; 190 return 0; 191 } 192 193 /* the properties file must contain at least the indexes array */ 194 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { 195 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 196 length-headerSize); 197 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 198 return 0; 199 } 200 201 /* read the indexes */ 202 inData32=(const int32_t *)((const char *)inData+headerSize); 203 for(i=0; i<UPROPS_INDEX_COUNT; ++i) { 204 dataIndexes[i]=udata_readInt32(ds, inData32[i]); 205 } 206 207 /* 208 * comments are copied from the data format description in genprops/store.c 209 * indexes[] constants are in uprops.h 210 */ 211 int32_t dataTop; 212 if(length>=0) { 213 int32_t *outData32; 214 215 /* 216 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. 217 * In earlier formatVersions, it is 0 and a lower dataIndexes entry 218 * has the top of the last item. 219 */ 220 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} 221 222 if((length-headerSize)<(4*dataTop)) { 223 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 224 length-headerSize); 225 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 226 return 0; 227 } 228 229 outData32=(int32_t *)((char *)outData+headerSize); 230 231 /* copy everything for inaccessible data (padding) */ 232 if(inData32!=outData32) { 233 uprv_memcpy(outData32, inData32, 4*dataTop); 234 } 235 236 /* swap the indexes[16] */ 237 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); 238 239 /* 240 * swap the main properties UTrie 241 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) 242 */ 243 utrie2_swapAnyVersion(ds, 244 inData32+UPROPS_INDEX_COUNT, 245 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), 246 outData32+UPROPS_INDEX_COUNT, 247 pErrorCode); 248 249 /* 250 * swap the properties and exceptions words 251 * P const uint32_t props32[i1-i0]; 252 * E const uint32_t exceptions[i2-i1]; 253 */ 254 ds->swapArray32(ds, 255 inData32+dataIndexes[UPROPS_PROPS32_INDEX], 256 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), 257 outData32+dataIndexes[UPROPS_PROPS32_INDEX], 258 pErrorCode); 259 260 /* 261 * swap the UChars 262 * U const UChar uchars[2*(i3-i2)]; 263 */ 264 ds->swapArray16(ds, 265 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 266 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), 267 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 268 pErrorCode); 269 270 /* 271 * swap the additional UTrie 272 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties 273 */ 274 utrie2_swapAnyVersion(ds, 275 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 276 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), 277 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 278 pErrorCode); 279 280 /* 281 * swap the properties vectors 282 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; 283 */ 284 ds->swapArray32(ds, 285 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 286 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), 287 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 288 pErrorCode); 289 290 // swap the Script_Extensions data 291 // SCX const uint16_t scriptExtensions[2*(i7-i6)]; 292 ds->swapArray16(ds, 293 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 294 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), 295 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 296 pErrorCode); 297 } 298 299 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ 300 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; 301 } 302 303 /* Unicode case mapping data swapping --------------------------------------- */ 304 305 static int32_t U_CALLCONV 306 ucase_swap(const UDataSwapper *ds, 307 const void *inData, int32_t length, void *outData, 308 UErrorCode *pErrorCode) { 309 const UDataInfo *pInfo; 310 int32_t headerSize; 311 312 const uint8_t *inBytes; 313 uint8_t *outBytes; 314 315 const int32_t *inIndexes; 316 int32_t indexes[16]; 317 318 int32_t i, offset, count, size; 319 320 /* udata_swapDataHeader checks the arguments */ 321 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 322 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 323 return 0; 324 } 325 326 /* check data format and format version */ 327 pInfo=(const UDataInfo *)((const char *)inData+4); 328 if(!( 329 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ 330 pInfo->dataFormat[1]==UCASE_FMT_1 && 331 pInfo->dataFormat[2]==UCASE_FMT_2 && 332 pInfo->dataFormat[3]==UCASE_FMT_3 && 333 ((pInfo->formatVersion[0]==1 && 334 pInfo->formatVersion[2]==UTRIE_SHIFT && 335 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 336 pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) 337 )) { 338 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", 339 pInfo->dataFormat[0], pInfo->dataFormat[1], 340 pInfo->dataFormat[2], pInfo->dataFormat[3], 341 pInfo->formatVersion[0]); 342 *pErrorCode=U_UNSUPPORTED_ERROR; 343 return 0; 344 } 345 346 inBytes=(const uint8_t *)inData+headerSize; 347 outBytes=(uint8_t *)outData+headerSize; 348 349 inIndexes=(const int32_t *)inBytes; 350 351 if(length>=0) { 352 length-=headerSize; 353 if(length<16*4) { 354 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", 355 length); 356 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 357 return 0; 358 } 359 } 360 361 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ 362 for(i=0; i<16; ++i) { 363 indexes[i]=udata_readInt32(ds, inIndexes[i]); 364 } 365 366 /* get the total length of the data */ 367 size=indexes[UCASE_IX_LENGTH]; 368 369 if(length>=0) { 370 if(length<size) { 371 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", 372 length); 373 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 374 return 0; 375 } 376 377 /* copy the data for inaccessible bytes */ 378 if(inBytes!=outBytes) { 379 uprv_memcpy(outBytes, inBytes, size); 380 } 381 382 offset=0; 383 384 /* swap the int32_t indexes[] */ 385 count=indexes[UCASE_IX_INDEX_TOP]*4; 386 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 387 offset+=count; 388 389 /* swap the UTrie */ 390 count=indexes[UCASE_IX_TRIE_SIZE]; 391 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 392 offset+=count; 393 394 /* swap the uint16_t exceptions[] and unfold[] */ 395 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; 396 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 397 offset+=count; 398 399 U_ASSERT(offset==size); 400 } 401 402 return headerSize+size; 403 } 404 405 /* Unicode bidi/shaping data swapping --------------------------------------- */ 406 407 static int32_t U_CALLCONV 408 ubidi_swap(const UDataSwapper *ds, 409 const void *inData, int32_t length, void *outData, 410 UErrorCode *pErrorCode) { 411 const UDataInfo *pInfo; 412 int32_t headerSize; 413 414 const uint8_t *inBytes; 415 uint8_t *outBytes; 416 417 const int32_t *inIndexes; 418 int32_t indexes[16]; 419 420 int32_t i, offset, count, size; 421 422 /* udata_swapDataHeader checks the arguments */ 423 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 424 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 425 return 0; 426 } 427 428 /* check data format and format version */ 429 pInfo=(const UDataInfo *)((const char *)inData+4); 430 if(!( 431 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ 432 pInfo->dataFormat[1]==UBIDI_FMT_1 && 433 pInfo->dataFormat[2]==UBIDI_FMT_2 && 434 pInfo->dataFormat[3]==UBIDI_FMT_3 && 435 ((pInfo->formatVersion[0]==1 && 436 pInfo->formatVersion[2]==UTRIE_SHIFT && 437 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 438 pInfo->formatVersion[0]==2) 439 )) { 440 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", 441 pInfo->dataFormat[0], pInfo->dataFormat[1], 442 pInfo->dataFormat[2], pInfo->dataFormat[3], 443 pInfo->formatVersion[0]); 444 *pErrorCode=U_UNSUPPORTED_ERROR; 445 return 0; 446 } 447 448 inBytes=(const uint8_t *)inData+headerSize; 449 outBytes=(uint8_t *)outData+headerSize; 450 451 inIndexes=(const int32_t *)inBytes; 452 453 if(length>=0) { 454 length-=headerSize; 455 if(length<16*4) { 456 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", 457 length); 458 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 459 return 0; 460 } 461 } 462 463 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ 464 for(i=0; i<16; ++i) { 465 indexes[i]=udata_readInt32(ds, inIndexes[i]); 466 } 467 468 /* get the total length of the data */ 469 size=indexes[UBIDI_IX_LENGTH]; 470 471 if(length>=0) { 472 if(length<size) { 473 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", 474 length); 475 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 476 return 0; 477 } 478 479 /* copy the data for inaccessible bytes */ 480 if(inBytes!=outBytes) { 481 uprv_memcpy(outBytes, inBytes, size); 482 } 483 484 offset=0; 485 486 /* swap the int32_t indexes[] */ 487 count=indexes[UBIDI_IX_INDEX_TOP]*4; 488 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 489 offset+=count; 490 491 /* swap the UTrie */ 492 count=indexes[UBIDI_IX_TRIE_SIZE]; 493 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 494 offset+=count; 495 496 /* swap the uint32_t mirrors[] */ 497 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; 498 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 499 offset+=count; 500 501 /* just skip the uint8_t jgArray[] and jgArray2[] */ 502 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; 503 offset+=count; 504 count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; 505 offset+=count; 506 507 U_ASSERT(offset==size); 508 } 509 510 return headerSize+size; 511 } 512 513 /* Unicode normalization data swapping -------------------------------------- */ 514 515 #if !UCONFIG_NO_NORMALIZATION 516 517 static int32_t U_CALLCONV 518 unorm_swap(const UDataSwapper *ds, 519 const void *inData, int32_t length, void *outData, 520 UErrorCode *pErrorCode) { 521 const UDataInfo *pInfo; 522 int32_t headerSize; 523 524 const uint8_t *inBytes; 525 uint8_t *outBytes; 526 527 const int32_t *inIndexes; 528 int32_t indexes[32]; 529 530 int32_t i, offset, count, size; 531 532 /* udata_swapDataHeader checks the arguments */ 533 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 534 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 535 return 0; 536 } 537 538 /* check data format and format version */ 539 pInfo=(const UDataInfo *)((const char *)inData+4); 540 if(!( 541 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ 542 pInfo->dataFormat[1]==0x6f && 543 pInfo->dataFormat[2]==0x72 && 544 pInfo->dataFormat[3]==0x6d && 545 pInfo->formatVersion[0]==2 546 )) { 547 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", 548 pInfo->dataFormat[0], pInfo->dataFormat[1], 549 pInfo->dataFormat[2], pInfo->dataFormat[3], 550 pInfo->formatVersion[0]); 551 *pErrorCode=U_UNSUPPORTED_ERROR; 552 return 0; 553 } 554 555 inBytes=(const uint8_t *)inData+headerSize; 556 outBytes=(uint8_t *)outData+headerSize; 557 558 inIndexes=(const int32_t *)inBytes; 559 560 if(length>=0) { 561 length-=headerSize; 562 if(length<32*4) { 563 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", 564 length); 565 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 566 return 0; 567 } 568 } 569 570 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ 571 for(i=0; i<32; ++i) { 572 indexes[i]=udata_readInt32(ds, inIndexes[i]); 573 } 574 575 /* calculate the total length of the data */ 576 size= 577 32*4+ /* size of indexes[] */ 578 indexes[_NORM_INDEX_TRIE_SIZE]+ 579 indexes[_NORM_INDEX_UCHAR_COUNT]*2+ 580 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ 581 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ 582 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ 583 indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 584 585 if(length>=0) { 586 if(length<size) { 587 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", 588 length); 589 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 590 return 0; 591 } 592 593 /* copy the data for inaccessible bytes */ 594 if(inBytes!=outBytes) { 595 uprv_memcpy(outBytes, inBytes, size); 596 } 597 598 offset=0; 599 600 /* swap the indexes[] */ 601 count=32*4; 602 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 603 offset+=count; 604 605 /* swap the main UTrie */ 606 count=indexes[_NORM_INDEX_TRIE_SIZE]; 607 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 608 offset+=count; 609 610 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ 611 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; 612 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 613 offset+=count; 614 615 /* swap the FCD UTrie */ 616 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; 617 if(count!=0) { 618 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 619 offset+=count; 620 } 621 622 /* swap the aux UTrie */ 623 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; 624 if(count!=0) { 625 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 626 offset+=count; 627 } 628 629 /* swap the uint16_t combiningTable[] */ 630 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 631 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 632 offset+=count; 633 } 634 635 return headerSize+size; 636 } 637 638 #endif 639 640 /* Swap 'Test' data from gentest */ 641 static int32_t U_CALLCONV 642 test_swap(const UDataSwapper *ds, 643 const void *inData, int32_t length, void *outData, 644 UErrorCode *pErrorCode) { 645 const UDataInfo *pInfo; 646 int32_t headerSize; 647 648 const uint8_t *inBytes; 649 uint8_t *outBytes; 650 651 int32_t offset; 652 653 /* udata_swapDataHeader checks the arguments */ 654 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 655 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 656 udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); 657 return 0; 658 } 659 660 /* check data format and format version */ 661 pInfo=(const UDataInfo *)((const char *)inData+4); 662 if(!( 663 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ 664 pInfo->dataFormat[1]==0x65 && 665 pInfo->dataFormat[2]==0x73 && 666 pInfo->dataFormat[3]==0x74 && 667 pInfo->formatVersion[0]==1 668 )) { 669 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", 670 pInfo->dataFormat[0], pInfo->dataFormat[1], 671 pInfo->dataFormat[2], pInfo->dataFormat[3], 672 pInfo->formatVersion[0]); 673 *pErrorCode=U_UNSUPPORTED_ERROR; 674 return 0; 675 } 676 677 inBytes=(const uint8_t *)inData+headerSize; 678 outBytes=(uint8_t *)outData+headerSize; 679 680 int32_t size16 = 2; // 16bit plus padding 681 int32_t sizeStr = 5; // 4 char inv-str plus null 682 int32_t size = size16 + sizeStr; 683 684 if(length>=0) { 685 if(length<size) { 686 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", 687 length, size); 688 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 689 return 0; 690 } 691 692 offset =0; 693 /* swap a 1 entry array */ 694 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); 695 offset+=size16; 696 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); 697 } 698 699 return headerSize+size; 700 } 701 702 /* swap any data (except a .dat package) ------------------------------------ */ 703 704 static const struct { 705 uint8_t dataFormat[4]; 706 UDataSwapFn *swapFn; 707 } swapFns[]={ 708 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ 709 #if !UCONFIG_NO_LEGACY_CONVERSION 710 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ 711 #endif 712 #if !UCONFIG_NO_CONVERSION 713 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ 714 #endif 715 #if !UCONFIG_NO_IDNA 716 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ 717 #endif 718 /* insert data formats here, descending by expected frequency of occurrence */ 719 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ 720 721 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, 722 ucase_swap }, /* dataFormat="cAsE" */ 723 724 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, 725 ubidi_swap }, /* dataFormat="BiDi" */ 726 727 #if !UCONFIG_NO_NORMALIZATION 728 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ 729 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ 730 #endif 731 #if !UCONFIG_NO_COLLATION 732 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ 733 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ 734 #endif 735 #if !UCONFIG_NO_BREAK_ITERATION 736 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ 737 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ 738 #endif 739 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ 740 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ 741 #if !UCONFIG_NO_NORMALIZATION 742 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ 743 #endif 744 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ 745 }; 746 747 U_CAPI int32_t U_EXPORT2 748 udata_swap(const UDataSwapper *ds, 749 const void *inData, int32_t length, void *outData, 750 UErrorCode *pErrorCode) { 751 char dataFormatChars[4]; 752 const UDataInfo *pInfo; 753 int32_t i, swappedLength; 754 755 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 756 return 0; 757 } 758 759 /* 760 * Preflight the header first; checks for illegal arguments, too. 761 * Do not swap the header right away because the format-specific swapper 762 * will swap it, get the headerSize again, and also use the header 763 * information. Otherwise we would have to pass some of the information 764 * and not be able to use the UDataSwapFn signature. 765 */ 766 udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); 767 768 /* 769 * If we wanted udata_swap() to also handle non-loadable data like a UTrie, 770 * then we could check here for further known magic values and structures. 771 */ 772 if(U_FAILURE(*pErrorCode)) { 773 return 0; /* the data format was not recognized */ 774 } 775 776 pInfo=(const UDataInfo *)((const char *)inData+4); 777 778 { 779 /* convert the data format from ASCII to Unicode to the system charset */ 780 UChar u[4]={ 781 pInfo->dataFormat[0], pInfo->dataFormat[1], 782 pInfo->dataFormat[2], pInfo->dataFormat[3] 783 }; 784 785 if(uprv_isInvariantUString(u, 4)) { 786 u_UCharsToChars(u, dataFormatChars, 4); 787 } else { 788 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; 789 } 790 } 791 792 /* dispatch to the swap function for the dataFormat */ 793 for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { 794 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { 795 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); 796 797 if(U_FAILURE(*pErrorCode)) { 798 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", 799 pInfo->dataFormat[0], pInfo->dataFormat[1], 800 pInfo->dataFormat[2], pInfo->dataFormat[3], 801 dataFormatChars[0], dataFormatChars[1], 802 dataFormatChars[2], dataFormatChars[3], 803 u_errorName(*pErrorCode)); 804 } else if(swappedLength<(length-15)) { 805 /* swapped less than expected */ 806 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 807 swappedLength, length, 808 pInfo->dataFormat[0], pInfo->dataFormat[1], 809 pInfo->dataFormat[2], pInfo->dataFormat[3], 810 dataFormatChars[0], dataFormatChars[1], 811 dataFormatChars[2], dataFormatChars[3], 812 u_errorName(*pErrorCode)); 813 } 814 815 return swappedLength; 816 } 817 } 818 819 /* the dataFormat was not recognized */ 820 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 821 pInfo->dataFormat[0], pInfo->dataFormat[1], 822 pInfo->dataFormat[2], pInfo->dataFormat[3], 823 dataFormatChars[0], dataFormatChars[1], 824 dataFormatChars[2], dataFormatChars[3]); 825 826 *pErrorCode=U_UNSUPPORTED_ERROR; 827 return 0; 828 } 829