1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2005-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: swapimpl.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005may05 16 * created by: Markus W. Scherer 17 * 18 * Data file swapping functions moved here from the common library 19 * because some data is hardcoded in ICU4C and needs not be swapped any more. 20 * Moving the functions here simplifies testing (for code coverage) because 21 * we need not jump through hoops (like adding snapshots of these files 22 * to testdata). 23 * 24 * The declarations for these functions remain in the internal header files 25 * in icu/source/common/ 26 */ 27 28 #include "unicode/utypes.h" 29 #include "unicode/putil.h" 30 #include "unicode/udata.h" 31 32 /* Explicit include statement for std_string.h is needed 33 * for compilation on certain platforms. (e.g. AIX/VACPP) 34 */ 35 #include "unicode/std_string.h" 36 37 #include "cmemory.h" 38 #include "cstring.h" 39 #include "uinvchar.h" 40 #include "uassert.h" 41 #include "uarrsort.h" 42 #include "ucmndata.h" 43 #include "udataswp.h" 44 45 /* swapping implementations in common */ 46 47 #include "uresdata.h" 48 #include "ucnv_io.h" 49 #include "uprops.h" 50 #include "ucase.h" 51 #include "ubidi_props.h" 52 #include "ucol_swp.h" 53 #include "ucnv_bld.h" 54 #include "unormimp.h" 55 #include "normalizer2impl.h" 56 #include "sprpimpl.h" 57 #include "propname.h" 58 #include "rbbidata.h" 59 #include "utrie.h" 60 #include "utrie2.h" 61 #include "dictionarydata.h" 62 63 /* swapping implementations in i18n */ 64 65 #if !UCONFIG_NO_NORMALIZATION 66 #include "uspoof_impl.h" 67 #endif 68 69 U_NAMESPACE_USE 70 71 /* definitions */ 72 73 /* Unicode property (value) aliases data swapping --------------------------- */ 74 75 static int32_t U_CALLCONV 76 upname_swap(const UDataSwapper *ds, 77 const void *inData, int32_t length, void *outData, 78 UErrorCode *pErrorCode) { 79 /* udata_swapDataHeader checks the arguments */ 80 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 81 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 82 return 0; 83 } 84 85 /* check data format and format version */ 86 const UDataInfo *pInfo= 87 reinterpret_cast<const UDataInfo *>( 88 static_cast<const char *>(inData)+4); 89 if(!( 90 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ 91 pInfo->dataFormat[1]==0x6e && 92 pInfo->dataFormat[2]==0x61 && 93 pInfo->dataFormat[3]==0x6d && 94 pInfo->formatVersion[0]==2 95 )) { 96 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", 97 pInfo->dataFormat[0], pInfo->dataFormat[1], 98 pInfo->dataFormat[2], pInfo->dataFormat[3], 99 pInfo->formatVersion[0]); 100 *pErrorCode=U_UNSUPPORTED_ERROR; 101 return 0; 102 } 103 104 const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; 105 uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; 106 107 if(length>=0) { 108 length-=headerSize; 109 // formatVersion 2 initially has indexes[8], 32 bytes. 110 if(length<32) { 111 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", 112 (int)length); 113 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 114 return 0; 115 } 116 } 117 118 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); 119 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); 120 if(length>=0) { 121 if(length<totalSize) { 122 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " 123 "for pnames.icu\n", 124 (int)length, (int)totalSize); 125 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 126 return 0; 127 } 128 129 int32_t numBytesIndexesAndValueMaps= 130 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); 131 132 // Swap the indexes[] and the valueMaps[]. 133 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); 134 135 // Copy the rest of the data. 136 if(inBytes!=outBytes) { 137 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, 138 inBytes+numBytesIndexesAndValueMaps, 139 totalSize-numBytesIndexesAndValueMaps); 140 } 141 142 // We need not swap anything else: 143 // 144 // The ByteTries are already byte-serialized, and are fixed on ASCII. 145 // (On an EBCDIC machine, the input string is converted to lowercase ASCII 146 // while matching.) 147 // 148 // The name groups are mostly invariant characters, but since we only 149 // generate, and keep in subversion, ASCII versions of pnames.icu, 150 // and since only ICU4J uses the pnames.icu data file 151 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, 152 // we just copy those bytes too. 153 } 154 155 return headerSize+totalSize; 156 } 157 158 /* Unicode properties data swapping ----------------------------------------- */ 159 160 static int32_t U_CALLCONV 161 uprops_swap(const UDataSwapper *ds, 162 const void *inData, int32_t length, void *outData, 163 UErrorCode *pErrorCode) { 164 const UDataInfo *pInfo; 165 int32_t headerSize, i; 166 167 int32_t dataIndexes[UPROPS_INDEX_COUNT]; 168 const int32_t *inData32; 169 170 /* udata_swapDataHeader checks the arguments */ 171 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 172 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 173 return 0; 174 } 175 176 /* check data format and format version */ 177 pInfo=(const UDataInfo *)((const char *)inData+4); 178 if(!( 179 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ 180 pInfo->dataFormat[1]==0x50 && 181 pInfo->dataFormat[2]==0x72 && 182 pInfo->dataFormat[3]==0x6f && 183 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && 184 (pInfo->formatVersion[0]>=7 || 185 (pInfo->formatVersion[2]==UTRIE_SHIFT && 186 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) 187 )) { 188 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", 189 pInfo->dataFormat[0], pInfo->dataFormat[1], 190 pInfo->dataFormat[2], pInfo->dataFormat[3], 191 pInfo->formatVersion[0]); 192 *pErrorCode=U_UNSUPPORTED_ERROR; 193 return 0; 194 } 195 196 /* the properties file must contain at least the indexes array */ 197 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { 198 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 199 length-headerSize); 200 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 201 return 0; 202 } 203 204 /* read the indexes */ 205 inData32=(const int32_t *)((const char *)inData+headerSize); 206 for(i=0; i<UPROPS_INDEX_COUNT; ++i) { 207 dataIndexes[i]=udata_readInt32(ds, inData32[i]); 208 } 209 210 /* 211 * comments are copied from the data format description in genprops/store.c 212 * indexes[] constants are in uprops.h 213 */ 214 int32_t dataTop; 215 if(length>=0) { 216 int32_t *outData32; 217 218 /* 219 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. 220 * In earlier formatVersions, it is 0 and a lower dataIndexes entry 221 * has the top of the last item. 222 */ 223 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} 224 225 if((length-headerSize)<(4*dataTop)) { 226 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 227 length-headerSize); 228 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 229 return 0; 230 } 231 232 outData32=(int32_t *)((char *)outData+headerSize); 233 234 /* copy everything for inaccessible data (padding) */ 235 if(inData32!=outData32) { 236 uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); 237 } 238 239 /* swap the indexes[16] */ 240 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); 241 242 /* 243 * swap the main properties UTrie 244 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) 245 */ 246 utrie2_swapAnyVersion(ds, 247 inData32+UPROPS_INDEX_COUNT, 248 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), 249 outData32+UPROPS_INDEX_COUNT, 250 pErrorCode); 251 252 /* 253 * swap the properties and exceptions words 254 * P const uint32_t props32[i1-i0]; 255 * E const uint32_t exceptions[i2-i1]; 256 */ 257 ds->swapArray32(ds, 258 inData32+dataIndexes[UPROPS_PROPS32_INDEX], 259 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), 260 outData32+dataIndexes[UPROPS_PROPS32_INDEX], 261 pErrorCode); 262 263 /* 264 * swap the UChars 265 * U const UChar uchars[2*(i3-i2)]; 266 */ 267 ds->swapArray16(ds, 268 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 269 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), 270 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 271 pErrorCode); 272 273 /* 274 * swap the additional UTrie 275 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties 276 */ 277 utrie2_swapAnyVersion(ds, 278 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 279 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), 280 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 281 pErrorCode); 282 283 /* 284 * swap the properties vectors 285 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; 286 */ 287 ds->swapArray32(ds, 288 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 289 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), 290 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 291 pErrorCode); 292 293 // swap the Script_Extensions data 294 // SCX const uint16_t scriptExtensions[2*(i7-i6)]; 295 ds->swapArray16(ds, 296 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 297 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), 298 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 299 pErrorCode); 300 } 301 302 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ 303 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; 304 } 305 306 /* Unicode case mapping data swapping --------------------------------------- */ 307 308 static int32_t U_CALLCONV 309 ucase_swap(const UDataSwapper *ds, 310 const void *inData, int32_t length, void *outData, 311 UErrorCode *pErrorCode) { 312 const UDataInfo *pInfo; 313 int32_t headerSize; 314 315 const uint8_t *inBytes; 316 uint8_t *outBytes; 317 318 const int32_t *inIndexes; 319 int32_t indexes[16]; 320 321 int32_t i, offset, count, size; 322 323 /* udata_swapDataHeader checks the arguments */ 324 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 325 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 326 return 0; 327 } 328 329 /* check data format and format version */ 330 pInfo=(const UDataInfo *)((const char *)inData+4); 331 if(!( 332 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ 333 pInfo->dataFormat[1]==UCASE_FMT_1 && 334 pInfo->dataFormat[2]==UCASE_FMT_2 && 335 pInfo->dataFormat[3]==UCASE_FMT_3 && 336 ((pInfo->formatVersion[0]==1 && 337 pInfo->formatVersion[2]==UTRIE_SHIFT && 338 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 339 pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) 340 )) { 341 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", 342 pInfo->dataFormat[0], pInfo->dataFormat[1], 343 pInfo->dataFormat[2], pInfo->dataFormat[3], 344 pInfo->formatVersion[0]); 345 *pErrorCode=U_UNSUPPORTED_ERROR; 346 return 0; 347 } 348 349 inBytes=(const uint8_t *)inData+headerSize; 350 outBytes=(uint8_t *)outData+headerSize; 351 352 inIndexes=(const int32_t *)inBytes; 353 354 if(length>=0) { 355 length-=headerSize; 356 if(length<16*4) { 357 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", 358 length); 359 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 360 return 0; 361 } 362 } 363 364 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ 365 for(i=0; i<16; ++i) { 366 indexes[i]=udata_readInt32(ds, inIndexes[i]); 367 } 368 369 /* get the total length of the data */ 370 size=indexes[UCASE_IX_LENGTH]; 371 372 if(length>=0) { 373 if(length<size) { 374 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", 375 length); 376 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 377 return 0; 378 } 379 380 /* copy the data for inaccessible bytes */ 381 if(inBytes!=outBytes) { 382 uprv_memcpy(outBytes, inBytes, size); 383 } 384 385 offset=0; 386 387 /* swap the int32_t indexes[] */ 388 count=indexes[UCASE_IX_INDEX_TOP]*4; 389 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 390 offset+=count; 391 392 /* swap the UTrie */ 393 count=indexes[UCASE_IX_TRIE_SIZE]; 394 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 395 offset+=count; 396 397 /* swap the uint16_t exceptions[] and unfold[] */ 398 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; 399 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 400 offset+=count; 401 402 U_ASSERT(offset==size); 403 } 404 405 return headerSize+size; 406 } 407 408 /* Unicode bidi/shaping data swapping --------------------------------------- */ 409 410 static int32_t U_CALLCONV 411 ubidi_swap(const UDataSwapper *ds, 412 const void *inData, int32_t length, void *outData, 413 UErrorCode *pErrorCode) { 414 const UDataInfo *pInfo; 415 int32_t headerSize; 416 417 const uint8_t *inBytes; 418 uint8_t *outBytes; 419 420 const int32_t *inIndexes; 421 int32_t indexes[16]; 422 423 int32_t i, offset, count, size; 424 425 /* udata_swapDataHeader checks the arguments */ 426 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 427 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 428 return 0; 429 } 430 431 /* check data format and format version */ 432 pInfo=(const UDataInfo *)((const char *)inData+4); 433 if(!( 434 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ 435 pInfo->dataFormat[1]==UBIDI_FMT_1 && 436 pInfo->dataFormat[2]==UBIDI_FMT_2 && 437 pInfo->dataFormat[3]==UBIDI_FMT_3 && 438 ((pInfo->formatVersion[0]==1 && 439 pInfo->formatVersion[2]==UTRIE_SHIFT && 440 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 441 pInfo->formatVersion[0]==2) 442 )) { 443 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", 444 pInfo->dataFormat[0], pInfo->dataFormat[1], 445 pInfo->dataFormat[2], pInfo->dataFormat[3], 446 pInfo->formatVersion[0]); 447 *pErrorCode=U_UNSUPPORTED_ERROR; 448 return 0; 449 } 450 451 inBytes=(const uint8_t *)inData+headerSize; 452 outBytes=(uint8_t *)outData+headerSize; 453 454 inIndexes=(const int32_t *)inBytes; 455 456 if(length>=0) { 457 length-=headerSize; 458 if(length<16*4) { 459 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", 460 length); 461 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 462 return 0; 463 } 464 } 465 466 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ 467 for(i=0; i<16; ++i) { 468 indexes[i]=udata_readInt32(ds, inIndexes[i]); 469 } 470 471 /* get the total length of the data */ 472 size=indexes[UBIDI_IX_LENGTH]; 473 474 if(length>=0) { 475 if(length<size) { 476 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", 477 length); 478 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 479 return 0; 480 } 481 482 /* copy the data for inaccessible bytes */ 483 if(inBytes!=outBytes) { 484 uprv_memcpy(outBytes, inBytes, size); 485 } 486 487 offset=0; 488 489 /* swap the int32_t indexes[] */ 490 count=indexes[UBIDI_IX_INDEX_TOP]*4; 491 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 492 offset+=count; 493 494 /* swap the UTrie */ 495 count=indexes[UBIDI_IX_TRIE_SIZE]; 496 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 497 offset+=count; 498 499 /* swap the uint32_t mirrors[] */ 500 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; 501 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 502 offset+=count; 503 504 /* just skip the uint8_t jgArray[] and jgArray2[] */ 505 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; 506 offset+=count; 507 count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; 508 offset+=count; 509 510 U_ASSERT(offset==size); 511 } 512 513 return headerSize+size; 514 } 515 516 /* Unicode normalization data swapping -------------------------------------- */ 517 518 #if !UCONFIG_NO_NORMALIZATION 519 520 static int32_t U_CALLCONV 521 unorm_swap(const UDataSwapper *ds, 522 const void *inData, int32_t length, void *outData, 523 UErrorCode *pErrorCode) { 524 const UDataInfo *pInfo; 525 int32_t headerSize; 526 527 const uint8_t *inBytes; 528 uint8_t *outBytes; 529 530 const int32_t *inIndexes; 531 int32_t indexes[32]; 532 533 int32_t i, offset, count, size; 534 535 /* udata_swapDataHeader checks the arguments */ 536 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 537 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 538 return 0; 539 } 540 541 /* check data format and format version */ 542 pInfo=(const UDataInfo *)((const char *)inData+4); 543 if(!( 544 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ 545 pInfo->dataFormat[1]==0x6f && 546 pInfo->dataFormat[2]==0x72 && 547 pInfo->dataFormat[3]==0x6d && 548 pInfo->formatVersion[0]==2 549 )) { 550 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", 551 pInfo->dataFormat[0], pInfo->dataFormat[1], 552 pInfo->dataFormat[2], pInfo->dataFormat[3], 553 pInfo->formatVersion[0]); 554 *pErrorCode=U_UNSUPPORTED_ERROR; 555 return 0; 556 } 557 558 inBytes=(const uint8_t *)inData+headerSize; 559 outBytes=(uint8_t *)outData+headerSize; 560 561 inIndexes=(const int32_t *)inBytes; 562 563 if(length>=0) { 564 length-=headerSize; 565 if(length<32*4) { 566 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", 567 length); 568 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 569 return 0; 570 } 571 } 572 573 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ 574 for(i=0; i<32; ++i) { 575 indexes[i]=udata_readInt32(ds, inIndexes[i]); 576 } 577 578 /* calculate the total length of the data */ 579 size= 580 32*4+ /* size of indexes[] */ 581 indexes[_NORM_INDEX_TRIE_SIZE]+ 582 indexes[_NORM_INDEX_UCHAR_COUNT]*2+ 583 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ 584 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ 585 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ 586 indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 587 588 if(length>=0) { 589 if(length<size) { 590 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", 591 length); 592 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 593 return 0; 594 } 595 596 /* copy the data for inaccessible bytes */ 597 if(inBytes!=outBytes) { 598 uprv_memcpy(outBytes, inBytes, size); 599 } 600 601 offset=0; 602 603 /* swap the indexes[] */ 604 count=32*4; 605 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 606 offset+=count; 607 608 /* swap the main UTrie */ 609 count=indexes[_NORM_INDEX_TRIE_SIZE]; 610 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 611 offset+=count; 612 613 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ 614 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; 615 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 616 offset+=count; 617 618 /* swap the FCD UTrie */ 619 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; 620 if(count!=0) { 621 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 622 offset+=count; 623 } 624 625 /* swap the aux UTrie */ 626 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; 627 if(count!=0) { 628 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 629 offset+=count; 630 } 631 632 /* swap the uint16_t combiningTable[] */ 633 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 634 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 635 offset+=count; 636 } 637 638 return headerSize+size; 639 } 640 641 #endif 642 643 /* Swap 'Test' data from gentest */ 644 static int32_t U_CALLCONV 645 test_swap(const UDataSwapper *ds, 646 const void *inData, int32_t length, void *outData, 647 UErrorCode *pErrorCode) { 648 const UDataInfo *pInfo; 649 int32_t headerSize; 650 651 const uint8_t *inBytes; 652 uint8_t *outBytes; 653 654 int32_t offset; 655 656 /* udata_swapDataHeader checks the arguments */ 657 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 658 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 659 udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); 660 return 0; 661 } 662 663 /* check data format and format version */ 664 pInfo=(const UDataInfo *)((const char *)inData+4); 665 if(!( 666 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ 667 pInfo->dataFormat[1]==0x65 && 668 pInfo->dataFormat[2]==0x73 && 669 pInfo->dataFormat[3]==0x74 && 670 pInfo->formatVersion[0]==1 671 )) { 672 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", 673 pInfo->dataFormat[0], pInfo->dataFormat[1], 674 pInfo->dataFormat[2], pInfo->dataFormat[3], 675 pInfo->formatVersion[0]); 676 *pErrorCode=U_UNSUPPORTED_ERROR; 677 return 0; 678 } 679 680 inBytes=(const uint8_t *)inData+headerSize; 681 outBytes=(uint8_t *)outData+headerSize; 682 683 int32_t size16 = 2; // 16bit plus padding 684 int32_t sizeStr = 5; // 4 char inv-str plus null 685 int32_t size = size16 + sizeStr; 686 687 if(length>=0) { 688 if(length<size) { 689 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", 690 length, size); 691 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 692 return 0; 693 } 694 695 offset =0; 696 /* swap a 1 entry array */ 697 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); 698 offset+=size16; 699 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); 700 } 701 702 return headerSize+size; 703 } 704 705 /* swap any data (except a .dat package) ------------------------------------ */ 706 707 static const struct { 708 uint8_t dataFormat[4]; 709 UDataSwapFn *swapFn; 710 } swapFns[]={ 711 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ 712 #if !UCONFIG_NO_LEGACY_CONVERSION 713 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ 714 #endif 715 #if !UCONFIG_NO_CONVERSION 716 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ 717 #endif 718 #if !UCONFIG_NO_IDNA 719 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ 720 #endif 721 /* insert data formats here, descending by expected frequency of occurrence */ 722 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ 723 724 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, 725 ucase_swap }, /* dataFormat="cAsE" */ 726 727 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, 728 ubidi_swap }, /* dataFormat="BiDi" */ 729 730 #if !UCONFIG_NO_NORMALIZATION 731 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ 732 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ 733 #endif 734 #if !UCONFIG_NO_COLLATION 735 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ 736 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ 737 #endif 738 #if !UCONFIG_NO_BREAK_ITERATION 739 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ 740 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ 741 #endif 742 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ 743 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ 744 #if !UCONFIG_NO_NORMALIZATION 745 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ 746 #endif 747 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ 748 }; 749 750 U_CAPI int32_t U_EXPORT2 751 udata_swap(const UDataSwapper *ds, 752 const void *inData, int32_t length, void *outData, 753 UErrorCode *pErrorCode) { 754 char dataFormatChars[4]; 755 const UDataInfo *pInfo; 756 int32_t i, swappedLength; 757 758 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 759 return 0; 760 } 761 762 /* 763 * Preflight the header first; checks for illegal arguments, too. 764 * Do not swap the header right away because the format-specific swapper 765 * will swap it, get the headerSize again, and also use the header 766 * information. Otherwise we would have to pass some of the information 767 * and not be able to use the UDataSwapFn signature. 768 */ 769 udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); 770 771 /* 772 * If we wanted udata_swap() to also handle non-loadable data like a UTrie, 773 * then we could check here for further known magic values and structures. 774 */ 775 if(U_FAILURE(*pErrorCode)) { 776 return 0; /* the data format was not recognized */ 777 } 778 779 pInfo=(const UDataInfo *)((const char *)inData+4); 780 781 { 782 /* convert the data format from ASCII to Unicode to the system charset */ 783 UChar u[4]={ 784 pInfo->dataFormat[0], pInfo->dataFormat[1], 785 pInfo->dataFormat[2], pInfo->dataFormat[3] 786 }; 787 788 if(uprv_isInvariantUString(u, 4)) { 789 u_UCharsToChars(u, dataFormatChars, 4); 790 } else { 791 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; 792 } 793 } 794 795 /* dispatch to the swap function for the dataFormat */ 796 for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { 797 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { 798 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); 799 800 if(U_FAILURE(*pErrorCode)) { 801 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", 802 pInfo->dataFormat[0], pInfo->dataFormat[1], 803 pInfo->dataFormat[2], pInfo->dataFormat[3], 804 dataFormatChars[0], dataFormatChars[1], 805 dataFormatChars[2], dataFormatChars[3], 806 u_errorName(*pErrorCode)); 807 } else if(swappedLength<(length-15)) { 808 /* swapped less than expected */ 809 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 810 swappedLength, length, 811 pInfo->dataFormat[0], pInfo->dataFormat[1], 812 pInfo->dataFormat[2], pInfo->dataFormat[3], 813 dataFormatChars[0], dataFormatChars[1], 814 dataFormatChars[2], dataFormatChars[3], 815 u_errorName(*pErrorCode)); 816 } 817 818 return swappedLength; 819 } 820 } 821 822 /* the dataFormat was not recognized */ 823 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 824 pInfo->dataFormat[0], pInfo->dataFormat[1], 825 pInfo->dataFormat[2], pInfo->dataFormat[3], 826 dataFormatChars[0], dataFormatChars[1], 827 dataFormatChars[2], dataFormatChars[3]); 828 829 *pErrorCode=U_UNSUPPORTED_ERROR; 830 return 0; 831 } 832