1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2005-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: swapimpl.cpp 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005may05 16 * created by: Markus W. Scherer 17 * 18 * Data file swapping functions moved here from the common library 19 * because some data is hardcoded in ICU4C and needs not be swapped any more. 20 * Moving the functions here simplifies testing (for code coverage) because 21 * we need not jump through hoops (like adding snapshots of these files 22 * to testdata). 23 * 24 * The declarations for these functions remain in the internal header files 25 * in icu/source/common/ 26 */ 27 28 #include "unicode/utypes.h" 29 #include "unicode/putil.h" 30 #include "unicode/udata.h" 31 32 /* Explicit include statement for std_string.h is needed 33 * for compilation on certain platforms. (e.g. AIX/VACPP) 34 */ 35 #include "unicode/std_string.h" 36 37 #include "cmemory.h" 38 #include "cstring.h" 39 #include "uinvchar.h" 40 #include "uassert.h" 41 #include "uarrsort.h" 42 #include "ucmndata.h" 43 #include "udataswp.h" 44 45 /* swapping implementations in common */ 46 47 #include "uresdata.h" 48 #include "ucnv_io.h" 49 #include "uprops.h" 50 #include "ucase.h" 51 #include "ubidi_props.h" 52 #include "ucol_swp.h" 53 #include "ucnv_bld.h" 54 #include "unormimp.h" 55 #include "normalizer2impl.h" 56 #include "sprpimpl.h" 57 #include "propname.h" 58 #include "rbbidata.h" 59 #include "utrie2.h" 60 #include "dictionarydata.h" 61 62 /* swapping implementations in i18n */ 63 64 #if !UCONFIG_NO_NORMALIZATION 65 #include "uspoof_impl.h" 66 #endif 67 68 U_NAMESPACE_USE 69 70 /* definitions */ 71 72 /* Unicode property (value) aliases data swapping --------------------------- */ 73 74 static int32_t U_CALLCONV 75 upname_swap(const UDataSwapper *ds, 76 const void *inData, int32_t length, void *outData, 77 UErrorCode *pErrorCode) { 78 /* udata_swapDataHeader checks the arguments */ 79 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 80 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 81 return 0; 82 } 83 84 /* check data format and format version */ 85 const UDataInfo *pInfo= 86 reinterpret_cast<const UDataInfo *>( 87 static_cast<const char *>(inData)+4); 88 if(!( 89 pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ 90 pInfo->dataFormat[1]==0x6e && 91 pInfo->dataFormat[2]==0x61 && 92 pInfo->dataFormat[3]==0x6d && 93 pInfo->formatVersion[0]==2 94 )) { 95 udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", 96 pInfo->dataFormat[0], pInfo->dataFormat[1], 97 pInfo->dataFormat[2], pInfo->dataFormat[3], 98 pInfo->formatVersion[0]); 99 *pErrorCode=U_UNSUPPORTED_ERROR; 100 return 0; 101 } 102 103 const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize; 104 uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize; 105 106 if(length>=0) { 107 length-=headerSize; 108 // formatVersion 2 initially has indexes[8], 32 bytes. 109 if(length<32) { 110 udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", 111 (int)length); 112 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 113 return 0; 114 } 115 } 116 117 const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes); 118 int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); 119 if(length>=0) { 120 if(length<totalSize) { 121 udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) " 122 "for pnames.icu\n", 123 (int)length, (int)totalSize); 124 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 125 return 0; 126 } 127 128 int32_t numBytesIndexesAndValueMaps= 129 udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]); 130 131 // Swap the indexes[] and the valueMaps[]. 132 ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); 133 134 // Copy the rest of the data. 135 if(inBytes!=outBytes) { 136 uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, 137 inBytes+numBytesIndexesAndValueMaps, 138 totalSize-numBytesIndexesAndValueMaps); 139 } 140 141 // We need not swap anything else: 142 // 143 // The ByteTries are already byte-serialized, and are fixed on ASCII. 144 // (On an EBCDIC machine, the input string is converted to lowercase ASCII 145 // while matching.) 146 // 147 // The name groups are mostly invariant characters, but since we only 148 // generate, and keep in subversion, ASCII versions of pnames.icu, 149 // and since only ICU4J uses the pnames.icu data file 150 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, 151 // we just copy those bytes too. 152 } 153 154 return headerSize+totalSize; 155 } 156 157 /* Unicode properties data swapping ----------------------------------------- */ 158 159 static int32_t U_CALLCONV 160 uprops_swap(const UDataSwapper *ds, 161 const void *inData, int32_t length, void *outData, 162 UErrorCode *pErrorCode) { 163 const UDataInfo *pInfo; 164 int32_t headerSize, i; 165 166 int32_t dataIndexes[UPROPS_INDEX_COUNT]; 167 const int32_t *inData32; 168 169 /* udata_swapDataHeader checks the arguments */ 170 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 171 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 172 return 0; 173 } 174 175 /* check data format and format version */ 176 pInfo=(const UDataInfo *)((const char *)inData+4); 177 if(!( 178 pInfo->dataFormat[0]==0x55 && /* dataFormat="UPro" */ 179 pInfo->dataFormat[1]==0x50 && 180 pInfo->dataFormat[2]==0x72 && 181 pInfo->dataFormat[3]==0x6f && 182 (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) && 183 (pInfo->formatVersion[0]>=7 || 184 (pInfo->formatVersion[2]==UTRIE_SHIFT && 185 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT)) 186 )) { 187 udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n", 188 pInfo->dataFormat[0], pInfo->dataFormat[1], 189 pInfo->dataFormat[2], pInfo->dataFormat[3], 190 pInfo->formatVersion[0]); 191 *pErrorCode=U_UNSUPPORTED_ERROR; 192 return 0; 193 } 194 195 /* the properties file must contain at least the indexes array */ 196 if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) { 197 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 198 length-headerSize); 199 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 200 return 0; 201 } 202 203 /* read the indexes */ 204 inData32=(const int32_t *)((const char *)inData+headerSize); 205 for(i=0; i<UPROPS_INDEX_COUNT; ++i) { 206 dataIndexes[i]=udata_readInt32(ds, inData32[i]); 207 } 208 209 /* 210 * comments are copied from the data format description in genprops/store.c 211 * indexes[] constants are in uprops.h 212 */ 213 int32_t dataTop; 214 if(length>=0) { 215 int32_t *outData32; 216 217 /* 218 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size. 219 * In earlier formatVersions, it is 0 and a lower dataIndexes entry 220 * has the top of the last item. 221 */ 222 for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {} 223 224 if((length-headerSize)<(4*dataTop)) { 225 udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n", 226 length-headerSize); 227 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 228 return 0; 229 } 230 231 outData32=(int32_t *)((char *)outData+headerSize); 232 233 /* copy everything for inaccessible data (padding) */ 234 if(inData32!=outData32) { 235 uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); 236 } 237 238 /* swap the indexes[16] */ 239 ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode); 240 241 /* 242 * swap the main properties UTrie 243 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) 244 */ 245 utrie2_swapAnyVersion(ds, 246 inData32+UPROPS_INDEX_COUNT, 247 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), 248 outData32+UPROPS_INDEX_COUNT, 249 pErrorCode); 250 251 /* 252 * swap the properties and exceptions words 253 * P const uint32_t props32[i1-i0]; 254 * E const uint32_t exceptions[i2-i1]; 255 */ 256 ds->swapArray32(ds, 257 inData32+dataIndexes[UPROPS_PROPS32_INDEX], 258 4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]), 259 outData32+dataIndexes[UPROPS_PROPS32_INDEX], 260 pErrorCode); 261 262 /* 263 * swap the UChars 264 * U const UChar uchars[2*(i3-i2)]; 265 */ 266 ds->swapArray16(ds, 267 inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 268 4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]), 269 outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX], 270 pErrorCode); 271 272 /* 273 * swap the additional UTrie 274 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties 275 */ 276 utrie2_swapAnyVersion(ds, 277 inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 278 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), 279 outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 280 pErrorCode); 281 282 /* 283 * swap the properties vectors 284 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; 285 */ 286 ds->swapArray32(ds, 287 inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 288 4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]), 289 outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX], 290 pErrorCode); 291 292 // swap the Script_Extensions data 293 // SCX const uint16_t scriptExtensions[2*(i7-i6)]; 294 ds->swapArray16(ds, 295 inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 296 4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]), 297 outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX], 298 pErrorCode); 299 } 300 301 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */ 302 return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7]; 303 } 304 305 /* Unicode case mapping data swapping --------------------------------------- */ 306 307 static int32_t U_CALLCONV 308 ucase_swap(const UDataSwapper *ds, 309 const void *inData, int32_t length, void *outData, 310 UErrorCode *pErrorCode) { 311 const UDataInfo *pInfo; 312 int32_t headerSize; 313 314 const uint8_t *inBytes; 315 uint8_t *outBytes; 316 317 const int32_t *inIndexes; 318 int32_t indexes[16]; 319 320 int32_t i, offset, count, size; 321 322 /* udata_swapDataHeader checks the arguments */ 323 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 324 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 325 return 0; 326 } 327 328 /* check data format and format version */ 329 pInfo=(const UDataInfo *)((const char *)inData+4); 330 if(!( 331 pInfo->dataFormat[0]==UCASE_FMT_0 && /* dataFormat="cAsE" */ 332 pInfo->dataFormat[1]==UCASE_FMT_1 && 333 pInfo->dataFormat[2]==UCASE_FMT_2 && 334 pInfo->dataFormat[3]==UCASE_FMT_3 && 335 ((pInfo->formatVersion[0]==1 && 336 pInfo->formatVersion[2]==UTRIE_SHIFT && 337 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 338 pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) 339 )) { 340 udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", 341 pInfo->dataFormat[0], pInfo->dataFormat[1], 342 pInfo->dataFormat[2], pInfo->dataFormat[3], 343 pInfo->formatVersion[0]); 344 *pErrorCode=U_UNSUPPORTED_ERROR; 345 return 0; 346 } 347 348 inBytes=(const uint8_t *)inData+headerSize; 349 outBytes=(uint8_t *)outData+headerSize; 350 351 inIndexes=(const int32_t *)inBytes; 352 353 if(length>=0) { 354 length-=headerSize; 355 if(length<16*4) { 356 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n", 357 length); 358 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 359 return 0; 360 } 361 } 362 363 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */ 364 for(i=0; i<16; ++i) { 365 indexes[i]=udata_readInt32(ds, inIndexes[i]); 366 } 367 368 /* get the total length of the data */ 369 size=indexes[UCASE_IX_LENGTH]; 370 371 if(length>=0) { 372 if(length<size) { 373 udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n", 374 length); 375 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 376 return 0; 377 } 378 379 /* copy the data for inaccessible bytes */ 380 if(inBytes!=outBytes) { 381 uprv_memcpy(outBytes, inBytes, size); 382 } 383 384 offset=0; 385 386 /* swap the int32_t indexes[] */ 387 count=indexes[UCASE_IX_INDEX_TOP]*4; 388 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 389 offset+=count; 390 391 /* swap the UTrie */ 392 count=indexes[UCASE_IX_TRIE_SIZE]; 393 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 394 offset+=count; 395 396 /* swap the uint16_t exceptions[] and unfold[] */ 397 count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2; 398 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 399 offset+=count; 400 401 U_ASSERT(offset==size); 402 } 403 404 return headerSize+size; 405 } 406 407 /* Unicode bidi/shaping data swapping --------------------------------------- */ 408 409 static int32_t U_CALLCONV 410 ubidi_swap(const UDataSwapper *ds, 411 const void *inData, int32_t length, void *outData, 412 UErrorCode *pErrorCode) { 413 const UDataInfo *pInfo; 414 int32_t headerSize; 415 416 const uint8_t *inBytes; 417 uint8_t *outBytes; 418 419 const int32_t *inIndexes; 420 int32_t indexes[16]; 421 422 int32_t i, offset, count, size; 423 424 /* udata_swapDataHeader checks the arguments */ 425 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 426 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 427 return 0; 428 } 429 430 /* check data format and format version */ 431 pInfo=(const UDataInfo *)((const char *)inData+4); 432 if(!( 433 pInfo->dataFormat[0]==UBIDI_FMT_0 && /* dataFormat="BiDi" */ 434 pInfo->dataFormat[1]==UBIDI_FMT_1 && 435 pInfo->dataFormat[2]==UBIDI_FMT_2 && 436 pInfo->dataFormat[3]==UBIDI_FMT_3 && 437 ((pInfo->formatVersion[0]==1 && 438 pInfo->formatVersion[2]==UTRIE_SHIFT && 439 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || 440 pInfo->formatVersion[0]==2) 441 )) { 442 udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n", 443 pInfo->dataFormat[0], pInfo->dataFormat[1], 444 pInfo->dataFormat[2], pInfo->dataFormat[3], 445 pInfo->formatVersion[0]); 446 *pErrorCode=U_UNSUPPORTED_ERROR; 447 return 0; 448 } 449 450 inBytes=(const uint8_t *)inData+headerSize; 451 outBytes=(uint8_t *)outData+headerSize; 452 453 inIndexes=(const int32_t *)inBytes; 454 455 if(length>=0) { 456 length-=headerSize; 457 if(length<16*4) { 458 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n", 459 length); 460 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 461 return 0; 462 } 463 } 464 465 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */ 466 for(i=0; i<16; ++i) { 467 indexes[i]=udata_readInt32(ds, inIndexes[i]); 468 } 469 470 /* get the total length of the data */ 471 size=indexes[UBIDI_IX_LENGTH]; 472 473 if(length>=0) { 474 if(length<size) { 475 udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n", 476 length); 477 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 478 return 0; 479 } 480 481 /* copy the data for inaccessible bytes */ 482 if(inBytes!=outBytes) { 483 uprv_memcpy(outBytes, inBytes, size); 484 } 485 486 offset=0; 487 488 /* swap the int32_t indexes[] */ 489 count=indexes[UBIDI_IX_INDEX_TOP]*4; 490 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 491 offset+=count; 492 493 /* swap the UTrie */ 494 count=indexes[UBIDI_IX_TRIE_SIZE]; 495 utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 496 offset+=count; 497 498 /* swap the uint32_t mirrors[] */ 499 count=indexes[UBIDI_IX_MIRROR_LENGTH]*4; 500 ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 501 offset+=count; 502 503 /* just skip the uint8_t jgArray[] and jgArray2[] */ 504 count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; 505 offset+=count; 506 count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; 507 offset+=count; 508 509 U_ASSERT(offset==size); 510 } 511 512 return headerSize+size; 513 } 514 515 /* Unicode normalization data swapping -------------------------------------- */ 516 517 #if !UCONFIG_NO_NORMALIZATION 518 519 static int32_t U_CALLCONV 520 unorm_swap(const UDataSwapper *ds, 521 const void *inData, int32_t length, void *outData, 522 UErrorCode *pErrorCode) { 523 const UDataInfo *pInfo; 524 int32_t headerSize; 525 526 const uint8_t *inBytes; 527 uint8_t *outBytes; 528 529 const int32_t *inIndexes; 530 int32_t indexes[32]; 531 532 int32_t i, offset, count, size; 533 534 /* udata_swapDataHeader checks the arguments */ 535 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 536 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 537 return 0; 538 } 539 540 /* check data format and format version */ 541 pInfo=(const UDataInfo *)((const char *)inData+4); 542 if(!( 543 pInfo->dataFormat[0]==0x4e && /* dataFormat="Norm" */ 544 pInfo->dataFormat[1]==0x6f && 545 pInfo->dataFormat[2]==0x72 && 546 pInfo->dataFormat[3]==0x6d && 547 pInfo->formatVersion[0]==2 548 )) { 549 udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n", 550 pInfo->dataFormat[0], pInfo->dataFormat[1], 551 pInfo->dataFormat[2], pInfo->dataFormat[3], 552 pInfo->formatVersion[0]); 553 *pErrorCode=U_UNSUPPORTED_ERROR; 554 return 0; 555 } 556 557 inBytes=(const uint8_t *)inData+headerSize; 558 outBytes=(uint8_t *)outData+headerSize; 559 560 inIndexes=(const int32_t *)inBytes; 561 562 if(length>=0) { 563 length-=headerSize; 564 if(length<32*4) { 565 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n", 566 length); 567 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 568 return 0; 569 } 570 } 571 572 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */ 573 for(i=0; i<32; ++i) { 574 indexes[i]=udata_readInt32(ds, inIndexes[i]); 575 } 576 577 /* calculate the total length of the data */ 578 size= 579 32*4+ /* size of indexes[] */ 580 indexes[_NORM_INDEX_TRIE_SIZE]+ 581 indexes[_NORM_INDEX_UCHAR_COUNT]*2+ 582 indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+ 583 indexes[_NORM_INDEX_FCD_TRIE_SIZE]+ 584 indexes[_NORM_INDEX_AUX_TRIE_SIZE]+ 585 indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 586 587 if(length>=0) { 588 if(length<size) { 589 udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n", 590 length); 591 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 592 return 0; 593 } 594 595 /* copy the data for inaccessible bytes */ 596 if(inBytes!=outBytes) { 597 uprv_memcpy(outBytes, inBytes, size); 598 } 599 600 offset=0; 601 602 /* swap the indexes[] */ 603 count=32*4; 604 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 605 offset+=count; 606 607 /* swap the main UTrie */ 608 count=indexes[_NORM_INDEX_TRIE_SIZE]; 609 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 610 offset+=count; 611 612 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */ 613 count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2; 614 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 615 offset+=count; 616 617 /* swap the FCD UTrie */ 618 count=indexes[_NORM_INDEX_FCD_TRIE_SIZE]; 619 if(count!=0) { 620 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 621 offset+=count; 622 } 623 624 /* swap the aux UTrie */ 625 count=indexes[_NORM_INDEX_AUX_TRIE_SIZE]; 626 if(count!=0) { 627 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 628 offset+=count; 629 } 630 631 /* swap the uint16_t combiningTable[] */ 632 count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2; 633 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 634 offset+=count; 635 } 636 637 return headerSize+size; 638 } 639 640 #endif 641 642 /* Swap 'Test' data from gentest */ 643 static int32_t U_CALLCONV 644 test_swap(const UDataSwapper *ds, 645 const void *inData, int32_t length, void *outData, 646 UErrorCode *pErrorCode) { 647 const UDataInfo *pInfo; 648 int32_t headerSize; 649 650 const uint8_t *inBytes; 651 uint8_t *outBytes; 652 653 int32_t offset; 654 655 /* udata_swapDataHeader checks the arguments */ 656 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 657 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 658 udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); 659 return 0; 660 } 661 662 /* check data format and format version */ 663 pInfo=(const UDataInfo *)((const char *)inData+4); 664 if(!( 665 pInfo->dataFormat[0]==0x54 && /* dataFormat="Norm" */ 666 pInfo->dataFormat[1]==0x65 && 667 pInfo->dataFormat[2]==0x73 && 668 pInfo->dataFormat[3]==0x74 && 669 pInfo->formatVersion[0]==1 670 )) { 671 udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n", 672 pInfo->dataFormat[0], pInfo->dataFormat[1], 673 pInfo->dataFormat[2], pInfo->dataFormat[3], 674 pInfo->formatVersion[0]); 675 *pErrorCode=U_UNSUPPORTED_ERROR; 676 return 0; 677 } 678 679 inBytes=(const uint8_t *)inData+headerSize; 680 outBytes=(uint8_t *)outData+headerSize; 681 682 int32_t size16 = 2; // 16bit plus padding 683 int32_t sizeStr = 5; // 4 char inv-str plus null 684 int32_t size = size16 + sizeStr; 685 686 if(length>=0) { 687 if(length<size) { 688 udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n", 689 length, size); 690 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 691 return 0; 692 } 693 694 offset =0; 695 /* swap a 1 entry array */ 696 ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode); 697 offset+=size16; 698 ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode); 699 } 700 701 return headerSize+size; 702 } 703 704 /* swap any data (except a .dat package) ------------------------------------ */ 705 706 static const struct { 707 uint8_t dataFormat[4]; 708 UDataSwapFn *swapFn; 709 } swapFns[]={ 710 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap }, /* dataFormat="ResB" */ 711 #if !UCONFIG_NO_LEGACY_CONVERSION 712 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap }, /* dataFormat="cnvt" */ 713 #endif 714 #if !UCONFIG_NO_CONVERSION 715 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases }, /* dataFormat="CvAl" */ 716 #endif 717 #if !UCONFIG_NO_IDNA 718 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap }, /* dataFormat="SPRP" */ 719 #endif 720 /* insert data formats here, descending by expected frequency of occurrence */ 721 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap }, /* dataFormat="UPro" */ 722 723 { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, 724 ucase_swap }, /* dataFormat="cAsE" */ 725 726 { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 }, 727 ubidi_swap }, /* dataFormat="BiDi" */ 728 729 #if !UCONFIG_NO_NORMALIZATION 730 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ 731 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ 732 #endif 733 #if !UCONFIG_NO_COLLATION 734 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ 735 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ 736 #endif 737 #if !UCONFIG_NO_BREAK_ITERATION 738 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ 739 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ 740 #endif 741 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ 742 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ 743 #if !UCONFIG_NO_NORMALIZATION 744 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap }, /* dataFormat="Cfu " */ 745 #endif 746 { { 0x54, 0x65, 0x73, 0x74 }, test_swap } /* dataFormat="Test" */ 747 }; 748 749 U_CAPI int32_t U_EXPORT2 750 udata_swap(const UDataSwapper *ds, 751 const void *inData, int32_t length, void *outData, 752 UErrorCode *pErrorCode) { 753 char dataFormatChars[4]; 754 const UDataInfo *pInfo; 755 int32_t i, swappedLength; 756 757 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 758 return 0; 759 } 760 761 /* 762 * Preflight the header first; checks for illegal arguments, too. 763 * Do not swap the header right away because the format-specific swapper 764 * will swap it, get the headerSize again, and also use the header 765 * information. Otherwise we would have to pass some of the information 766 * and not be able to use the UDataSwapFn signature. 767 */ 768 udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); 769 770 /* 771 * If we wanted udata_swap() to also handle non-loadable data like a UTrie, 772 * then we could check here for further known magic values and structures. 773 */ 774 if(U_FAILURE(*pErrorCode)) { 775 return 0; /* the data format was not recognized */ 776 } 777 778 pInfo=(const UDataInfo *)((const char *)inData+4); 779 780 { 781 /* convert the data format from ASCII to Unicode to the system charset */ 782 UChar u[4]={ 783 pInfo->dataFormat[0], pInfo->dataFormat[1], 784 pInfo->dataFormat[2], pInfo->dataFormat[3] 785 }; 786 787 if(uprv_isInvariantUString(u, 4)) { 788 u_UCharsToChars(u, dataFormatChars, 4); 789 } else { 790 dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?'; 791 } 792 } 793 794 /* dispatch to the swap function for the dataFormat */ 795 for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) { 796 if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) { 797 swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode); 798 799 if(U_FAILURE(*pErrorCode)) { 800 udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n", 801 pInfo->dataFormat[0], pInfo->dataFormat[1], 802 pInfo->dataFormat[2], pInfo->dataFormat[3], 803 dataFormatChars[0], dataFormatChars[1], 804 dataFormatChars[2], dataFormatChars[3], 805 u_errorName(*pErrorCode)); 806 } else if(swappedLength<(length-15)) { 807 /* swapped less than expected */ 808 udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 809 swappedLength, length, 810 pInfo->dataFormat[0], pInfo->dataFormat[1], 811 pInfo->dataFormat[2], pInfo->dataFormat[3], 812 dataFormatChars[0], dataFormatChars[1], 813 dataFormatChars[2], dataFormatChars[3], 814 u_errorName(*pErrorCode)); 815 } 816 817 return swappedLength; 818 } 819 } 820 821 /* the dataFormat was not recognized */ 822 udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n", 823 pInfo->dataFormat[0], pInfo->dataFormat[1], 824 pInfo->dataFormat[2], pInfo->dataFormat[3], 825 dataFormatChars[0], dataFormatChars[1], 826 dataFormatChars[2], dataFormatChars[3]); 827 828 *pErrorCode=U_UNSUPPORTED_ERROR; 829 return 0; 830 } 831