1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2007, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: icuswap.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003aug08 14 * created by: Markus W. Scherer 15 * 16 * This tool takes an ICU data file and "swaps" it, that is, changes its 17 * platform properties between big-/little-endianness and ASCII/EBCDIC charset 18 * families. 19 * The modified data file is written to a new file. 20 * Useful as an install-time tool for shipping only one flavor of ICU data 21 * and preparing data files for the target platform. 22 * Will not work with data DLLs (shared libraries). 23 */ 24 25 #include "unicode/utypes.h" 26 #include "unicode/putil.h" 27 #include "unicode/udata.h" 28 #include "cmemory.h" 29 #include "cstring.h" 30 #include "uinvchar.h" 31 #include "uarrsort.h" 32 #include "ucmndata.h" 33 #include "udataswp.h" 34 #include "swapimpl.h" 35 #include "toolutil.h" 36 #include "uoptions.h" 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 /* definitions */ 43 44 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 45 #define DEFAULT_PADDING_LENGTH 15 46 47 static UOption options[]={ 48 UOPTION_HELP_H, 49 UOPTION_HELP_QUESTION_MARK, 50 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG) 51 }; 52 53 enum { 54 OPT_HELP_H, 55 OPT_HELP_QUESTION_MARK, 56 OPT_OUT_TYPE 57 }; 58 59 static int32_t 60 fileSize(FILE *f) { 61 int32_t size; 62 63 fseek(f, 0, SEEK_END); 64 size=(int32_t)ftell(f); 65 fseek(f, 0, SEEK_SET); 66 return size; 67 } 68 69 /** 70 * Swap an ICU .dat package, including swapping of enclosed items. 71 */ 72 U_CFUNC int32_t U_CALLCONV 73 udata_swapPackage(const char *inFilename, const char *outFilename, 74 const UDataSwapper *ds, 75 const void *inData, int32_t length, void *outData, 76 UErrorCode *pErrorCode); 77 78 U_CDECL_BEGIN 79 static void U_CALLCONV 80 printError(void *context, const char *fmt, va_list args) { 81 vfprintf((FILE *)context, fmt, args); 82 } 83 U_CDECL_END 84 85 static int 86 printUsage(const char *pname, UBool ishelp) { 87 fprintf(stderr, 88 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n", 89 ishelp ? 'U' : 'u', pname); 90 if(ishelp) { 91 fprintf(stderr, 92 "\nOptions: -h, -?, --help print this message and exit\n" 93 " Read the input file, swap its platform properties according\n" 94 " to the -t or --type option, and write the result to the output file.\n" 95 " -tl change to little-endian/ASCII charset family\n" 96 " -tb change to big-endian/ASCII charset family\n" 97 " -te change to big-endian/EBCDIC charset family\n"); 98 } 99 100 return !ishelp; 101 } 102 103 extern int 104 main(int argc, char *argv[]) { 105 FILE *in, *out; 106 const char *pname; 107 char *data; 108 int32_t length; 109 UBool ishelp; 110 int rc; 111 112 UDataSwapper *ds; 113 const UDataInfo *pInfo; 114 UErrorCode errorCode; 115 uint8_t outCharset; 116 UBool outIsBigEndian; 117 118 U_MAIN_INIT_ARGS(argc, argv); 119 120 fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n"); 121 122 /* get the program basename */ 123 pname=strrchr(argv[0], U_FILE_SEP_CHAR); 124 if(pname==NULL) { 125 pname=strrchr(argv[0], '/'); 126 } 127 if(pname!=NULL) { 128 ++pname; 129 } else { 130 pname=argv[0]; 131 } 132 133 argc=u_parseArgs(argc, argv, LENGTHOF(options), options); 134 ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; 135 if(ishelp || argc!=3) { 136 return printUsage(pname, ishelp); 137 } 138 139 /* parse the output type option */ 140 data=(char *)options[OPT_OUT_TYPE].value; 141 if(data[0]==0 || data[1]!=0) { 142 /* the type must be exactly one letter */ 143 return printUsage(pname, FALSE); 144 } 145 switch(data[0]) { 146 case 'l': 147 outIsBigEndian=FALSE; 148 outCharset=U_ASCII_FAMILY; 149 break; 150 case 'b': 151 outIsBigEndian=TRUE; 152 outCharset=U_ASCII_FAMILY; 153 break; 154 case 'e': 155 outIsBigEndian=TRUE; 156 outCharset=U_EBCDIC_FAMILY; 157 break; 158 default: 159 return printUsage(pname, FALSE); 160 } 161 162 in=out=NULL; 163 data=NULL; 164 165 /* open the input file, get its length, allocate memory for it, read the file */ 166 in=fopen(argv[1], "rb"); 167 if(in==NULL) { 168 fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]); 169 rc=2; 170 goto done; 171 } 172 173 length=fileSize(in); 174 if(length<DEFAULT_PADDING_LENGTH) { 175 fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]); 176 rc=2; 177 goto done; 178 } 179 180 /* 181 * +15: udata_swapPackage() may need to add a few padding bytes to the 182 * last item if charset swapping is done, 183 * because the last item may be resorted into the middle and then needs 184 * additional padding bytes 185 */ 186 data=(char *)malloc(length+DEFAULT_PADDING_LENGTH); 187 if(data==NULL) { 188 fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]); 189 rc=2; 190 goto done; 191 } 192 193 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */ 194 uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH); 195 196 if(length!=(int32_t)fread(data, 1, length, in)) { 197 fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]); 198 rc=3; 199 goto done; 200 } 201 202 fclose(in); 203 in=NULL; 204 205 /* swap the data in-place */ 206 errorCode=U_ZERO_ERROR; 207 ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode); 208 if(U_FAILURE(errorCode)) { 209 fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n", 210 pname, argv[1], u_errorName(errorCode)); 211 rc=4; 212 goto done; 213 } 214 215 ds->printError=printError; 216 ds->printErrorContext=stderr; 217 218 /* speculative cast, protected by the following length check */ 219 pInfo=(const UDataInfo *)((const char *)data+4); 220 221 if( length>=20 && 222 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 223 pInfo->dataFormat[1]==0x6d && 224 pInfo->dataFormat[2]==0x6e && 225 pInfo->dataFormat[3]==0x44 226 ) { 227 /* 228 * swap the .dat package 229 * udata_swapPackage() needs to rename ToC name entries from the old package 230 * name to the new one. 231 * We pass it the filenames, and udata_swapPackage() will extract the 232 * package names. 233 */ 234 length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode); 235 udata_closeSwapper(ds); 236 if(U_FAILURE(errorCode)) { 237 fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n", 238 pname, argv[1], u_errorName(errorCode)); 239 rc=4; 240 goto done; 241 } 242 } else { 243 /* swap the data, which is not a .dat package */ 244 length=udata_swap(ds, data, length, data, &errorCode); 245 udata_closeSwapper(ds); 246 if(U_FAILURE(errorCode)) { 247 fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", 248 pname, argv[1], u_errorName(errorCode)); 249 rc=4; 250 goto done; 251 } 252 } 253 254 out=fopen(argv[2], "wb"); 255 if(out==NULL) { 256 fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]); 257 rc=5; 258 goto done; 259 } 260 261 if(length!=(int32_t)fwrite(data, 1, length, out)) { 262 fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]); 263 rc=6; 264 goto done; 265 } 266 267 fclose(out); 268 out=NULL; 269 270 /* all done */ 271 rc=0; 272 273 done: 274 if(in!=NULL) { 275 fclose(in); 276 } 277 if(out!=NULL) { 278 fclose(out); 279 } 280 if(data!=NULL) { 281 free(data); 282 } 283 return rc; 284 } 285 286 /* swap .dat package files -------------------------------------------------- */ 287 288 static int32_t 289 extractPackageName(const UDataSwapper *ds, const char *filename, 290 char pkg[], int32_t capacity, 291 UErrorCode *pErrorCode) { 292 const char *basename; 293 int32_t len; 294 295 if(U_FAILURE(*pErrorCode)) { 296 return 0; 297 } 298 299 basename=findBasename(filename); 300 len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ 301 302 if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { 303 udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", 304 basename); 305 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 306 return 0; 307 } 308 309 if(len>=capacity) { 310 udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", 311 (long)capacity); 312 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 313 return 0; 314 } 315 316 uprv_memcpy(pkg, basename, len); 317 pkg[len]=0; 318 return len; 319 } 320 321 struct ToCEntry { 322 uint32_t nameOffset, inOffset, outOffset, length; 323 }; 324 325 U_CDECL_BEGIN 326 static int32_t U_CALLCONV 327 compareToCEntries(const void *context, const void *left, const void *right) { 328 const char *chars=(const char *)context; 329 return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset, 330 chars+((const ToCEntry *)right)->nameOffset); 331 } 332 U_CDECL_END 333 334 U_CFUNC int32_t U_CALLCONV 335 udata_swapPackage(const char *inFilename, const char *outFilename, 336 const UDataSwapper *ds, 337 const void *inData, int32_t length, void *outData, 338 UErrorCode *pErrorCode) { 339 const UDataInfo *pInfo; 340 int32_t headerSize; 341 342 const uint8_t *inBytes; 343 uint8_t *outBytes; 344 345 uint32_t itemCount, offset, i; 346 int32_t itemLength; 347 348 const UDataOffsetTOCEntry *inEntries; 349 UDataOffsetTOCEntry *outEntries; 350 351 ToCEntry *table; 352 353 char inPkgName[32], outPkgName[32]; 354 int32_t inPkgNameLength, outPkgNameLength; 355 356 /* udata_swapDataHeader checks the arguments */ 357 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 358 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 359 return 0; 360 } 361 362 /* check data format and format version */ 363 pInfo=(const UDataInfo *)((const char *)inData+4); 364 if(!( 365 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 366 pInfo->dataFormat[1]==0x6d && 367 pInfo->dataFormat[2]==0x6e && 368 pInfo->dataFormat[3]==0x44 && 369 pInfo->formatVersion[0]==1 370 )) { 371 udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 372 pInfo->dataFormat[0], pInfo->dataFormat[1], 373 pInfo->dataFormat[2], pInfo->dataFormat[3], 374 pInfo->formatVersion[0]); 375 *pErrorCode=U_UNSUPPORTED_ERROR; 376 return 0; 377 } 378 379 /* 380 * We need to change the ToC name entries so that they have the correct 381 * package name prefix. 382 * Extract the package names from the in/out filenames. 383 */ 384 inPkgNameLength=extractPackageName( 385 ds, inFilename, 386 inPkgName, (int32_t)sizeof(inPkgName), 387 pErrorCode); 388 outPkgNameLength=extractPackageName( 389 ds, outFilename, 390 outPkgName, (int32_t)sizeof(outPkgName), 391 pErrorCode); 392 if(U_FAILURE(*pErrorCode)) { 393 return 0; 394 } 395 396 /* 397 * It is possible to work with inPkgNameLength!=outPkgNameLength, 398 * but then the length of the data file would change more significantly, 399 * which we are not currently prepared for. 400 */ 401 if(inPkgNameLength!=outPkgNameLength) { 402 udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", 403 inPkgName, outPkgName); 404 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 405 return 0; 406 } 407 408 inBytes=(const uint8_t *)inData+headerSize; 409 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 410 411 if(length<0) { 412 /* preflighting */ 413 itemCount=ds->readUInt32(*(const uint32_t *)inBytes); 414 if(itemCount==0) { 415 /* no items: count only the item count and return */ 416 return headerSize+4; 417 } 418 419 /* read the last item's offset and preflight it */ 420 offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); 421 itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); 422 423 if(U_SUCCESS(*pErrorCode)) { 424 return headerSize+offset+(uint32_t)itemLength; 425 } else { 426 return 0; 427 } 428 } else { 429 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 430 length-=headerSize; 431 if(length<4) { 432 /* itemCount does not fit */ 433 offset=0xffffffff; 434 itemCount=0; /* make compilers happy */ 435 } else { 436 itemCount=ds->readUInt32(*(const uint32_t *)inBytes); 437 if(itemCount==0) { 438 offset=4; 439 } else if((uint32_t)length<(4+8*itemCount)) { 440 /* ToC table does not fit */ 441 offset=0xffffffff; 442 } else { 443 /* offset of the last item plus at least 20 bytes for its header */ 444 offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); 445 } 446 } 447 if((uint32_t)length<offset) { 448 udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n", 449 length); 450 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 451 return 0; 452 } 453 454 outBytes=(uint8_t *)outData+headerSize; 455 456 /* swap the item count */ 457 ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); 458 459 if(itemCount==0) { 460 /* no items: just return now */ 461 return headerSize+4; 462 } 463 464 /* swap the item name strings */ 465 offset=4+8*itemCount; 466 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); 467 udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); 468 if(U_FAILURE(*pErrorCode)) { 469 udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); 470 return 0; 471 } 472 /* keep offset and itemLength in case we allocate and copy the strings below */ 473 474 /* swap the package names into the output charset */ 475 if(ds->outCharset!=U_CHARSET_FAMILY) { 476 UDataSwapper *ds2; 477 ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); 478 ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); 479 ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); 480 udata_closeSwapper(ds2); 481 if(U_FAILURE(*pErrorCode)) { 482 udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); 483 } 484 } 485 486 /* change the prefix of each ToC entry name from the old to the new package name */ 487 { 488 char *entryName; 489 490 for(i=0; i<itemCount; ++i) { 491 entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); 492 493 if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { 494 uprv_memcpy(entryName, outPkgName, inPkgNameLength); 495 } else { 496 udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", 497 (long)i); 498 *pErrorCode=U_INVALID_FORMAT_ERROR; 499 return 0; 500 } 501 } 502 } 503 504 /* 505 * Allocate the ToC table and, if necessary, a temporary buffer for 506 * pseudo-in-place swapping. 507 * 508 * We cannot swap in-place because: 509 * 510 * 1. If the swapping of an item fails mid-way, then in-place swapping 511 * has destroyed its data. 512 * Out-of-place swapping allows us to then copy its original data. 513 * 514 * 2. If swapping changes the charset family, then we must resort 515 * not only the ToC table but also the data items themselves. 516 * This requires a permutation and is best done with separate in/out 517 * buffers. 518 * 519 * We swapped the strings above to avoid the malloc below if string swapping fails. 520 */ 521 if(inData==outData) { 522 /* +15: prepare for extra padding of a newly-last item */ 523 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH); 524 if(table!=NULL) { 525 outBytes=(uint8_t *)(table+itemCount); 526 527 /* copy the item count and the swapped strings */ 528 uprv_memcpy(outBytes, inBytes, 4); 529 uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); 530 } 531 } else { 532 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); 533 } 534 if(table==NULL) { 535 udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", 536 inData==outData ? 537 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH : 538 itemCount*sizeof(ToCEntry)); 539 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 540 return 0; 541 } 542 outEntries=(UDataOffsetTOCEntry *)(outBytes+4); 543 544 /* read the ToC table */ 545 for(i=0; i<itemCount; ++i) { 546 table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); 547 table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); 548 if(i>0) { 549 table[i-1].length=table[i].inOffset-table[i-1].inOffset; 550 } 551 } 552 table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; 553 554 if(ds->inCharset==ds->outCharset) { 555 /* no charset swapping, no resorting: keep item offsets the same */ 556 for(i=0; i<itemCount; ++i) { 557 table[i].outOffset=table[i].inOffset; 558 } 559 } else { 560 /* charset swapping: resort items by their swapped names */ 561 562 /* 563 * Before the actual sorting, we need to make sure that each item 564 * has a length that is a multiple of 16 bytes so that all items 565 * are 16-aligned. 566 * Only the old last item may be missing up to 15 padding bytes. 567 * Add padding bytes for it. 568 * Since the icuswap main() function has already allocated enough 569 * input buffer space and set the last 15 bytes there to 0xaa, 570 * we only need to increase the total data length and the length 571 * of the last item here. 572 */ 573 if((length&0xf)!=0) { 574 int32_t delta=16-(length&0xf); 575 length+=delta; 576 table[itemCount-1].length+=(uint32_t)delta; 577 } 578 579 /* Save the offset before we sort the TOC. */ 580 offset=table[0].inOffset; 581 /* sort the TOC entries */ 582 uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), 583 compareToCEntries, outBytes, FALSE, pErrorCode); 584 585 /* 586 * Note: Before sorting, the inOffset values were in order. 587 * Now the outOffset values are in order. 588 */ 589 590 /* assign outOffset values */ 591 for(i=0; i<itemCount; ++i) { 592 table[i].outOffset=offset; 593 offset+=table[i].length; 594 } 595 } 596 597 /* write the output ToC table */ 598 for(i=0; i<itemCount; ++i) { 599 ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); 600 ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); 601 } 602 603 /* swap each data item */ 604 for(i=0; i<itemCount; ++i) { 605 /* first copy the item bytes to make sure that unreachable bytes are copied */ 606 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); 607 608 /* swap the item */ 609 udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, 610 outBytes+table[i].outOffset, pErrorCode); 611 612 if(U_FAILURE(*pErrorCode)) { 613 if(ds->outCharset==U_CHARSET_FAMILY) { 614 udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" 615 " at inOffset 0x%x length 0x%x - %s\n" 616 " the data item will be copied, not swapped\n\n", 617 (char *)outBytes+table[i].nameOffset, 618 table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); 619 } else { 620 udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" 621 " at inOffset 0x%x length 0x%x - %s\n" 622 " the data item will be copied, not swapped\n\n", 623 table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); 624 } 625 /* reset the error code, copy the data item, and continue */ 626 *pErrorCode=U_ZERO_ERROR; 627 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); 628 } 629 } 630 631 if(inData==outData) { 632 /* copy the data from the temporary buffer to the in-place buffer */ 633 uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); 634 } 635 uprv_free(table); 636 637 return headerSize+length; 638 } 639 } 640 641 /* 642 * Hey, Emacs, please set the following: 643 * 644 * Local Variables: 645 * indent-tabs-mode: nil 646 * End: 647 * 648 */ 649