1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: package.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005aug25 14 * created by: Markus W. Scherer 15 * 16 * Read, modify, and write ICU .dat data package files. 17 * This is an integral part of the icupkg tool, moved to the toolutil library 18 * because parts of tool implementations tend to be later shared by 19 * other tools. 20 * Subsumes functionality and implementation code from 21 * gencmn, decmn, and icuswap tools. 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/putil.h" 26 #include "unicode/udata.h" 27 #include "cstring.h" 28 #include "uarrsort.h" 29 #include "ucmndata.h" 30 #include "udataswp.h" 31 #include "swapimpl.h" 32 #include "toolutil.h" 33 #include "package.h" 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 // general definitions ----------------------------------------------------- *** 40 41 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 42 43 /* UDataInfo cf. udata.h */ 44 static const UDataInfo dataInfo={ 45 (uint16_t)sizeof(UDataInfo), 46 0, 47 48 U_IS_BIG_ENDIAN, 49 U_CHARSET_FAMILY, 50 (uint8_t)sizeof(UChar), 51 0, 52 53 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 54 {1, 0, 0, 0}, /* formatVersion */ 55 {3, 0, 0, 0} /* dataVersion */ 56 }; 57 58 U_CDECL_BEGIN 59 static void U_CALLCONV 60 printPackageError(void *context, const char *fmt, va_list args) { 61 vfprintf((FILE *)context, fmt, args); 62 } 63 U_CDECL_END 64 65 static uint16_t 66 readSwapUInt16(uint16_t x) { 67 return (uint16_t)((x<<8)|(x>>8)); 68 } 69 70 // platform types ---------------------------------------------------------- *** 71 72 static const char *types="lb?e"; 73 74 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 75 76 static inline int32_t 77 makeTypeEnum(uint8_t charset, UBool isBigEndian) { 78 return 2*(int32_t)charset+isBigEndian; 79 } 80 81 static inline int32_t 82 makeTypeEnum(char type) { 83 return 84 type == 'l' ? TYPE_L : 85 type == 'b' ? TYPE_B : 86 type == 'e' ? TYPE_E : 87 -1; 88 } 89 90 static inline char 91 makeTypeLetter(uint8_t charset, UBool isBigEndian) { 92 return types[makeTypeEnum(charset, isBigEndian)]; 93 } 94 95 static inline char 96 makeTypeLetter(int32_t typeEnum) { 97 return types[typeEnum]; 98 } 99 100 static void 101 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 102 int32_t typeEnum=makeTypeEnum(type); 103 charset=(uint8_t)(typeEnum>>1); 104 isBigEndian=(UBool)(typeEnum&1); 105 } 106 107 U_CFUNC const UDataInfo * 108 getDataInfo(const uint8_t *data, int32_t length, 109 int32_t &infoLength, int32_t &headerLength, 110 UErrorCode *pErrorCode) { 111 const DataHeader *pHeader; 112 const UDataInfo *pInfo; 113 114 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 115 return NULL; 116 } 117 if( data==NULL || 118 (length>=0 && length<(int32_t)sizeof(DataHeader)) 119 ) { 120 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 121 return NULL; 122 } 123 124 pHeader=(const DataHeader *)data; 125 pInfo=&pHeader->info; 126 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 127 pHeader->dataHeader.magic1!=0xda || 128 pHeader->dataHeader.magic2!=0x27 || 129 pInfo->sizeofUChar!=2 130 ) { 131 *pErrorCode=U_UNSUPPORTED_ERROR; 132 return NULL; 133 } 134 135 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 136 headerLength=pHeader->dataHeader.headerSize; 137 infoLength=pInfo->size; 138 } else { 139 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 140 infoLength=readSwapUInt16(pInfo->size); 141 } 142 143 if( headerLength<(int32_t)sizeof(DataHeader) || 144 infoLength<(int32_t)sizeof(UDataInfo) || 145 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 146 (length>=0 && length<headerLength) 147 ) { 148 *pErrorCode=U_UNSUPPORTED_ERROR; 149 return NULL; 150 } 151 152 return pInfo; 153 } 154 155 static int32_t 156 getTypeEnumForInputData(const uint8_t *data, int32_t length, 157 UErrorCode *pErrorCode) { 158 const UDataInfo *pInfo; 159 int32_t infoLength, headerLength; 160 161 /* getDataInfo() checks for illegal arguments */ 162 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 163 if(pInfo==NULL) { 164 return -1; 165 } 166 167 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 168 } 169 170 // file handling ----------------------------------------------------------- *** 171 172 static void 173 extractPackageName(const char *filename, 174 char pkg[], int32_t capacity) { 175 const char *basename; 176 int32_t len; 177 178 basename=findBasename(filename); 179 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 180 181 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 182 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 183 basename); 184 exit(U_ILLEGAL_ARGUMENT_ERROR); 185 } 186 187 if(len>=capacity) { 188 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 189 basename, (long)capacity); 190 exit(U_ILLEGAL_ARGUMENT_ERROR); 191 } 192 193 memcpy(pkg, basename, len); 194 pkg[len]=0; 195 } 196 197 static int32_t 198 getFileLength(FILE *f) { 199 int32_t length; 200 201 fseek(f, 0, SEEK_END); 202 length=(int32_t)ftell(f); 203 fseek(f, 0, SEEK_SET); 204 return length; 205 } 206 207 /* 208 * Turn tree separators and alternate file separators into normal file separators. 209 */ 210 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 211 #define treeToPath(s) 212 #else 213 static void 214 treeToPath(char *s) { 215 char *t; 216 217 for(t=s; *t!=0; ++t) { 218 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 219 *t=U_FILE_SEP_CHAR; 220 } 221 } 222 } 223 #endif 224 225 /* 226 * Turn file separators into tree separators. 227 */ 228 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 229 #define pathToTree(s) 230 #else 231 static void 232 pathToTree(char *s) { 233 char *t; 234 235 for(t=s; *t!=0; ++t) { 236 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 237 *t=U_TREE_ENTRY_SEP_CHAR; 238 } 239 } 240 } 241 #endif 242 243 /* 244 * Prepend the path (if any) to the name and run the name through treeToName(). 245 */ 246 static void 247 makeFullFilename(const char *path, const char *name, 248 char *filename, int32_t capacity) { 249 char *s; 250 251 // prepend the path unless NULL or empty 252 if(path!=NULL && path[0]!=0) { 253 if((int32_t)(strlen(path)+1)>=capacity) { 254 fprintf(stderr, "pathname too long: \"%s\"\n", path); 255 exit(U_BUFFER_OVERFLOW_ERROR); 256 } 257 strcpy(filename, path); 258 259 // make sure the path ends with a file separator 260 s=strchr(filename, 0); 261 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 262 *s++=U_FILE_SEP_CHAR; 263 } 264 } else { 265 s=filename; 266 } 267 268 // turn the name into a filename, turn tree separators into file separators 269 if((int32_t)((s-filename)+strlen(name))>=capacity) { 270 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 271 exit(U_BUFFER_OVERFLOW_ERROR); 272 } 273 strcpy(s, name); 274 treeToPath(s); 275 } 276 277 static void 278 makeFullFilenameAndDirs(const char *path, const char *name, 279 char *filename, int32_t capacity) { 280 char *sep; 281 UErrorCode errorCode; 282 283 makeFullFilename(path, name, filename, capacity); 284 285 // make tree directories 286 errorCode=U_ZERO_ERROR; 287 sep=strchr(filename, 0)-strlen(name); 288 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 289 if(sep!=filename) { 290 *sep=0; // truncate temporarily 291 uprv_mkdir(filename, &errorCode); 292 if(U_FAILURE(errorCode)) { 293 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 294 exit(U_FILE_ACCESS_ERROR); 295 } 296 } 297 *sep++=U_FILE_SEP_CHAR; // restore file separator character 298 } 299 } 300 301 static uint8_t * 302 readFile(const char *path, const char *name, int32_t &length, char &type) { 303 char filename[1024]; 304 FILE *file; 305 uint8_t *data; 306 UErrorCode errorCode; 307 int32_t fileLength, typeEnum; 308 309 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 310 311 /* open the input file, get its length, allocate memory for it, read the file */ 312 file=fopen(filename, "rb"); 313 if(file==NULL) { 314 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 315 exit(U_FILE_ACCESS_ERROR); 316 } 317 318 /* get the file length */ 319 fileLength=getFileLength(file); 320 if(ferror(file) || fileLength<=0) { 321 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 322 fclose(file); 323 exit(U_FILE_ACCESS_ERROR); 324 } 325 326 /* allocate the buffer, pad to multiple of 16 */ 327 length=(fileLength+0xf)&~0xf; 328 data=(uint8_t *)malloc(length); 329 if(data==NULL) { 330 fclose(file); 331 exit(U_MEMORY_ALLOCATION_ERROR); 332 } 333 334 /* read the file */ 335 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 336 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 337 fclose(file); 338 free(data); 339 exit(U_FILE_ACCESS_ERROR); 340 } 341 342 /* pad the file to a multiple of 16 using the usual padding byte */ 343 if(fileLength<length) { 344 memset(data+fileLength, 0xaa, length-fileLength); 345 } 346 347 fclose(file); 348 349 // minimum check for ICU-format data 350 errorCode=U_ZERO_ERROR; 351 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 352 if(typeEnum<0 || U_FAILURE(errorCode)) { 353 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 354 free(data); 355 #if !UCONFIG_NO_LEGACY_CONVERSION 356 exit(U_INVALID_FORMAT_ERROR); 357 #else 358 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 359 exit(0); 360 #endif 361 } 362 type=makeTypeLetter(typeEnum); 363 364 return data; 365 } 366 367 // .dat package file representation ---------------------------------------- *** 368 369 U_CDECL_BEGIN 370 371 static int32_t U_CALLCONV 372 compareItems(const void * /*context*/, const void *left, const void *right) { 373 U_NAMESPACE_USE 374 375 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 376 } 377 378 U_CDECL_END 379 380 U_NAMESPACE_BEGIN 381 382 Package::Package() { 383 inPkgName[0]=0; 384 inData=NULL; 385 inLength=0; 386 inCharset=U_CHARSET_FAMILY; 387 inIsBigEndian=U_IS_BIG_ENDIAN; 388 389 itemCount=0; 390 inStringTop=outStringTop=0; 391 392 matchMode=0; 393 findPrefix=findSuffix=NULL; 394 findPrefixLength=findSuffixLength=0; 395 findNextIndex=-1; 396 397 // create a header for an empty package 398 DataHeader *pHeader; 399 pHeader=(DataHeader *)header; 400 pHeader->dataHeader.magic1=0xda; 401 pHeader->dataHeader.magic2=0x27; 402 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 403 headerLength=(int32_t)(4+sizeof(dataInfo)); 404 if(headerLength&0xf) { 405 /* NUL-pad the header to a multiple of 16 */ 406 int32_t length=(headerLength+0xf)&~0xf; 407 memset(header+headerLength, 0, length-headerLength); 408 headerLength=length; 409 } 410 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 411 } 412 413 Package::~Package() { 414 int32_t idx; 415 416 free(inData); 417 418 for(idx=0; idx<itemCount; ++idx) { 419 if(items[idx].isDataOwned) { 420 free(items[idx].data); 421 } 422 } 423 } 424 425 void 426 Package::readPackage(const char *filename) { 427 UDataSwapper *ds; 428 const UDataInfo *pInfo; 429 UErrorCode errorCode; 430 431 const uint8_t *inBytes; 432 433 int32_t length, offset, i; 434 int32_t itemLength, typeEnum; 435 char type; 436 437 const UDataOffsetTOCEntry *inEntries; 438 439 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 440 441 /* read the file */ 442 inData=readFile(NULL, filename, inLength, type); 443 length=inLength; 444 445 /* 446 * swap the header - even if the swapping itself is a no-op 447 * because it tells us the header length 448 */ 449 errorCode=U_ZERO_ERROR; 450 makeTypeProps(type, inCharset, inIsBigEndian); 451 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 452 if(U_FAILURE(errorCode)) { 453 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 454 filename, u_errorName(errorCode)); 455 exit(errorCode); 456 } 457 458 ds->printError=printPackageError; 459 ds->printErrorContext=stderr; 460 461 headerLength=sizeof(header); 462 if(length<headerLength) { 463 headerLength=length; 464 } 465 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 466 if(U_FAILURE(errorCode)) { 467 exit(errorCode); 468 } 469 470 /* check data format and format version */ 471 pInfo=(const UDataInfo *)((const char *)inData+4); 472 if(!( 473 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 474 pInfo->dataFormat[1]==0x6d && 475 pInfo->dataFormat[2]==0x6e && 476 pInfo->dataFormat[3]==0x44 && 477 pInfo->formatVersion[0]==1 478 )) { 479 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 480 pInfo->dataFormat[0], pInfo->dataFormat[1], 481 pInfo->dataFormat[2], pInfo->dataFormat[3], 482 pInfo->formatVersion[0]); 483 exit(U_UNSUPPORTED_ERROR); 484 } 485 inIsBigEndian=(UBool)pInfo->isBigEndian; 486 inCharset=pInfo->charsetFamily; 487 488 inBytes=(const uint8_t *)inData+headerLength; 489 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 490 491 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 492 length-=headerLength; 493 if(length<4) { 494 /* itemCount does not fit */ 495 offset=0x7fffffff; 496 } else { 497 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 498 if(itemCount==0) { 499 offset=4; 500 } else if(length<(4+8*itemCount)) { 501 /* ToC table does not fit */ 502 offset=0x7fffffff; 503 } else { 504 /* offset of the last item plus at least 20 bytes for its header */ 505 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 506 } 507 } 508 if(length<offset) { 509 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 510 (long)length); 511 exit(U_INDEX_OUTOFBOUNDS_ERROR); 512 } 513 /* do not modify the package length variable until the last item's length is set */ 514 515 if(itemCount>0) { 516 char prefix[MAX_PKG_NAME_LENGTH+4]; 517 char *s, *inItemStrings; 518 int32_t inPkgNameLength, prefixLength, stringsOffset; 519 520 if(itemCount>MAX_FILE_COUNT) { 521 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); 522 exit(U_BUFFER_OVERFLOW_ERROR); 523 } 524 525 /* swap the item name strings */ 526 stringsOffset=4+8*itemCount; 527 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 528 529 // don't include padding bytes at the end of the item names 530 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 531 --itemLength; 532 } 533 534 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 535 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 536 exit(U_BUFFER_OVERFLOW_ERROR); 537 } 538 539 inItemStrings=inStrings+inStringTop; 540 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 541 if(U_FAILURE(errorCode)) { 542 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 543 exit(U_INVALID_FORMAT_ERROR); 544 } 545 inStringTop+=itemLength; 546 547 // reset the Item entries 548 memset(items, 0, itemCount*sizeof(Item)); 549 550 inPkgNameLength=strlen(inPkgName); 551 memcpy(prefix, inPkgName, inPkgNameLength); 552 prefixLength=inPkgNameLength; 553 554 /* 555 * Get the common prefix of the items. 556 * New-style ICU .dat packages use tree separators ('/') between package names, 557 * tree names, and item names, 558 * while old-style ICU .dat packages (before multi-tree support) 559 * use an underscore ('_') between package and item names. 560 */ 561 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 562 s=inItemStrings+offset; 563 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 564 0==memcmp(s, inPkgName, inPkgNameLength) && 565 s[inPkgNameLength]=='_' 566 ) { 567 // old-style .dat package 568 prefix[prefixLength++]='_'; 569 } else { 570 // new-style .dat package 571 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 572 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 573 // then the test in the loop below will fail 574 } 575 prefix[prefixLength]=0; 576 577 /* read the ToC table */ 578 for(i=0; i<itemCount; ++i) { 579 // skip the package part of the item name, error if it does not match the actual package name 580 // or if nothing follows the package name 581 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 582 s=inItemStrings+offset; 583 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 584 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 585 s, prefix); 586 exit(U_UNSUPPORTED_ERROR); 587 } 588 items[i].name=s+prefixLength; 589 590 // set the item's data 591 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 592 if(i>0) { 593 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 594 595 // set the previous item's platform type 596 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 597 if(typeEnum<0 || U_FAILURE(errorCode)) { 598 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 599 exit(U_INVALID_FORMAT_ERROR); 600 } 601 items[i-1].type=makeTypeLetter(typeEnum); 602 } 603 items[i].isDataOwned=FALSE; 604 } 605 // set the last item's length 606 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 607 608 // set the last item's platform type 609 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 610 if(typeEnum<0 || U_FAILURE(errorCode)) { 611 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 612 exit(U_INVALID_FORMAT_ERROR); 613 } 614 items[itemCount-1].type=makeTypeLetter(typeEnum); 615 616 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 617 // sort the item names for the local charset 618 sortItems(); 619 } 620 } 621 622 udata_closeSwapper(ds); 623 } 624 625 char 626 Package::getInType() { 627 return makeTypeLetter(inCharset, inIsBigEndian); 628 } 629 630 void 631 Package::writePackage(const char *filename, char outType, const char *comment) { 632 char prefix[MAX_PKG_NAME_LENGTH+4]; 633 UDataOffsetTOCEntry entry; 634 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 635 FILE *file; 636 Item *pItem; 637 char *name; 638 UErrorCode errorCode; 639 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 640 uint8_t outCharset; 641 UBool outIsBigEndian; 642 643 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 644 645 // if there is an explicit comment, then use it, else use what's in the current header 646 if(comment!=NULL) { 647 /* get the header size minus the current comment */ 648 DataHeader *pHeader; 649 int32_t length; 650 651 pHeader=(DataHeader *)header; 652 headerLength=4+pHeader->info.size; 653 length=(int32_t)strlen(comment); 654 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 655 fprintf(stderr, "icupkg: comment too long\n"); 656 exit(U_BUFFER_OVERFLOW_ERROR); 657 } 658 memcpy(header+headerLength, comment, length+1); 659 headerLength+=length; 660 if(headerLength&0xf) { 661 /* NUL-pad the header to a multiple of 16 */ 662 length=(headerLength+0xf)&~0xf; 663 memset(header+headerLength, 0, length-headerLength); 664 headerLength=length; 665 } 666 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 667 } 668 669 makeTypeProps(outType, outCharset, outIsBigEndian); 670 671 // open (TYPE_COUNT-2) swappers 672 // one is a no-op for local type==outType 673 // one type (TYPE_LE) is bogus 674 errorCode=U_ZERO_ERROR; 675 i=makeTypeEnum(outType); 676 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 677 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 678 ds[TYPE_LE]=NULL; 679 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 680 if(U_FAILURE(errorCode)) { 681 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 682 exit(errorCode); 683 } 684 for(i=0; i<TYPE_COUNT; ++i) { 685 if(ds[i]!=NULL) { 686 ds[i]->printError=printPackageError; 687 ds[i]->printErrorContext=stderr; 688 } 689 } 690 691 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 692 693 // create the file and write its contents 694 file=fopen(filename, "wb"); 695 if(file==NULL) { 696 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 697 exit(U_FILE_ACCESS_ERROR); 698 } 699 700 // swap and write the header 701 if(dsLocalToOut!=NULL) { 702 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 703 if(U_FAILURE(errorCode)) { 704 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 705 exit(errorCode); 706 } 707 } 708 length=(int32_t)fwrite(header, 1, headerLength, file); 709 if(length!=headerLength) { 710 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 711 exit(U_FILE_ACCESS_ERROR); 712 } 713 714 // prepare and swap the package name with a tree separator 715 // for prepending to item names 716 strcat(prefix, U_TREE_ENTRY_SEP_STRING); 717 prefixLength=(int32_t)strlen(prefix); 718 if(dsLocalToOut!=NULL) { 719 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 720 if(U_FAILURE(errorCode)) { 721 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 722 exit(errorCode); 723 } 724 725 // swap and sort the item names (sorting needs to be done in the output charset) 726 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 727 if(U_FAILURE(errorCode)) { 728 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 729 exit(errorCode); 730 } 731 sortItems(); 732 } 733 734 // create the output item names in sorted order, with the package name prepended to each 735 for(i=0; i<itemCount; ++i) { 736 length=(int32_t)strlen(items[i].name); 737 name=allocString(FALSE, length+prefixLength); 738 memcpy(name, prefix, prefixLength); 739 memcpy(name+prefixLength, items[i].name, length+1); 740 items[i].name=name; 741 } 742 743 // calculate offsets for item names and items, pad to 16-align items 744 // align only the first item; each item's length is a multiple of 16 745 basenameOffset=4+8*itemCount; 746 offset=basenameOffset+outStringTop; 747 if((length=(offset&15))!=0) { 748 length=16-length; 749 memset(allocString(FALSE, length-1), 0xaa, length); 750 offset+=length; 751 } 752 753 // write the table of contents 754 // first the itemCount 755 outInt32=itemCount; 756 if(dsLocalToOut!=NULL) { 757 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 758 if(U_FAILURE(errorCode)) { 759 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 760 exit(errorCode); 761 } 762 } 763 length=(int32_t)fwrite(&outInt32, 1, 4, file); 764 if(length!=4) { 765 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 766 exit(U_FILE_ACCESS_ERROR); 767 } 768 769 // then write the item entries (and collect the maxItemLength) 770 maxItemLength=0; 771 for(i=0; i<itemCount; ++i) { 772 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 773 entry.dataOffset=(uint32_t)offset; 774 if(dsLocalToOut!=NULL) { 775 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 776 if(U_FAILURE(errorCode)) { 777 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 778 exit(errorCode); 779 } 780 } 781 length=(int32_t)fwrite(&entry, 1, 8, file); 782 if(length!=8) { 783 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 784 exit(U_FILE_ACCESS_ERROR); 785 } 786 787 length=items[i].length; 788 if(length>maxItemLength) { 789 maxItemLength=length; 790 } 791 offset+=length; 792 } 793 794 // write the item names 795 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 796 if(length!=outStringTop) { 797 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 798 exit(U_FILE_ACCESS_ERROR); 799 } 800 801 // write the items 802 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 803 int32_t type=makeTypeEnum(pItem->type); 804 if(ds[type]!=NULL) { 805 // swap each item from its platform properties to the desired ones 806 udata_swap( 807 ds[type], 808 pItem->data, pItem->length, pItem->data, 809 &errorCode); 810 if(U_FAILURE(errorCode)) { 811 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 812 exit(errorCode); 813 } 814 } 815 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 816 if(length!=pItem->length) { 817 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 818 exit(U_FILE_ACCESS_ERROR); 819 } 820 } 821 822 if(ferror(file)) { 823 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 824 exit(U_FILE_ACCESS_ERROR); 825 } 826 827 fclose(file); 828 for(i=0; i<TYPE_COUNT; ++i) { 829 udata_closeSwapper(ds[i]); 830 } 831 } 832 833 int32_t 834 Package::findItem(const char *name, int32_t length) const { 835 int32_t i, start, limit; 836 int result; 837 838 /* do a binary search for the string */ 839 start=0; 840 limit=itemCount; 841 while(start<limit) { 842 i=(start+limit)/2; 843 if(length>=0) { 844 result=strncmp(name, items[i].name, length); 845 } else { 846 result=strcmp(name, items[i].name); 847 } 848 849 if(result==0) { 850 /* found */ 851 if(length>=0) { 852 /* 853 * if we compared just prefixes, then we may need to back up 854 * to the first item with this prefix 855 */ 856 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 857 --i; 858 } 859 } 860 return i; 861 } else if(result<0) { 862 limit=i; 863 } else /* result>0 */ { 864 start=i+1; 865 } 866 } 867 868 return ~start; /* not found, return binary-not of the insertion point */ 869 } 870 871 void 872 Package::findItems(const char *pattern) { 873 const char *wild; 874 875 if(pattern==NULL || *pattern==0) { 876 findNextIndex=-1; 877 return; 878 } 879 880 findPrefix=pattern; 881 findSuffix=NULL; 882 findSuffixLength=0; 883 884 wild=strchr(pattern, '*'); 885 if(wild==NULL) { 886 // no wildcard 887 findPrefixLength=(int32_t)strlen(pattern); 888 } else { 889 // one wildcard 890 findPrefixLength=(int32_t)(wild-pattern); 891 findSuffix=wild+1; 892 findSuffixLength=(int32_t)strlen(findSuffix); 893 if(NULL!=strchr(findSuffix, '*')) { 894 // two or more wildcards 895 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 896 exit(U_PARSE_ERROR); 897 } 898 } 899 900 if(findPrefixLength==0) { 901 findNextIndex=0; 902 } else { 903 findNextIndex=findItem(findPrefix, findPrefixLength); 904 } 905 } 906 907 int32_t 908 Package::findNextItem() { 909 const char *name, *middle, *treeSep; 910 int32_t idx, nameLength, middleLength; 911 912 if(findNextIndex<0) { 913 return -1; 914 } 915 916 while(findNextIndex<itemCount) { 917 idx=findNextIndex++; 918 name=items[idx].name; 919 nameLength=(int32_t)strlen(name); 920 if(nameLength<(findPrefixLength+findSuffixLength)) { 921 // item name too short for prefix & suffix 922 continue; 923 } 924 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 925 // left the range of names with this prefix 926 break; 927 } 928 middle=name+findPrefixLength; 929 middleLength=nameLength-findPrefixLength-findSuffixLength; 930 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 931 // suffix does not match 932 continue; 933 } 934 // prefix & suffix match 935 936 if(matchMode&MATCH_NOSLASH) { 937 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 938 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 939 // the middle (matching the * wildcard) contains a tree separator / 940 continue; 941 } 942 } 943 944 // found a matching item 945 return idx; 946 } 947 948 // no more items 949 findNextIndex=-1; 950 return -1; 951 } 952 953 void 954 Package::setMatchMode(uint32_t mode) { 955 matchMode=mode; 956 } 957 958 void 959 Package::addItem(const char *name) { 960 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 961 } 962 963 void 964 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 965 int32_t idx; 966 967 idx=findItem(name); 968 if(idx<0) { 969 // new item, make space at the insertion point 970 if(itemCount>=MAX_FILE_COUNT) { 971 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); 972 exit(U_BUFFER_OVERFLOW_ERROR); 973 } 974 // move the following items down 975 idx=~idx; 976 if(idx<itemCount) { 977 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 978 } 979 ++itemCount; 980 981 // reset this Item entry 982 memset(items+idx, 0, sizeof(Item)); 983 984 // copy the item's name 985 items[idx].name=allocString(TRUE, strlen(name)); 986 strcpy(items[idx].name, name); 987 pathToTree(items[idx].name); 988 } else { 989 // same-name item found, replace it 990 if(items[idx].isDataOwned) { 991 free(items[idx].data); 992 } 993 994 // keep the item's name since it is the same 995 } 996 997 // set the item's data 998 items[idx].data=data; 999 items[idx].length=length; 1000 items[idx].isDataOwned=isDataOwned; 1001 items[idx].type=type; 1002 } 1003 1004 void 1005 Package::addFile(const char *filesPath, const char *name) { 1006 uint8_t *data; 1007 int32_t length; 1008 char type; 1009 1010 data=readFile(filesPath, name, length, type); 1011 // readFile() exits the tool if it fails 1012 addItem(name, data, length, TRUE, type); 1013 } 1014 1015 void 1016 Package::addItems(const Package &listPkg) { 1017 const Item *pItem; 1018 int32_t i; 1019 1020 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1021 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1022 } 1023 } 1024 1025 void 1026 Package::removeItem(int32_t idx) { 1027 if(idx>=0) { 1028 // remove the item 1029 if(items[idx].isDataOwned) { 1030 free(items[idx].data); 1031 } 1032 1033 // move the following items up 1034 if((idx+1)<itemCount) { 1035 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1036 } 1037 --itemCount; 1038 1039 if(idx<=findNextIndex) { 1040 --findNextIndex; 1041 } 1042 } 1043 } 1044 1045 void 1046 Package::removeItems(const char *pattern) { 1047 int32_t idx; 1048 1049 findItems(pattern); 1050 while((idx=findNextItem())>=0) { 1051 removeItem(idx); 1052 } 1053 } 1054 1055 void 1056 Package::removeItems(const Package &listPkg) { 1057 const Item *pItem; 1058 int32_t i; 1059 1060 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1061 removeItems(pItem->name); 1062 } 1063 } 1064 1065 void 1066 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1067 char filename[1024]; 1068 UDataSwapper *ds; 1069 FILE *file; 1070 Item *pItem; 1071 int32_t fileLength; 1072 uint8_t itemCharset, outCharset; 1073 UBool itemIsBigEndian, outIsBigEndian; 1074 1075 if(idx<0 || itemCount<=idx) { 1076 return; 1077 } 1078 pItem=items+idx; 1079 1080 // swap the data to the outType 1081 // outType==0: don't swap 1082 if(outType!=0 && pItem->type!=outType) { 1083 // open the swapper 1084 UErrorCode errorCode=U_ZERO_ERROR; 1085 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1086 makeTypeProps(outType, outCharset, outIsBigEndian); 1087 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1088 if(U_FAILURE(errorCode)) { 1089 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1090 (long)idx, u_errorName(errorCode)); 1091 exit(errorCode); 1092 } 1093 1094 ds->printError=printPackageError; 1095 ds->printErrorContext=stderr; 1096 1097 // swap the item from its platform properties to the desired ones 1098 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1099 if(U_FAILURE(errorCode)) { 1100 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1101 exit(errorCode); 1102 } 1103 udata_closeSwapper(ds); 1104 pItem->type=outType; 1105 } 1106 1107 // create the file and write its contents 1108 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1109 file=fopen(filename, "wb"); 1110 if(file==NULL) { 1111 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1112 exit(U_FILE_ACCESS_ERROR); 1113 } 1114 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1115 1116 if(ferror(file) || fileLength!=pItem->length) { 1117 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1118 exit(U_FILE_ACCESS_ERROR); 1119 } 1120 fclose(file); 1121 } 1122 1123 void 1124 Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1125 extractItem(filesPath, items[idx].name, idx, outType); 1126 } 1127 1128 void 1129 Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1130 int32_t idx; 1131 1132 findItems(pattern); 1133 while((idx=findNextItem())>=0) { 1134 extractItem(filesPath, idx, outType); 1135 } 1136 } 1137 1138 void 1139 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1140 const Item *pItem; 1141 int32_t i; 1142 1143 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1144 extractItems(filesPath, pItem->name, outType); 1145 } 1146 } 1147 1148 int32_t 1149 Package::getItemCount() const { 1150 return itemCount; 1151 } 1152 1153 const Item * 1154 Package::getItem(int32_t idx) const { 1155 if (0 <= idx && idx < itemCount) { 1156 return &items[idx]; 1157 } 1158 return NULL; 1159 } 1160 1161 void 1162 Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1163 // check dependency: make sure the target item is in the package 1164 Package *me=(Package *)context; 1165 if(me->findItem(targetName)<0) { 1166 me->isMissingItems=TRUE; 1167 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1168 } 1169 } 1170 1171 UBool 1172 Package::checkDependencies() { 1173 isMissingItems=FALSE; 1174 enumDependencies(this, checkDependency); 1175 return (UBool)!isMissingItems; 1176 } 1177 1178 void 1179 Package::enumDependencies(void *context, CheckDependency check) { 1180 int32_t i; 1181 1182 for(i=0; i<itemCount; ++i) { 1183 enumDependencies(items+i, context, check); 1184 } 1185 } 1186 1187 char * 1188 Package::allocString(UBool in, int32_t length) { 1189 char *p; 1190 int32_t top; 1191 1192 if(in) { 1193 top=inStringTop; 1194 p=inStrings+top; 1195 } else { 1196 top=outStringTop; 1197 p=outStrings+top; 1198 } 1199 top+=length+1; 1200 1201 if(top>STRING_STORE_SIZE) { 1202 fprintf(stderr, "icupkg: string storage overflow\n"); 1203 exit(U_BUFFER_OVERFLOW_ERROR); 1204 } 1205 if(in) { 1206 inStringTop=top; 1207 } else { 1208 outStringTop=top; 1209 } 1210 return p; 1211 } 1212 1213 void 1214 Package::sortItems() { 1215 UErrorCode errorCode=U_ZERO_ERROR; 1216 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1217 if(U_FAILURE(errorCode)) { 1218 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1219 exit(errorCode); 1220 } 1221 } 1222 1223 U_NAMESPACE_END 1224