1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: package.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005aug25 14 * created by: Markus W. Scherer 15 * 16 * Read, modify, and write ICU .dat data package files. 17 * This is an integral part of the icupkg tool, moved to the toolutil library 18 * because parts of tool implementations tend to be later shared by 19 * other tools. 20 * Subsumes functionality and implementation code from 21 * gencmn, decmn, and icuswap tools. 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/putil.h" 26 #include "unicode/udata.h" 27 #include "cstring.h" 28 #include "uarrsort.h" 29 #include "ucmndata.h" 30 #include "udataswp.h" 31 #include "swapimpl.h" 32 #include "toolutil.h" 33 #include "package.h" 34 #include "cmemory.h" 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 41 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 42 43 // general definitions ----------------------------------------------------- *** 44 45 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 46 47 /* UDataInfo cf. udata.h */ 48 static const UDataInfo dataInfo={ 49 (uint16_t)sizeof(UDataInfo), 50 0, 51 52 U_IS_BIG_ENDIAN, 53 U_CHARSET_FAMILY, 54 (uint8_t)sizeof(UChar), 55 0, 56 57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 58 {1, 0, 0, 0}, /* formatVersion */ 59 {3, 0, 0, 0} /* dataVersion */ 60 }; 61 62 U_CDECL_BEGIN 63 static void U_CALLCONV 64 printPackageError(void *context, const char *fmt, va_list args) { 65 vfprintf((FILE *)context, fmt, args); 66 } 67 U_CDECL_END 68 69 static uint16_t 70 readSwapUInt16(uint16_t x) { 71 return (uint16_t)((x<<8)|(x>>8)); 72 } 73 74 // platform types ---------------------------------------------------------- *** 75 76 static const char *types="lb?e"; 77 78 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 79 80 static inline int32_t 81 makeTypeEnum(uint8_t charset, UBool isBigEndian) { 82 return 2*(int32_t)charset+isBigEndian; 83 } 84 85 static inline int32_t 86 makeTypeEnum(char type) { 87 return 88 type == 'l' ? TYPE_L : 89 type == 'b' ? TYPE_B : 90 type == 'e' ? TYPE_E : 91 -1; 92 } 93 94 static inline char 95 makeTypeLetter(uint8_t charset, UBool isBigEndian) { 96 return types[makeTypeEnum(charset, isBigEndian)]; 97 } 98 99 static inline char 100 makeTypeLetter(int32_t typeEnum) { 101 return types[typeEnum]; 102 } 103 104 static void 105 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 106 int32_t typeEnum=makeTypeEnum(type); 107 charset=(uint8_t)(typeEnum>>1); 108 isBigEndian=(UBool)(typeEnum&1); 109 } 110 111 U_CFUNC const UDataInfo * 112 getDataInfo(const uint8_t *data, int32_t length, 113 int32_t &infoLength, int32_t &headerLength, 114 UErrorCode *pErrorCode) { 115 const DataHeader *pHeader; 116 const UDataInfo *pInfo; 117 118 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 119 return NULL; 120 } 121 if( data==NULL || 122 (length>=0 && length<(int32_t)sizeof(DataHeader)) 123 ) { 124 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 125 return NULL; 126 } 127 128 pHeader=(const DataHeader *)data; 129 pInfo=&pHeader->info; 130 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 131 pHeader->dataHeader.magic1!=0xda || 132 pHeader->dataHeader.magic2!=0x27 || 133 pInfo->sizeofUChar!=2 134 ) { 135 *pErrorCode=U_UNSUPPORTED_ERROR; 136 return NULL; 137 } 138 139 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 140 headerLength=pHeader->dataHeader.headerSize; 141 infoLength=pInfo->size; 142 } else { 143 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 144 infoLength=readSwapUInt16(pInfo->size); 145 } 146 147 if( headerLength<(int32_t)sizeof(DataHeader) || 148 infoLength<(int32_t)sizeof(UDataInfo) || 149 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 150 (length>=0 && length<headerLength) 151 ) { 152 *pErrorCode=U_UNSUPPORTED_ERROR; 153 return NULL; 154 } 155 156 return pInfo; 157 } 158 159 static int32_t 160 getTypeEnumForInputData(const uint8_t *data, int32_t length, 161 UErrorCode *pErrorCode) { 162 const UDataInfo *pInfo; 163 int32_t infoLength, headerLength; 164 165 /* getDataInfo() checks for illegal arguments */ 166 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 167 if(pInfo==NULL) { 168 return -1; 169 } 170 171 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 172 } 173 174 // file handling ----------------------------------------------------------- *** 175 176 static void 177 extractPackageName(const char *filename, 178 char pkg[], int32_t capacity) { 179 const char *basename; 180 int32_t len; 181 182 basename=findBasename(filename); 183 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 184 185 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 186 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 187 basename); 188 exit(U_ILLEGAL_ARGUMENT_ERROR); 189 } 190 191 if(len>=capacity) { 192 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 193 basename, (long)capacity); 194 exit(U_ILLEGAL_ARGUMENT_ERROR); 195 } 196 197 memcpy(pkg, basename, len); 198 pkg[len]=0; 199 } 200 201 static int32_t 202 getFileLength(FILE *f) { 203 int32_t length; 204 205 fseek(f, 0, SEEK_END); 206 length=(int32_t)ftell(f); 207 fseek(f, 0, SEEK_SET); 208 return length; 209 } 210 211 /* 212 * Turn tree separators and alternate file separators into normal file separators. 213 */ 214 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 215 #define treeToPath(s) 216 #else 217 static void 218 treeToPath(char *s) { 219 char *t; 220 221 for(t=s; *t!=0; ++t) { 222 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 223 *t=U_FILE_SEP_CHAR; 224 } 225 } 226 } 227 #endif 228 229 /* 230 * Turn file separators into tree separators. 231 */ 232 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 233 #define pathToTree(s) 234 #else 235 static void 236 pathToTree(char *s) { 237 char *t; 238 239 for(t=s; *t!=0; ++t) { 240 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 241 *t=U_TREE_ENTRY_SEP_CHAR; 242 } 243 } 244 } 245 #endif 246 247 /* 248 * Prepend the path (if any) to the name and run the name through treeToName(). 249 */ 250 static void 251 makeFullFilename(const char *path, const char *name, 252 char *filename, int32_t capacity) { 253 char *s; 254 255 // prepend the path unless NULL or empty 256 if(path!=NULL && path[0]!=0) { 257 if((int32_t)(strlen(path)+1)>=capacity) { 258 fprintf(stderr, "pathname too long: \"%s\"\n", path); 259 exit(U_BUFFER_OVERFLOW_ERROR); 260 } 261 strcpy(filename, path); 262 263 // make sure the path ends with a file separator 264 s=strchr(filename, 0); 265 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 266 *s++=U_FILE_SEP_CHAR; 267 } 268 } else { 269 s=filename; 270 } 271 272 // turn the name into a filename, turn tree separators into file separators 273 if((int32_t)((s-filename)+strlen(name))>=capacity) { 274 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 275 exit(U_BUFFER_OVERFLOW_ERROR); 276 } 277 strcpy(s, name); 278 treeToPath(s); 279 } 280 281 static void 282 makeFullFilenameAndDirs(const char *path, const char *name, 283 char *filename, int32_t capacity) { 284 char *sep; 285 UErrorCode errorCode; 286 287 makeFullFilename(path, name, filename, capacity); 288 289 // make tree directories 290 errorCode=U_ZERO_ERROR; 291 sep=strchr(filename, 0)-strlen(name); 292 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 293 if(sep!=filename) { 294 *sep=0; // truncate temporarily 295 uprv_mkdir(filename, &errorCode); 296 if(U_FAILURE(errorCode)) { 297 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 298 exit(U_FILE_ACCESS_ERROR); 299 } 300 } 301 *sep++=U_FILE_SEP_CHAR; // restore file separator character 302 } 303 } 304 305 static uint8_t * 306 readFile(const char *path, const char *name, int32_t &length, char &type) { 307 char filename[1024]; 308 FILE *file; 309 uint8_t *data; 310 UErrorCode errorCode; 311 int32_t fileLength, typeEnum; 312 313 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 314 315 /* open the input file, get its length, allocate memory for it, read the file */ 316 file=fopen(filename, "rb"); 317 if(file==NULL) { 318 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 319 exit(U_FILE_ACCESS_ERROR); 320 } 321 322 /* get the file length */ 323 fileLength=getFileLength(file); 324 if(ferror(file) || fileLength<=0) { 325 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 326 fclose(file); 327 exit(U_FILE_ACCESS_ERROR); 328 } 329 330 /* allocate the buffer, pad to multiple of 16 */ 331 length=(fileLength+0xf)&~0xf; 332 data=(uint8_t *)malloc(length); 333 if(data==NULL) { 334 fclose(file); 335 exit(U_MEMORY_ALLOCATION_ERROR); 336 } 337 338 /* read the file */ 339 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 340 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 341 fclose(file); 342 free(data); 343 exit(U_FILE_ACCESS_ERROR); 344 } 345 346 /* pad the file to a multiple of 16 using the usual padding byte */ 347 if(fileLength<length) { 348 memset(data+fileLength, 0xaa, length-fileLength); 349 } 350 351 fclose(file); 352 353 // minimum check for ICU-format data 354 errorCode=U_ZERO_ERROR; 355 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 356 if(typeEnum<0 || U_FAILURE(errorCode)) { 357 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 358 free(data); 359 #if !UCONFIG_NO_LEGACY_CONVERSION 360 exit(U_INVALID_FORMAT_ERROR); 361 #else 362 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 363 exit(0); 364 #endif 365 } 366 type=makeTypeLetter(typeEnum); 367 368 return data; 369 } 370 371 // .dat package file representation ---------------------------------------- *** 372 373 U_CDECL_BEGIN 374 375 static int32_t U_CALLCONV 376 compareItems(const void * /*context*/, const void *left, const void *right) { 377 U_NAMESPACE_USE 378 379 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 380 } 381 382 U_CDECL_END 383 384 U_NAMESPACE_BEGIN 385 386 Package::Package() { 387 inPkgName[0]=0; 388 inData=NULL; 389 inLength=0; 390 inCharset=U_CHARSET_FAMILY; 391 inIsBigEndian=U_IS_BIG_ENDIAN; 392 393 itemCount=0; 394 itemMax=0; 395 items=NULL; 396 397 inStringTop=outStringTop=0; 398 399 matchMode=0; 400 findPrefix=findSuffix=NULL; 401 findPrefixLength=findSuffixLength=0; 402 findNextIndex=-1; 403 404 // create a header for an empty package 405 DataHeader *pHeader; 406 pHeader=(DataHeader *)header; 407 pHeader->dataHeader.magic1=0xda; 408 pHeader->dataHeader.magic2=0x27; 409 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 410 headerLength=(int32_t)(4+sizeof(dataInfo)); 411 if(headerLength&0xf) { 412 /* NUL-pad the header to a multiple of 16 */ 413 int32_t length=(headerLength+0xf)&~0xf; 414 memset(header+headerLength, 0, length-headerLength); 415 headerLength=length; 416 } 417 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 418 } 419 420 Package::~Package() { 421 int32_t idx; 422 423 free(inData); 424 425 for(idx=0; idx<itemCount; ++idx) { 426 if(items[idx].isDataOwned) { 427 free(items[idx].data); 428 } 429 } 430 431 uprv_free((void*)items); 432 } 433 434 void 435 Package::readPackage(const char *filename) { 436 UDataSwapper *ds; 437 const UDataInfo *pInfo; 438 UErrorCode errorCode; 439 440 const uint8_t *inBytes; 441 442 int32_t length, offset, i; 443 int32_t itemLength, typeEnum; 444 char type; 445 446 const UDataOffsetTOCEntry *inEntries; 447 448 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 449 450 /* read the file */ 451 inData=readFile(NULL, filename, inLength, type); 452 length=inLength; 453 454 /* 455 * swap the header - even if the swapping itself is a no-op 456 * because it tells us the header length 457 */ 458 errorCode=U_ZERO_ERROR; 459 makeTypeProps(type, inCharset, inIsBigEndian); 460 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 461 if(U_FAILURE(errorCode)) { 462 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 463 filename, u_errorName(errorCode)); 464 exit(errorCode); 465 } 466 467 ds->printError=printPackageError; 468 ds->printErrorContext=stderr; 469 470 headerLength=sizeof(header); 471 if(length<headerLength) { 472 headerLength=length; 473 } 474 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 475 if(U_FAILURE(errorCode)) { 476 exit(errorCode); 477 } 478 479 /* check data format and format version */ 480 pInfo=(const UDataInfo *)((const char *)inData+4); 481 if(!( 482 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 483 pInfo->dataFormat[1]==0x6d && 484 pInfo->dataFormat[2]==0x6e && 485 pInfo->dataFormat[3]==0x44 && 486 pInfo->formatVersion[0]==1 487 )) { 488 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 489 pInfo->dataFormat[0], pInfo->dataFormat[1], 490 pInfo->dataFormat[2], pInfo->dataFormat[3], 491 pInfo->formatVersion[0]); 492 exit(U_UNSUPPORTED_ERROR); 493 } 494 inIsBigEndian=(UBool)pInfo->isBigEndian; 495 inCharset=pInfo->charsetFamily; 496 497 inBytes=(const uint8_t *)inData+headerLength; 498 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 499 500 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 501 length-=headerLength; 502 if(length<4) { 503 /* itemCount does not fit */ 504 offset=0x7fffffff; 505 } else { 506 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 507 setItemCapacity(itemCount); /* resize so there's space */ 508 if(itemCount==0) { 509 offset=4; 510 } else if(length<(4+8*itemCount)) { 511 /* ToC table does not fit */ 512 offset=0x7fffffff; 513 } else { 514 /* offset of the last item plus at least 20 bytes for its header */ 515 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 516 } 517 } 518 if(length<offset) { 519 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 520 (long)length); 521 exit(U_INDEX_OUTOFBOUNDS_ERROR); 522 } 523 /* do not modify the package length variable until the last item's length is set */ 524 525 if(itemCount>0) { 526 char prefix[MAX_PKG_NAME_LENGTH+4]; 527 char *s, *inItemStrings; 528 int32_t inPkgNameLength, prefixLength, stringsOffset; 529 530 if(itemCount>itemMax) { 531 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 532 exit(U_BUFFER_OVERFLOW_ERROR); 533 } 534 535 /* swap the item name strings */ 536 stringsOffset=4+8*itemCount; 537 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 538 539 // don't include padding bytes at the end of the item names 540 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 541 --itemLength; 542 } 543 544 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 545 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 546 exit(U_BUFFER_OVERFLOW_ERROR); 547 } 548 549 inItemStrings=inStrings+inStringTop; 550 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 551 if(U_FAILURE(errorCode)) { 552 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 553 exit(U_INVALID_FORMAT_ERROR); 554 } 555 inStringTop+=itemLength; 556 557 // reset the Item entries 558 memset(items, 0, itemCount*sizeof(Item)); 559 560 inPkgNameLength=strlen(inPkgName); 561 memcpy(prefix, inPkgName, inPkgNameLength); 562 prefixLength=inPkgNameLength; 563 564 /* 565 * Get the common prefix of the items. 566 * New-style ICU .dat packages use tree separators ('/') between package names, 567 * tree names, and item names, 568 * while old-style ICU .dat packages (before multi-tree support) 569 * use an underscore ('_') between package and item names. 570 */ 571 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 572 s=inItemStrings+offset; 573 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 574 0==memcmp(s, inPkgName, inPkgNameLength) && 575 s[inPkgNameLength]=='_' 576 ) { 577 // old-style .dat package 578 prefix[prefixLength++]='_'; 579 } else { 580 // new-style .dat package 581 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 582 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 583 // then the test in the loop below will fail 584 } 585 prefix[prefixLength]=0; 586 587 /* read the ToC table */ 588 for(i=0; i<itemCount; ++i) { 589 // skip the package part of the item name, error if it does not match the actual package name 590 // or if nothing follows the package name 591 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 592 s=inItemStrings+offset; 593 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 594 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 595 s, prefix); 596 exit(U_UNSUPPORTED_ERROR); 597 } 598 items[i].name=s+prefixLength; 599 600 // set the item's data 601 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 602 if(i>0) { 603 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 604 605 // set the previous item's platform type 606 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 607 if(typeEnum<0 || U_FAILURE(errorCode)) { 608 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 609 exit(U_INVALID_FORMAT_ERROR); 610 } 611 items[i-1].type=makeTypeLetter(typeEnum); 612 } 613 items[i].isDataOwned=FALSE; 614 } 615 // set the last item's length 616 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 617 618 // set the last item's platform type 619 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 620 if(typeEnum<0 || U_FAILURE(errorCode)) { 621 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 622 exit(U_INVALID_FORMAT_ERROR); 623 } 624 items[itemCount-1].type=makeTypeLetter(typeEnum); 625 626 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 627 // sort the item names for the local charset 628 sortItems(); 629 } 630 } 631 632 udata_closeSwapper(ds); 633 } 634 635 char 636 Package::getInType() { 637 return makeTypeLetter(inCharset, inIsBigEndian); 638 } 639 640 void 641 Package::writePackage(const char *filename, char outType, const char *comment) { 642 char prefix[MAX_PKG_NAME_LENGTH+4]; 643 UDataOffsetTOCEntry entry; 644 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 645 FILE *file; 646 Item *pItem; 647 char *name; 648 UErrorCode errorCode; 649 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 650 uint8_t outCharset; 651 UBool outIsBigEndian; 652 653 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 654 655 // if there is an explicit comment, then use it, else use what's in the current header 656 if(comment!=NULL) { 657 /* get the header size minus the current comment */ 658 DataHeader *pHeader; 659 int32_t length; 660 661 pHeader=(DataHeader *)header; 662 headerLength=4+pHeader->info.size; 663 length=(int32_t)strlen(comment); 664 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 665 fprintf(stderr, "icupkg: comment too long\n"); 666 exit(U_BUFFER_OVERFLOW_ERROR); 667 } 668 memcpy(header+headerLength, comment, length+1); 669 headerLength+=length; 670 if(headerLength&0xf) { 671 /* NUL-pad the header to a multiple of 16 */ 672 length=(headerLength+0xf)&~0xf; 673 memset(header+headerLength, 0, length-headerLength); 674 headerLength=length; 675 } 676 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 677 } 678 679 makeTypeProps(outType, outCharset, outIsBigEndian); 680 681 // open (TYPE_COUNT-2) swappers 682 // one is a no-op for local type==outType 683 // one type (TYPE_LE) is bogus 684 errorCode=U_ZERO_ERROR; 685 i=makeTypeEnum(outType); 686 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 687 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 688 ds[TYPE_LE]=NULL; 689 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 690 if(U_FAILURE(errorCode)) { 691 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 692 exit(errorCode); 693 } 694 for(i=0; i<TYPE_COUNT; ++i) { 695 if(ds[i]!=NULL) { 696 ds[i]->printError=printPackageError; 697 ds[i]->printErrorContext=stderr; 698 } 699 } 700 701 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 702 703 // create the file and write its contents 704 file=fopen(filename, "wb"); 705 if(file==NULL) { 706 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 707 exit(U_FILE_ACCESS_ERROR); 708 } 709 710 // swap and write the header 711 if(dsLocalToOut!=NULL) { 712 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 713 if(U_FAILURE(errorCode)) { 714 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 715 exit(errorCode); 716 } 717 } 718 length=(int32_t)fwrite(header, 1, headerLength, file); 719 if(length!=headerLength) { 720 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 721 exit(U_FILE_ACCESS_ERROR); 722 } 723 724 // prepare and swap the package name with a tree separator 725 // for prepending to item names 726 strcat(prefix, U_TREE_ENTRY_SEP_STRING); 727 prefixLength=(int32_t)strlen(prefix); 728 if(dsLocalToOut!=NULL) { 729 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 730 if(U_FAILURE(errorCode)) { 731 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 732 exit(errorCode); 733 } 734 735 // swap and sort the item names (sorting needs to be done in the output charset) 736 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 737 if(U_FAILURE(errorCode)) { 738 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 739 exit(errorCode); 740 } 741 sortItems(); 742 } 743 744 // create the output item names in sorted order, with the package name prepended to each 745 for(i=0; i<itemCount; ++i) { 746 length=(int32_t)strlen(items[i].name); 747 name=allocString(FALSE, length+prefixLength); 748 memcpy(name, prefix, prefixLength); 749 memcpy(name+prefixLength, items[i].name, length+1); 750 items[i].name=name; 751 } 752 753 // calculate offsets for item names and items, pad to 16-align items 754 // align only the first item; each item's length is a multiple of 16 755 basenameOffset=4+8*itemCount; 756 offset=basenameOffset+outStringTop; 757 if((length=(offset&15))!=0) { 758 length=16-length; 759 memset(allocString(FALSE, length-1), 0xaa, length); 760 offset+=length; 761 } 762 763 // write the table of contents 764 // first the itemCount 765 outInt32=itemCount; 766 if(dsLocalToOut!=NULL) { 767 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 768 if(U_FAILURE(errorCode)) { 769 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 770 exit(errorCode); 771 } 772 } 773 length=(int32_t)fwrite(&outInt32, 1, 4, file); 774 if(length!=4) { 775 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 776 exit(U_FILE_ACCESS_ERROR); 777 } 778 779 // then write the item entries (and collect the maxItemLength) 780 maxItemLength=0; 781 for(i=0; i<itemCount; ++i) { 782 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 783 entry.dataOffset=(uint32_t)offset; 784 if(dsLocalToOut!=NULL) { 785 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 786 if(U_FAILURE(errorCode)) { 787 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 788 exit(errorCode); 789 } 790 } 791 length=(int32_t)fwrite(&entry, 1, 8, file); 792 if(length!=8) { 793 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 794 exit(U_FILE_ACCESS_ERROR); 795 } 796 797 length=items[i].length; 798 if(length>maxItemLength) { 799 maxItemLength=length; 800 } 801 offset+=length; 802 } 803 804 // write the item names 805 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 806 if(length!=outStringTop) { 807 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 808 exit(U_FILE_ACCESS_ERROR); 809 } 810 811 // write the items 812 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 813 int32_t type=makeTypeEnum(pItem->type); 814 if(ds[type]!=NULL) { 815 // swap each item from its platform properties to the desired ones 816 udata_swap( 817 ds[type], 818 pItem->data, pItem->length, pItem->data, 819 &errorCode); 820 if(U_FAILURE(errorCode)) { 821 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 822 exit(errorCode); 823 } 824 } 825 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 826 if(length!=pItem->length) { 827 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 828 exit(U_FILE_ACCESS_ERROR); 829 } 830 } 831 832 if(ferror(file)) { 833 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 834 exit(U_FILE_ACCESS_ERROR); 835 } 836 837 fclose(file); 838 for(i=0; i<TYPE_COUNT; ++i) { 839 udata_closeSwapper(ds[i]); 840 } 841 } 842 843 int32_t 844 Package::findItem(const char *name, int32_t length) const { 845 int32_t i, start, limit; 846 int result; 847 848 /* do a binary search for the string */ 849 start=0; 850 limit=itemCount; 851 while(start<limit) { 852 i=(start+limit)/2; 853 if(length>=0) { 854 result=strncmp(name, items[i].name, length); 855 } else { 856 result=strcmp(name, items[i].name); 857 } 858 859 if(result==0) { 860 /* found */ 861 if(length>=0) { 862 /* 863 * if we compared just prefixes, then we may need to back up 864 * to the first item with this prefix 865 */ 866 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 867 --i; 868 } 869 } 870 return i; 871 } else if(result<0) { 872 limit=i; 873 } else /* result>0 */ { 874 start=i+1; 875 } 876 } 877 878 return ~start; /* not found, return binary-not of the insertion point */ 879 } 880 881 void 882 Package::findItems(const char *pattern) { 883 const char *wild; 884 885 if(pattern==NULL || *pattern==0) { 886 findNextIndex=-1; 887 return; 888 } 889 890 findPrefix=pattern; 891 findSuffix=NULL; 892 findSuffixLength=0; 893 894 wild=strchr(pattern, '*'); 895 if(wild==NULL) { 896 // no wildcard 897 findPrefixLength=(int32_t)strlen(pattern); 898 } else { 899 // one wildcard 900 findPrefixLength=(int32_t)(wild-pattern); 901 findSuffix=wild+1; 902 findSuffixLength=(int32_t)strlen(findSuffix); 903 if(NULL!=strchr(findSuffix, '*')) { 904 // two or more wildcards 905 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 906 exit(U_PARSE_ERROR); 907 } 908 } 909 910 if(findPrefixLength==0) { 911 findNextIndex=0; 912 } else { 913 findNextIndex=findItem(findPrefix, findPrefixLength); 914 } 915 } 916 917 int32_t 918 Package::findNextItem() { 919 const char *name, *middle, *treeSep; 920 int32_t idx, nameLength, middleLength; 921 922 if(findNextIndex<0) { 923 return -1; 924 } 925 926 while(findNextIndex<itemCount) { 927 idx=findNextIndex++; 928 name=items[idx].name; 929 nameLength=(int32_t)strlen(name); 930 if(nameLength<(findPrefixLength+findSuffixLength)) { 931 // item name too short for prefix & suffix 932 continue; 933 } 934 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 935 // left the range of names with this prefix 936 break; 937 } 938 middle=name+findPrefixLength; 939 middleLength=nameLength-findPrefixLength-findSuffixLength; 940 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 941 // suffix does not match 942 continue; 943 } 944 // prefix & suffix match 945 946 if(matchMode&MATCH_NOSLASH) { 947 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 948 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 949 // the middle (matching the * wildcard) contains a tree separator / 950 continue; 951 } 952 } 953 954 // found a matching item 955 return idx; 956 } 957 958 // no more items 959 findNextIndex=-1; 960 return -1; 961 } 962 963 void 964 Package::setMatchMode(uint32_t mode) { 965 matchMode=mode; 966 } 967 968 void 969 Package::addItem(const char *name) { 970 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 971 } 972 973 void 974 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 975 int32_t idx; 976 977 idx=findItem(name); 978 if(idx<0) { 979 // new item, make space at the insertion point 980 ensureItemCapacity(); 981 // move the following items down 982 idx=~idx; 983 if(idx<itemCount) { 984 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 985 } 986 ++itemCount; 987 988 // reset this Item entry 989 memset(items+idx, 0, sizeof(Item)); 990 991 // copy the item's name 992 items[idx].name=allocString(TRUE, strlen(name)); 993 strcpy(items[idx].name, name); 994 pathToTree(items[idx].name); 995 } else { 996 // same-name item found, replace it 997 if(items[idx].isDataOwned) { 998 free(items[idx].data); 999 } 1000 1001 // keep the item's name since it is the same 1002 } 1003 1004 // set the item's data 1005 items[idx].data=data; 1006 items[idx].length=length; 1007 items[idx].isDataOwned=isDataOwned; 1008 items[idx].type=type; 1009 } 1010 1011 void 1012 Package::addFile(const char *filesPath, const char *name) { 1013 uint8_t *data; 1014 int32_t length; 1015 char type; 1016 1017 data=readFile(filesPath, name, length, type); 1018 // readFile() exits the tool if it fails 1019 addItem(name, data, length, TRUE, type); 1020 } 1021 1022 void 1023 Package::addItems(const Package &listPkg) { 1024 const Item *pItem; 1025 int32_t i; 1026 1027 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1028 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1029 } 1030 } 1031 1032 void 1033 Package::removeItem(int32_t idx) { 1034 if(idx>=0) { 1035 // remove the item 1036 if(items[idx].isDataOwned) { 1037 free(items[idx].data); 1038 } 1039 1040 // move the following items up 1041 if((idx+1)<itemCount) { 1042 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1043 } 1044 --itemCount; 1045 1046 if(idx<=findNextIndex) { 1047 --findNextIndex; 1048 } 1049 } 1050 } 1051 1052 void 1053 Package::removeItems(const char *pattern) { 1054 int32_t idx; 1055 1056 findItems(pattern); 1057 while((idx=findNextItem())>=0) { 1058 removeItem(idx); 1059 } 1060 } 1061 1062 void 1063 Package::removeItems(const Package &listPkg) { 1064 const Item *pItem; 1065 int32_t i; 1066 1067 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1068 removeItems(pItem->name); 1069 } 1070 } 1071 1072 void 1073 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1074 char filename[1024]; 1075 UDataSwapper *ds; 1076 FILE *file; 1077 Item *pItem; 1078 int32_t fileLength; 1079 uint8_t itemCharset, outCharset; 1080 UBool itemIsBigEndian, outIsBigEndian; 1081 1082 if(idx<0 || itemCount<=idx) { 1083 return; 1084 } 1085 pItem=items+idx; 1086 1087 // swap the data to the outType 1088 // outType==0: don't swap 1089 if(outType!=0 && pItem->type!=outType) { 1090 // open the swapper 1091 UErrorCode errorCode=U_ZERO_ERROR; 1092 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1093 makeTypeProps(outType, outCharset, outIsBigEndian); 1094 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1095 if(U_FAILURE(errorCode)) { 1096 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1097 (long)idx, u_errorName(errorCode)); 1098 exit(errorCode); 1099 } 1100 1101 ds->printError=printPackageError; 1102 ds->printErrorContext=stderr; 1103 1104 // swap the item from its platform properties to the desired ones 1105 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1106 if(U_FAILURE(errorCode)) { 1107 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1108 exit(errorCode); 1109 } 1110 udata_closeSwapper(ds); 1111 pItem->type=outType; 1112 } 1113 1114 // create the file and write its contents 1115 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1116 file=fopen(filename, "wb"); 1117 if(file==NULL) { 1118 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1119 exit(U_FILE_ACCESS_ERROR); 1120 } 1121 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1122 1123 if(ferror(file) || fileLength!=pItem->length) { 1124 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1125 exit(U_FILE_ACCESS_ERROR); 1126 } 1127 fclose(file); 1128 } 1129 1130 void 1131 Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1132 extractItem(filesPath, items[idx].name, idx, outType); 1133 } 1134 1135 void 1136 Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1137 int32_t idx; 1138 1139 findItems(pattern); 1140 while((idx=findNextItem())>=0) { 1141 extractItem(filesPath, idx, outType); 1142 } 1143 } 1144 1145 void 1146 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1147 const Item *pItem; 1148 int32_t i; 1149 1150 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1151 extractItems(filesPath, pItem->name, outType); 1152 } 1153 } 1154 1155 int32_t 1156 Package::getItemCount() const { 1157 return itemCount; 1158 } 1159 1160 const Item * 1161 Package::getItem(int32_t idx) const { 1162 if (0 <= idx && idx < itemCount) { 1163 return &items[idx]; 1164 } 1165 return NULL; 1166 } 1167 1168 void 1169 Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1170 // check dependency: make sure the target item is in the package 1171 Package *me=(Package *)context; 1172 if(me->findItem(targetName)<0) { 1173 me->isMissingItems=TRUE; 1174 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1175 } 1176 } 1177 1178 UBool 1179 Package::checkDependencies() { 1180 isMissingItems=FALSE; 1181 enumDependencies(this, checkDependency); 1182 return (UBool)!isMissingItems; 1183 } 1184 1185 void 1186 Package::enumDependencies(void *context, CheckDependency check) { 1187 int32_t i; 1188 1189 for(i=0; i<itemCount; ++i) { 1190 enumDependencies(items+i, context, check); 1191 } 1192 } 1193 1194 char * 1195 Package::allocString(UBool in, int32_t length) { 1196 char *p; 1197 int32_t top; 1198 1199 if(in) { 1200 top=inStringTop; 1201 p=inStrings+top; 1202 } else { 1203 top=outStringTop; 1204 p=outStrings+top; 1205 } 1206 top+=length+1; 1207 1208 if(top>STRING_STORE_SIZE) { 1209 fprintf(stderr, "icupkg: string storage overflow\n"); 1210 exit(U_BUFFER_OVERFLOW_ERROR); 1211 } 1212 if(in) { 1213 inStringTop=top; 1214 } else { 1215 outStringTop=top; 1216 } 1217 return p; 1218 } 1219 1220 void 1221 Package::sortItems() { 1222 UErrorCode errorCode=U_ZERO_ERROR; 1223 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1224 if(U_FAILURE(errorCode)) { 1225 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1226 exit(errorCode); 1227 } 1228 } 1229 1230 void Package::setItemCapacity(int32_t max) 1231 { 1232 if(max<=itemMax) { 1233 return; 1234 } 1235 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1236 Item *oldItems = items; 1237 if(newItems == NULL) { 1238 fprintf(stderr, "icupkg: Out of memory trying to allocate %ld bytes for %d items\n", max*sizeof(items[0]), max); 1239 exit(U_MEMORY_ALLOCATION_ERROR); 1240 } 1241 if(items && itemCount>0) { 1242 uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); 1243 } 1244 itemMax = max; 1245 items = newItems; 1246 uprv_free(oldItems); 1247 } 1248 1249 void Package::ensureItemCapacity() 1250 { 1251 if((itemCount+1)>itemMax) { 1252 setItemCapacity(itemCount+kItemsChunk); 1253 } 1254 } 1255 1256 U_NAMESPACE_END 1257