1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2009, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: package.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005aug25 14 * created by: Markus W. Scherer 15 * 16 * Read, modify, and write ICU .dat data package files. 17 * This is an integral part of the icupkg tool, moved to the toolutil library 18 * because parts of tool implementations tend to be later shared by 19 * other tools. 20 * Subsumes functionality and implementation code from 21 * gencmn, decmn, and icuswap tools. 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/putil.h" 26 #include "unicode/udata.h" 27 #include "cstring.h" 28 #include "uarrsort.h" 29 #include "ucmndata.h" 30 #include "udataswp.h" 31 #include "swapimpl.h" 32 #include "toolutil.h" 33 #include "package.h" 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 // general definitions ----------------------------------------------------- *** 40 41 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 42 43 /* UDataInfo cf. udata.h */ 44 static const UDataInfo dataInfo={ 45 (uint16_t)sizeof(UDataInfo), 46 0, 47 48 U_IS_BIG_ENDIAN, 49 U_CHARSET_FAMILY, 50 (uint8_t)sizeof(UChar), 51 0, 52 53 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 54 {1, 0, 0, 0}, /* formatVersion */ 55 {3, 0, 0, 0} /* dataVersion */ 56 }; 57 58 U_CDECL_BEGIN 59 static void U_CALLCONV 60 printPackageError(void *context, const char *fmt, va_list args) { 61 vfprintf((FILE *)context, fmt, args); 62 } 63 U_CDECL_END 64 65 static uint16_t 66 readSwapUInt16(uint16_t x) { 67 return (uint16_t)((x<<8)|(x>>8)); 68 } 69 70 // platform types ---------------------------------------------------------- *** 71 72 static const char *types="lb?e"; 73 74 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 75 76 static inline int32_t 77 makeTypeEnum(uint8_t charset, UBool isBigEndian) { 78 return 2*(int32_t)charset+isBigEndian; 79 } 80 81 static inline int32_t 82 makeTypeEnum(char type) { 83 return 84 type == 'l' ? TYPE_L : 85 type == 'b' ? TYPE_B : 86 type == 'e' ? TYPE_E : 87 -1; 88 } 89 90 static inline char 91 makeTypeLetter(uint8_t charset, UBool isBigEndian) { 92 return types[makeTypeEnum(charset, isBigEndian)]; 93 } 94 95 static inline char 96 makeTypeLetter(int32_t typeEnum) { 97 return types[typeEnum]; 98 } 99 100 static void 101 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 102 int32_t typeEnum=makeTypeEnum(type); 103 charset=(uint8_t)(typeEnum>>1); 104 isBigEndian=(UBool)(typeEnum&1); 105 } 106 107 U_CFUNC const UDataInfo * 108 getDataInfo(const uint8_t *data, int32_t length, 109 int32_t &infoLength, int32_t &headerLength, 110 UErrorCode *pErrorCode) { 111 const DataHeader *pHeader; 112 const UDataInfo *pInfo; 113 114 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 115 return NULL; 116 } 117 if( data==NULL || 118 (length>=0 && length<(int32_t)sizeof(DataHeader)) 119 ) { 120 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 121 return NULL; 122 } 123 124 pHeader=(const DataHeader *)data; 125 pInfo=&pHeader->info; 126 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 127 pHeader->dataHeader.magic1!=0xda || 128 pHeader->dataHeader.magic2!=0x27 || 129 pInfo->sizeofUChar!=2 130 ) { 131 *pErrorCode=U_UNSUPPORTED_ERROR; 132 return NULL; 133 } 134 135 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 136 headerLength=pHeader->dataHeader.headerSize; 137 infoLength=pInfo->size; 138 } else { 139 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 140 infoLength=readSwapUInt16(pInfo->size); 141 } 142 143 if( headerLength<(int32_t)sizeof(DataHeader) || 144 infoLength<(int32_t)sizeof(UDataInfo) || 145 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 146 (length>=0 && length<headerLength) 147 ) { 148 *pErrorCode=U_UNSUPPORTED_ERROR; 149 return NULL; 150 } 151 152 return pInfo; 153 } 154 155 static int32_t 156 getTypeEnumForInputData(const uint8_t *data, int32_t length, 157 UErrorCode *pErrorCode) { 158 const UDataInfo *pInfo; 159 int32_t infoLength, headerLength; 160 161 /* getDataInfo() checks for illegal arguments */ 162 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 163 if(pInfo==NULL) { 164 return -1; 165 } 166 167 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 168 } 169 170 // file handling ----------------------------------------------------------- *** 171 172 static void 173 extractPackageName(const char *filename, 174 char pkg[], int32_t capacity) { 175 const char *basename; 176 int32_t len; 177 178 basename=findBasename(filename); 179 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 180 181 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 182 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 183 basename); 184 exit(U_ILLEGAL_ARGUMENT_ERROR); 185 } 186 187 if(len>=capacity) { 188 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 189 basename, (long)capacity); 190 exit(U_ILLEGAL_ARGUMENT_ERROR); 191 } 192 193 memcpy(pkg, basename, len); 194 pkg[len]=0; 195 } 196 197 static int32_t 198 getFileLength(FILE *f) { 199 int32_t length; 200 201 fseek(f, 0, SEEK_END); 202 length=(int32_t)ftell(f); 203 fseek(f, 0, SEEK_SET); 204 return length; 205 } 206 207 /* 208 * Turn tree separators and alternate file separators into normal file separators. 209 */ 210 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 211 #define treeToPath(s) 212 #else 213 static void 214 treeToPath(char *s) { 215 char *t; 216 217 for(t=s; *t!=0; ++t) { 218 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 219 *t=U_FILE_SEP_CHAR; 220 } 221 } 222 } 223 #endif 224 225 /* 226 * Turn file separators into tree separators. 227 */ 228 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 229 #define pathToTree(s) 230 #else 231 static void 232 pathToTree(char *s) { 233 char *t; 234 235 for(t=s; *t!=0; ++t) { 236 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 237 *t=U_TREE_ENTRY_SEP_CHAR; 238 } 239 } 240 } 241 #endif 242 243 /* 244 * Prepend the path (if any) to the name and run the name through treeToName(). 245 */ 246 static void 247 makeFullFilename(const char *path, const char *name, 248 char *filename, int32_t capacity) { 249 char *s; 250 251 // prepend the path unless NULL or empty 252 if(path!=NULL && path[0]!=0) { 253 if((int32_t)(strlen(path)+1)>=capacity) { 254 fprintf(stderr, "pathname too long: \"%s\"\n", path); 255 exit(U_BUFFER_OVERFLOW_ERROR); 256 } 257 strcpy(filename, path); 258 259 // make sure the path ends with a file separator 260 s=strchr(filename, 0); 261 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 262 *s++=U_FILE_SEP_CHAR; 263 } 264 } else { 265 s=filename; 266 } 267 268 // turn the name into a filename, turn tree separators into file separators 269 if((int32_t)((s-filename)+strlen(name))>=capacity) { 270 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 271 exit(U_BUFFER_OVERFLOW_ERROR); 272 } 273 strcpy(s, name); 274 treeToPath(s); 275 } 276 277 static void 278 makeFullFilenameAndDirs(const char *path, const char *name, 279 char *filename, int32_t capacity) { 280 char *sep; 281 UErrorCode errorCode; 282 283 makeFullFilename(path, name, filename, capacity); 284 285 // make tree directories 286 errorCode=U_ZERO_ERROR; 287 sep=strchr(filename, 0)-strlen(name); 288 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 289 if(sep!=filename) { 290 *sep=0; // truncate temporarily 291 uprv_mkdir(filename, &errorCode); 292 if(U_FAILURE(errorCode)) { 293 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 294 exit(U_FILE_ACCESS_ERROR); 295 } 296 } 297 *sep++=U_FILE_SEP_CHAR; // restore file separator character 298 } 299 } 300 301 static uint8_t * 302 readFile(const char *path, const char *name, int32_t &length, char &type) { 303 char filename[1024]; 304 FILE *file; 305 uint8_t *data; 306 UErrorCode errorCode; 307 int32_t fileLength, typeEnum; 308 309 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 310 311 /* open the input file, get its length, allocate memory for it, read the file */ 312 file=fopen(filename, "rb"); 313 if(file==NULL) { 314 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 315 exit(U_FILE_ACCESS_ERROR); 316 } 317 318 /* get the file length */ 319 fileLength=getFileLength(file); 320 if(ferror(file) || fileLength<=0) { 321 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 322 fclose(file); 323 exit(U_FILE_ACCESS_ERROR); 324 } 325 326 /* allocate the buffer, pad to multiple of 16 */ 327 length=(fileLength+0xf)&~0xf; 328 data=(uint8_t *)malloc(length); 329 if(data==NULL) { 330 fclose(file); 331 exit(U_MEMORY_ALLOCATION_ERROR); 332 } 333 334 /* read the file */ 335 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 336 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 337 fclose(file); 338 free(data); 339 exit(U_FILE_ACCESS_ERROR); 340 } 341 342 /* pad the file to a multiple of 16 using the usual padding byte */ 343 if(fileLength<length) { 344 memset(data+fileLength, 0xaa, length-fileLength); 345 } 346 347 fclose(file); 348 349 // minimum check for ICU-format data 350 errorCode=U_ZERO_ERROR; 351 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 352 if(typeEnum<0 || U_FAILURE(errorCode)) { 353 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 354 free(data); 355 exit(U_INVALID_FORMAT_ERROR); 356 } 357 type=makeTypeLetter(typeEnum); 358 359 return data; 360 } 361 362 // .dat package file representation ---------------------------------------- *** 363 364 U_CDECL_BEGIN 365 366 static int32_t U_CALLCONV 367 compareItems(const void * /*context*/, const void *left, const void *right) { 368 U_NAMESPACE_USE 369 370 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 371 } 372 373 U_CDECL_END 374 375 U_NAMESPACE_BEGIN 376 377 Package::Package() { 378 inPkgName[0]=0; 379 inData=NULL; 380 inLength=0; 381 inCharset=U_CHARSET_FAMILY; 382 inIsBigEndian=U_IS_BIG_ENDIAN; 383 384 itemCount=0; 385 inStringTop=outStringTop=0; 386 387 matchMode=0; 388 findPrefix=findSuffix=NULL; 389 findPrefixLength=findSuffixLength=0; 390 findNextIndex=-1; 391 392 // create a header for an empty package 393 DataHeader *pHeader; 394 pHeader=(DataHeader *)header; 395 pHeader->dataHeader.magic1=0xda; 396 pHeader->dataHeader.magic2=0x27; 397 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 398 headerLength=(int32_t)(4+sizeof(dataInfo)); 399 if(headerLength&0xf) { 400 /* NUL-pad the header to a multiple of 16 */ 401 int32_t length=(headerLength+0xf)&~0xf; 402 memset(header+headerLength, 0, length-headerLength); 403 headerLength=length; 404 } 405 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 406 } 407 408 Package::~Package() { 409 int32_t idx; 410 411 free(inData); 412 413 for(idx=0; idx<itemCount; ++idx) { 414 if(items[idx].isDataOwned) { 415 free(items[idx].data); 416 } 417 } 418 } 419 420 void 421 Package::readPackage(const char *filename) { 422 UDataSwapper *ds; 423 const UDataInfo *pInfo; 424 UErrorCode errorCode; 425 426 const uint8_t *inBytes; 427 428 int32_t length, offset, i; 429 int32_t itemLength, typeEnum; 430 char type; 431 432 const UDataOffsetTOCEntry *inEntries; 433 434 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 435 436 /* read the file */ 437 inData=readFile(NULL, filename, inLength, type); 438 length=inLength; 439 440 /* 441 * swap the header - even if the swapping itself is a no-op 442 * because it tells us the header length 443 */ 444 errorCode=U_ZERO_ERROR; 445 makeTypeProps(type, inCharset, inIsBigEndian); 446 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 447 if(U_FAILURE(errorCode)) { 448 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 449 filename, u_errorName(errorCode)); 450 exit(errorCode); 451 } 452 453 ds->printError=printPackageError; 454 ds->printErrorContext=stderr; 455 456 headerLength=sizeof(header); 457 if(length<headerLength) { 458 headerLength=length; 459 } 460 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 461 if(U_FAILURE(errorCode)) { 462 exit(errorCode); 463 } 464 465 /* check data format and format version */ 466 pInfo=(const UDataInfo *)((const char *)inData+4); 467 if(!( 468 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 469 pInfo->dataFormat[1]==0x6d && 470 pInfo->dataFormat[2]==0x6e && 471 pInfo->dataFormat[3]==0x44 && 472 pInfo->formatVersion[0]==1 473 )) { 474 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 475 pInfo->dataFormat[0], pInfo->dataFormat[1], 476 pInfo->dataFormat[2], pInfo->dataFormat[3], 477 pInfo->formatVersion[0]); 478 exit(U_UNSUPPORTED_ERROR); 479 } 480 inIsBigEndian=(UBool)pInfo->isBigEndian; 481 inCharset=pInfo->charsetFamily; 482 483 inBytes=(const uint8_t *)inData+headerLength; 484 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 485 486 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 487 length-=headerLength; 488 if(length<4) { 489 /* itemCount does not fit */ 490 offset=0x7fffffff; 491 } else { 492 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 493 if(itemCount==0) { 494 offset=4; 495 } else if(length<(4+8*itemCount)) { 496 /* ToC table does not fit */ 497 offset=0x7fffffff; 498 } else { 499 /* offset of the last item plus at least 20 bytes for its header */ 500 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 501 } 502 } 503 if(length<offset) { 504 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 505 (long)length); 506 exit(U_INDEX_OUTOFBOUNDS_ERROR); 507 } 508 /* do not modify the package length variable until the last item's length is set */ 509 510 if(itemCount>0) { 511 char prefix[MAX_PKG_NAME_LENGTH+4]; 512 char *s, *inItemStrings; 513 int32_t inPkgNameLength, prefixLength, stringsOffset; 514 515 if(itemCount>MAX_FILE_COUNT) { 516 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); 517 exit(U_BUFFER_OVERFLOW_ERROR); 518 } 519 520 /* swap the item name strings */ 521 stringsOffset=4+8*itemCount; 522 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 523 524 // don't include padding bytes at the end of the item names 525 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 526 --itemLength; 527 } 528 529 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 530 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 531 exit(U_BUFFER_OVERFLOW_ERROR); 532 } 533 534 inItemStrings=inStrings+inStringTop; 535 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 536 if(U_FAILURE(errorCode)) { 537 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 538 exit(U_INVALID_FORMAT_ERROR); 539 } 540 inStringTop+=itemLength; 541 542 // reset the Item entries 543 memset(items, 0, itemCount*sizeof(Item)); 544 545 inPkgNameLength=strlen(inPkgName); 546 memcpy(prefix, inPkgName, inPkgNameLength); 547 prefixLength=inPkgNameLength; 548 549 /* 550 * Get the common prefix of the items. 551 * New-style ICU .dat packages use tree separators ('/') between package names, 552 * tree names, and item names, 553 * while old-style ICU .dat packages (before multi-tree support) 554 * use an underscore ('_') between package and item names. 555 */ 556 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 557 s=inItemStrings+offset; 558 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 559 0==memcmp(s, inPkgName, inPkgNameLength) && 560 s[inPkgNameLength]=='_' 561 ) { 562 // old-style .dat package 563 prefix[prefixLength++]='_'; 564 } else { 565 // new-style .dat package 566 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 567 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 568 // then the test in the loop below will fail 569 } 570 prefix[prefixLength]=0; 571 572 /* read the ToC table */ 573 for(i=0; i<itemCount; ++i) { 574 // skip the package part of the item name, error if it does not match the actual package name 575 // or if nothing follows the package name 576 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 577 s=inItemStrings+offset; 578 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 579 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 580 s, prefix); 581 exit(U_UNSUPPORTED_ERROR); 582 } 583 items[i].name=s+prefixLength; 584 585 // set the item's data 586 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 587 if(i>0) { 588 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 589 590 // set the previous item's platform type 591 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 592 if(typeEnum<0 || U_FAILURE(errorCode)) { 593 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 594 exit(U_INVALID_FORMAT_ERROR); 595 } 596 items[i-1].type=makeTypeLetter(typeEnum); 597 } 598 items[i].isDataOwned=FALSE; 599 } 600 // set the last item's length 601 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 602 603 // set the last item's platform type 604 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 605 if(typeEnum<0 || U_FAILURE(errorCode)) { 606 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 607 exit(U_INVALID_FORMAT_ERROR); 608 } 609 items[itemCount-1].type=makeTypeLetter(typeEnum); 610 611 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 612 // sort the item names for the local charset 613 sortItems(); 614 } 615 } 616 617 udata_closeSwapper(ds); 618 } 619 620 char 621 Package::getInType() { 622 return makeTypeLetter(inCharset, inIsBigEndian); 623 } 624 625 void 626 Package::writePackage(const char *filename, char outType, const char *comment) { 627 char prefix[MAX_PKG_NAME_LENGTH+4]; 628 UDataOffsetTOCEntry entry; 629 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 630 FILE *file; 631 Item *pItem; 632 char *name; 633 UErrorCode errorCode; 634 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 635 uint8_t outCharset; 636 UBool outIsBigEndian; 637 638 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 639 640 // if there is an explicit comment, then use it, else use what's in the current header 641 if(comment!=NULL) { 642 /* get the header size minus the current comment */ 643 DataHeader *pHeader; 644 int32_t length; 645 646 pHeader=(DataHeader *)header; 647 headerLength=4+pHeader->info.size; 648 length=(int32_t)strlen(comment); 649 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 650 fprintf(stderr, "icupkg: comment too long\n"); 651 exit(U_BUFFER_OVERFLOW_ERROR); 652 } 653 memcpy(header+headerLength, comment, length+1); 654 headerLength+=length; 655 if(headerLength&0xf) { 656 /* NUL-pad the header to a multiple of 16 */ 657 length=(headerLength+0xf)&~0xf; 658 memset(header+headerLength, 0, length-headerLength); 659 headerLength=length; 660 } 661 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 662 } 663 664 makeTypeProps(outType, outCharset, outIsBigEndian); 665 666 // open (TYPE_COUNT-2) swappers 667 // one is a no-op for local type==outType 668 // one type (TYPE_LE) is bogus 669 errorCode=U_ZERO_ERROR; 670 i=makeTypeEnum(outType); 671 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 672 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 673 ds[TYPE_LE]=NULL; 674 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 675 if(U_FAILURE(errorCode)) { 676 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 677 exit(errorCode); 678 } 679 for(i=0; i<TYPE_COUNT; ++i) { 680 if(ds[i]!=NULL) { 681 ds[i]->printError=printPackageError; 682 ds[i]->printErrorContext=stderr; 683 } 684 } 685 686 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 687 688 // create the file and write its contents 689 file=fopen(filename, "wb"); 690 if(file==NULL) { 691 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 692 exit(U_FILE_ACCESS_ERROR); 693 } 694 695 // swap and write the header 696 if(dsLocalToOut!=NULL) { 697 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 698 if(U_FAILURE(errorCode)) { 699 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 700 exit(errorCode); 701 } 702 } 703 length=(int32_t)fwrite(header, 1, headerLength, file); 704 if(length!=headerLength) { 705 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 706 exit(U_FILE_ACCESS_ERROR); 707 } 708 709 // prepare and swap the package name with a tree separator 710 // for prepending to item names 711 strcat(prefix, U_TREE_ENTRY_SEP_STRING); 712 prefixLength=(int32_t)strlen(prefix); 713 if(dsLocalToOut!=NULL) { 714 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 715 if(U_FAILURE(errorCode)) { 716 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 717 exit(errorCode); 718 } 719 720 // swap and sort the item names (sorting needs to be done in the output charset) 721 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 722 if(U_FAILURE(errorCode)) { 723 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 724 exit(errorCode); 725 } 726 sortItems(); 727 } 728 729 // create the output item names in sorted order, with the package name prepended to each 730 for(i=0; i<itemCount; ++i) { 731 length=(int32_t)strlen(items[i].name); 732 name=allocString(FALSE, length+prefixLength); 733 memcpy(name, prefix, prefixLength); 734 memcpy(name+prefixLength, items[i].name, length+1); 735 items[i].name=name; 736 } 737 738 // calculate offsets for item names and items, pad to 16-align items 739 // align only the first item; each item's length is a multiple of 16 740 basenameOffset=4+8*itemCount; 741 offset=basenameOffset+outStringTop; 742 if((length=(offset&15))!=0) { 743 length=16-length; 744 memset(allocString(FALSE, length-1), 0xaa, length); 745 offset+=length; 746 } 747 748 // write the table of contents 749 // first the itemCount 750 outInt32=itemCount; 751 if(dsLocalToOut!=NULL) { 752 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 753 if(U_FAILURE(errorCode)) { 754 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 755 exit(errorCode); 756 } 757 } 758 length=(int32_t)fwrite(&outInt32, 1, 4, file); 759 if(length!=4) { 760 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 761 exit(U_FILE_ACCESS_ERROR); 762 } 763 764 // then write the item entries (and collect the maxItemLength) 765 maxItemLength=0; 766 for(i=0; i<itemCount; ++i) { 767 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 768 entry.dataOffset=(uint32_t)offset; 769 if(dsLocalToOut!=NULL) { 770 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 771 if(U_FAILURE(errorCode)) { 772 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 773 exit(errorCode); 774 } 775 } 776 length=(int32_t)fwrite(&entry, 1, 8, file); 777 if(length!=8) { 778 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 779 exit(U_FILE_ACCESS_ERROR); 780 } 781 782 length=items[i].length; 783 if(length>maxItemLength) { 784 maxItemLength=length; 785 } 786 offset+=length; 787 } 788 789 // write the item names 790 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 791 if(length!=outStringTop) { 792 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 793 exit(U_FILE_ACCESS_ERROR); 794 } 795 796 // write the items 797 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 798 int32_t type=makeTypeEnum(pItem->type); 799 if(ds[type]!=NULL) { 800 // swap each item from its platform properties to the desired ones 801 udata_swap( 802 ds[type], 803 pItem->data, pItem->length, pItem->data, 804 &errorCode); 805 if(U_FAILURE(errorCode)) { 806 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 807 exit(errorCode); 808 } 809 } 810 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 811 if(length!=pItem->length) { 812 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 813 exit(U_FILE_ACCESS_ERROR); 814 } 815 } 816 817 if(ferror(file)) { 818 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 819 exit(U_FILE_ACCESS_ERROR); 820 } 821 822 fclose(file); 823 for(i=0; i<TYPE_COUNT; ++i) { 824 udata_closeSwapper(ds[i]); 825 } 826 } 827 828 int32_t 829 Package::findItem(const char *name, int32_t length) const { 830 int32_t i, start, limit; 831 int result; 832 833 /* do a binary search for the string */ 834 start=0; 835 limit=itemCount; 836 while(start<limit) { 837 i=(start+limit)/2; 838 if(length>=0) { 839 result=strncmp(name, items[i].name, length); 840 } else { 841 result=strcmp(name, items[i].name); 842 } 843 844 if(result==0) { 845 /* found */ 846 if(length>=0) { 847 /* 848 * if we compared just prefixes, then we may need to back up 849 * to the first item with this prefix 850 */ 851 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 852 --i; 853 } 854 } 855 return i; 856 } else if(result<0) { 857 limit=i; 858 } else /* result>0 */ { 859 start=i+1; 860 } 861 } 862 863 return ~start; /* not found, return binary-not of the insertion point */ 864 } 865 866 void 867 Package::findItems(const char *pattern) { 868 const char *wild; 869 870 if(pattern==NULL || *pattern==0) { 871 findNextIndex=-1; 872 return; 873 } 874 875 findPrefix=pattern; 876 findSuffix=NULL; 877 findSuffixLength=0; 878 879 wild=strchr(pattern, '*'); 880 if(wild==NULL) { 881 // no wildcard 882 findPrefixLength=(int32_t)strlen(pattern); 883 } else { 884 // one wildcard 885 findPrefixLength=(int32_t)(wild-pattern); 886 findSuffix=wild+1; 887 findSuffixLength=(int32_t)strlen(findSuffix); 888 if(NULL!=strchr(findSuffix, '*')) { 889 // two or more wildcards 890 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 891 exit(U_PARSE_ERROR); 892 } 893 } 894 895 if(findPrefixLength==0) { 896 findNextIndex=0; 897 } else { 898 findNextIndex=findItem(findPrefix, findPrefixLength); 899 } 900 } 901 902 int32_t 903 Package::findNextItem() { 904 const char *name, *middle, *treeSep; 905 int32_t idx, nameLength, middleLength; 906 907 if(findNextIndex<0) { 908 return -1; 909 } 910 911 while(findNextIndex<itemCount) { 912 idx=findNextIndex++; 913 name=items[idx].name; 914 nameLength=(int32_t)strlen(name); 915 if(nameLength<(findPrefixLength+findSuffixLength)) { 916 // item name too short for prefix & suffix 917 continue; 918 } 919 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 920 // left the range of names with this prefix 921 break; 922 } 923 middle=name+findPrefixLength; 924 middleLength=nameLength-findPrefixLength-findSuffixLength; 925 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 926 // suffix does not match 927 continue; 928 } 929 // prefix & suffix match 930 931 if(matchMode&MATCH_NOSLASH) { 932 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 933 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 934 // the middle (matching the * wildcard) contains a tree separator / 935 continue; 936 } 937 } 938 939 // found a matching item 940 return idx; 941 } 942 943 // no more items 944 findNextIndex=-1; 945 return -1; 946 } 947 948 void 949 Package::setMatchMode(uint32_t mode) { 950 matchMode=mode; 951 } 952 953 void 954 Package::addItem(const char *name) { 955 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 956 } 957 958 void 959 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 960 int32_t idx; 961 962 idx=findItem(name); 963 if(idx<0) { 964 // new item, make space at the insertion point 965 if(itemCount>=MAX_FILE_COUNT) { 966 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); 967 exit(U_BUFFER_OVERFLOW_ERROR); 968 } 969 // move the following items down 970 idx=~idx; 971 if(idx<itemCount) { 972 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 973 } 974 ++itemCount; 975 976 // reset this Item entry 977 memset(items+idx, 0, sizeof(Item)); 978 979 // copy the item's name 980 items[idx].name=allocString(TRUE, strlen(name)); 981 strcpy(items[idx].name, name); 982 pathToTree(items[idx].name); 983 } else { 984 // same-name item found, replace it 985 if(items[idx].isDataOwned) { 986 free(items[idx].data); 987 } 988 989 // keep the item's name since it is the same 990 } 991 992 // set the item's data 993 items[idx].data=data; 994 items[idx].length=length; 995 items[idx].isDataOwned=isDataOwned; 996 items[idx].type=type; 997 } 998 999 void 1000 Package::addFile(const char *filesPath, const char *name) { 1001 uint8_t *data; 1002 int32_t length; 1003 char type; 1004 1005 data=readFile(filesPath, name, length, type); 1006 // readFile() exits the tool if it fails 1007 addItem(name, data, length, TRUE, type); 1008 } 1009 1010 void 1011 Package::addItems(const Package &listPkg) { 1012 const Item *pItem; 1013 int32_t i; 1014 1015 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1016 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1017 } 1018 } 1019 1020 void 1021 Package::removeItem(int32_t idx) { 1022 if(idx>=0) { 1023 // remove the item 1024 if(items[idx].isDataOwned) { 1025 free(items[idx].data); 1026 } 1027 1028 // move the following items up 1029 if((idx+1)<itemCount) { 1030 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1031 } 1032 --itemCount; 1033 1034 if(idx<=findNextIndex) { 1035 --findNextIndex; 1036 } 1037 } 1038 } 1039 1040 void 1041 Package::removeItems(const char *pattern) { 1042 int32_t idx; 1043 1044 findItems(pattern); 1045 while((idx=findNextItem())>=0) { 1046 removeItem(idx); 1047 } 1048 } 1049 1050 void 1051 Package::removeItems(const Package &listPkg) { 1052 const Item *pItem; 1053 int32_t i; 1054 1055 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1056 removeItems(pItem->name); 1057 } 1058 } 1059 1060 void 1061 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1062 char filename[1024]; 1063 UDataSwapper *ds; 1064 FILE *file; 1065 Item *pItem; 1066 int32_t fileLength; 1067 uint8_t itemCharset, outCharset; 1068 UBool itemIsBigEndian, outIsBigEndian; 1069 1070 if(idx<0 || itemCount<=idx) { 1071 return; 1072 } 1073 pItem=items+idx; 1074 1075 // swap the data to the outType 1076 // outType==0: don't swap 1077 if(outType!=0 && pItem->type!=outType) { 1078 // open the swapper 1079 UErrorCode errorCode=U_ZERO_ERROR; 1080 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1081 makeTypeProps(outType, outCharset, outIsBigEndian); 1082 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1083 if(U_FAILURE(errorCode)) { 1084 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1085 (long)idx, u_errorName(errorCode)); 1086 exit(errorCode); 1087 } 1088 1089 ds->printError=printPackageError; 1090 ds->printErrorContext=stderr; 1091 1092 // swap the item from its platform properties to the desired ones 1093 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1094 if(U_FAILURE(errorCode)) { 1095 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1096 exit(errorCode); 1097 } 1098 udata_closeSwapper(ds); 1099 } 1100 1101 // create the file and write its contents 1102 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1103 file=fopen(filename, "wb"); 1104 if(file==NULL) { 1105 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1106 exit(U_FILE_ACCESS_ERROR); 1107 } 1108 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1109 1110 if(ferror(file) || fileLength!=pItem->length) { 1111 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1112 exit(U_FILE_ACCESS_ERROR); 1113 } 1114 fclose(file); 1115 } 1116 1117 void 1118 Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1119 extractItem(filesPath, items[idx].name, idx, outType); 1120 } 1121 1122 void 1123 Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1124 int32_t idx; 1125 1126 findItems(pattern); 1127 while((idx=findNextItem())>=0) { 1128 extractItem(filesPath, idx, outType); 1129 } 1130 } 1131 1132 void 1133 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1134 const Item *pItem; 1135 int32_t i; 1136 1137 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1138 extractItems(filesPath, pItem->name, outType); 1139 } 1140 } 1141 1142 int32_t 1143 Package::getItemCount() const { 1144 return itemCount; 1145 } 1146 1147 const Item * 1148 Package::getItem(int32_t idx) const { 1149 if (0 <= idx && idx < itemCount) { 1150 return &items[idx]; 1151 } 1152 return NULL; 1153 } 1154 1155 void 1156 Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1157 // check dependency: make sure the target item is in the package 1158 Package *me=(Package *)context; 1159 if(me->findItem(targetName)<0) { 1160 me->isMissingItems=TRUE; 1161 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1162 } 1163 } 1164 1165 UBool 1166 Package::checkDependencies() { 1167 isMissingItems=FALSE; 1168 enumDependencies(this, checkDependency); 1169 return (UBool)!isMissingItems; 1170 } 1171 1172 void 1173 Package::enumDependencies(void *context, CheckDependency check) { 1174 int32_t i; 1175 1176 for(i=0; i<itemCount; ++i) { 1177 enumDependencies(items+i, context, check); 1178 } 1179 } 1180 1181 char * 1182 Package::allocString(UBool in, int32_t length) { 1183 char *p; 1184 int32_t top; 1185 1186 if(in) { 1187 top=inStringTop; 1188 p=inStrings+top; 1189 } else { 1190 top=outStringTop; 1191 p=outStrings+top; 1192 } 1193 top+=length+1; 1194 1195 if(top>STRING_STORE_SIZE) { 1196 fprintf(stderr, "icupkg: string storage overflow\n"); 1197 exit(U_BUFFER_OVERFLOW_ERROR); 1198 } 1199 if(in) { 1200 inStringTop=top; 1201 } else { 1202 outStringTop=top; 1203 } 1204 return p; 1205 } 1206 1207 void 1208 Package::sortItems() { 1209 UErrorCode errorCode=U_ZERO_ERROR; 1210 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1211 if(U_FAILURE(errorCode)) { 1212 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1213 exit(errorCode); 1214 } 1215 } 1216 1217 U_NAMESPACE_END 1218