1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1999-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: package.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2005aug25 14 * created by: Markus W. Scherer 15 * 16 * Read, modify, and write ICU .dat data package files. 17 * This is an integral part of the icupkg tool, moved to the toolutil library 18 * because parts of tool implementations tend to be later shared by 19 * other tools. 20 * Subsumes functionality and implementation code from 21 * gencmn, decmn, and icuswap tools. 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/putil.h" 26 #include "unicode/udata.h" 27 #include "cstring.h" 28 #include "uarrsort.h" 29 #include "ucmndata.h" 30 #include "udataswp.h" 31 #include "swapimpl.h" 32 #include "toolutil.h" 33 #include "package.h" 34 #include "cmemory.h" 35 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 40 41 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 42 43 // general definitions ----------------------------------------------------- *** 44 45 /* UDataInfo cf. udata.h */ 46 static const UDataInfo dataInfo={ 47 (uint16_t)sizeof(UDataInfo), 48 0, 49 50 U_IS_BIG_ENDIAN, 51 U_CHARSET_FAMILY, 52 (uint8_t)sizeof(UChar), 53 0, 54 55 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 56 {1, 0, 0, 0}, /* formatVersion */ 57 {3, 0, 0, 0} /* dataVersion */ 58 }; 59 60 U_CDECL_BEGIN 61 static void U_CALLCONV 62 printPackageError(void *context, const char *fmt, va_list args) { 63 vfprintf((FILE *)context, fmt, args); 64 } 65 U_CDECL_END 66 67 static uint16_t 68 readSwapUInt16(uint16_t x) { 69 return (uint16_t)((x<<8)|(x>>8)); 70 } 71 72 // platform types ---------------------------------------------------------- *** 73 74 static const char *types="lb?e"; 75 76 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 77 78 static inline int32_t 79 makeTypeEnum(uint8_t charset, UBool isBigEndian) { 80 return 2*(int32_t)charset+isBigEndian; 81 } 82 83 static inline int32_t 84 makeTypeEnum(char type) { 85 return 86 type == 'l' ? TYPE_L : 87 type == 'b' ? TYPE_B : 88 type == 'e' ? TYPE_E : 89 -1; 90 } 91 92 static inline char 93 makeTypeLetter(uint8_t charset, UBool isBigEndian) { 94 return types[makeTypeEnum(charset, isBigEndian)]; 95 } 96 97 static inline char 98 makeTypeLetter(int32_t typeEnum) { 99 return types[typeEnum]; 100 } 101 102 static void 103 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 104 int32_t typeEnum=makeTypeEnum(type); 105 charset=(uint8_t)(typeEnum>>1); 106 isBigEndian=(UBool)(typeEnum&1); 107 } 108 109 U_CFUNC const UDataInfo * 110 getDataInfo(const uint8_t *data, int32_t length, 111 int32_t &infoLength, int32_t &headerLength, 112 UErrorCode *pErrorCode) { 113 const DataHeader *pHeader; 114 const UDataInfo *pInfo; 115 116 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 117 return NULL; 118 } 119 if( data==NULL || 120 (length>=0 && length<(int32_t)sizeof(DataHeader)) 121 ) { 122 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 123 return NULL; 124 } 125 126 pHeader=(const DataHeader *)data; 127 pInfo=&pHeader->info; 128 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 129 pHeader->dataHeader.magic1!=0xda || 130 pHeader->dataHeader.magic2!=0x27 || 131 pInfo->sizeofUChar!=2 132 ) { 133 *pErrorCode=U_UNSUPPORTED_ERROR; 134 return NULL; 135 } 136 137 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 138 headerLength=pHeader->dataHeader.headerSize; 139 infoLength=pInfo->size; 140 } else { 141 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 142 infoLength=readSwapUInt16(pInfo->size); 143 } 144 145 if( headerLength<(int32_t)sizeof(DataHeader) || 146 infoLength<(int32_t)sizeof(UDataInfo) || 147 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 148 (length>=0 && length<headerLength) 149 ) { 150 *pErrorCode=U_UNSUPPORTED_ERROR; 151 return NULL; 152 } 153 154 return pInfo; 155 } 156 157 static int32_t 158 getTypeEnumForInputData(const uint8_t *data, int32_t length, 159 UErrorCode *pErrorCode) { 160 const UDataInfo *pInfo; 161 int32_t infoLength, headerLength; 162 163 /* getDataInfo() checks for illegal arguments */ 164 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 165 if(pInfo==NULL) { 166 return -1; 167 } 168 169 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 170 } 171 172 // file handling ----------------------------------------------------------- *** 173 174 static void 175 extractPackageName(const char *filename, 176 char pkg[], int32_t capacity) { 177 const char *basename; 178 int32_t len; 179 180 basename=findBasename(filename); 181 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 182 183 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 184 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 185 basename); 186 exit(U_ILLEGAL_ARGUMENT_ERROR); 187 } 188 189 if(len>=capacity) { 190 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 191 basename, (long)capacity); 192 exit(U_ILLEGAL_ARGUMENT_ERROR); 193 } 194 195 memcpy(pkg, basename, len); 196 pkg[len]=0; 197 } 198 199 static int32_t 200 getFileLength(FILE *f) { 201 int32_t length; 202 203 fseek(f, 0, SEEK_END); 204 length=(int32_t)ftell(f); 205 fseek(f, 0, SEEK_SET); 206 return length; 207 } 208 209 /* 210 * Turn tree separators and alternate file separators into normal file separators. 211 */ 212 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 213 #define treeToPath(s) 214 #else 215 static void 216 treeToPath(char *s) { 217 char *t; 218 219 for(t=s; *t!=0; ++t) { 220 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 221 *t=U_FILE_SEP_CHAR; 222 } 223 } 224 } 225 #endif 226 227 /* 228 * Turn file separators into tree separators. 229 */ 230 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 231 #define pathToTree(s) 232 #else 233 static void 234 pathToTree(char *s) { 235 char *t; 236 237 for(t=s; *t!=0; ++t) { 238 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 239 *t=U_TREE_ENTRY_SEP_CHAR; 240 } 241 } 242 } 243 #endif 244 245 /* 246 * Prepend the path (if any) to the name and run the name through treeToName(). 247 */ 248 static void 249 makeFullFilename(const char *path, const char *name, 250 char *filename, int32_t capacity) { 251 char *s; 252 253 // prepend the path unless NULL or empty 254 if(path!=NULL && path[0]!=0) { 255 if((int32_t)(strlen(path)+1)>=capacity) { 256 fprintf(stderr, "pathname too long: \"%s\"\n", path); 257 exit(U_BUFFER_OVERFLOW_ERROR); 258 } 259 strcpy(filename, path); 260 261 // make sure the path ends with a file separator 262 s=strchr(filename, 0); 263 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 264 *s++=U_FILE_SEP_CHAR; 265 } 266 } else { 267 s=filename; 268 } 269 270 // turn the name into a filename, turn tree separators into file separators 271 if((int32_t)((s-filename)+strlen(name))>=capacity) { 272 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 273 exit(U_BUFFER_OVERFLOW_ERROR); 274 } 275 strcpy(s, name); 276 treeToPath(s); 277 } 278 279 static void 280 makeFullFilenameAndDirs(const char *path, const char *name, 281 char *filename, int32_t capacity) { 282 char *sep; 283 UErrorCode errorCode; 284 285 makeFullFilename(path, name, filename, capacity); 286 287 // make tree directories 288 errorCode=U_ZERO_ERROR; 289 sep=strchr(filename, 0)-strlen(name); 290 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 291 if(sep!=filename) { 292 *sep=0; // truncate temporarily 293 uprv_mkdir(filename, &errorCode); 294 if(U_FAILURE(errorCode)) { 295 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 296 exit(U_FILE_ACCESS_ERROR); 297 } 298 } 299 *sep++=U_FILE_SEP_CHAR; // restore file separator character 300 } 301 } 302 303 static uint8_t * 304 readFile(const char *path, const char *name, int32_t &length, char &type) { 305 char filename[1024]; 306 FILE *file; 307 uint8_t *data; 308 UErrorCode errorCode; 309 int32_t fileLength, typeEnum; 310 311 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 312 313 /* open the input file, get its length, allocate memory for it, read the file */ 314 file=fopen(filename, "rb"); 315 if(file==NULL) { 316 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 317 exit(U_FILE_ACCESS_ERROR); 318 } 319 320 /* get the file length */ 321 fileLength=getFileLength(file); 322 if(ferror(file) || fileLength<=0) { 323 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 324 fclose(file); 325 exit(U_FILE_ACCESS_ERROR); 326 } 327 328 /* allocate the buffer, pad to multiple of 16 */ 329 length=(fileLength+0xf)&~0xf; 330 data=(uint8_t *)uprv_malloc(length); 331 if(data==NULL) { 332 fclose(file); 333 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); 334 exit(U_MEMORY_ALLOCATION_ERROR); 335 } 336 337 /* read the file */ 338 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 339 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 340 fclose(file); 341 free(data); 342 exit(U_FILE_ACCESS_ERROR); 343 } 344 345 /* pad the file to a multiple of 16 using the usual padding byte */ 346 if(fileLength<length) { 347 memset(data+fileLength, 0xaa, length-fileLength); 348 } 349 350 fclose(file); 351 352 // minimum check for ICU-format data 353 errorCode=U_ZERO_ERROR; 354 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 355 if(typeEnum<0 || U_FAILURE(errorCode)) { 356 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 357 free(data); 358 #if !UCONFIG_NO_LEGACY_CONVERSION 359 exit(U_INVALID_FORMAT_ERROR); 360 #else 361 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 362 exit(0); 363 #endif 364 } 365 type=makeTypeLetter(typeEnum); 366 367 return data; 368 } 369 370 // .dat package file representation ---------------------------------------- *** 371 372 U_CDECL_BEGIN 373 374 static int32_t U_CALLCONV 375 compareItems(const void * /*context*/, const void *left, const void *right) { 376 U_NAMESPACE_USE 377 378 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 379 } 380 381 U_CDECL_END 382 383 U_NAMESPACE_BEGIN 384 385 Package::Package() 386 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { 387 inPkgName[0]=0; 388 pkgPrefix[0]=0; 389 inData=NULL; 390 inLength=0; 391 inCharset=U_CHARSET_FAMILY; 392 inIsBigEndian=U_IS_BIG_ENDIAN; 393 394 itemCount=0; 395 itemMax=0; 396 items=NULL; 397 398 inStringTop=outStringTop=0; 399 400 matchMode=0; 401 findPrefix=findSuffix=NULL; 402 findPrefixLength=findSuffixLength=0; 403 findNextIndex=-1; 404 405 // create a header for an empty package 406 DataHeader *pHeader; 407 pHeader=(DataHeader *)header; 408 pHeader->dataHeader.magic1=0xda; 409 pHeader->dataHeader.magic2=0x27; 410 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 411 headerLength=(int32_t)(4+sizeof(dataInfo)); 412 if(headerLength&0xf) { 413 /* NUL-pad the header to a multiple of 16 */ 414 int32_t length=(headerLength+0xf)&~0xf; 415 memset(header+headerLength, 0, length-headerLength); 416 headerLength=length; 417 } 418 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 419 } 420 421 Package::~Package() { 422 int32_t idx; 423 424 free(inData); 425 426 for(idx=0; idx<itemCount; ++idx) { 427 if(items[idx].isDataOwned) { 428 free(items[idx].data); 429 } 430 } 431 432 uprv_free((void*)items); 433 } 434 435 void 436 Package::setPrefix(const char *p) { 437 if(strlen(p)>=sizeof(pkgPrefix)) { 438 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); 439 exit(U_ILLEGAL_ARGUMENT_ERROR); 440 } 441 strcpy(pkgPrefix, p); 442 } 443 444 void 445 Package::readPackage(const char *filename) { 446 UDataSwapper *ds; 447 const UDataInfo *pInfo; 448 UErrorCode errorCode; 449 450 const uint8_t *inBytes; 451 452 int32_t length, offset, i; 453 int32_t itemLength, typeEnum; 454 char type; 455 456 const UDataOffsetTOCEntry *inEntries; 457 458 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 459 460 /* read the file */ 461 inData=readFile(NULL, filename, inLength, type); 462 length=inLength; 463 464 /* 465 * swap the header - even if the swapping itself is a no-op 466 * because it tells us the header length 467 */ 468 errorCode=U_ZERO_ERROR; 469 makeTypeProps(type, inCharset, inIsBigEndian); 470 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 471 if(U_FAILURE(errorCode)) { 472 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 473 filename, u_errorName(errorCode)); 474 exit(errorCode); 475 } 476 477 ds->printError=printPackageError; 478 ds->printErrorContext=stderr; 479 480 headerLength=sizeof(header); 481 if(length<headerLength) { 482 headerLength=length; 483 } 484 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 485 if(U_FAILURE(errorCode)) { 486 exit(errorCode); 487 } 488 489 /* check data format and format version */ 490 pInfo=(const UDataInfo *)((const char *)inData+4); 491 if(!( 492 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 493 pInfo->dataFormat[1]==0x6d && 494 pInfo->dataFormat[2]==0x6e && 495 pInfo->dataFormat[3]==0x44 && 496 pInfo->formatVersion[0]==1 497 )) { 498 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 499 pInfo->dataFormat[0], pInfo->dataFormat[1], 500 pInfo->dataFormat[2], pInfo->dataFormat[3], 501 pInfo->formatVersion[0]); 502 exit(U_UNSUPPORTED_ERROR); 503 } 504 inIsBigEndian=(UBool)pInfo->isBigEndian; 505 inCharset=pInfo->charsetFamily; 506 507 inBytes=(const uint8_t *)inData+headerLength; 508 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 509 510 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 511 length-=headerLength; 512 if(length<4) { 513 /* itemCount does not fit */ 514 offset=0x7fffffff; 515 } else { 516 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 517 setItemCapacity(itemCount); /* resize so there's space */ 518 if(itemCount==0) { 519 offset=4; 520 } else if(length<(4+8*itemCount)) { 521 /* ToC table does not fit */ 522 offset=0x7fffffff; 523 } else { 524 /* offset of the last item plus at least 20 bytes for its header */ 525 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 526 } 527 } 528 if(length<offset) { 529 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 530 (long)length); 531 exit(U_INDEX_OUTOFBOUNDS_ERROR); 532 } 533 /* do not modify the package length variable until the last item's length is set */ 534 535 if(itemCount<=0) { 536 if(doAutoPrefix) { 537 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); 538 exit(U_INVALID_FORMAT_ERROR); 539 } 540 } else { 541 char prefix[MAX_PKG_NAME_LENGTH+4]; 542 char *s, *inItemStrings; 543 544 if(itemCount>itemMax) { 545 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 546 exit(U_BUFFER_OVERFLOW_ERROR); 547 } 548 549 /* swap the item name strings */ 550 int32_t stringsOffset=4+8*itemCount; 551 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 552 553 // don't include padding bytes at the end of the item names 554 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 555 --itemLength; 556 } 557 558 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 559 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 560 exit(U_BUFFER_OVERFLOW_ERROR); 561 } 562 563 inItemStrings=inStrings+inStringTop; 564 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 565 if(U_FAILURE(errorCode)) { 566 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 567 exit(U_INVALID_FORMAT_ERROR); 568 } 569 inStringTop+=itemLength; 570 571 // reset the Item entries 572 memset(items, 0, itemCount*sizeof(Item)); 573 574 /* 575 * Get the common prefix of the items. 576 * New-style ICU .dat packages use tree separators ('/') between package names, 577 * tree names, and item names, 578 * while old-style ICU .dat packages (before multi-tree support) 579 * use an underscore ('_') between package and item names. 580 */ 581 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 582 s=inItemStrings+offset; // name of the first entry 583 int32_t prefixLength; 584 if(doAutoPrefix) { 585 // Use the first entry's prefix. Must be a new-style package. 586 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); 587 if(prefixLimit==NULL) { 588 fprintf(stderr, 589 "icupkg: --auto_toc_prefix[_with_type] but " 590 "the first entry \"%s\" does not contain a '%c'\n", 591 s, U_TREE_ENTRY_SEP_CHAR); 592 exit(U_INVALID_FORMAT_ERROR); 593 } 594 prefixLength=(int32_t)(prefixLimit-s); 595 if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) { 596 fprintf(stderr, 597 "icupkg: --auto_toc_prefix[_with_type] but " 598 "the prefix of the first entry \"%s\" is empty or too long\n", 599 s); 600 exit(U_INVALID_FORMAT_ERROR); 601 } 602 if(prefixEndsWithType && s[prefixLength-1]!=type) { 603 fprintf(stderr, 604 "icupkg: --auto_toc_prefix_with_type but " 605 "the prefix of the first entry \"%s\" does not end with '%c'\n", 606 s, type); 607 exit(U_INVALID_FORMAT_ERROR); 608 } 609 memcpy(pkgPrefix, s, prefixLength); 610 pkgPrefix[prefixLength]=0; 611 memcpy(prefix, s, ++prefixLength); // include the / 612 } else { 613 // Use the package basename as prefix. 614 int32_t inPkgNameLength=strlen(inPkgName); 615 memcpy(prefix, inPkgName, inPkgNameLength); 616 prefixLength=inPkgNameLength; 617 618 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 619 0==memcmp(s, inPkgName, inPkgNameLength) && 620 s[inPkgNameLength]=='_' 621 ) { 622 // old-style .dat package 623 prefix[prefixLength++]='_'; 624 } else { 625 // new-style .dat package 626 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 627 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 628 // then the test in the loop below will fail 629 } 630 } 631 prefix[prefixLength]=0; 632 633 /* read the ToC table */ 634 for(i=0; i<itemCount; ++i) { 635 // skip the package part of the item name, error if it does not match the actual package name 636 // or if nothing follows the package name 637 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 638 s=inItemStrings+offset; 639 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 640 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 641 s, prefix); 642 exit(U_INVALID_FORMAT_ERROR); 643 } 644 items[i].name=s+prefixLength; 645 646 // set the item's data 647 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 648 if(i>0) { 649 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 650 651 // set the previous item's platform type 652 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 653 if(typeEnum<0 || U_FAILURE(errorCode)) { 654 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 655 exit(U_INVALID_FORMAT_ERROR); 656 } 657 items[i-1].type=makeTypeLetter(typeEnum); 658 } 659 items[i].isDataOwned=FALSE; 660 } 661 // set the last item's length 662 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 663 664 // set the last item's platform type 665 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 666 if(typeEnum<0 || U_FAILURE(errorCode)) { 667 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 668 exit(U_INVALID_FORMAT_ERROR); 669 } 670 items[itemCount-1].type=makeTypeLetter(typeEnum); 671 672 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 673 // sort the item names for the local charset 674 sortItems(); 675 } 676 } 677 678 udata_closeSwapper(ds); 679 } 680 681 char 682 Package::getInType() { 683 return makeTypeLetter(inCharset, inIsBigEndian); 684 } 685 686 void 687 Package::writePackage(const char *filename, char outType, const char *comment) { 688 char prefix[MAX_PKG_NAME_LENGTH+4]; 689 UDataOffsetTOCEntry entry; 690 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 691 FILE *file; 692 Item *pItem; 693 char *name; 694 UErrorCode errorCode; 695 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 696 uint8_t outCharset; 697 UBool outIsBigEndian; 698 699 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 700 701 // if there is an explicit comment, then use it, else use what's in the current header 702 if(comment!=NULL) { 703 /* get the header size minus the current comment */ 704 DataHeader *pHeader; 705 int32_t length; 706 707 pHeader=(DataHeader *)header; 708 headerLength=4+pHeader->info.size; 709 length=(int32_t)strlen(comment); 710 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 711 fprintf(stderr, "icupkg: comment too long\n"); 712 exit(U_BUFFER_OVERFLOW_ERROR); 713 } 714 memcpy(header+headerLength, comment, length+1); 715 headerLength+=length; 716 if(headerLength&0xf) { 717 /* NUL-pad the header to a multiple of 16 */ 718 length=(headerLength+0xf)&~0xf; 719 memset(header+headerLength, 0, length-headerLength); 720 headerLength=length; 721 } 722 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 723 } 724 725 makeTypeProps(outType, outCharset, outIsBigEndian); 726 727 // open (TYPE_COUNT-2) swappers 728 // one is a no-op for local type==outType 729 // one type (TYPE_LE) is bogus 730 errorCode=U_ZERO_ERROR; 731 i=makeTypeEnum(outType); 732 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 733 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 734 ds[TYPE_LE]=NULL; 735 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 736 if(U_FAILURE(errorCode)) { 737 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 738 exit(errorCode); 739 } 740 for(i=0; i<TYPE_COUNT; ++i) { 741 if(ds[i]!=NULL) { 742 ds[i]->printError=printPackageError; 743 ds[i]->printErrorContext=stderr; 744 } 745 } 746 747 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 748 749 // create the file and write its contents 750 file=fopen(filename, "wb"); 751 if(file==NULL) { 752 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 753 exit(U_FILE_ACCESS_ERROR); 754 } 755 756 // swap and write the header 757 if(dsLocalToOut!=NULL) { 758 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 759 if(U_FAILURE(errorCode)) { 760 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 761 exit(errorCode); 762 } 763 } 764 length=(int32_t)fwrite(header, 1, headerLength, file); 765 if(length!=headerLength) { 766 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 767 exit(U_FILE_ACCESS_ERROR); 768 } 769 770 // prepare and swap the package name with a tree separator 771 // for prepending to item names 772 if(pkgPrefix[0]==0) { 773 prefixLength=(int32_t)strlen(prefix); 774 } else { 775 prefixLength=(int32_t)strlen(pkgPrefix); 776 memcpy(prefix, pkgPrefix, prefixLength); 777 if(prefixEndsWithType) { 778 prefix[prefixLength-1]=outType; 779 } 780 } 781 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 782 prefix[prefixLength]=0; 783 if(dsLocalToOut!=NULL) { 784 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 785 if(U_FAILURE(errorCode)) { 786 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 787 exit(errorCode); 788 } 789 790 // swap and sort the item names (sorting needs to be done in the output charset) 791 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 792 if(U_FAILURE(errorCode)) { 793 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 794 exit(errorCode); 795 } 796 sortItems(); 797 } 798 799 // create the output item names in sorted order, with the package name prepended to each 800 for(i=0; i<itemCount; ++i) { 801 length=(int32_t)strlen(items[i].name); 802 name=allocString(FALSE, length+prefixLength); 803 memcpy(name, prefix, prefixLength); 804 memcpy(name+prefixLength, items[i].name, length+1); 805 items[i].name=name; 806 } 807 808 // calculate offsets for item names and items, pad to 16-align items 809 // align only the first item; each item's length is a multiple of 16 810 basenameOffset=4+8*itemCount; 811 offset=basenameOffset+outStringTop; 812 if((length=(offset&15))!=0) { 813 length=16-length; 814 memset(allocString(FALSE, length-1), 0xaa, length); 815 offset+=length; 816 } 817 818 // write the table of contents 819 // first the itemCount 820 outInt32=itemCount; 821 if(dsLocalToOut!=NULL) { 822 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 823 if(U_FAILURE(errorCode)) { 824 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 825 exit(errorCode); 826 } 827 } 828 length=(int32_t)fwrite(&outInt32, 1, 4, file); 829 if(length!=4) { 830 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 831 exit(U_FILE_ACCESS_ERROR); 832 } 833 834 // then write the item entries (and collect the maxItemLength) 835 maxItemLength=0; 836 for(i=0; i<itemCount; ++i) { 837 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 838 entry.dataOffset=(uint32_t)offset; 839 if(dsLocalToOut!=NULL) { 840 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 841 if(U_FAILURE(errorCode)) { 842 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 843 exit(errorCode); 844 } 845 } 846 length=(int32_t)fwrite(&entry, 1, 8, file); 847 if(length!=8) { 848 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 849 exit(U_FILE_ACCESS_ERROR); 850 } 851 852 length=items[i].length; 853 if(length>maxItemLength) { 854 maxItemLength=length; 855 } 856 offset+=length; 857 } 858 859 // write the item names 860 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 861 if(length!=outStringTop) { 862 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 863 exit(U_FILE_ACCESS_ERROR); 864 } 865 866 // write the items 867 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 868 int32_t type=makeTypeEnum(pItem->type); 869 if(ds[type]!=NULL) { 870 // swap each item from its platform properties to the desired ones 871 udata_swap( 872 ds[type], 873 pItem->data, pItem->length, pItem->data, 874 &errorCode); 875 if(U_FAILURE(errorCode)) { 876 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 877 exit(errorCode); 878 } 879 } 880 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 881 if(length!=pItem->length) { 882 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 883 exit(U_FILE_ACCESS_ERROR); 884 } 885 } 886 887 if(ferror(file)) { 888 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 889 exit(U_FILE_ACCESS_ERROR); 890 } 891 892 fclose(file); 893 for(i=0; i<TYPE_COUNT; ++i) { 894 udata_closeSwapper(ds[i]); 895 } 896 } 897 898 int32_t 899 Package::findItem(const char *name, int32_t length) const { 900 int32_t i, start, limit; 901 int result; 902 903 /* do a binary search for the string */ 904 start=0; 905 limit=itemCount; 906 while(start<limit) { 907 i=(start+limit)/2; 908 if(length>=0) { 909 result=strncmp(name, items[i].name, length); 910 } else { 911 result=strcmp(name, items[i].name); 912 } 913 914 if(result==0) { 915 /* found */ 916 if(length>=0) { 917 /* 918 * if we compared just prefixes, then we may need to back up 919 * to the first item with this prefix 920 */ 921 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 922 --i; 923 } 924 } 925 return i; 926 } else if(result<0) { 927 limit=i; 928 } else /* result>0 */ { 929 start=i+1; 930 } 931 } 932 933 return ~start; /* not found, return binary-not of the insertion point */ 934 } 935 936 void 937 Package::findItems(const char *pattern) { 938 const char *wild; 939 940 if(pattern==NULL || *pattern==0) { 941 findNextIndex=-1; 942 return; 943 } 944 945 findPrefix=pattern; 946 findSuffix=NULL; 947 findSuffixLength=0; 948 949 wild=strchr(pattern, '*'); 950 if(wild==NULL) { 951 // no wildcard 952 findPrefixLength=(int32_t)strlen(pattern); 953 } else { 954 // one wildcard 955 findPrefixLength=(int32_t)(wild-pattern); 956 findSuffix=wild+1; 957 findSuffixLength=(int32_t)strlen(findSuffix); 958 if(NULL!=strchr(findSuffix, '*')) { 959 // two or more wildcards 960 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 961 exit(U_PARSE_ERROR); 962 } 963 } 964 965 if(findPrefixLength==0) { 966 findNextIndex=0; 967 } else { 968 findNextIndex=findItem(findPrefix, findPrefixLength); 969 } 970 } 971 972 int32_t 973 Package::findNextItem() { 974 const char *name, *middle, *treeSep; 975 int32_t idx, nameLength, middleLength; 976 977 if(findNextIndex<0) { 978 return -1; 979 } 980 981 while(findNextIndex<itemCount) { 982 idx=findNextIndex++; 983 name=items[idx].name; 984 nameLength=(int32_t)strlen(name); 985 if(nameLength<(findPrefixLength+findSuffixLength)) { 986 // item name too short for prefix & suffix 987 continue; 988 } 989 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 990 // left the range of names with this prefix 991 break; 992 } 993 middle=name+findPrefixLength; 994 middleLength=nameLength-findPrefixLength-findSuffixLength; 995 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 996 // suffix does not match 997 continue; 998 } 999 // prefix & suffix match 1000 1001 if(matchMode&MATCH_NOSLASH) { 1002 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 1003 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 1004 // the middle (matching the * wildcard) contains a tree separator / 1005 continue; 1006 } 1007 } 1008 1009 // found a matching item 1010 return idx; 1011 } 1012 1013 // no more items 1014 findNextIndex=-1; 1015 return -1; 1016 } 1017 1018 void 1019 Package::setMatchMode(uint32_t mode) { 1020 matchMode=mode; 1021 } 1022 1023 void 1024 Package::addItem(const char *name) { 1025 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 1026 } 1027 1028 void 1029 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 1030 int32_t idx; 1031 1032 idx=findItem(name); 1033 if(idx<0) { 1034 // new item, make space at the insertion point 1035 ensureItemCapacity(); 1036 // move the following items down 1037 idx=~idx; 1038 if(idx<itemCount) { 1039 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 1040 } 1041 ++itemCount; 1042 1043 // reset this Item entry 1044 memset(items+idx, 0, sizeof(Item)); 1045 1046 // copy the item's name 1047 items[idx].name=allocString(TRUE, strlen(name)); 1048 strcpy(items[idx].name, name); 1049 pathToTree(items[idx].name); 1050 } else { 1051 // same-name item found, replace it 1052 if(items[idx].isDataOwned) { 1053 free(items[idx].data); 1054 } 1055 1056 // keep the item's name since it is the same 1057 } 1058 1059 // set the item's data 1060 items[idx].data=data; 1061 items[idx].length=length; 1062 items[idx].isDataOwned=isDataOwned; 1063 items[idx].type=type; 1064 } 1065 1066 void 1067 Package::addFile(const char *filesPath, const char *name) { 1068 uint8_t *data; 1069 int32_t length; 1070 char type; 1071 1072 data=readFile(filesPath, name, length, type); 1073 // readFile() exits the tool if it fails 1074 addItem(name, data, length, TRUE, type); 1075 } 1076 1077 void 1078 Package::addItems(const Package &listPkg) { 1079 const Item *pItem; 1080 int32_t i; 1081 1082 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1083 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1084 } 1085 } 1086 1087 void 1088 Package::removeItem(int32_t idx) { 1089 if(idx>=0) { 1090 // remove the item 1091 if(items[idx].isDataOwned) { 1092 free(items[idx].data); 1093 } 1094 1095 // move the following items up 1096 if((idx+1)<itemCount) { 1097 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1098 } 1099 --itemCount; 1100 1101 if(idx<=findNextIndex) { 1102 --findNextIndex; 1103 } 1104 } 1105 } 1106 1107 void 1108 Package::removeItems(const char *pattern) { 1109 int32_t idx; 1110 1111 findItems(pattern); 1112 while((idx=findNextItem())>=0) { 1113 removeItem(idx); 1114 } 1115 } 1116 1117 void 1118 Package::removeItems(const Package &listPkg) { 1119 const Item *pItem; 1120 int32_t i; 1121 1122 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1123 removeItems(pItem->name); 1124 } 1125 } 1126 1127 void 1128 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1129 char filename[1024]; 1130 UDataSwapper *ds; 1131 FILE *file; 1132 Item *pItem; 1133 int32_t fileLength; 1134 uint8_t itemCharset, outCharset; 1135 UBool itemIsBigEndian, outIsBigEndian; 1136 1137 if(idx<0 || itemCount<=idx) { 1138 return; 1139 } 1140 pItem=items+idx; 1141 1142 // swap the data to the outType 1143 // outType==0: don't swap 1144 if(outType!=0 && pItem->type!=outType) { 1145 // open the swapper 1146 UErrorCode errorCode=U_ZERO_ERROR; 1147 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1148 makeTypeProps(outType, outCharset, outIsBigEndian); 1149 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1150 if(U_FAILURE(errorCode)) { 1151 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1152 (long)idx, u_errorName(errorCode)); 1153 exit(errorCode); 1154 } 1155 1156 ds->printError=printPackageError; 1157 ds->printErrorContext=stderr; 1158 1159 // swap the item from its platform properties to the desired ones 1160 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1161 if(U_FAILURE(errorCode)) { 1162 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1163 exit(errorCode); 1164 } 1165 udata_closeSwapper(ds); 1166 pItem->type=outType; 1167 } 1168 1169 // create the file and write its contents 1170 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1171 file=fopen(filename, "wb"); 1172 if(file==NULL) { 1173 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1174 exit(U_FILE_ACCESS_ERROR); 1175 } 1176 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1177 1178 if(ferror(file) || fileLength!=pItem->length) { 1179 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1180 exit(U_FILE_ACCESS_ERROR); 1181 } 1182 fclose(file); 1183 } 1184 1185 void 1186 Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1187 extractItem(filesPath, items[idx].name, idx, outType); 1188 } 1189 1190 void 1191 Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1192 int32_t idx; 1193 1194 findItems(pattern); 1195 while((idx=findNextItem())>=0) { 1196 extractItem(filesPath, idx, outType); 1197 } 1198 } 1199 1200 void 1201 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1202 const Item *pItem; 1203 int32_t i; 1204 1205 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1206 extractItems(filesPath, pItem->name, outType); 1207 } 1208 } 1209 1210 int32_t 1211 Package::getItemCount() const { 1212 return itemCount; 1213 } 1214 1215 const Item * 1216 Package::getItem(int32_t idx) const { 1217 if (0 <= idx && idx < itemCount) { 1218 return &items[idx]; 1219 } 1220 return NULL; 1221 } 1222 1223 void 1224 Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1225 // check dependency: make sure the target item is in the package 1226 Package *me=(Package *)context; 1227 if(me->findItem(targetName)<0) { 1228 me->isMissingItems=TRUE; 1229 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1230 } 1231 } 1232 1233 UBool 1234 Package::checkDependencies() { 1235 isMissingItems=FALSE; 1236 enumDependencies(this, checkDependency); 1237 return (UBool)!isMissingItems; 1238 } 1239 1240 void 1241 Package::enumDependencies(void *context, CheckDependency check) { 1242 int32_t i; 1243 1244 for(i=0; i<itemCount; ++i) { 1245 enumDependencies(items+i, context, check); 1246 } 1247 } 1248 1249 char * 1250 Package::allocString(UBool in, int32_t length) { 1251 char *p; 1252 int32_t top; 1253 1254 if(in) { 1255 top=inStringTop; 1256 p=inStrings+top; 1257 } else { 1258 top=outStringTop; 1259 p=outStrings+top; 1260 } 1261 top+=length+1; 1262 1263 if(top>STRING_STORE_SIZE) { 1264 fprintf(stderr, "icupkg: string storage overflow\n"); 1265 exit(U_BUFFER_OVERFLOW_ERROR); 1266 } 1267 if(in) { 1268 inStringTop=top; 1269 } else { 1270 outStringTop=top; 1271 } 1272 return p; 1273 } 1274 1275 void 1276 Package::sortItems() { 1277 UErrorCode errorCode=U_ZERO_ERROR; 1278 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1279 if(U_FAILURE(errorCode)) { 1280 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1281 exit(errorCode); 1282 } 1283 } 1284 1285 void Package::setItemCapacity(int32_t max) 1286 { 1287 if(max<=itemMax) { 1288 return; 1289 } 1290 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1291 Item *oldItems = items; 1292 if(newItems == NULL) { 1293 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", 1294 (unsigned long)max*sizeof(items[0]), max); 1295 exit(U_MEMORY_ALLOCATION_ERROR); 1296 } 1297 if(items && itemCount>0) { 1298 uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); 1299 } 1300 itemMax = max; 1301 items = newItems; 1302 uprv_free(oldItems); 1303 } 1304 1305 void Package::ensureItemCapacity() 1306 { 1307 if((itemCount+1)>itemMax) { 1308 setItemCapacity(itemCount+kItemsChunk); 1309 } 1310 } 1311 1312 U_NAMESPACE_END 1313