1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1999-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: package.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2005aug25 16 * created by: Markus W. Scherer 17 * 18 * Read, modify, and write ICU .dat data package files. 19 * This is an integral part of the icupkg tool, moved to the toolutil library 20 * because parts of tool implementations tend to be later shared by 21 * other tools. 22 * Subsumes functionality and implementation code from 23 * gencmn, decmn, and icuswap tools. 24 */ 25 26 #include "unicode/utypes.h" 27 #include "unicode/putil.h" 28 #include "unicode/udata.h" 29 #include "cstring.h" 30 #include "uarrsort.h" 31 #include "ucmndata.h" 32 #include "udataswp.h" 33 #include "swapimpl.h" 34 #include "toolutil.h" 35 #include "package.h" 36 #include "cmemory.h" 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 43 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 44 45 // general definitions ----------------------------------------------------- *** 46 47 /* UDataInfo cf. udata.h */ 48 static const UDataInfo dataInfo={ 49 (uint16_t)sizeof(UDataInfo), 50 0, 51 52 U_IS_BIG_ENDIAN, 53 U_CHARSET_FAMILY, 54 (uint8_t)sizeof(UChar), 55 0, 56 57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 58 {1, 0, 0, 0}, /* formatVersion */ 59 {3, 0, 0, 0} /* dataVersion */ 60 }; 61 62 U_CDECL_BEGIN 63 static void U_CALLCONV 64 printPackageError(void *context, const char *fmt, va_list args) { 65 vfprintf((FILE *)context, fmt, args); 66 } 67 U_CDECL_END 68 69 static uint16_t 70 readSwapUInt16(uint16_t x) { 71 return (uint16_t)((x<<8)|(x>>8)); 72 } 73 74 // platform types ---------------------------------------------------------- *** 75 76 static const char *types="lb?e"; 77 78 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 79 80 static inline int32_t 81 makeTypeEnum(uint8_t charset, UBool isBigEndian) { 82 return 2*(int32_t)charset+isBigEndian; 83 } 84 85 static inline int32_t 86 makeTypeEnum(char type) { 87 return 88 type == 'l' ? TYPE_L : 89 type == 'b' ? TYPE_B : 90 type == 'e' ? TYPE_E : 91 -1; 92 } 93 94 static inline char 95 makeTypeLetter(uint8_t charset, UBool isBigEndian) { 96 return types[makeTypeEnum(charset, isBigEndian)]; 97 } 98 99 static inline char 100 makeTypeLetter(int32_t typeEnum) { 101 return types[typeEnum]; 102 } 103 104 static void 105 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 106 int32_t typeEnum=makeTypeEnum(type); 107 charset=(uint8_t)(typeEnum>>1); 108 isBigEndian=(UBool)(typeEnum&1); 109 } 110 111 U_CFUNC const UDataInfo * 112 getDataInfo(const uint8_t *data, int32_t length, 113 int32_t &infoLength, int32_t &headerLength, 114 UErrorCode *pErrorCode) { 115 const DataHeader *pHeader; 116 const UDataInfo *pInfo; 117 118 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 119 return NULL; 120 } 121 if( data==NULL || 122 (length>=0 && length<(int32_t)sizeof(DataHeader)) 123 ) { 124 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 125 return NULL; 126 } 127 128 pHeader=(const DataHeader *)data; 129 pInfo=&pHeader->info; 130 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 131 pHeader->dataHeader.magic1!=0xda || 132 pHeader->dataHeader.magic2!=0x27 || 133 pInfo->sizeofUChar!=2 134 ) { 135 *pErrorCode=U_UNSUPPORTED_ERROR; 136 return NULL; 137 } 138 139 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 140 headerLength=pHeader->dataHeader.headerSize; 141 infoLength=pInfo->size; 142 } else { 143 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 144 infoLength=readSwapUInt16(pInfo->size); 145 } 146 147 if( headerLength<(int32_t)sizeof(DataHeader) || 148 infoLength<(int32_t)sizeof(UDataInfo) || 149 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 150 (length>=0 && length<headerLength) 151 ) { 152 *pErrorCode=U_UNSUPPORTED_ERROR; 153 return NULL; 154 } 155 156 return pInfo; 157 } 158 159 static int32_t 160 getTypeEnumForInputData(const uint8_t *data, int32_t length, 161 UErrorCode *pErrorCode) { 162 const UDataInfo *pInfo; 163 int32_t infoLength, headerLength; 164 165 /* getDataInfo() checks for illegal arguments */ 166 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 167 if(pInfo==NULL) { 168 return -1; 169 } 170 171 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 172 } 173 174 // file handling ----------------------------------------------------------- *** 175 176 static void 177 extractPackageName(const char *filename, 178 char pkg[], int32_t capacity) { 179 const char *basename; 180 int32_t len; 181 182 basename=findBasename(filename); 183 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 184 185 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 186 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 187 basename); 188 exit(U_ILLEGAL_ARGUMENT_ERROR); 189 } 190 191 if(len>=capacity) { 192 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 193 basename, (long)capacity); 194 exit(U_ILLEGAL_ARGUMENT_ERROR); 195 } 196 197 memcpy(pkg, basename, len); 198 pkg[len]=0; 199 } 200 201 static int32_t 202 getFileLength(FILE *f) { 203 int32_t length; 204 205 fseek(f, 0, SEEK_END); 206 length=(int32_t)ftell(f); 207 fseek(f, 0, SEEK_SET); 208 return length; 209 } 210 211 /* 212 * Turn tree separators and alternate file separators into normal file separators. 213 */ 214 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 215 #define treeToPath(s) 216 #else 217 static void 218 treeToPath(char *s) { 219 char *t; 220 221 for(t=s; *t!=0; ++t) { 222 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 223 *t=U_FILE_SEP_CHAR; 224 } 225 } 226 } 227 #endif 228 229 /* 230 * Turn file separators into tree separators. 231 */ 232 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 233 #define pathToTree(s) 234 #else 235 static void 236 pathToTree(char *s) { 237 char *t; 238 239 for(t=s; *t!=0; ++t) { 240 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 241 *t=U_TREE_ENTRY_SEP_CHAR; 242 } 243 } 244 } 245 #endif 246 247 /* 248 * Prepend the path (if any) to the name and run the name through treeToName(). 249 */ 250 static void 251 makeFullFilename(const char *path, const char *name, 252 char *filename, int32_t capacity) { 253 char *s; 254 255 // prepend the path unless NULL or empty 256 if(path!=NULL && path[0]!=0) { 257 if((int32_t)(strlen(path)+1)>=capacity) { 258 fprintf(stderr, "pathname too long: \"%s\"\n", path); 259 exit(U_BUFFER_OVERFLOW_ERROR); 260 } 261 strcpy(filename, path); 262 263 // make sure the path ends with a file separator 264 s=strchr(filename, 0); 265 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 266 *s++=U_FILE_SEP_CHAR; 267 } 268 } else { 269 s=filename; 270 } 271 272 // turn the name into a filename, turn tree separators into file separators 273 if((int32_t)((s-filename)+strlen(name))>=capacity) { 274 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 275 exit(U_BUFFER_OVERFLOW_ERROR); 276 } 277 strcpy(s, name); 278 treeToPath(s); 279 } 280 281 static void 282 makeFullFilenameAndDirs(const char *path, const char *name, 283 char *filename, int32_t capacity) { 284 char *sep; 285 UErrorCode errorCode; 286 287 makeFullFilename(path, name, filename, capacity); 288 289 // make tree directories 290 errorCode=U_ZERO_ERROR; 291 sep=strchr(filename, 0)-strlen(name); 292 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 293 if(sep!=filename) { 294 *sep=0; // truncate temporarily 295 uprv_mkdir(filename, &errorCode); 296 if(U_FAILURE(errorCode)) { 297 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 298 exit(U_FILE_ACCESS_ERROR); 299 } 300 } 301 *sep++=U_FILE_SEP_CHAR; // restore file separator character 302 } 303 } 304 305 static uint8_t * 306 readFile(const char *path, const char *name, int32_t &length, char &type) { 307 char filename[1024]; 308 FILE *file; 309 UErrorCode errorCode; 310 int32_t fileLength, typeEnum; 311 312 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 313 314 /* open the input file, get its length, allocate memory for it, read the file */ 315 file=fopen(filename, "rb"); 316 if(file==NULL) { 317 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 318 exit(U_FILE_ACCESS_ERROR); 319 } 320 321 /* get the file length */ 322 fileLength=getFileLength(file); 323 if(ferror(file) || fileLength<=0) { 324 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 325 fclose(file); 326 exit(U_FILE_ACCESS_ERROR); 327 } 328 329 /* allocate the buffer, pad to multiple of 16 */ 330 length=(fileLength+0xf)&~0xf; 331 icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length)); 332 if(data.isNull()) { 333 fclose(file); 334 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); 335 exit(U_MEMORY_ALLOCATION_ERROR); 336 } 337 338 /* read the file */ 339 if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) { 340 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 341 fclose(file); 342 exit(U_FILE_ACCESS_ERROR); 343 } 344 345 /* pad the file to a multiple of 16 using the usual padding byte */ 346 if(fileLength<length) { 347 memset(data.getAlias()+fileLength, 0xaa, length-fileLength); 348 } 349 350 fclose(file); 351 352 // minimum check for ICU-format data 353 errorCode=U_ZERO_ERROR; 354 typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode); 355 if(typeEnum<0 || U_FAILURE(errorCode)) { 356 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 357 #if !UCONFIG_NO_LEGACY_CONVERSION 358 exit(U_INVALID_FORMAT_ERROR); 359 #else 360 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 361 exit(0); 362 #endif 363 } 364 type=makeTypeLetter(typeEnum); 365 366 return data.orphan(); 367 } 368 369 // .dat package file representation ---------------------------------------- *** 370 371 U_CDECL_BEGIN 372 373 static int32_t U_CALLCONV 374 compareItems(const void * /*context*/, const void *left, const void *right) { 375 U_NAMESPACE_USE 376 377 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 378 } 379 380 U_CDECL_END 381 382 U_NAMESPACE_BEGIN 383 384 Package::Package() 385 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { 386 inPkgName[0]=0; 387 pkgPrefix[0]=0; 388 inData=NULL; 389 inLength=0; 390 inCharset=U_CHARSET_FAMILY; 391 inIsBigEndian=U_IS_BIG_ENDIAN; 392 393 itemCount=0; 394 itemMax=0; 395 items=NULL; 396 397 inStringTop=outStringTop=0; 398 399 matchMode=0; 400 findPrefix=findSuffix=NULL; 401 findPrefixLength=findSuffixLength=0; 402 findNextIndex=-1; 403 404 // create a header for an empty package 405 DataHeader *pHeader; 406 pHeader=(DataHeader *)header; 407 pHeader->dataHeader.magic1=0xda; 408 pHeader->dataHeader.magic2=0x27; 409 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 410 headerLength=(int32_t)(4+sizeof(dataInfo)); 411 if(headerLength&0xf) { 412 /* NUL-pad the header to a multiple of 16 */ 413 int32_t length=(headerLength+0xf)&~0xf; 414 memset(header+headerLength, 0, length-headerLength); 415 headerLength=length; 416 } 417 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 418 } 419 420 Package::~Package() { 421 int32_t idx; 422 423 uprv_free(inData); 424 425 for(idx=0; idx<itemCount; ++idx) { 426 if(items[idx].isDataOwned) { 427 uprv_free(items[idx].data); 428 } 429 } 430 431 uprv_free((void*)items); 432 } 433 434 void 435 Package::setPrefix(const char *p) { 436 if(strlen(p)>=sizeof(pkgPrefix)) { 437 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); 438 exit(U_ILLEGAL_ARGUMENT_ERROR); 439 } 440 strcpy(pkgPrefix, p); 441 } 442 443 void 444 Package::readPackage(const char *filename) { 445 UDataSwapper *ds; 446 const UDataInfo *pInfo; 447 UErrorCode errorCode; 448 449 const uint8_t *inBytes; 450 451 int32_t length, offset, i; 452 int32_t itemLength, typeEnum; 453 char type; 454 455 const UDataOffsetTOCEntry *inEntries; 456 457 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 458 459 /* read the file */ 460 inData=readFile(NULL, filename, inLength, type); 461 length=inLength; 462 463 /* 464 * swap the header - even if the swapping itself is a no-op 465 * because it tells us the header length 466 */ 467 errorCode=U_ZERO_ERROR; 468 makeTypeProps(type, inCharset, inIsBigEndian); 469 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 470 if(U_FAILURE(errorCode)) { 471 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 472 filename, u_errorName(errorCode)); 473 exit(errorCode); 474 } 475 476 ds->printError=printPackageError; 477 ds->printErrorContext=stderr; 478 479 headerLength=sizeof(header); 480 if(length<headerLength) { 481 headerLength=length; 482 } 483 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 484 if(U_FAILURE(errorCode)) { 485 exit(errorCode); 486 } 487 488 /* check data format and format version */ 489 pInfo=(const UDataInfo *)((const char *)inData+4); 490 if(!( 491 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 492 pInfo->dataFormat[1]==0x6d && 493 pInfo->dataFormat[2]==0x6e && 494 pInfo->dataFormat[3]==0x44 && 495 pInfo->formatVersion[0]==1 496 )) { 497 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 498 pInfo->dataFormat[0], pInfo->dataFormat[1], 499 pInfo->dataFormat[2], pInfo->dataFormat[3], 500 pInfo->formatVersion[0]); 501 exit(U_UNSUPPORTED_ERROR); 502 } 503 inIsBigEndian=(UBool)pInfo->isBigEndian; 504 inCharset=pInfo->charsetFamily; 505 506 inBytes=(const uint8_t *)inData+headerLength; 507 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 508 509 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 510 length-=headerLength; 511 if(length<4) { 512 /* itemCount does not fit */ 513 offset=0x7fffffff; 514 } else { 515 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 516 setItemCapacity(itemCount); /* resize so there's space */ 517 if(itemCount==0) { 518 offset=4; 519 } else if(length<(4+8*itemCount)) { 520 /* ToC table does not fit */ 521 offset=0x7fffffff; 522 } else { 523 /* offset of the last item plus at least 20 bytes for its header */ 524 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 525 } 526 } 527 if(length<offset) { 528 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 529 (long)length); 530 exit(U_INDEX_OUTOFBOUNDS_ERROR); 531 } 532 /* do not modify the package length variable until the last item's length is set */ 533 534 if(itemCount<=0) { 535 if(doAutoPrefix) { 536 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); 537 exit(U_INVALID_FORMAT_ERROR); 538 } 539 } else { 540 char prefix[MAX_PKG_NAME_LENGTH+4]; 541 char *s, *inItemStrings; 542 543 if(itemCount>itemMax) { 544 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 545 exit(U_BUFFER_OVERFLOW_ERROR); 546 } 547 548 /* swap the item name strings */ 549 int32_t stringsOffset=4+8*itemCount; 550 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 551 552 // don't include padding bytes at the end of the item names 553 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 554 --itemLength; 555 } 556 557 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 558 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 559 exit(U_BUFFER_OVERFLOW_ERROR); 560 } 561 562 inItemStrings=inStrings+inStringTop; 563 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 564 if(U_FAILURE(errorCode)) { 565 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 566 exit(U_INVALID_FORMAT_ERROR); 567 } 568 inStringTop+=itemLength; 569 570 // reset the Item entries 571 memset(items, 0, itemCount*sizeof(Item)); 572 573 /* 574 * Get the common prefix of the items. 575 * New-style ICU .dat packages use tree separators ('/') between package names, 576 * tree names, and item names, 577 * while old-style ICU .dat packages (before multi-tree support) 578 * use an underscore ('_') between package and item names. 579 */ 580 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 581 s=inItemStrings+offset; // name of the first entry 582 int32_t prefixLength; 583 if(doAutoPrefix) { 584 // Use the first entry's prefix. Must be a new-style package. 585 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); 586 if(prefixLimit==NULL) { 587 fprintf(stderr, 588 "icupkg: --auto_toc_prefix[_with_type] but " 589 "the first entry \"%s\" does not contain a '%c'\n", 590 s, U_TREE_ENTRY_SEP_CHAR); 591 exit(U_INVALID_FORMAT_ERROR); 592 } 593 prefixLength=(int32_t)(prefixLimit-s); 594 if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) { 595 fprintf(stderr, 596 "icupkg: --auto_toc_prefix[_with_type] but " 597 "the prefix of the first entry \"%s\" is empty or too long\n", 598 s); 599 exit(U_INVALID_FORMAT_ERROR); 600 } 601 if(prefixEndsWithType && s[prefixLength-1]!=type) { 602 fprintf(stderr, 603 "icupkg: --auto_toc_prefix_with_type but " 604 "the prefix of the first entry \"%s\" does not end with '%c'\n", 605 s, type); 606 exit(U_INVALID_FORMAT_ERROR); 607 } 608 memcpy(pkgPrefix, s, prefixLength); 609 pkgPrefix[prefixLength]=0; 610 memcpy(prefix, s, ++prefixLength); // include the / 611 } else { 612 // Use the package basename as prefix. 613 int32_t inPkgNameLength= static_cast<int32_t>(strlen(inPkgName)); 614 memcpy(prefix, inPkgName, inPkgNameLength); 615 prefixLength=inPkgNameLength; 616 617 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 618 0==memcmp(s, inPkgName, inPkgNameLength) && 619 s[inPkgNameLength]=='_' 620 ) { 621 // old-style .dat package 622 prefix[prefixLength++]='_'; 623 } else { 624 // new-style .dat package 625 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 626 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 627 // then the test in the loop below will fail 628 } 629 } 630 prefix[prefixLength]=0; 631 632 /* read the ToC table */ 633 for(i=0; i<itemCount; ++i) { 634 // skip the package part of the item name, error if it does not match the actual package name 635 // or if nothing follows the package name 636 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 637 s=inItemStrings+offset; 638 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 639 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 640 s, prefix); 641 exit(U_INVALID_FORMAT_ERROR); 642 } 643 items[i].name=s+prefixLength; 644 645 // set the item's data 646 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 647 if(i>0) { 648 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 649 650 // set the previous item's platform type 651 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 652 if(typeEnum<0 || U_FAILURE(errorCode)) { 653 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 654 exit(U_INVALID_FORMAT_ERROR); 655 } 656 items[i-1].type=makeTypeLetter(typeEnum); 657 } 658 items[i].isDataOwned=FALSE; 659 } 660 // set the last item's length 661 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 662 663 // set the last item's platform type 664 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 665 if(typeEnum<0 || U_FAILURE(errorCode)) { 666 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[itemCount-1].name, filename); 667 exit(U_INVALID_FORMAT_ERROR); 668 } 669 items[itemCount-1].type=makeTypeLetter(typeEnum); 670 671 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 672 // sort the item names for the local charset 673 sortItems(); 674 } 675 } 676 677 udata_closeSwapper(ds); 678 } 679 680 char 681 Package::getInType() { 682 return makeTypeLetter(inCharset, inIsBigEndian); 683 } 684 685 void 686 Package::writePackage(const char *filename, char outType, const char *comment) { 687 char prefix[MAX_PKG_NAME_LENGTH+4]; 688 UDataOffsetTOCEntry entry; 689 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 690 FILE *file; 691 Item *pItem; 692 char *name; 693 UErrorCode errorCode; 694 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 695 uint8_t outCharset; 696 UBool outIsBigEndian; 697 698 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 699 700 // if there is an explicit comment, then use it, else use what's in the current header 701 if(comment!=NULL) { 702 /* get the header size minus the current comment */ 703 DataHeader *pHeader; 704 int32_t length; 705 706 pHeader=(DataHeader *)header; 707 headerLength=4+pHeader->info.size; 708 length=(int32_t)strlen(comment); 709 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 710 fprintf(stderr, "icupkg: comment too long\n"); 711 exit(U_BUFFER_OVERFLOW_ERROR); 712 } 713 memcpy(header+headerLength, comment, length+1); 714 headerLength+=length; 715 if(headerLength&0xf) { 716 /* NUL-pad the header to a multiple of 16 */ 717 length=(headerLength+0xf)&~0xf; 718 memset(header+headerLength, 0, length-headerLength); 719 headerLength=length; 720 } 721 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 722 } 723 724 makeTypeProps(outType, outCharset, outIsBigEndian); 725 726 // open (TYPE_COUNT-2) swappers 727 // one is a no-op for local type==outType 728 // one type (TYPE_LE) is bogus 729 errorCode=U_ZERO_ERROR; 730 i=makeTypeEnum(outType); 731 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 732 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 733 ds[TYPE_LE]=NULL; 734 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 735 if(U_FAILURE(errorCode)) { 736 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 737 exit(errorCode); 738 } 739 for(i=0; i<TYPE_COUNT; ++i) { 740 if(ds[i]!=NULL) { 741 ds[i]->printError=printPackageError; 742 ds[i]->printErrorContext=stderr; 743 } 744 } 745 746 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 747 748 // create the file and write its contents 749 file=fopen(filename, "wb"); 750 if(file==NULL) { 751 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 752 exit(U_FILE_ACCESS_ERROR); 753 } 754 755 // swap and write the header 756 if(dsLocalToOut!=NULL) { 757 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 758 if(U_FAILURE(errorCode)) { 759 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 760 exit(errorCode); 761 } 762 } 763 length=(int32_t)fwrite(header, 1, headerLength, file); 764 if(length!=headerLength) { 765 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 766 exit(U_FILE_ACCESS_ERROR); 767 } 768 769 // prepare and swap the package name with a tree separator 770 // for prepending to item names 771 if(pkgPrefix[0]==0) { 772 prefixLength=(int32_t)strlen(prefix); 773 } else { 774 prefixLength=(int32_t)strlen(pkgPrefix); 775 memcpy(prefix, pkgPrefix, prefixLength); 776 if(prefixEndsWithType) { 777 prefix[prefixLength-1]=outType; 778 } 779 } 780 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 781 prefix[prefixLength]=0; 782 if(dsLocalToOut!=NULL) { 783 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 784 if(U_FAILURE(errorCode)) { 785 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 786 exit(errorCode); 787 } 788 789 // swap and sort the item names (sorting needs to be done in the output charset) 790 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 791 if(U_FAILURE(errorCode)) { 792 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 793 exit(errorCode); 794 } 795 sortItems(); 796 } 797 798 // create the output item names in sorted order, with the package name prepended to each 799 for(i=0; i<itemCount; ++i) { 800 length=(int32_t)strlen(items[i].name); 801 name=allocString(FALSE, length+prefixLength); 802 memcpy(name, prefix, prefixLength); 803 memcpy(name+prefixLength, items[i].name, length+1); 804 items[i].name=name; 805 } 806 807 // calculate offsets for item names and items, pad to 16-align items 808 // align only the first item; each item's length is a multiple of 16 809 basenameOffset=4+8*itemCount; 810 offset=basenameOffset+outStringTop; 811 if((length=(offset&15))!=0) { 812 length=16-length; 813 memset(allocString(FALSE, length-1), 0xaa, length); 814 offset+=length; 815 } 816 817 // write the table of contents 818 // first the itemCount 819 outInt32=itemCount; 820 if(dsLocalToOut!=NULL) { 821 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 822 if(U_FAILURE(errorCode)) { 823 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 824 exit(errorCode); 825 } 826 } 827 length=(int32_t)fwrite(&outInt32, 1, 4, file); 828 if(length!=4) { 829 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 830 exit(U_FILE_ACCESS_ERROR); 831 } 832 833 // then write the item entries (and collect the maxItemLength) 834 maxItemLength=0; 835 for(i=0; i<itemCount; ++i) { 836 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 837 entry.dataOffset=(uint32_t)offset; 838 if(dsLocalToOut!=NULL) { 839 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 840 if(U_FAILURE(errorCode)) { 841 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 842 exit(errorCode); 843 } 844 } 845 length=(int32_t)fwrite(&entry, 1, 8, file); 846 if(length!=8) { 847 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 848 exit(U_FILE_ACCESS_ERROR); 849 } 850 851 length=items[i].length; 852 if(length>maxItemLength) { 853 maxItemLength=length; 854 } 855 offset+=length; 856 } 857 858 // write the item names 859 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 860 if(length!=outStringTop) { 861 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 862 exit(U_FILE_ACCESS_ERROR); 863 } 864 865 // write the items 866 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 867 int32_t type=makeTypeEnum(pItem->type); 868 if(ds[type]!=NULL) { 869 // swap each item from its platform properties to the desired ones 870 udata_swap( 871 ds[type], 872 pItem->data, pItem->length, pItem->data, 873 &errorCode); 874 if(U_FAILURE(errorCode)) { 875 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 876 exit(errorCode); 877 } 878 } 879 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 880 if(length!=pItem->length) { 881 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 882 exit(U_FILE_ACCESS_ERROR); 883 } 884 } 885 886 if(ferror(file)) { 887 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 888 exit(U_FILE_ACCESS_ERROR); 889 } 890 891 fclose(file); 892 for(i=0; i<TYPE_COUNT; ++i) { 893 udata_closeSwapper(ds[i]); 894 } 895 } 896 897 int32_t 898 Package::findItem(const char *name, int32_t length) const { 899 int32_t i, start, limit; 900 int result; 901 902 /* do a binary search for the string */ 903 start=0; 904 limit=itemCount; 905 while(start<limit) { 906 i=(start+limit)/2; 907 if(length>=0) { 908 result=strncmp(name, items[i].name, length); 909 } else { 910 result=strcmp(name, items[i].name); 911 } 912 913 if(result==0) { 914 /* found */ 915 if(length>=0) { 916 /* 917 * if we compared just prefixes, then we may need to back up 918 * to the first item with this prefix 919 */ 920 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 921 --i; 922 } 923 } 924 return i; 925 } else if(result<0) { 926 limit=i; 927 } else /* result>0 */ { 928 start=i+1; 929 } 930 } 931 932 return ~start; /* not found, return binary-not of the insertion point */ 933 } 934 935 void 936 Package::findItems(const char *pattern) { 937 const char *wild; 938 939 if(pattern==NULL || *pattern==0) { 940 findNextIndex=-1; 941 return; 942 } 943 944 findPrefix=pattern; 945 findSuffix=NULL; 946 findSuffixLength=0; 947 948 wild=strchr(pattern, '*'); 949 if(wild==NULL) { 950 // no wildcard 951 findPrefixLength=(int32_t)strlen(pattern); 952 } else { 953 // one wildcard 954 findPrefixLength=(int32_t)(wild-pattern); 955 findSuffix=wild+1; 956 findSuffixLength=(int32_t)strlen(findSuffix); 957 if(NULL!=strchr(findSuffix, '*')) { 958 // two or more wildcards 959 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 960 exit(U_PARSE_ERROR); 961 } 962 } 963 964 if(findPrefixLength==0) { 965 findNextIndex=0; 966 } else { 967 findNextIndex=findItem(findPrefix, findPrefixLength); 968 } 969 } 970 971 int32_t 972 Package::findNextItem() { 973 const char *name, *middle, *treeSep; 974 int32_t idx, nameLength, middleLength; 975 976 if(findNextIndex<0) { 977 return -1; 978 } 979 980 while(findNextIndex<itemCount) { 981 idx=findNextIndex++; 982 name=items[idx].name; 983 nameLength=(int32_t)strlen(name); 984 if(nameLength<(findPrefixLength+findSuffixLength)) { 985 // item name too short for prefix & suffix 986 continue; 987 } 988 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 989 // left the range of names with this prefix 990 break; 991 } 992 middle=name+findPrefixLength; 993 middleLength=nameLength-findPrefixLength-findSuffixLength; 994 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 995 // suffix does not match 996 continue; 997 } 998 // prefix & suffix match 999 1000 if(matchMode&MATCH_NOSLASH) { 1001 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 1002 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 1003 // the middle (matching the * wildcard) contains a tree separator / 1004 continue; 1005 } 1006 } 1007 1008 // found a matching item 1009 return idx; 1010 } 1011 1012 // no more items 1013 findNextIndex=-1; 1014 return -1; 1015 } 1016 1017 void 1018 Package::setMatchMode(uint32_t mode) { 1019 matchMode=mode; 1020 } 1021 1022 void 1023 Package::addItem(const char *name) { 1024 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 1025 } 1026 1027 void 1028 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 1029 int32_t idx; 1030 1031 idx=findItem(name); 1032 if(idx<0) { 1033 // new item, make space at the insertion point 1034 ensureItemCapacity(); 1035 // move the following items down 1036 idx=~idx; 1037 if(idx<itemCount) { 1038 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 1039 } 1040 ++itemCount; 1041 1042 // reset this Item entry 1043 memset(items+idx, 0, sizeof(Item)); 1044 1045 // copy the item's name 1046 items[idx].name=allocString(TRUE, static_cast<int32_t>(strlen(name))); 1047 strcpy(items[idx].name, name); 1048 pathToTree(items[idx].name); 1049 } else { 1050 // same-name item found, replace it 1051 if(items[idx].isDataOwned) { 1052 uprv_free(items[idx].data); 1053 } 1054 1055 // keep the item's name since it is the same 1056 } 1057 1058 // set the item's data 1059 items[idx].data=data; 1060 items[idx].length=length; 1061 items[idx].isDataOwned=isDataOwned; 1062 items[idx].type=type; 1063 } 1064 1065 void 1066 Package::addFile(const char *filesPath, const char *name) { 1067 uint8_t *data; 1068 int32_t length; 1069 char type; 1070 1071 data=readFile(filesPath, name, length, type); 1072 // readFile() exits the tool if it fails 1073 addItem(name, data, length, TRUE, type); 1074 } 1075 1076 void 1077 Package::addItems(const Package &listPkg) { 1078 const Item *pItem; 1079 int32_t i; 1080 1081 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1082 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1083 } 1084 } 1085 1086 void 1087 Package::removeItem(int32_t idx) { 1088 if(idx>=0) { 1089 // remove the item 1090 if(items[idx].isDataOwned) { 1091 uprv_free(items[idx].data); 1092 } 1093 1094 // move the following items up 1095 if((idx+1)<itemCount) { 1096 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1097 } 1098 --itemCount; 1099 1100 if(idx<=findNextIndex) { 1101 --findNextIndex; 1102 } 1103 } 1104 } 1105 1106 void 1107 Package::removeItems(const char *pattern) { 1108 int32_t idx; 1109 1110 findItems(pattern); 1111 while((idx=findNextItem())>=0) { 1112 removeItem(idx); 1113 } 1114 } 1115 1116 void 1117 Package::removeItems(const Package &listPkg) { 1118 const Item *pItem; 1119 int32_t i; 1120 1121 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1122 removeItems(pItem->name); 1123 } 1124 } 1125 1126 void 1127 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1128 char filename[1024]; 1129 UDataSwapper *ds; 1130 FILE *file; 1131 Item *pItem; 1132 int32_t fileLength; 1133 uint8_t itemCharset, outCharset; 1134 UBool itemIsBigEndian, outIsBigEndian; 1135 1136 if(idx<0 || itemCount<=idx) { 1137 return; 1138 } 1139 pItem=items+idx; 1140 1141 // swap the data to the outType 1142 // outType==0: don't swap 1143 if(outType!=0 && pItem->type!=outType) { 1144 // open the swapper 1145 UErrorCode errorCode=U_ZERO_ERROR; 1146 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1147 makeTypeProps(outType, outCharset, outIsBigEndian); 1148 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1149 if(U_FAILURE(errorCode)) { 1150 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1151 (long)idx, u_errorName(errorCode)); 1152 exit(errorCode); 1153 } 1154 1155 ds->printError=printPackageError; 1156 ds->printErrorContext=stderr; 1157 1158 // swap the item from its platform properties to the desired ones 1159 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1160 if(U_FAILURE(errorCode)) { 1161 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1162 exit(errorCode); 1163 } 1164 udata_closeSwapper(ds); 1165 pItem->type=outType; 1166 } 1167 1168 // create the file and write its contents 1169 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1170 file=fopen(filename, "wb"); 1171 if(file==NULL) { 1172 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1173 exit(U_FILE_ACCESS_ERROR); 1174 } 1175 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1176 1177 if(ferror(file) || fileLength!=pItem->length) { 1178 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1179 exit(U_FILE_ACCESS_ERROR); 1180 } 1181 fclose(file); 1182 } 1183 1184 void 1185 Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1186 extractItem(filesPath, items[idx].name, idx, outType); 1187 } 1188 1189 void 1190 Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1191 int32_t idx; 1192 1193 findItems(pattern); 1194 while((idx=findNextItem())>=0) { 1195 extractItem(filesPath, idx, outType); 1196 } 1197 } 1198 1199 void 1200 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1201 const Item *pItem; 1202 int32_t i; 1203 1204 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1205 extractItems(filesPath, pItem->name, outType); 1206 } 1207 } 1208 1209 int32_t 1210 Package::getItemCount() const { 1211 return itemCount; 1212 } 1213 1214 const Item * 1215 Package::getItem(int32_t idx) const { 1216 if (0 <= idx && idx < itemCount) { 1217 return &items[idx]; 1218 } 1219 return NULL; 1220 } 1221 1222 void 1223 Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1224 // check dependency: make sure the target item is in the package 1225 Package *me=(Package *)context; 1226 if(me->findItem(targetName)<0) { 1227 me->isMissingItems=TRUE; 1228 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1229 } 1230 } 1231 1232 UBool 1233 Package::checkDependencies() { 1234 isMissingItems=FALSE; 1235 enumDependencies(this, checkDependency); 1236 return (UBool)!isMissingItems; 1237 } 1238 1239 void 1240 Package::enumDependencies(void *context, CheckDependency check) { 1241 int32_t i; 1242 1243 for(i=0; i<itemCount; ++i) { 1244 enumDependencies(items+i, context, check); 1245 } 1246 } 1247 1248 char * 1249 Package::allocString(UBool in, int32_t length) { 1250 char *p; 1251 int32_t top; 1252 1253 if(in) { 1254 top=inStringTop; 1255 p=inStrings+top; 1256 } else { 1257 top=outStringTop; 1258 p=outStrings+top; 1259 } 1260 top+=length+1; 1261 1262 if(top>STRING_STORE_SIZE) { 1263 fprintf(stderr, "icupkg: string storage overflow\n"); 1264 exit(U_BUFFER_OVERFLOW_ERROR); 1265 } 1266 if(in) { 1267 inStringTop=top; 1268 } else { 1269 outStringTop=top; 1270 } 1271 return p; 1272 } 1273 1274 void 1275 Package::sortItems() { 1276 UErrorCode errorCode=U_ZERO_ERROR; 1277 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1278 if(U_FAILURE(errorCode)) { 1279 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1280 exit(errorCode); 1281 } 1282 } 1283 1284 void Package::setItemCapacity(int32_t max) 1285 { 1286 if(max<=itemMax) { 1287 return; 1288 } 1289 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1290 Item *oldItems = items; 1291 if(newItems == NULL) { 1292 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", 1293 (unsigned long)(max*sizeof(items[0])), max); 1294 exit(U_MEMORY_ALLOCATION_ERROR); 1295 } 1296 if(items && itemCount>0) { 1297 uprv_memcpy(newItems, items, (size_t)itemCount*sizeof(items[0])); 1298 } 1299 itemMax = max; 1300 items = newItems; 1301 uprv_free(oldItems); 1302 } 1303 1304 void Package::ensureItemCapacity() 1305 { 1306 if((itemCount+1)>itemMax) { 1307 setItemCapacity(itemCount+kItemsChunk); 1308 } 1309 } 1310 1311 U_NAMESPACE_END 1312