Home | History | Annotate | Download | only in toolutil
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2011, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  package.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2005aug25
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Read, modify, and write ICU .dat data package files.
     17 *   This is an integral part of the icupkg tool, moved to the toolutil library
     18 *   because parts of tool implementations tend to be later shared by
     19 *   other tools.
     20 *   Subsumes functionality and implementation code from
     21 *   gencmn, decmn, and icuswap tools.
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/putil.h"
     26 #include "unicode/udata.h"
     27 #include "cstring.h"
     28 #include "uarrsort.h"
     29 #include "ucmndata.h"
     30 #include "udataswp.h"
     31 #include "swapimpl.h"
     32 #include "toolutil.h"
     33 #include "package.h"
     34 #include "cmemory.h"
     35 
     36 #include <stdio.h>
     37 #include <stdlib.h>
     38 #include <string.h>
     39 
     40 
     41 static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
     42 
     43 // general definitions ----------------------------------------------------- ***
     44 
     45 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     46 
     47 /* UDataInfo cf. udata.h */
     48 static const UDataInfo dataInfo={
     49     (uint16_t)sizeof(UDataInfo),
     50     0,
     51 
     52     U_IS_BIG_ENDIAN,
     53     U_CHARSET_FAMILY,
     54     (uint8_t)sizeof(UChar),
     55     0,
     56 
     57     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
     58     {1, 0, 0, 0},                 /* formatVersion */
     59     {3, 0, 0, 0}                  /* dataVersion */
     60 };
     61 
     62 U_CDECL_BEGIN
     63 static void U_CALLCONV
     64 printPackageError(void *context, const char *fmt, va_list args) {
     65     vfprintf((FILE *)context, fmt, args);
     66 }
     67 U_CDECL_END
     68 
     69 static uint16_t
     70 readSwapUInt16(uint16_t x) {
     71     return (uint16_t)((x<<8)|(x>>8));
     72 }
     73 
     74 // platform types ---------------------------------------------------------- ***
     75 
     76 static const char *types="lb?e";
     77 
     78 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
     79 
     80 static inline int32_t
     81 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
     82     return 2*(int32_t)charset+isBigEndian;
     83 }
     84 
     85 static inline int32_t
     86 makeTypeEnum(char type) {
     87     return
     88         type == 'l' ? TYPE_L :
     89         type == 'b' ? TYPE_B :
     90         type == 'e' ? TYPE_E :
     91                -1;
     92 }
     93 
     94 static inline char
     95 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
     96     return types[makeTypeEnum(charset, isBigEndian)];
     97 }
     98 
     99 static inline char
    100 makeTypeLetter(int32_t typeEnum) {
    101     return types[typeEnum];
    102 }
    103 
    104 static void
    105 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
    106     int32_t typeEnum=makeTypeEnum(type);
    107     charset=(uint8_t)(typeEnum>>1);
    108     isBigEndian=(UBool)(typeEnum&1);
    109 }
    110 
    111 U_CFUNC const UDataInfo *
    112 getDataInfo(const uint8_t *data, int32_t length,
    113             int32_t &infoLength, int32_t &headerLength,
    114             UErrorCode *pErrorCode) {
    115     const DataHeader *pHeader;
    116     const UDataInfo *pInfo;
    117 
    118     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    119         return NULL;
    120     }
    121     if( data==NULL ||
    122         (length>=0 && length<(int32_t)sizeof(DataHeader))
    123     ) {
    124         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    125         return NULL;
    126     }
    127 
    128     pHeader=(const DataHeader *)data;
    129     pInfo=&pHeader->info;
    130     if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
    131         pHeader->dataHeader.magic1!=0xda ||
    132         pHeader->dataHeader.magic2!=0x27 ||
    133         pInfo->sizeofUChar!=2
    134     ) {
    135         *pErrorCode=U_UNSUPPORTED_ERROR;
    136         return NULL;
    137     }
    138 
    139     if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
    140         headerLength=pHeader->dataHeader.headerSize;
    141         infoLength=pInfo->size;
    142     } else {
    143         headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
    144         infoLength=readSwapUInt16(pInfo->size);
    145     }
    146 
    147     if( headerLength<(int32_t)sizeof(DataHeader) ||
    148         infoLength<(int32_t)sizeof(UDataInfo) ||
    149         headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
    150         (length>=0 && length<headerLength)
    151     ) {
    152         *pErrorCode=U_UNSUPPORTED_ERROR;
    153         return NULL;
    154     }
    155 
    156     return pInfo;
    157 }
    158 
    159 static int32_t
    160 getTypeEnumForInputData(const uint8_t *data, int32_t length,
    161                         UErrorCode *pErrorCode) {
    162     const UDataInfo *pInfo;
    163     int32_t infoLength, headerLength;
    164 
    165     /* getDataInfo() checks for illegal arguments */
    166     pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
    167     if(pInfo==NULL) {
    168         return -1;
    169     }
    170 
    171     return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
    172 }
    173 
    174 // file handling ----------------------------------------------------------- ***
    175 
    176 static void
    177 extractPackageName(const char *filename,
    178                    char pkg[], int32_t capacity) {
    179     const char *basename;
    180     int32_t len;
    181 
    182     basename=findBasename(filename);
    183     len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
    184 
    185     if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
    186         fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
    187                          basename);
    188         exit(U_ILLEGAL_ARGUMENT_ERROR);
    189     }
    190 
    191     if(len>=capacity) {
    192         fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
    193                          basename, (long)capacity);
    194         exit(U_ILLEGAL_ARGUMENT_ERROR);
    195     }
    196 
    197     memcpy(pkg, basename, len);
    198     pkg[len]=0;
    199 }
    200 
    201 static int32_t
    202 getFileLength(FILE *f) {
    203     int32_t length;
    204 
    205     fseek(f, 0, SEEK_END);
    206     length=(int32_t)ftell(f);
    207     fseek(f, 0, SEEK_SET);
    208     return length;
    209 }
    210 
    211 /*
    212  * Turn tree separators and alternate file separators into normal file separators.
    213  */
    214 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
    215 #define treeToPath(s)
    216 #else
    217 static void
    218 treeToPath(char *s) {
    219     char *t;
    220 
    221     for(t=s; *t!=0; ++t) {
    222         if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
    223             *t=U_FILE_SEP_CHAR;
    224         }
    225     }
    226 }
    227 #endif
    228 
    229 /*
    230  * Turn file separators into tree separators.
    231  */
    232 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
    233 #define pathToTree(s)
    234 #else
    235 static void
    236 pathToTree(char *s) {
    237     char *t;
    238 
    239     for(t=s; *t!=0; ++t) {
    240         if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
    241             *t=U_TREE_ENTRY_SEP_CHAR;
    242         }
    243     }
    244 }
    245 #endif
    246 
    247 /*
    248  * Prepend the path (if any) to the name and run the name through treeToName().
    249  */
    250 static void
    251 makeFullFilename(const char *path, const char *name,
    252                  char *filename, int32_t capacity) {
    253     char *s;
    254 
    255     // prepend the path unless NULL or empty
    256     if(path!=NULL && path[0]!=0) {
    257         if((int32_t)(strlen(path)+1)>=capacity) {
    258             fprintf(stderr, "pathname too long: \"%s\"\n", path);
    259             exit(U_BUFFER_OVERFLOW_ERROR);
    260         }
    261         strcpy(filename, path);
    262 
    263         // make sure the path ends with a file separator
    264         s=strchr(filename, 0);
    265         if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
    266             *s++=U_FILE_SEP_CHAR;
    267         }
    268     } else {
    269         s=filename;
    270     }
    271 
    272     // turn the name into a filename, turn tree separators into file separators
    273     if((int32_t)((s-filename)+strlen(name))>=capacity) {
    274         fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
    275         exit(U_BUFFER_OVERFLOW_ERROR);
    276     }
    277     strcpy(s, name);
    278     treeToPath(s);
    279 }
    280 
    281 static void
    282 makeFullFilenameAndDirs(const char *path, const char *name,
    283                         char *filename, int32_t capacity) {
    284     char *sep;
    285     UErrorCode errorCode;
    286 
    287     makeFullFilename(path, name, filename, capacity);
    288 
    289     // make tree directories
    290     errorCode=U_ZERO_ERROR;
    291     sep=strchr(filename, 0)-strlen(name);
    292     while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
    293         if(sep!=filename) {
    294             *sep=0;                 // truncate temporarily
    295             uprv_mkdir(filename, &errorCode);
    296             if(U_FAILURE(errorCode)) {
    297                 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
    298                 exit(U_FILE_ACCESS_ERROR);
    299             }
    300         }
    301         *sep++=U_FILE_SEP_CHAR; // restore file separator character
    302     }
    303 }
    304 
    305 static uint8_t *
    306 readFile(const char *path, const char *name, int32_t &length, char &type) {
    307     char filename[1024];
    308     FILE *file;
    309     uint8_t *data;
    310     UErrorCode errorCode;
    311     int32_t fileLength, typeEnum;
    312 
    313     makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
    314 
    315     /* open the input file, get its length, allocate memory for it, read the file */
    316     file=fopen(filename, "rb");
    317     if(file==NULL) {
    318         fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
    319         exit(U_FILE_ACCESS_ERROR);
    320     }
    321 
    322     /* get the file length */
    323     fileLength=getFileLength(file);
    324     if(ferror(file) || fileLength<=0) {
    325         fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
    326         fclose(file);
    327         exit(U_FILE_ACCESS_ERROR);
    328     }
    329 
    330     /* allocate the buffer, pad to multiple of 16 */
    331     length=(fileLength+0xf)&~0xf;
    332     data=(uint8_t *)uprv_malloc(length);
    333     if(data==NULL) {
    334         fclose(file);
    335         fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
    336         exit(U_MEMORY_ALLOCATION_ERROR);
    337     }
    338 
    339     /* read the file */
    340     if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
    341         fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
    342         fclose(file);
    343         free(data);
    344         exit(U_FILE_ACCESS_ERROR);
    345     }
    346 
    347     /* pad the file to a multiple of 16 using the usual padding byte */
    348     if(fileLength<length) {
    349         memset(data+fileLength, 0xaa, length-fileLength);
    350     }
    351 
    352     fclose(file);
    353 
    354     // minimum check for ICU-format data
    355     errorCode=U_ZERO_ERROR;
    356     typeEnum=getTypeEnumForInputData(data, length, &errorCode);
    357     if(typeEnum<0 || U_FAILURE(errorCode)) {
    358         fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
    359         free(data);
    360 #if !UCONFIG_NO_LEGACY_CONVERSION
    361         exit(U_INVALID_FORMAT_ERROR);
    362 #else
    363         fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
    364         exit(0);
    365 #endif
    366     }
    367     type=makeTypeLetter(typeEnum);
    368 
    369     return data;
    370 }
    371 
    372 // .dat package file representation ---------------------------------------- ***
    373 
    374 U_CDECL_BEGIN
    375 
    376 static int32_t U_CALLCONV
    377 compareItems(const void * /*context*/, const void *left, const void *right) {
    378     U_NAMESPACE_USE
    379 
    380     return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
    381 }
    382 
    383 U_CDECL_END
    384 
    385 U_NAMESPACE_BEGIN
    386 
    387 Package::Package() {
    388     inPkgName[0]=0;
    389     inData=NULL;
    390     inLength=0;
    391     inCharset=U_CHARSET_FAMILY;
    392     inIsBigEndian=U_IS_BIG_ENDIAN;
    393 
    394     itemCount=0;
    395     itemMax=0;
    396     items=NULL;
    397 
    398     inStringTop=outStringTop=0;
    399 
    400     matchMode=0;
    401     findPrefix=findSuffix=NULL;
    402     findPrefixLength=findSuffixLength=0;
    403     findNextIndex=-1;
    404 
    405     // create a header for an empty package
    406     DataHeader *pHeader;
    407     pHeader=(DataHeader *)header;
    408     pHeader->dataHeader.magic1=0xda;
    409     pHeader->dataHeader.magic2=0x27;
    410     memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
    411     headerLength=(int32_t)(4+sizeof(dataInfo));
    412     if(headerLength&0xf) {
    413         /* NUL-pad the header to a multiple of 16 */
    414         int32_t length=(headerLength+0xf)&~0xf;
    415         memset(header+headerLength, 0, length-headerLength);
    416         headerLength=length;
    417     }
    418     pHeader->dataHeader.headerSize=(uint16_t)headerLength;
    419 }
    420 
    421 Package::~Package() {
    422     int32_t idx;
    423 
    424     free(inData);
    425 
    426     for(idx=0; idx<itemCount; ++idx) {
    427         if(items[idx].isDataOwned) {
    428             free(items[idx].data);
    429         }
    430     }
    431 
    432     uprv_free((void*)items);
    433 }
    434 
    435 void
    436 Package::readPackage(const char *filename) {
    437     UDataSwapper *ds;
    438     const UDataInfo *pInfo;
    439     UErrorCode errorCode;
    440 
    441     const uint8_t *inBytes;
    442 
    443     int32_t length, offset, i;
    444     int32_t itemLength, typeEnum;
    445     char type;
    446 
    447     const UDataOffsetTOCEntry *inEntries;
    448 
    449     extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
    450 
    451     /* read the file */
    452     inData=readFile(NULL, filename, inLength, type);
    453     length=inLength;
    454 
    455     /*
    456      * swap the header - even if the swapping itself is a no-op
    457      * because it tells us the header length
    458      */
    459     errorCode=U_ZERO_ERROR;
    460     makeTypeProps(type, inCharset, inIsBigEndian);
    461     ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
    462     if(U_FAILURE(errorCode)) {
    463         fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
    464                 filename, u_errorName(errorCode));
    465         exit(errorCode);
    466     }
    467 
    468     ds->printError=printPackageError;
    469     ds->printErrorContext=stderr;
    470 
    471     headerLength=sizeof(header);
    472     if(length<headerLength) {
    473         headerLength=length;
    474     }
    475     headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
    476     if(U_FAILURE(errorCode)) {
    477         exit(errorCode);
    478     }
    479 
    480     /* check data format and format version */
    481     pInfo=(const UDataInfo *)((const char *)inData+4);
    482     if(!(
    483         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
    484         pInfo->dataFormat[1]==0x6d &&
    485         pInfo->dataFormat[2]==0x6e &&
    486         pInfo->dataFormat[3]==0x44 &&
    487         pInfo->formatVersion[0]==1
    488     )) {
    489         fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
    490                 pInfo->dataFormat[0], pInfo->dataFormat[1],
    491                 pInfo->dataFormat[2], pInfo->dataFormat[3],
    492                 pInfo->formatVersion[0]);
    493         exit(U_UNSUPPORTED_ERROR);
    494     }
    495     inIsBigEndian=(UBool)pInfo->isBigEndian;
    496     inCharset=pInfo->charsetFamily;
    497 
    498     inBytes=(const uint8_t *)inData+headerLength;
    499     inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
    500 
    501     /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
    502     length-=headerLength;
    503     if(length<4) {
    504         /* itemCount does not fit */
    505         offset=0x7fffffff;
    506     } else {
    507         itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
    508         setItemCapacity(itemCount); /* resize so there's space */
    509         if(itemCount==0) {
    510             offset=4;
    511         } else if(length<(4+8*itemCount)) {
    512             /* ToC table does not fit */
    513             offset=0x7fffffff;
    514         } else {
    515             /* offset of the last item plus at least 20 bytes for its header */
    516             offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
    517         }
    518     }
    519     if(length<offset) {
    520         fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
    521                         (long)length);
    522         exit(U_INDEX_OUTOFBOUNDS_ERROR);
    523     }
    524     /* do not modify the package length variable until the last item's length is set */
    525 
    526     if(itemCount>0) {
    527         char prefix[MAX_PKG_NAME_LENGTH+4];
    528         char *s, *inItemStrings;
    529         int32_t inPkgNameLength, prefixLength, stringsOffset;
    530 
    531         if(itemCount>itemMax) {
    532             fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
    533             exit(U_BUFFER_OVERFLOW_ERROR);
    534         }
    535 
    536         /* swap the item name strings */
    537         stringsOffset=4+8*itemCount;
    538         itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
    539 
    540         // don't include padding bytes at the end of the item names
    541         while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
    542             --itemLength;
    543         }
    544 
    545         if((inStringTop+itemLength)>STRING_STORE_SIZE) {
    546             fprintf(stderr, "icupkg: total length of item name strings too long\n");
    547             exit(U_BUFFER_OVERFLOW_ERROR);
    548         }
    549 
    550         inItemStrings=inStrings+inStringTop;
    551         ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
    552         if(U_FAILURE(errorCode)) {
    553             fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
    554             exit(U_INVALID_FORMAT_ERROR);
    555         }
    556         inStringTop+=itemLength;
    557 
    558         // reset the Item entries
    559         memset(items, 0, itemCount*sizeof(Item));
    560 
    561         inPkgNameLength=strlen(inPkgName);
    562         memcpy(prefix, inPkgName, inPkgNameLength);
    563         prefixLength=inPkgNameLength;
    564 
    565         /*
    566          * Get the common prefix of the items.
    567          * New-style ICU .dat packages use tree separators ('/') between package names,
    568          * tree names, and item names,
    569          * while old-style ICU .dat packages (before multi-tree support)
    570          * use an underscore ('_') between package and item names.
    571          */
    572         offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
    573         s=inItemStrings+offset;
    574         if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
    575             0==memcmp(s, inPkgName, inPkgNameLength) &&
    576             s[inPkgNameLength]=='_'
    577         ) {
    578             // old-style .dat package
    579             prefix[prefixLength++]='_';
    580         } else {
    581             // new-style .dat package
    582             prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
    583             // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
    584             // then the test in the loop below will fail
    585         }
    586         prefix[prefixLength]=0;
    587 
    588         /* read the ToC table */
    589         for(i=0; i<itemCount; ++i) {
    590             // skip the package part of the item name, error if it does not match the actual package name
    591             // or if nothing follows the package name
    592             offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
    593             s=inItemStrings+offset;
    594             if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
    595                 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
    596                         s, prefix);
    597                 exit(U_UNSUPPORTED_ERROR);
    598             }
    599             items[i].name=s+prefixLength;
    600 
    601             // set the item's data
    602             items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
    603             if(i>0) {
    604                 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
    605 
    606                 // set the previous item's platform type
    607                 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
    608                 if(typeEnum<0 || U_FAILURE(errorCode)) {
    609                     fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
    610                     exit(U_INVALID_FORMAT_ERROR);
    611                 }
    612                 items[i-1].type=makeTypeLetter(typeEnum);
    613             }
    614             items[i].isDataOwned=FALSE;
    615         }
    616         // set the last item's length
    617         items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
    618 
    619         // set the last item's platform type
    620         typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
    621         if(typeEnum<0 || U_FAILURE(errorCode)) {
    622             fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
    623             exit(U_INVALID_FORMAT_ERROR);
    624         }
    625         items[itemCount-1].type=makeTypeLetter(typeEnum);
    626 
    627         if(type!=U_ICUDATA_TYPE_LETTER[0]) {
    628             // sort the item names for the local charset
    629             sortItems();
    630         }
    631     }
    632 
    633     udata_closeSwapper(ds);
    634 }
    635 
    636 char
    637 Package::getInType() {
    638     return makeTypeLetter(inCharset, inIsBigEndian);
    639 }
    640 
    641 void
    642 Package::writePackage(const char *filename, char outType, const char *comment) {
    643     char prefix[MAX_PKG_NAME_LENGTH+4];
    644     UDataOffsetTOCEntry entry;
    645     UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
    646     FILE *file;
    647     Item *pItem;
    648     char *name;
    649     UErrorCode errorCode;
    650     int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
    651     uint8_t outCharset;
    652     UBool outIsBigEndian;
    653 
    654     extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
    655 
    656     // if there is an explicit comment, then use it, else use what's in the current header
    657     if(comment!=NULL) {
    658         /* get the header size minus the current comment */
    659         DataHeader *pHeader;
    660         int32_t length;
    661 
    662         pHeader=(DataHeader *)header;
    663         headerLength=4+pHeader->info.size;
    664         length=(int32_t)strlen(comment);
    665         if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
    666             fprintf(stderr, "icupkg: comment too long\n");
    667             exit(U_BUFFER_OVERFLOW_ERROR);
    668         }
    669         memcpy(header+headerLength, comment, length+1);
    670         headerLength+=length;
    671         if(headerLength&0xf) {
    672             /* NUL-pad the header to a multiple of 16 */
    673             length=(headerLength+0xf)&~0xf;
    674             memset(header+headerLength, 0, length-headerLength);
    675             headerLength=length;
    676         }
    677         pHeader->dataHeader.headerSize=(uint16_t)headerLength;
    678     }
    679 
    680     makeTypeProps(outType, outCharset, outIsBigEndian);
    681 
    682     // open (TYPE_COUNT-2) swappers
    683     // one is a no-op for local type==outType
    684     // one type (TYPE_LE) is bogus
    685     errorCode=U_ZERO_ERROR;
    686     i=makeTypeEnum(outType);
    687     ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    688     ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    689     ds[TYPE_LE]=NULL;
    690     ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
    691     if(U_FAILURE(errorCode)) {
    692         fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
    693         exit(errorCode);
    694     }
    695     for(i=0; i<TYPE_COUNT; ++i) {
    696         if(ds[i]!=NULL) {
    697             ds[i]->printError=printPackageError;
    698             ds[i]->printErrorContext=stderr;
    699         }
    700     }
    701 
    702     dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
    703 
    704     // create the file and write its contents
    705     file=fopen(filename, "wb");
    706     if(file==NULL) {
    707         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
    708         exit(U_FILE_ACCESS_ERROR);
    709     }
    710 
    711     // swap and write the header
    712     if(dsLocalToOut!=NULL) {
    713         udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
    714         if(U_FAILURE(errorCode)) {
    715             fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
    716             exit(errorCode);
    717         }
    718     }
    719     length=(int32_t)fwrite(header, 1, headerLength, file);
    720     if(length!=headerLength) {
    721         fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
    722         exit(U_FILE_ACCESS_ERROR);
    723     }
    724 
    725     // prepare and swap the package name with a tree separator
    726     // for prepending to item names
    727     strcat(prefix, U_TREE_ENTRY_SEP_STRING);
    728     prefixLength=(int32_t)strlen(prefix);
    729     if(dsLocalToOut!=NULL) {
    730         dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
    731         if(U_FAILURE(errorCode)) {
    732             fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
    733             exit(errorCode);
    734         }
    735 
    736         // swap and sort the item names (sorting needs to be done in the output charset)
    737         dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
    738         if(U_FAILURE(errorCode)) {
    739             fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
    740             exit(errorCode);
    741         }
    742         sortItems();
    743     }
    744 
    745     // create the output item names in sorted order, with the package name prepended to each
    746     for(i=0; i<itemCount; ++i) {
    747         length=(int32_t)strlen(items[i].name);
    748         name=allocString(FALSE, length+prefixLength);
    749         memcpy(name, prefix, prefixLength);
    750         memcpy(name+prefixLength, items[i].name, length+1);
    751         items[i].name=name;
    752     }
    753 
    754     // calculate offsets for item names and items, pad to 16-align items
    755     // align only the first item; each item's length is a multiple of 16
    756     basenameOffset=4+8*itemCount;
    757     offset=basenameOffset+outStringTop;
    758     if((length=(offset&15))!=0) {
    759         length=16-length;
    760         memset(allocString(FALSE, length-1), 0xaa, length);
    761         offset+=length;
    762     }
    763 
    764     // write the table of contents
    765     // first the itemCount
    766     outInt32=itemCount;
    767     if(dsLocalToOut!=NULL) {
    768         dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
    769         if(U_FAILURE(errorCode)) {
    770             fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
    771             exit(errorCode);
    772         }
    773     }
    774     length=(int32_t)fwrite(&outInt32, 1, 4, file);
    775     if(length!=4) {
    776         fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
    777         exit(U_FILE_ACCESS_ERROR);
    778     }
    779 
    780     // then write the item entries (and collect the maxItemLength)
    781     maxItemLength=0;
    782     for(i=0; i<itemCount; ++i) {
    783         entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
    784         entry.dataOffset=(uint32_t)offset;
    785         if(dsLocalToOut!=NULL) {
    786             dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
    787             if(U_FAILURE(errorCode)) {
    788                 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
    789                 exit(errorCode);
    790             }
    791         }
    792         length=(int32_t)fwrite(&entry, 1, 8, file);
    793         if(length!=8) {
    794             fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
    795             exit(U_FILE_ACCESS_ERROR);
    796         }
    797 
    798         length=items[i].length;
    799         if(length>maxItemLength) {
    800             maxItemLength=length;
    801         }
    802         offset+=length;
    803     }
    804 
    805     // write the item names
    806     length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
    807     if(length!=outStringTop) {
    808         fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
    809         exit(U_FILE_ACCESS_ERROR);
    810     }
    811 
    812     // write the items
    813     for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
    814         int32_t type=makeTypeEnum(pItem->type);
    815         if(ds[type]!=NULL) {
    816             // swap each item from its platform properties to the desired ones
    817             udata_swap(
    818                 ds[type],
    819                 pItem->data, pItem->length, pItem->data,
    820                 &errorCode);
    821             if(U_FAILURE(errorCode)) {
    822                 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
    823                 exit(errorCode);
    824             }
    825         }
    826         length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
    827         if(length!=pItem->length) {
    828             fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
    829             exit(U_FILE_ACCESS_ERROR);
    830         }
    831     }
    832 
    833     if(ferror(file)) {
    834         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
    835         exit(U_FILE_ACCESS_ERROR);
    836     }
    837 
    838     fclose(file);
    839     for(i=0; i<TYPE_COUNT; ++i) {
    840         udata_closeSwapper(ds[i]);
    841     }
    842 }
    843 
    844 int32_t
    845 Package::findItem(const char *name, int32_t length) const {
    846     int32_t i, start, limit;
    847     int result;
    848 
    849     /* do a binary search for the string */
    850     start=0;
    851     limit=itemCount;
    852     while(start<limit) {
    853         i=(start+limit)/2;
    854         if(length>=0) {
    855             result=strncmp(name, items[i].name, length);
    856         } else {
    857             result=strcmp(name, items[i].name);
    858         }
    859 
    860         if(result==0) {
    861             /* found */
    862             if(length>=0) {
    863                 /*
    864                  * if we compared just prefixes, then we may need to back up
    865                  * to the first item with this prefix
    866                  */
    867                 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
    868                     --i;
    869                 }
    870             }
    871             return i;
    872         } else if(result<0) {
    873             limit=i;
    874         } else /* result>0 */ {
    875             start=i+1;
    876         }
    877     }
    878 
    879     return ~start; /* not found, return binary-not of the insertion point */
    880 }
    881 
    882 void
    883 Package::findItems(const char *pattern) {
    884     const char *wild;
    885 
    886     if(pattern==NULL || *pattern==0) {
    887         findNextIndex=-1;
    888         return;
    889     }
    890 
    891     findPrefix=pattern;
    892     findSuffix=NULL;
    893     findSuffixLength=0;
    894 
    895     wild=strchr(pattern, '*');
    896     if(wild==NULL) {
    897         // no wildcard
    898         findPrefixLength=(int32_t)strlen(pattern);
    899     } else {
    900         // one wildcard
    901         findPrefixLength=(int32_t)(wild-pattern);
    902         findSuffix=wild+1;
    903         findSuffixLength=(int32_t)strlen(findSuffix);
    904         if(NULL!=strchr(findSuffix, '*')) {
    905             // two or more wildcards
    906             fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
    907             exit(U_PARSE_ERROR);
    908         }
    909     }
    910 
    911     if(findPrefixLength==0) {
    912         findNextIndex=0;
    913     } else {
    914         findNextIndex=findItem(findPrefix, findPrefixLength);
    915     }
    916 }
    917 
    918 int32_t
    919 Package::findNextItem() {
    920     const char *name, *middle, *treeSep;
    921     int32_t idx, nameLength, middleLength;
    922 
    923     if(findNextIndex<0) {
    924         return -1;
    925     }
    926 
    927     while(findNextIndex<itemCount) {
    928         idx=findNextIndex++;
    929         name=items[idx].name;
    930         nameLength=(int32_t)strlen(name);
    931         if(nameLength<(findPrefixLength+findSuffixLength)) {
    932             // item name too short for prefix & suffix
    933             continue;
    934         }
    935         if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
    936             // left the range of names with this prefix
    937             break;
    938         }
    939         middle=name+findPrefixLength;
    940         middleLength=nameLength-findPrefixLength-findSuffixLength;
    941         if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
    942             // suffix does not match
    943             continue;
    944         }
    945         // prefix & suffix match
    946 
    947         if(matchMode&MATCH_NOSLASH) {
    948             treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
    949             if(treeSep!=NULL && (treeSep-middle)<middleLength) {
    950                 // the middle (matching the * wildcard) contains a tree separator /
    951                 continue;
    952             }
    953         }
    954 
    955         // found a matching item
    956         return idx;
    957     }
    958 
    959     // no more items
    960     findNextIndex=-1;
    961     return -1;
    962 }
    963 
    964 void
    965 Package::setMatchMode(uint32_t mode) {
    966     matchMode=mode;
    967 }
    968 
    969 void
    970 Package::addItem(const char *name) {
    971     addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
    972 }
    973 
    974 void
    975 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
    976     int32_t idx;
    977 
    978     idx=findItem(name);
    979     if(idx<0) {
    980         // new item, make space at the insertion point
    981         ensureItemCapacity();
    982         // move the following items down
    983         idx=~idx;
    984         if(idx<itemCount) {
    985             memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
    986         }
    987         ++itemCount;
    988 
    989         // reset this Item entry
    990         memset(items+idx, 0, sizeof(Item));
    991 
    992         // copy the item's name
    993         items[idx].name=allocString(TRUE, strlen(name));
    994         strcpy(items[idx].name, name);
    995         pathToTree(items[idx].name);
    996     } else {
    997         // same-name item found, replace it
    998         if(items[idx].isDataOwned) {
    999             free(items[idx].data);
   1000         }
   1001 
   1002         // keep the item's name since it is the same
   1003     }
   1004 
   1005     // set the item's data
   1006     items[idx].data=data;
   1007     items[idx].length=length;
   1008     items[idx].isDataOwned=isDataOwned;
   1009     items[idx].type=type;
   1010 }
   1011 
   1012 void
   1013 Package::addFile(const char *filesPath, const char *name) {
   1014     uint8_t *data;
   1015     int32_t length;
   1016     char type;
   1017 
   1018     data=readFile(filesPath, name, length, type);
   1019     // readFile() exits the tool if it fails
   1020     addItem(name, data, length, TRUE, type);
   1021 }
   1022 
   1023 void
   1024 Package::addItems(const Package &listPkg) {
   1025     const Item *pItem;
   1026     int32_t i;
   1027 
   1028     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
   1029         addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
   1030     }
   1031 }
   1032 
   1033 void
   1034 Package::removeItem(int32_t idx) {
   1035     if(idx>=0) {
   1036         // remove the item
   1037         if(items[idx].isDataOwned) {
   1038             free(items[idx].data);
   1039         }
   1040 
   1041         // move the following items up
   1042         if((idx+1)<itemCount) {
   1043             memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
   1044         }
   1045         --itemCount;
   1046 
   1047         if(idx<=findNextIndex) {
   1048             --findNextIndex;
   1049         }
   1050     }
   1051 }
   1052 
   1053 void
   1054 Package::removeItems(const char *pattern) {
   1055     int32_t idx;
   1056 
   1057     findItems(pattern);
   1058     while((idx=findNextItem())>=0) {
   1059         removeItem(idx);
   1060     }
   1061 }
   1062 
   1063 void
   1064 Package::removeItems(const Package &listPkg) {
   1065     const Item *pItem;
   1066     int32_t i;
   1067 
   1068     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
   1069         removeItems(pItem->name);
   1070     }
   1071 }
   1072 
   1073 void
   1074 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
   1075     char filename[1024];
   1076     UDataSwapper *ds;
   1077     FILE *file;
   1078     Item *pItem;
   1079     int32_t fileLength;
   1080     uint8_t itemCharset, outCharset;
   1081     UBool itemIsBigEndian, outIsBigEndian;
   1082 
   1083     if(idx<0 || itemCount<=idx) {
   1084         return;
   1085     }
   1086     pItem=items+idx;
   1087 
   1088     // swap the data to the outType
   1089     // outType==0: don't swap
   1090     if(outType!=0 && pItem->type!=outType) {
   1091         // open the swapper
   1092         UErrorCode errorCode=U_ZERO_ERROR;
   1093         makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
   1094         makeTypeProps(outType, outCharset, outIsBigEndian);
   1095         ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
   1096         if(U_FAILURE(errorCode)) {
   1097             fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
   1098                     (long)idx, u_errorName(errorCode));
   1099             exit(errorCode);
   1100         }
   1101 
   1102         ds->printError=printPackageError;
   1103         ds->printErrorContext=stderr;
   1104 
   1105         // swap the item from its platform properties to the desired ones
   1106         udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
   1107         if(U_FAILURE(errorCode)) {
   1108             fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
   1109             exit(errorCode);
   1110         }
   1111         udata_closeSwapper(ds);
   1112         pItem->type=outType;
   1113     }
   1114 
   1115     // create the file and write its contents
   1116     makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
   1117     file=fopen(filename, "wb");
   1118     if(file==NULL) {
   1119         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
   1120         exit(U_FILE_ACCESS_ERROR);
   1121     }
   1122     fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
   1123 
   1124     if(ferror(file) || fileLength!=pItem->length) {
   1125         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
   1126         exit(U_FILE_ACCESS_ERROR);
   1127     }
   1128     fclose(file);
   1129 }
   1130 
   1131 void
   1132 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
   1133     extractItem(filesPath, items[idx].name, idx, outType);
   1134 }
   1135 
   1136 void
   1137 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
   1138     int32_t idx;
   1139 
   1140     findItems(pattern);
   1141     while((idx=findNextItem())>=0) {
   1142         extractItem(filesPath, idx, outType);
   1143     }
   1144 }
   1145 
   1146 void
   1147 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
   1148     const Item *pItem;
   1149     int32_t i;
   1150 
   1151     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
   1152         extractItems(filesPath, pItem->name, outType);
   1153     }
   1154 }
   1155 
   1156 int32_t
   1157 Package::getItemCount() const {
   1158     return itemCount;
   1159 }
   1160 
   1161 const Item *
   1162 Package::getItem(int32_t idx) const {
   1163     if (0 <= idx && idx < itemCount) {
   1164         return &items[idx];
   1165     }
   1166     return NULL;
   1167 }
   1168 
   1169 void
   1170 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
   1171     // check dependency: make sure the target item is in the package
   1172     Package *me=(Package *)context;
   1173     if(me->findItem(targetName)<0) {
   1174         me->isMissingItems=TRUE;
   1175         fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
   1176     }
   1177 }
   1178 
   1179 UBool
   1180 Package::checkDependencies() {
   1181     isMissingItems=FALSE;
   1182     enumDependencies(this, checkDependency);
   1183     return (UBool)!isMissingItems;
   1184 }
   1185 
   1186 void
   1187 Package::enumDependencies(void *context, CheckDependency check) {
   1188     int32_t i;
   1189 
   1190     for(i=0; i<itemCount; ++i) {
   1191         enumDependencies(items+i, context, check);
   1192     }
   1193 }
   1194 
   1195 char *
   1196 Package::allocString(UBool in, int32_t length) {
   1197     char *p;
   1198     int32_t top;
   1199 
   1200     if(in) {
   1201         top=inStringTop;
   1202         p=inStrings+top;
   1203     } else {
   1204         top=outStringTop;
   1205         p=outStrings+top;
   1206     }
   1207     top+=length+1;
   1208 
   1209     if(top>STRING_STORE_SIZE) {
   1210         fprintf(stderr, "icupkg: string storage overflow\n");
   1211         exit(U_BUFFER_OVERFLOW_ERROR);
   1212     }
   1213     if(in) {
   1214         inStringTop=top;
   1215     } else {
   1216         outStringTop=top;
   1217     }
   1218     return p;
   1219 }
   1220 
   1221 void
   1222 Package::sortItems() {
   1223     UErrorCode errorCode=U_ZERO_ERROR;
   1224     uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
   1225     if(U_FAILURE(errorCode)) {
   1226         fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
   1227         exit(errorCode);
   1228     }
   1229 }
   1230 
   1231 void Package::setItemCapacity(int32_t max)
   1232 {
   1233   if(max<=itemMax) {
   1234     return;
   1235   }
   1236   Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
   1237   Item *oldItems = items;
   1238   if(newItems == NULL) {
   1239     fprintf(stderr, "icupkg: Out of memory trying to allocate %ld bytes for %d items\n", max*sizeof(items[0]), max);
   1240     exit(U_MEMORY_ALLOCATION_ERROR);
   1241   }
   1242   if(items && itemCount>0) {
   1243     uprv_memcpy(newItems, items, itemCount*sizeof(items[0]));
   1244   }
   1245   itemMax = max;
   1246   items = newItems;
   1247   uprv_free(oldItems);
   1248 }
   1249 
   1250 void Package::ensureItemCapacity()
   1251 {
   1252   if((itemCount+1)>itemMax) {
   1253     setItemCapacity(itemCount+kItemsChunk);
   1254   }
   1255 }
   1256 
   1257 U_NAMESPACE_END
   1258