Home | History | Annotate | Download | only in toolutil
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  package.cpp
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2005aug25
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Read, modify, and write ICU .dat data package files.
     17 *   This is an integral part of the icupkg tool, moved to the toolutil library
     18 *   because parts of tool implementations tend to be later shared by
     19 *   other tools.
     20 *   Subsumes functionality and implementation code from
     21 *   gencmn, decmn, and icuswap tools.
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/putil.h"
     26 #include "unicode/udata.h"
     27 #include "cstring.h"
     28 #include "uarrsort.h"
     29 #include "ucmndata.h"
     30 #include "udataswp.h"
     31 #include "swapimpl.h"
     32 #include "toolutil.h"
     33 #include "package.h"
     34 
     35 #include <stdio.h>
     36 #include <stdlib.h>
     37 #include <string.h>
     38 
     39 // general definitions ----------------------------------------------------- ***
     40 
     41 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     42 
     43 /* UDataInfo cf. udata.h */
     44 static const UDataInfo dataInfo={
     45     (uint16_t)sizeof(UDataInfo),
     46     0,
     47 
     48     U_IS_BIG_ENDIAN,
     49     U_CHARSET_FAMILY,
     50     (uint8_t)sizeof(UChar),
     51     0,
     52 
     53     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
     54     {1, 0, 0, 0},                 /* formatVersion */
     55     {3, 0, 0, 0}                  /* dataVersion */
     56 };
     57 
     58 U_CDECL_BEGIN
     59 static void U_CALLCONV
     60 printPackageError(void *context, const char *fmt, va_list args) {
     61     vfprintf((FILE *)context, fmt, args);
     62 }
     63 U_CDECL_END
     64 
     65 static uint16_t
     66 readSwapUInt16(uint16_t x) {
     67     return (uint16_t)((x<<8)|(x>>8));
     68 }
     69 
     70 // platform types ---------------------------------------------------------- ***
     71 
     72 static const char *types="lb?e";
     73 
     74 enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
     75 
     76 static inline int32_t
     77 makeTypeEnum(uint8_t charset, UBool isBigEndian) {
     78     return 2*(int32_t)charset+isBigEndian;
     79 }
     80 
     81 static inline int32_t
     82 makeTypeEnum(char type) {
     83     return
     84         type == 'l' ? TYPE_L :
     85         type == 'b' ? TYPE_B :
     86         type == 'e' ? TYPE_E :
     87                -1;
     88 }
     89 
     90 static inline char
     91 makeTypeLetter(uint8_t charset, UBool isBigEndian) {
     92     return types[makeTypeEnum(charset, isBigEndian)];
     93 }
     94 
     95 static inline char
     96 makeTypeLetter(int32_t typeEnum) {
     97     return types[typeEnum];
     98 }
     99 
    100 static void
    101 makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
    102     int32_t typeEnum=makeTypeEnum(type);
    103     charset=(uint8_t)(typeEnum>>1);
    104     isBigEndian=(UBool)(typeEnum&1);
    105 }
    106 
    107 U_CFUNC const UDataInfo *
    108 getDataInfo(const uint8_t *data, int32_t length,
    109             int32_t &infoLength, int32_t &headerLength,
    110             UErrorCode *pErrorCode) {
    111     const DataHeader *pHeader;
    112     const UDataInfo *pInfo;
    113 
    114     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    115         return NULL;
    116     }
    117     if( data==NULL ||
    118         (length>=0 && length<(int32_t)sizeof(DataHeader))
    119     ) {
    120         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    121         return NULL;
    122     }
    123 
    124     pHeader=(const DataHeader *)data;
    125     pInfo=&pHeader->info;
    126     if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
    127         pHeader->dataHeader.magic1!=0xda ||
    128         pHeader->dataHeader.magic2!=0x27 ||
    129         pInfo->sizeofUChar!=2
    130     ) {
    131         *pErrorCode=U_UNSUPPORTED_ERROR;
    132         return NULL;
    133     }
    134 
    135     if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
    136         headerLength=pHeader->dataHeader.headerSize;
    137         infoLength=pInfo->size;
    138     } else {
    139         headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
    140         infoLength=readSwapUInt16(pInfo->size);
    141     }
    142 
    143     if( headerLength<(int32_t)sizeof(DataHeader) ||
    144         infoLength<(int32_t)sizeof(UDataInfo) ||
    145         headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
    146         (length>=0 && length<headerLength)
    147     ) {
    148         *pErrorCode=U_UNSUPPORTED_ERROR;
    149         return NULL;
    150     }
    151 
    152     return pInfo;
    153 }
    154 
    155 static int32_t
    156 getTypeEnumForInputData(const uint8_t *data, int32_t length,
    157                         UErrorCode *pErrorCode) {
    158     const UDataInfo *pInfo;
    159     int32_t infoLength, headerLength;
    160 
    161     /* getDataInfo() checks for illegal arguments */
    162     pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
    163     if(pInfo==NULL) {
    164         return -1;
    165     }
    166 
    167     return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
    168 }
    169 
    170 // file handling ----------------------------------------------------------- ***
    171 
    172 static void
    173 extractPackageName(const char *filename,
    174                    char pkg[], int32_t capacity) {
    175     const char *basename;
    176     int32_t len;
    177 
    178     basename=findBasename(filename);
    179     len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
    180 
    181     if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
    182         fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
    183                          basename);
    184         exit(U_ILLEGAL_ARGUMENT_ERROR);
    185     }
    186 
    187     if(len>=capacity) {
    188         fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
    189                          basename, (long)capacity);
    190         exit(U_ILLEGAL_ARGUMENT_ERROR);
    191     }
    192 
    193     memcpy(pkg, basename, len);
    194     pkg[len]=0;
    195 }
    196 
    197 static int32_t
    198 getFileLength(FILE *f) {
    199     int32_t length;
    200 
    201     fseek(f, 0, SEEK_END);
    202     length=(int32_t)ftell(f);
    203     fseek(f, 0, SEEK_SET);
    204     return length;
    205 }
    206 
    207 /*
    208  * Turn tree separators and alternate file separators into normal file separators.
    209  */
    210 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
    211 #define treeToPath(s)
    212 #else
    213 static void
    214 treeToPath(char *s) {
    215     char *t;
    216 
    217     for(t=s; *t!=0; ++t) {
    218         if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
    219             *t=U_FILE_SEP_CHAR;
    220         }
    221     }
    222 }
    223 #endif
    224 
    225 /*
    226  * Turn file separators into tree separators.
    227  */
    228 #if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
    229 #define pathToTree(s)
    230 #else
    231 static void
    232 pathToTree(char *s) {
    233     char *t;
    234 
    235     for(t=s; *t!=0; ++t) {
    236         if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
    237             *t=U_TREE_ENTRY_SEP_CHAR;
    238         }
    239     }
    240 }
    241 #endif
    242 
    243 /*
    244  * Prepend the path (if any) to the name and run the name through treeToName().
    245  */
    246 static void
    247 makeFullFilename(const char *path, const char *name,
    248                  char *filename, int32_t capacity) {
    249     char *s;
    250 
    251     // prepend the path unless NULL or empty
    252     if(path!=NULL && path[0]!=0) {
    253         if((int32_t)(strlen(path)+1)>=capacity) {
    254             fprintf(stderr, "pathname too long: \"%s\"\n", path);
    255             exit(U_BUFFER_OVERFLOW_ERROR);
    256         }
    257         strcpy(filename, path);
    258 
    259         // make sure the path ends with a file separator
    260         s=strchr(filename, 0);
    261         if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
    262             *s++=U_FILE_SEP_CHAR;
    263         }
    264     } else {
    265         s=filename;
    266     }
    267 
    268     // turn the name into a filename, turn tree separators into file separators
    269     if((int32_t)((s-filename)+strlen(name))>=capacity) {
    270         fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
    271         exit(U_BUFFER_OVERFLOW_ERROR);
    272     }
    273     strcpy(s, name);
    274     treeToPath(s);
    275 }
    276 
    277 static void
    278 makeFullFilenameAndDirs(const char *path, const char *name,
    279                         char *filename, int32_t capacity) {
    280     char *sep;
    281     UErrorCode errorCode;
    282 
    283     makeFullFilename(path, name, filename, capacity);
    284 
    285     // make tree directories
    286     errorCode=U_ZERO_ERROR;
    287     sep=strchr(filename, 0)-strlen(name);
    288     while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
    289         if(sep!=filename) {
    290             *sep=0;                 // truncate temporarily
    291             uprv_mkdir(filename, &errorCode);
    292             if(U_FAILURE(errorCode)) {
    293                 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
    294                 exit(U_FILE_ACCESS_ERROR);
    295             }
    296         }
    297         *sep++=U_FILE_SEP_CHAR; // restore file separator character
    298     }
    299 }
    300 
    301 static uint8_t *
    302 readFile(const char *path, const char *name, int32_t &length, char &type) {
    303     char filename[1024];
    304     FILE *file;
    305     uint8_t *data;
    306     UErrorCode errorCode;
    307     int32_t fileLength, typeEnum;
    308 
    309     makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
    310 
    311     /* open the input file, get its length, allocate memory for it, read the file */
    312     file=fopen(filename, "rb");
    313     if(file==NULL) {
    314         fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
    315         exit(U_FILE_ACCESS_ERROR);
    316     }
    317 
    318     /* get the file length */
    319     fileLength=getFileLength(file);
    320     if(ferror(file) || fileLength<=0) {
    321         fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
    322         fclose(file);
    323         exit(U_FILE_ACCESS_ERROR);
    324     }
    325 
    326     /* allocate the buffer, pad to multiple of 16 */
    327     length=(fileLength+0xf)&~0xf;
    328     data=(uint8_t *)malloc(length);
    329     if(data==NULL) {
    330         fclose(file);
    331         exit(U_MEMORY_ALLOCATION_ERROR);
    332     }
    333 
    334     /* read the file */
    335     if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
    336         fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
    337         fclose(file);
    338         free(data);
    339         exit(U_FILE_ACCESS_ERROR);
    340     }
    341 
    342     /* pad the file to a multiple of 16 using the usual padding byte */
    343     if(fileLength<length) {
    344         memset(data+fileLength, 0xaa, length-fileLength);
    345     }
    346 
    347     fclose(file);
    348 
    349     // minimum check for ICU-format data
    350     errorCode=U_ZERO_ERROR;
    351     typeEnum=getTypeEnumForInputData(data, length, &errorCode);
    352     if(typeEnum<0 || U_FAILURE(errorCode)) {
    353         fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
    354         free(data);
    355 #if !UCONFIG_NO_LEGACY_CONVERSION
    356         exit(U_INVALID_FORMAT_ERROR);
    357 #else
    358         fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
    359         exit(0);
    360 #endif
    361     }
    362     type=makeTypeLetter(typeEnum);
    363 
    364     return data;
    365 }
    366 
    367 // .dat package file representation ---------------------------------------- ***
    368 
    369 U_CDECL_BEGIN
    370 
    371 static int32_t U_CALLCONV
    372 compareItems(const void * /*context*/, const void *left, const void *right) {
    373     U_NAMESPACE_USE
    374 
    375     return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
    376 }
    377 
    378 U_CDECL_END
    379 
    380 U_NAMESPACE_BEGIN
    381 
    382 Package::Package() {
    383     inPkgName[0]=0;
    384     inData=NULL;
    385     inLength=0;
    386     inCharset=U_CHARSET_FAMILY;
    387     inIsBigEndian=U_IS_BIG_ENDIAN;
    388 
    389     itemCount=0;
    390     inStringTop=outStringTop=0;
    391 
    392     matchMode=0;
    393     findPrefix=findSuffix=NULL;
    394     findPrefixLength=findSuffixLength=0;
    395     findNextIndex=-1;
    396 
    397     // create a header for an empty package
    398     DataHeader *pHeader;
    399     pHeader=(DataHeader *)header;
    400     pHeader->dataHeader.magic1=0xda;
    401     pHeader->dataHeader.magic2=0x27;
    402     memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
    403     headerLength=(int32_t)(4+sizeof(dataInfo));
    404     if(headerLength&0xf) {
    405         /* NUL-pad the header to a multiple of 16 */
    406         int32_t length=(headerLength+0xf)&~0xf;
    407         memset(header+headerLength, 0, length-headerLength);
    408         headerLength=length;
    409     }
    410     pHeader->dataHeader.headerSize=(uint16_t)headerLength;
    411 }
    412 
    413 Package::~Package() {
    414     int32_t idx;
    415 
    416     free(inData);
    417 
    418     for(idx=0; idx<itemCount; ++idx) {
    419         if(items[idx].isDataOwned) {
    420             free(items[idx].data);
    421         }
    422     }
    423 }
    424 
    425 void
    426 Package::readPackage(const char *filename) {
    427     UDataSwapper *ds;
    428     const UDataInfo *pInfo;
    429     UErrorCode errorCode;
    430 
    431     const uint8_t *inBytes;
    432 
    433     int32_t length, offset, i;
    434     int32_t itemLength, typeEnum;
    435     char type;
    436 
    437     const UDataOffsetTOCEntry *inEntries;
    438 
    439     extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
    440 
    441     /* read the file */
    442     inData=readFile(NULL, filename, inLength, type);
    443     length=inLength;
    444 
    445     /*
    446      * swap the header - even if the swapping itself is a no-op
    447      * because it tells us the header length
    448      */
    449     errorCode=U_ZERO_ERROR;
    450     makeTypeProps(type, inCharset, inIsBigEndian);
    451     ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
    452     if(U_FAILURE(errorCode)) {
    453         fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
    454                 filename, u_errorName(errorCode));
    455         exit(errorCode);
    456     }
    457 
    458     ds->printError=printPackageError;
    459     ds->printErrorContext=stderr;
    460 
    461     headerLength=sizeof(header);
    462     if(length<headerLength) {
    463         headerLength=length;
    464     }
    465     headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
    466     if(U_FAILURE(errorCode)) {
    467         exit(errorCode);
    468     }
    469 
    470     /* check data format and format version */
    471     pInfo=(const UDataInfo *)((const char *)inData+4);
    472     if(!(
    473         pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
    474         pInfo->dataFormat[1]==0x6d &&
    475         pInfo->dataFormat[2]==0x6e &&
    476         pInfo->dataFormat[3]==0x44 &&
    477         pInfo->formatVersion[0]==1
    478     )) {
    479         fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
    480                 pInfo->dataFormat[0], pInfo->dataFormat[1],
    481                 pInfo->dataFormat[2], pInfo->dataFormat[3],
    482                 pInfo->formatVersion[0]);
    483         exit(U_UNSUPPORTED_ERROR);
    484     }
    485     inIsBigEndian=(UBool)pInfo->isBigEndian;
    486     inCharset=pInfo->charsetFamily;
    487 
    488     inBytes=(const uint8_t *)inData+headerLength;
    489     inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
    490 
    491     /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
    492     length-=headerLength;
    493     if(length<4) {
    494         /* itemCount does not fit */
    495         offset=0x7fffffff;
    496     } else {
    497         itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
    498         if(itemCount==0) {
    499             offset=4;
    500         } else if(length<(4+8*itemCount)) {
    501             /* ToC table does not fit */
    502             offset=0x7fffffff;
    503         } else {
    504             /* offset of the last item plus at least 20 bytes for its header */
    505             offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
    506         }
    507     }
    508     if(length<offset) {
    509         fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
    510                         (long)length);
    511         exit(U_INDEX_OUTOFBOUNDS_ERROR);
    512     }
    513     /* do not modify the package length variable until the last item's length is set */
    514 
    515     if(itemCount>0) {
    516         char prefix[MAX_PKG_NAME_LENGTH+4];
    517         char *s, *inItemStrings;
    518         int32_t inPkgNameLength, prefixLength, stringsOffset;
    519 
    520         if(itemCount>MAX_FILE_COUNT) {
    521             fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
    522             exit(U_BUFFER_OVERFLOW_ERROR);
    523         }
    524 
    525         /* swap the item name strings */
    526         stringsOffset=4+8*itemCount;
    527         itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
    528 
    529         // don't include padding bytes at the end of the item names
    530         while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
    531             --itemLength;
    532         }
    533 
    534         if((inStringTop+itemLength)>STRING_STORE_SIZE) {
    535             fprintf(stderr, "icupkg: total length of item name strings too long\n");
    536             exit(U_BUFFER_OVERFLOW_ERROR);
    537         }
    538 
    539         inItemStrings=inStrings+inStringTop;
    540         ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
    541         if(U_FAILURE(errorCode)) {
    542             fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
    543             exit(U_INVALID_FORMAT_ERROR);
    544         }
    545         inStringTop+=itemLength;
    546 
    547         // reset the Item entries
    548         memset(items, 0, itemCount*sizeof(Item));
    549 
    550         inPkgNameLength=strlen(inPkgName);
    551         memcpy(prefix, inPkgName, inPkgNameLength);
    552         prefixLength=inPkgNameLength;
    553 
    554         /*
    555          * Get the common prefix of the items.
    556          * New-style ICU .dat packages use tree separators ('/') between package names,
    557          * tree names, and item names,
    558          * while old-style ICU .dat packages (before multi-tree support)
    559          * use an underscore ('_') between package and item names.
    560          */
    561         offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
    562         s=inItemStrings+offset;
    563         if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
    564             0==memcmp(s, inPkgName, inPkgNameLength) &&
    565             s[inPkgNameLength]=='_'
    566         ) {
    567             // old-style .dat package
    568             prefix[prefixLength++]='_';
    569         } else {
    570             // new-style .dat package
    571             prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
    572             // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
    573             // then the test in the loop below will fail
    574         }
    575         prefix[prefixLength]=0;
    576 
    577         /* read the ToC table */
    578         for(i=0; i<itemCount; ++i) {
    579             // skip the package part of the item name, error if it does not match the actual package name
    580             // or if nothing follows the package name
    581             offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
    582             s=inItemStrings+offset;
    583             if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
    584                 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
    585                         s, prefix);
    586                 exit(U_UNSUPPORTED_ERROR);
    587             }
    588             items[i].name=s+prefixLength;
    589 
    590             // set the item's data
    591             items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
    592             if(i>0) {
    593                 items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
    594 
    595                 // set the previous item's platform type
    596                 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
    597                 if(typeEnum<0 || U_FAILURE(errorCode)) {
    598                     fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
    599                     exit(U_INVALID_FORMAT_ERROR);
    600                 }
    601                 items[i-1].type=makeTypeLetter(typeEnum);
    602             }
    603             items[i].isDataOwned=FALSE;
    604         }
    605         // set the last item's length
    606         items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
    607 
    608         // set the last item's platform type
    609         typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
    610         if(typeEnum<0 || U_FAILURE(errorCode)) {
    611             fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
    612             exit(U_INVALID_FORMAT_ERROR);
    613         }
    614         items[itemCount-1].type=makeTypeLetter(typeEnum);
    615 
    616         if(type!=U_ICUDATA_TYPE_LETTER[0]) {
    617             // sort the item names for the local charset
    618             sortItems();
    619         }
    620     }
    621 
    622     udata_closeSwapper(ds);
    623 }
    624 
    625 char
    626 Package::getInType() {
    627     return makeTypeLetter(inCharset, inIsBigEndian);
    628 }
    629 
    630 void
    631 Package::writePackage(const char *filename, char outType, const char *comment) {
    632     char prefix[MAX_PKG_NAME_LENGTH+4];
    633     UDataOffsetTOCEntry entry;
    634     UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
    635     FILE *file;
    636     Item *pItem;
    637     char *name;
    638     UErrorCode errorCode;
    639     int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
    640     uint8_t outCharset;
    641     UBool outIsBigEndian;
    642 
    643     extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
    644 
    645     // if there is an explicit comment, then use it, else use what's in the current header
    646     if(comment!=NULL) {
    647         /* get the header size minus the current comment */
    648         DataHeader *pHeader;
    649         int32_t length;
    650 
    651         pHeader=(DataHeader *)header;
    652         headerLength=4+pHeader->info.size;
    653         length=(int32_t)strlen(comment);
    654         if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
    655             fprintf(stderr, "icupkg: comment too long\n");
    656             exit(U_BUFFER_OVERFLOW_ERROR);
    657         }
    658         memcpy(header+headerLength, comment, length+1);
    659         headerLength+=length;
    660         if(headerLength&0xf) {
    661             /* NUL-pad the header to a multiple of 16 */
    662             length=(headerLength+0xf)&~0xf;
    663             memset(header+headerLength, 0, length-headerLength);
    664             headerLength=length;
    665         }
    666         pHeader->dataHeader.headerSize=(uint16_t)headerLength;
    667     }
    668 
    669     makeTypeProps(outType, outCharset, outIsBigEndian);
    670 
    671     // open (TYPE_COUNT-2) swappers
    672     // one is a no-op for local type==outType
    673     // one type (TYPE_LE) is bogus
    674     errorCode=U_ZERO_ERROR;
    675     i=makeTypeEnum(outType);
    676     ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    677     ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
    678     ds[TYPE_LE]=NULL;
    679     ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
    680     if(U_FAILURE(errorCode)) {
    681         fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
    682         exit(errorCode);
    683     }
    684     for(i=0; i<TYPE_COUNT; ++i) {
    685         if(ds[i]!=NULL) {
    686             ds[i]->printError=printPackageError;
    687             ds[i]->printErrorContext=stderr;
    688         }
    689     }
    690 
    691     dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
    692 
    693     // create the file and write its contents
    694     file=fopen(filename, "wb");
    695     if(file==NULL) {
    696         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
    697         exit(U_FILE_ACCESS_ERROR);
    698     }
    699 
    700     // swap and write the header
    701     if(dsLocalToOut!=NULL) {
    702         udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
    703         if(U_FAILURE(errorCode)) {
    704             fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
    705             exit(errorCode);
    706         }
    707     }
    708     length=(int32_t)fwrite(header, 1, headerLength, file);
    709     if(length!=headerLength) {
    710         fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
    711         exit(U_FILE_ACCESS_ERROR);
    712     }
    713 
    714     // prepare and swap the package name with a tree separator
    715     // for prepending to item names
    716     strcat(prefix, U_TREE_ENTRY_SEP_STRING);
    717     prefixLength=(int32_t)strlen(prefix);
    718     if(dsLocalToOut!=NULL) {
    719         dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
    720         if(U_FAILURE(errorCode)) {
    721             fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
    722             exit(errorCode);
    723         }
    724 
    725         // swap and sort the item names (sorting needs to be done in the output charset)
    726         dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
    727         if(U_FAILURE(errorCode)) {
    728             fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
    729             exit(errorCode);
    730         }
    731         sortItems();
    732     }
    733 
    734     // create the output item names in sorted order, with the package name prepended to each
    735     for(i=0; i<itemCount; ++i) {
    736         length=(int32_t)strlen(items[i].name);
    737         name=allocString(FALSE, length+prefixLength);
    738         memcpy(name, prefix, prefixLength);
    739         memcpy(name+prefixLength, items[i].name, length+1);
    740         items[i].name=name;
    741     }
    742 
    743     // calculate offsets for item names and items, pad to 16-align items
    744     // align only the first item; each item's length is a multiple of 16
    745     basenameOffset=4+8*itemCount;
    746     offset=basenameOffset+outStringTop;
    747     if((length=(offset&15))!=0) {
    748         length=16-length;
    749         memset(allocString(FALSE, length-1), 0xaa, length);
    750         offset+=length;
    751     }
    752 
    753     // write the table of contents
    754     // first the itemCount
    755     outInt32=itemCount;
    756     if(dsLocalToOut!=NULL) {
    757         dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
    758         if(U_FAILURE(errorCode)) {
    759             fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
    760             exit(errorCode);
    761         }
    762     }
    763     length=(int32_t)fwrite(&outInt32, 1, 4, file);
    764     if(length!=4) {
    765         fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
    766         exit(U_FILE_ACCESS_ERROR);
    767     }
    768 
    769     // then write the item entries (and collect the maxItemLength)
    770     maxItemLength=0;
    771     for(i=0; i<itemCount; ++i) {
    772         entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
    773         entry.dataOffset=(uint32_t)offset;
    774         if(dsLocalToOut!=NULL) {
    775             dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
    776             if(U_FAILURE(errorCode)) {
    777                 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
    778                 exit(errorCode);
    779             }
    780         }
    781         length=(int32_t)fwrite(&entry, 1, 8, file);
    782         if(length!=8) {
    783             fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
    784             exit(U_FILE_ACCESS_ERROR);
    785         }
    786 
    787         length=items[i].length;
    788         if(length>maxItemLength) {
    789             maxItemLength=length;
    790         }
    791         offset+=length;
    792     }
    793 
    794     // write the item names
    795     length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
    796     if(length!=outStringTop) {
    797         fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
    798         exit(U_FILE_ACCESS_ERROR);
    799     }
    800 
    801     // write the items
    802     for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
    803         int32_t type=makeTypeEnum(pItem->type);
    804         if(ds[type]!=NULL) {
    805             // swap each item from its platform properties to the desired ones
    806             udata_swap(
    807                 ds[type],
    808                 pItem->data, pItem->length, pItem->data,
    809                 &errorCode);
    810             if(U_FAILURE(errorCode)) {
    811                 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
    812                 exit(errorCode);
    813             }
    814         }
    815         length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
    816         if(length!=pItem->length) {
    817             fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
    818             exit(U_FILE_ACCESS_ERROR);
    819         }
    820     }
    821 
    822     if(ferror(file)) {
    823         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
    824         exit(U_FILE_ACCESS_ERROR);
    825     }
    826 
    827     fclose(file);
    828     for(i=0; i<TYPE_COUNT; ++i) {
    829         udata_closeSwapper(ds[i]);
    830     }
    831 }
    832 
    833 int32_t
    834 Package::findItem(const char *name, int32_t length) const {
    835     int32_t i, start, limit;
    836     int result;
    837 
    838     /* do a binary search for the string */
    839     start=0;
    840     limit=itemCount;
    841     while(start<limit) {
    842         i=(start+limit)/2;
    843         if(length>=0) {
    844             result=strncmp(name, items[i].name, length);
    845         } else {
    846             result=strcmp(name, items[i].name);
    847         }
    848 
    849         if(result==0) {
    850             /* found */
    851             if(length>=0) {
    852                 /*
    853                  * if we compared just prefixes, then we may need to back up
    854                  * to the first item with this prefix
    855                  */
    856                 while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
    857                     --i;
    858                 }
    859             }
    860             return i;
    861         } else if(result<0) {
    862             limit=i;
    863         } else /* result>0 */ {
    864             start=i+1;
    865         }
    866     }
    867 
    868     return ~start; /* not found, return binary-not of the insertion point */
    869 }
    870 
    871 void
    872 Package::findItems(const char *pattern) {
    873     const char *wild;
    874 
    875     if(pattern==NULL || *pattern==0) {
    876         findNextIndex=-1;
    877         return;
    878     }
    879 
    880     findPrefix=pattern;
    881     findSuffix=NULL;
    882     findSuffixLength=0;
    883 
    884     wild=strchr(pattern, '*');
    885     if(wild==NULL) {
    886         // no wildcard
    887         findPrefixLength=(int32_t)strlen(pattern);
    888     } else {
    889         // one wildcard
    890         findPrefixLength=(int32_t)(wild-pattern);
    891         findSuffix=wild+1;
    892         findSuffixLength=(int32_t)strlen(findSuffix);
    893         if(NULL!=strchr(findSuffix, '*')) {
    894             // two or more wildcards
    895             fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
    896             exit(U_PARSE_ERROR);
    897         }
    898     }
    899 
    900     if(findPrefixLength==0) {
    901         findNextIndex=0;
    902     } else {
    903         findNextIndex=findItem(findPrefix, findPrefixLength);
    904     }
    905 }
    906 
    907 int32_t
    908 Package::findNextItem() {
    909     const char *name, *middle, *treeSep;
    910     int32_t idx, nameLength, middleLength;
    911 
    912     if(findNextIndex<0) {
    913         return -1;
    914     }
    915 
    916     while(findNextIndex<itemCount) {
    917         idx=findNextIndex++;
    918         name=items[idx].name;
    919         nameLength=(int32_t)strlen(name);
    920         if(nameLength<(findPrefixLength+findSuffixLength)) {
    921             // item name too short for prefix & suffix
    922             continue;
    923         }
    924         if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
    925             // left the range of names with this prefix
    926             break;
    927         }
    928         middle=name+findPrefixLength;
    929         middleLength=nameLength-findPrefixLength-findSuffixLength;
    930         if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
    931             // suffix does not match
    932             continue;
    933         }
    934         // prefix & suffix match
    935 
    936         if(matchMode&MATCH_NOSLASH) {
    937             treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
    938             if(treeSep!=NULL && (treeSep-middle)<middleLength) {
    939                 // the middle (matching the * wildcard) contains a tree separator /
    940                 continue;
    941             }
    942         }
    943 
    944         // found a matching item
    945         return idx;
    946     }
    947 
    948     // no more items
    949     findNextIndex=-1;
    950     return -1;
    951 }
    952 
    953 void
    954 Package::setMatchMode(uint32_t mode) {
    955     matchMode=mode;
    956 }
    957 
    958 void
    959 Package::addItem(const char *name) {
    960     addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
    961 }
    962 
    963 void
    964 Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
    965     int32_t idx;
    966 
    967     idx=findItem(name);
    968     if(idx<0) {
    969         // new item, make space at the insertion point
    970         if(itemCount>=MAX_FILE_COUNT) {
    971             fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
    972             exit(U_BUFFER_OVERFLOW_ERROR);
    973         }
    974         // move the following items down
    975         idx=~idx;
    976         if(idx<itemCount) {
    977             memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
    978         }
    979         ++itemCount;
    980 
    981         // reset this Item entry
    982         memset(items+idx, 0, sizeof(Item));
    983 
    984         // copy the item's name
    985         items[idx].name=allocString(TRUE, strlen(name));
    986         strcpy(items[idx].name, name);
    987         pathToTree(items[idx].name);
    988     } else {
    989         // same-name item found, replace it
    990         if(items[idx].isDataOwned) {
    991             free(items[idx].data);
    992         }
    993 
    994         // keep the item's name since it is the same
    995     }
    996 
    997     // set the item's data
    998     items[idx].data=data;
    999     items[idx].length=length;
   1000     items[idx].isDataOwned=isDataOwned;
   1001     items[idx].type=type;
   1002 }
   1003 
   1004 void
   1005 Package::addFile(const char *filesPath, const char *name) {
   1006     uint8_t *data;
   1007     int32_t length;
   1008     char type;
   1009 
   1010     data=readFile(filesPath, name, length, type);
   1011     // readFile() exits the tool if it fails
   1012     addItem(name, data, length, TRUE, type);
   1013 }
   1014 
   1015 void
   1016 Package::addItems(const Package &listPkg) {
   1017     const Item *pItem;
   1018     int32_t i;
   1019 
   1020     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
   1021         addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
   1022     }
   1023 }
   1024 
   1025 void
   1026 Package::removeItem(int32_t idx) {
   1027     if(idx>=0) {
   1028         // remove the item
   1029         if(items[idx].isDataOwned) {
   1030             free(items[idx].data);
   1031         }
   1032 
   1033         // move the following items up
   1034         if((idx+1)<itemCount) {
   1035             memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
   1036         }
   1037         --itemCount;
   1038 
   1039         if(idx<=findNextIndex) {
   1040             --findNextIndex;
   1041         }
   1042     }
   1043 }
   1044 
   1045 void
   1046 Package::removeItems(const char *pattern) {
   1047     int32_t idx;
   1048 
   1049     findItems(pattern);
   1050     while((idx=findNextItem())>=0) {
   1051         removeItem(idx);
   1052     }
   1053 }
   1054 
   1055 void
   1056 Package::removeItems(const Package &listPkg) {
   1057     const Item *pItem;
   1058     int32_t i;
   1059 
   1060     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
   1061         removeItems(pItem->name);
   1062     }
   1063 }
   1064 
   1065 void
   1066 Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
   1067     char filename[1024];
   1068     UDataSwapper *ds;
   1069     FILE *file;
   1070     Item *pItem;
   1071     int32_t fileLength;
   1072     uint8_t itemCharset, outCharset;
   1073     UBool itemIsBigEndian, outIsBigEndian;
   1074 
   1075     if(idx<0 || itemCount<=idx) {
   1076         return;
   1077     }
   1078     pItem=items+idx;
   1079 
   1080     // swap the data to the outType
   1081     // outType==0: don't swap
   1082     if(outType!=0 && pItem->type!=outType) {
   1083         // open the swapper
   1084         UErrorCode errorCode=U_ZERO_ERROR;
   1085         makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
   1086         makeTypeProps(outType, outCharset, outIsBigEndian);
   1087         ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
   1088         if(U_FAILURE(errorCode)) {
   1089             fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
   1090                     (long)idx, u_errorName(errorCode));
   1091             exit(errorCode);
   1092         }
   1093 
   1094         ds->printError=printPackageError;
   1095         ds->printErrorContext=stderr;
   1096 
   1097         // swap the item from its platform properties to the desired ones
   1098         udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
   1099         if(U_FAILURE(errorCode)) {
   1100             fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
   1101             exit(errorCode);
   1102         }
   1103         udata_closeSwapper(ds);
   1104         pItem->type=outType;
   1105     }
   1106 
   1107     // create the file and write its contents
   1108     makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
   1109     file=fopen(filename, "wb");
   1110     if(file==NULL) {
   1111         fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
   1112         exit(U_FILE_ACCESS_ERROR);
   1113     }
   1114     fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
   1115 
   1116     if(ferror(file) || fileLength!=pItem->length) {
   1117         fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
   1118         exit(U_FILE_ACCESS_ERROR);
   1119     }
   1120     fclose(file);
   1121 }
   1122 
   1123 void
   1124 Package::extractItem(const char *filesPath, int32_t idx, char outType) {
   1125     extractItem(filesPath, items[idx].name, idx, outType);
   1126 }
   1127 
   1128 void
   1129 Package::extractItems(const char *filesPath, const char *pattern, char outType) {
   1130     int32_t idx;
   1131 
   1132     findItems(pattern);
   1133     while((idx=findNextItem())>=0) {
   1134         extractItem(filesPath, idx, outType);
   1135     }
   1136 }
   1137 
   1138 void
   1139 Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
   1140     const Item *pItem;
   1141     int32_t i;
   1142 
   1143     for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
   1144         extractItems(filesPath, pItem->name, outType);
   1145     }
   1146 }
   1147 
   1148 int32_t
   1149 Package::getItemCount() const {
   1150     return itemCount;
   1151 }
   1152 
   1153 const Item *
   1154 Package::getItem(int32_t idx) const {
   1155     if (0 <= idx && idx < itemCount) {
   1156         return &items[idx];
   1157     }
   1158     return NULL;
   1159 }
   1160 
   1161 void
   1162 Package::checkDependency(void *context, const char *itemName, const char *targetName) {
   1163     // check dependency: make sure the target item is in the package
   1164     Package *me=(Package *)context;
   1165     if(me->findItem(targetName)<0) {
   1166         me->isMissingItems=TRUE;
   1167         fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
   1168     }
   1169 }
   1170 
   1171 UBool
   1172 Package::checkDependencies() {
   1173     isMissingItems=FALSE;
   1174     enumDependencies(this, checkDependency);
   1175     return (UBool)!isMissingItems;
   1176 }
   1177 
   1178 void
   1179 Package::enumDependencies(void *context, CheckDependency check) {
   1180     int32_t i;
   1181 
   1182     for(i=0; i<itemCount; ++i) {
   1183         enumDependencies(items+i, context, check);
   1184     }
   1185 }
   1186 
   1187 char *
   1188 Package::allocString(UBool in, int32_t length) {
   1189     char *p;
   1190     int32_t top;
   1191 
   1192     if(in) {
   1193         top=inStringTop;
   1194         p=inStrings+top;
   1195     } else {
   1196         top=outStringTop;
   1197         p=outStrings+top;
   1198     }
   1199     top+=length+1;
   1200 
   1201     if(top>STRING_STORE_SIZE) {
   1202         fprintf(stderr, "icupkg: string storage overflow\n");
   1203         exit(U_BUFFER_OVERFLOW_ERROR);
   1204     }
   1205     if(in) {
   1206         inStringTop=top;
   1207     } else {
   1208         outStringTop=top;
   1209     }
   1210     return p;
   1211 }
   1212 
   1213 void
   1214 Package::sortItems() {
   1215     UErrorCode errorCode=U_ZERO_ERROR;
   1216     uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
   1217     if(U_FAILURE(errorCode)) {
   1218         fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
   1219         exit(errorCode);
   1220     }
   1221 }
   1222 
   1223 U_NAMESPACE_END
   1224