Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2008, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include "unicode/utypes.h"
     11 #include "unicode/putil.h"
     12 #include "cmemory.h"
     13 #include "cstring.h"
     14 #include "filestrm.h"
     15 #include "toolutil.h"
     16 #include "unicode/uclean.h"
     17 #include "unewdata.h"
     18 #include "putilimp.h"
     19 #include "pkg_gencmn.h"
     20 
     21 #define STRING_STORE_SIZE 100000
     22 #define MAX_FILE_COUNT 2000
     23 
     24 #define COMMON_DATA_NAME U_ICUDATA_NAME
     25 #define DATA_TYPE "dat"
     26 
     27 /* ICU package data file format (.dat files) ------------------------------- ***
     28 
     29 Description of the data format after the usual ICU data file header
     30 (UDataInfo etc.).
     31 
     32 Format version 1
     33 
     34 A .dat package file contains a simple Table of Contents of item names,
     35 followed by the items themselves:
     36 
     37 1. ToC table
     38 
     39 uint32_t count; - number of items
     40 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
     41     uint32_t nameOffset; - offset of the item name
     42     uint32_t dataOffset; - offset of the item data
     43 both are byte offsets from the beginning of the data
     44 
     45 2. item name strings
     46 
     47 All item names are stored as char * strings in one block between the ToC table
     48 and the data items.
     49 
     50 3. data items
     51 
     52 The data items are stored following the item names block.
     53 Each data item is 16-aligned.
     54 The data items are stored in the sorted order of their names.
     55 
     56 Therefore, the top of the name strings block is the offset of the first item,
     57 the length of the last item is the difference between its offset and
     58 the .dat file length, and the length of all previous items is the difference
     59 between its offset and the next one.
     60 
     61 ----------------------------------------------------------------------------- */
     62 
     63 /* UDataInfo cf. udata.h */
     64 static const UDataInfo dataInfo={
     65     sizeof(UDataInfo),
     66     0,
     67 
     68     U_IS_BIG_ENDIAN,
     69     U_CHARSET_FAMILY,
     70     sizeof(UChar),
     71     0,
     72 
     73     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
     74     {1, 0, 0, 0},                 /* formatVersion */
     75     {3, 0, 0, 0}                  /* dataVersion */
     76 };
     77 
     78 static uint32_t maxSize;
     79 
     80 static char stringStore[STRING_STORE_SIZE];
     81 static uint32_t stringTop=0, basenameTotal=0;
     82 
     83 typedef struct {
     84     char *pathname, *basename;
     85     uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
     86 } File;
     87 
     88 static File files[MAX_FILE_COUNT];
     89 static uint32_t fileCount=0;
     90 
     91 static char *symPrefix = NULL;
     92 
     93 /* prototypes --------------------------------------------------------------- */
     94 
     95 static void
     96 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
     97 
     98 static char *
     99 allocString(uint32_t length);
    100 
    101 static int
    102 compareFiles(const void *file1, const void *file2);
    103 
    104 static char *
    105 pathToFullPath(const char *path, const char *source);
    106 
    107 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
    108 static void
    109 fixDirToTreePath(char *s);
    110 /* -------------------------------------------------------------------------- */
    111 
    112 U_CAPI void U_EXPORT2
    113 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
    114                      const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
    115     static char buffer[4096];
    116     char line[512];
    117     char *s;
    118     UErrorCode errorCode=U_ZERO_ERROR;
    119     uint32_t i, fileOffset, basenameOffset, length, nread;
    120     FileStream *in, *file;
    121 
    122     maxSize = max_size;
    123 
    124     if (destDir == NULL) {
    125         destDir = u_getDataDirectory();
    126     }
    127     if (name == NULL) {
    128         name = COMMON_DATA_NAME;
    129     }
    130     if (type == NULL) {
    131         type = DATA_TYPE;
    132     }
    133     if (source == NULL) {
    134         source = ".";
    135     }
    136 
    137     if (dataFile == NULL) {
    138         in = T_FileStream_stdin();
    139     } else {
    140         in = T_FileStream_open(dataFile, "r");
    141         if(in == NULL) {
    142             fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
    143             exit(U_FILE_ACCESS_ERROR);
    144         }
    145     }
    146 
    147     if (verbose) {
    148         if(sourceTOC) {
    149             printf("generating %s_%s.c (table of contents source file)\n", name, type);
    150         } else {
    151             printf("generating %s.%s (common data file with table of contents)\n", name, type);
    152         }
    153     }
    154 
    155     /* read the list of files and get their lengths */
    156     while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
    157         /* remove trailing newline characters */
    158         s=line;
    159         while(*s!=0) {
    160             if(*s=='\r' || *s=='\n') {
    161                 *s=0;
    162                 break;
    163             }
    164             ++s;
    165         }
    166 
    167         /* check for comment */
    168 
    169         if (*line == '#') {
    170             continue;
    171         }
    172 
    173         /* add the file */
    174 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
    175         {
    176           char *t;
    177           while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
    178             *t = U_FILE_SEP_CHAR;
    179           }
    180         }
    181 #endif
    182         addFile(getLongPathname(line), name, source, sourceTOC, verbose);
    183     }
    184 
    185     if(in!=T_FileStream_stdin()) {
    186         T_FileStream_close(in);
    187     }
    188 
    189     if(fileCount==0) {
    190         fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
    191         return;
    192     }
    193 
    194     /* sort the files by basename */
    195     qsort(files, fileCount, sizeof(File), compareFiles);
    196 
    197     if(!sourceTOC) {
    198         UNewDataMemory *out;
    199 
    200         /* determine the offsets of all basenames and files in this common one */
    201         basenameOffset=4+8*fileCount;
    202         fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
    203         for(i=0; i<fileCount; ++i) {
    204             files[i].fileOffset=fileOffset;
    205             fileOffset+=(files[i].fileSize+15)&~0xf;
    206             files[i].basenameOffset=basenameOffset;
    207             basenameOffset+=files[i].basenameLength;
    208         }
    209 
    210         /* create the output file */
    211         out=udata_create(destDir, type, name,
    212                          &dataInfo,
    213                          copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
    214                          &errorCode);
    215         if(U_FAILURE(errorCode)) {
    216             fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
    217                 destDir, name, type,
    218                 u_errorName(errorCode));
    219             exit(errorCode);
    220         }
    221 
    222         /* write the table of contents */
    223         udata_write32(out, fileCount);
    224         for(i=0; i<fileCount; ++i) {
    225             udata_write32(out, files[i].basenameOffset);
    226             udata_write32(out, files[i].fileOffset);
    227         }
    228 
    229         /* write the basenames */
    230         for(i=0; i<fileCount; ++i) {
    231             udata_writeString(out, files[i].basename, files[i].basenameLength);
    232         }
    233         length=4+8*fileCount+basenameTotal;
    234 
    235         /* copy the files */
    236         for(i=0; i<fileCount; ++i) {
    237             /* pad to 16-align the next file */
    238             length&=0xf;
    239             if(length!=0) {
    240                 udata_writePadding(out, 16-length);
    241             }
    242 
    243             if (verbose) {
    244                 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
    245             }
    246 
    247             /* copy the next file */
    248             file=T_FileStream_open(files[i].pathname, "rb");
    249             if(file==NULL) {
    250                 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
    251                 exit(U_FILE_ACCESS_ERROR);
    252             }
    253             for(nread = 0;;) {
    254                 length=T_FileStream_read(file, buffer, sizeof(buffer));
    255                 if(length <= 0) {
    256                     break;
    257                 }
    258                 nread += length;
    259                 udata_writeBlock(out, buffer, length);
    260             }
    261             T_FileStream_close(file);
    262             length=files[i].fileSize;
    263 
    264             if (nread != files[i].fileSize) {
    265               fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
    266                 exit(U_FILE_ACCESS_ERROR);
    267             }
    268         }
    269 
    270         /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
    271         length&=0xf;
    272         if(length!=0) {
    273             udata_writePadding(out, 16-length);
    274         }
    275 
    276         /* finish */
    277         udata_finish(out, &errorCode);
    278         if(U_FAILURE(errorCode)) {
    279             fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
    280             exit(errorCode);
    281         }
    282     } else {
    283         /* write a .c source file with the table of contents */
    284         char *filename;
    285         FileStream *out;
    286 
    287         /* create the output filename */
    288         filename=s=buffer;
    289         uprv_strcpy(filename, destDir);
    290         s=filename+uprv_strlen(filename);
    291         if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
    292             *s++=U_FILE_SEP_CHAR;
    293         }
    294         uprv_strcpy(s, name);
    295         if(*(type)!=0) {
    296             s+=uprv_strlen(s);
    297             *s++='_';
    298             uprv_strcpy(s, type);
    299         }
    300         s+=uprv_strlen(s);
    301         uprv_strcpy(s, ".c");
    302 
    303         /* open the output file */
    304         out=T_FileStream_open(filename, "w");
    305         if (gencmnFileName != NULL) {
    306             uprv_strcpy(gencmnFileName, filename);
    307         }
    308         if(out==NULL) {
    309             fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
    310             exit(U_FILE_ACCESS_ERROR);
    311         }
    312 
    313         /* write the source file */
    314         sprintf(buffer,
    315             "/*\n"
    316             " * ICU common data table of contents for %s.%s ,\n"
    317             " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
    318             " */\n\n"
    319             "#include \"unicode/utypes.h\"\n"
    320             "#include \"unicode/udata.h\"\n"
    321             "\n"
    322             "/* external symbol declarations for data */\n",
    323             name, type);
    324         T_FileStream_writeLine(out, buffer);
    325 
    326         sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
    327         T_FileStream_writeLine(out, buffer);
    328         for(i=1; i<fileCount; ++i) {
    329             sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
    330             T_FileStream_writeLine(out, buffer);
    331         }
    332         T_FileStream_writeLine(out, ";\n\n");
    333 
    334         sprintf(
    335             buffer,
    336             "U_EXPORT struct {\n"
    337             "    uint16_t headerSize;\n"
    338             "    uint8_t magic1, magic2;\n"
    339             "    UDataInfo info;\n"
    340             "    char padding[%lu];\n"
    341             "    uint32_t count, reserved;\n"
    342             "    struct {\n"
    343             "        const char *name;\n"
    344             "        const void *data;\n"
    345             "    } toc[%lu];\n"
    346             "} U_EXPORT2 %s_dat = {\n"
    347             "    32, 0xda, 0x27, {\n"
    348             "        %lu, 0,\n"
    349             "        %u, %u, %u, 0,\n"
    350             "        {0x54, 0x6f, 0x43, 0x50},\n"
    351             "        {1, 0, 0, 0},\n"
    352             "        {0, 0, 0, 0}\n"
    353             "    },\n"
    354             "    \"\", %lu, 0, {\n",
    355             (unsigned long)32-4-sizeof(UDataInfo),
    356             (unsigned long)fileCount,
    357             entrypointName,
    358             (unsigned long)sizeof(UDataInfo),
    359             U_IS_BIG_ENDIAN,
    360             U_CHARSET_FAMILY,
    361             U_SIZEOF_UCHAR,
    362             (unsigned long)fileCount
    363         );
    364         T_FileStream_writeLine(out, buffer);
    365 
    366         sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
    367         T_FileStream_writeLine(out, buffer);
    368         for(i=1; i<fileCount; ++i) {
    369             sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
    370             T_FileStream_writeLine(out, buffer);
    371         }
    372 
    373         T_FileStream_writeLine(out, "\n    }\n};\n");
    374         T_FileStream_close(out);
    375 
    376         uprv_free(symPrefix);
    377     }
    378 }
    379 
    380 static void
    381 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
    382     char *s;
    383     uint32_t length;
    384     char *fullPath = NULL;
    385 
    386     if(fileCount==MAX_FILE_COUNT) {
    387         fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
    388         exit(U_BUFFER_OVERFLOW_ERROR);
    389     }
    390 
    391     if(!sourceTOC) {
    392         FileStream *file;
    393 
    394         if(uprv_pathIsAbsolute(filename)) {
    395             fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
    396             exit(U_ILLEGAL_ARGUMENT_ERROR);
    397         }
    398         fullPath = pathToFullPath(filename, source);
    399 
    400         /* store the pathname */
    401         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
    402         s=allocString(length);
    403         uprv_strcpy(s, name);
    404         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
    405         uprv_strcat(s, filename);
    406 
    407         /* get the basename */
    408         fixDirToTreePath(s);
    409         files[fileCount].basename=s;
    410         files[fileCount].basenameLength=length;
    411 
    412         files[fileCount].pathname=fullPath;
    413 
    414         basenameTotal+=length;
    415 
    416         /* try to open the file */
    417         file=T_FileStream_open(fullPath, "rb");
    418         if(file==NULL) {
    419             fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
    420             exit(U_FILE_ACCESS_ERROR);
    421         }
    422 
    423         /* get the file length */
    424         length=T_FileStream_size(file);
    425         if(T_FileStream_error(file) || length<=20) {
    426             fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
    427             exit(U_FILE_ACCESS_ERROR);
    428         }
    429 
    430         T_FileStream_close(file);
    431 
    432         /* do not add files that are longer than maxSize */
    433         if(maxSize && length>maxSize) {
    434             if (verbose) {
    435                 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
    436             }
    437             return;
    438         }
    439         files[fileCount].fileSize=length;
    440     } else {
    441         char *t;
    442 
    443         /* get and store the basename */
    444         /* need to include the package name */
    445         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
    446         s=allocString(length);
    447         uprv_strcpy(s, name);
    448         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
    449         uprv_strcat(s, filename);
    450         fixDirToTreePath(s);
    451         files[fileCount].basename=s;
    452 
    453 
    454         /* turn the basename into an entry point name and store in the pathname field */
    455         t=files[fileCount].pathname=allocString(length);
    456         while(--length>0) {
    457             if(*s=='.' || *s=='-' || *s=='/') {
    458                 *t='_';
    459             } else {
    460                 *t=*s;
    461             }
    462             ++s;
    463             ++t;
    464         }
    465         *t=0;
    466     }
    467     ++fileCount;
    468 }
    469 
    470 static char *
    471 allocString(uint32_t length) {
    472     uint32_t top=stringTop+length;
    473     char *p;
    474 
    475     if(top>STRING_STORE_SIZE) {
    476         fprintf(stderr, "gencmn: out of memory\n");
    477         exit(U_MEMORY_ALLOCATION_ERROR);
    478     }
    479     p=stringStore+stringTop;
    480     stringTop=top;
    481     return p;
    482 }
    483 
    484 static char *
    485 pathToFullPath(const char *path, const char *source) {
    486     int32_t length;
    487     int32_t newLength;
    488     char *fullPath;
    489     int32_t n;
    490 
    491     length = (uint32_t)(uprv_strlen(path) + 1);
    492     newLength = (length + 1 + (int32_t)uprv_strlen(source));
    493     fullPath = uprv_malloc(newLength);
    494     if(source != NULL) {
    495         uprv_strcpy(fullPath, source);
    496         uprv_strcat(fullPath, U_FILE_SEP_STRING);
    497     } else {
    498         fullPath[0] = 0;
    499     }
    500     n = (int32_t)uprv_strlen(fullPath);
    501     uprv_strcat(fullPath, path);
    502 
    503 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    504 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
    505     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
    506     for(;fullPath[n];n++) {
    507         if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
    508             fullPath[n] = U_FILE_SEP_CHAR;
    509         }
    510     }
    511 #endif
    512 #endif
    513 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    514     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
    515     for(;fullPath[n];n++) {
    516         if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
    517             fullPath[n] = U_FILE_SEP_CHAR;
    518         }
    519     }
    520 #endif
    521     return fullPath;
    522 }
    523 
    524 static int
    525 compareFiles(const void *file1, const void *file2) {
    526     /* sort by basename */
    527     return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
    528 }
    529 
    530 static void
    531 fixDirToTreePath(char *s)
    532 {
    533 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
    534     char *t;
    535 #endif
    536 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    537     for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
    538         *t = U_TREE_ENTRY_SEP_CHAR;
    539     }
    540 #endif
    541 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    542     for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
    543         *t = U_TREE_ENTRY_SEP_CHAR;
    544     }
    545 #endif
    546 }
    547