Home | History | Annotate | Download | only in toolutil
      1 /******************************************************************************
      2  *   Copyright (C) 2008-2010, International Business Machines
      3  *   Corporation and others.  All Rights Reserved.
      4  *******************************************************************************
      5  */
      6 #include "unicode/utypes.h"
      7 
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 #include "unicode/utypes.h"
     11 #include "unicode/putil.h"
     12 #include "cmemory.h"
     13 #include "cstring.h"
     14 #include "filestrm.h"
     15 #include "toolutil.h"
     16 #include "unicode/uclean.h"
     17 #include "unewdata.h"
     18 #include "putilimp.h"
     19 #include "pkg_gencmn.h"
     20 
     21 #define STRING_STORE_SIZE 100000
     22 
     23 #define COMMON_DATA_NAME U_ICUDATA_NAME
     24 #define DATA_TYPE "dat"
     25 
     26 /* ICU package data file format (.dat files) ------------------------------- ***
     27 
     28 Description of the data format after the usual ICU data file header
     29 (UDataInfo etc.).
     30 
     31 Format version 1
     32 
     33 A .dat package file contains a simple Table of Contents of item names,
     34 followed by the items themselves:
     35 
     36 1. ToC table
     37 
     38 uint32_t count; - number of items
     39 UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
     40     uint32_t nameOffset; - offset of the item name
     41     uint32_t dataOffset; - offset of the item data
     42 both are byte offsets from the beginning of the data
     43 
     44 2. item name strings
     45 
     46 All item names are stored as char * strings in one block between the ToC table
     47 and the data items.
     48 
     49 3. data items
     50 
     51 The data items are stored following the item names block.
     52 Each data item is 16-aligned.
     53 The data items are stored in the sorted order of their names.
     54 
     55 Therefore, the top of the name strings block is the offset of the first item,
     56 the length of the last item is the difference between its offset and
     57 the .dat file length, and the length of all previous items is the difference
     58 between its offset and the next one.
     59 
     60 ----------------------------------------------------------------------------- */
     61 
     62 /* UDataInfo cf. udata.h */
     63 static const UDataInfo dataInfo={
     64     sizeof(UDataInfo),
     65     0,
     66 
     67     U_IS_BIG_ENDIAN,
     68     U_CHARSET_FAMILY,
     69     sizeof(UChar),
     70     0,
     71 
     72     {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
     73     {1, 0, 0, 0},                 /* formatVersion */
     74     {3, 0, 0, 0}                  /* dataVersion */
     75 };
     76 
     77 static uint32_t maxSize;
     78 
     79 static char stringStore[STRING_STORE_SIZE];
     80 static uint32_t stringTop=0, basenameTotal=0;
     81 
     82 typedef struct {
     83     char *pathname, *basename;
     84     uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
     85 } File;
     86 
     87 #define CHUNK_FILE_COUNT 256
     88 static File *files = NULL;
     89 static uint32_t fileCount=0;
     90 static uint32_t fileMax = 0;
     91 
     92 
     93 static char *symPrefix = NULL;
     94 
     95 /* prototypes --------------------------------------------------------------- */
     96 
     97 static void
     98 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
     99 
    100 static char *
    101 allocString(uint32_t length);
    102 
    103 static int
    104 compareFiles(const void *file1, const void *file2);
    105 
    106 static char *
    107 pathToFullPath(const char *path, const char *source);
    108 
    109 /* map non-tree separator (such as '\') to tree separator ('/') inplace. */
    110 static void
    111 fixDirToTreePath(char *s);
    112 /* -------------------------------------------------------------------------- */
    113 
    114 U_CAPI void U_EXPORT2
    115 createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
    116                      const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
    117     static char buffer[4096];
    118     char line[512];
    119     char *s;
    120     UErrorCode errorCode=U_ZERO_ERROR;
    121     uint32_t i, fileOffset, basenameOffset, length, nread;
    122     FileStream *in, *file;
    123 
    124     maxSize = max_size;
    125 
    126     if (destDir == NULL) {
    127         destDir = u_getDataDirectory();
    128     }
    129     if (name == NULL) {
    130         name = COMMON_DATA_NAME;
    131     }
    132     if (type == NULL) {
    133         type = DATA_TYPE;
    134     }
    135     if (source == NULL) {
    136         source = ".";
    137     }
    138 
    139     if (dataFile == NULL) {
    140         in = T_FileStream_stdin();
    141     } else {
    142         in = T_FileStream_open(dataFile, "r");
    143         if(in == NULL) {
    144             fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
    145             exit(U_FILE_ACCESS_ERROR);
    146         }
    147     }
    148 
    149     if (verbose) {
    150         if(sourceTOC) {
    151             printf("generating %s_%s.c (table of contents source file)\n", name, type);
    152         } else {
    153             printf("generating %s.%s (common data file with table of contents)\n", name, type);
    154         }
    155     }
    156 
    157     /* read the list of files and get their lengths */
    158     while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
    159         /* remove trailing newline characters */
    160         s=line;
    161         while(*s!=0) {
    162             if(*s=='\r' || *s=='\n') {
    163                 *s=0;
    164                 break;
    165             }
    166             ++s;
    167         }
    168 
    169         /* check for comment */
    170 
    171         if (*line == '#') {
    172             continue;
    173         }
    174 
    175         /* add the file */
    176 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
    177         {
    178           char *t;
    179           while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
    180             *t = U_FILE_SEP_CHAR;
    181           }
    182         }
    183 #endif
    184         addFile(getLongPathname(line), name, source, sourceTOC, verbose);
    185     }
    186 
    187     if(in!=T_FileStream_stdin()) {
    188         T_FileStream_close(in);
    189     }
    190 
    191     if(fileCount==0) {
    192         fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
    193         return;
    194     }
    195 
    196     /* sort the files by basename */
    197     qsort(files, fileCount, sizeof(File), compareFiles);
    198 
    199     if(!sourceTOC) {
    200         UNewDataMemory *out;
    201 
    202         /* determine the offsets of all basenames and files in this common one */
    203         basenameOffset=4+8*fileCount;
    204         fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
    205         for(i=0; i<fileCount; ++i) {
    206             files[i].fileOffset=fileOffset;
    207             fileOffset+=(files[i].fileSize+15)&~0xf;
    208             files[i].basenameOffset=basenameOffset;
    209             basenameOffset+=files[i].basenameLength;
    210         }
    211 
    212         /* create the output file */
    213         out=udata_create(destDir, type, name,
    214                          &dataInfo,
    215                          copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
    216                          &errorCode);
    217         if(U_FAILURE(errorCode)) {
    218             fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
    219                 destDir, name, type,
    220                 u_errorName(errorCode));
    221             exit(errorCode);
    222         }
    223 
    224         /* write the table of contents */
    225         udata_write32(out, fileCount);
    226         for(i=0; i<fileCount; ++i) {
    227             udata_write32(out, files[i].basenameOffset);
    228             udata_write32(out, files[i].fileOffset);
    229         }
    230 
    231         /* write the basenames */
    232         for(i=0; i<fileCount; ++i) {
    233             udata_writeString(out, files[i].basename, files[i].basenameLength);
    234         }
    235         length=4+8*fileCount+basenameTotal;
    236 
    237         /* copy the files */
    238         for(i=0; i<fileCount; ++i) {
    239             /* pad to 16-align the next file */
    240             length&=0xf;
    241             if(length!=0) {
    242                 udata_writePadding(out, 16-length);
    243             }
    244 
    245             if (verbose) {
    246                 printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
    247             }
    248 
    249             /* copy the next file */
    250             file=T_FileStream_open(files[i].pathname, "rb");
    251             if(file==NULL) {
    252                 fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
    253                 exit(U_FILE_ACCESS_ERROR);
    254             }
    255             for(nread = 0;;) {
    256                 length=T_FileStream_read(file, buffer, sizeof(buffer));
    257                 if(length <= 0) {
    258                     break;
    259                 }
    260                 nread += length;
    261                 udata_writeBlock(out, buffer, length);
    262             }
    263             T_FileStream_close(file);
    264             length=files[i].fileSize;
    265 
    266             if (nread != files[i].fileSize) {
    267               fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
    268                 exit(U_FILE_ACCESS_ERROR);
    269             }
    270         }
    271 
    272         /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
    273         length&=0xf;
    274         if(length!=0) {
    275             udata_writePadding(out, 16-length);
    276         }
    277 
    278         /* finish */
    279         udata_finish(out, &errorCode);
    280         if(U_FAILURE(errorCode)) {
    281             fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
    282             exit(errorCode);
    283         }
    284     } else {
    285         /* write a .c source file with the table of contents */
    286         char *filename;
    287         FileStream *out;
    288 
    289         /* create the output filename */
    290         filename=s=buffer;
    291         uprv_strcpy(filename, destDir);
    292         s=filename+uprv_strlen(filename);
    293         if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
    294             *s++=U_FILE_SEP_CHAR;
    295         }
    296         uprv_strcpy(s, name);
    297         if(*(type)!=0) {
    298             s+=uprv_strlen(s);
    299             *s++='_';
    300             uprv_strcpy(s, type);
    301         }
    302         s+=uprv_strlen(s);
    303         uprv_strcpy(s, ".c");
    304 
    305         /* open the output file */
    306         out=T_FileStream_open(filename, "w");
    307         if (gencmnFileName != NULL) {
    308             uprv_strcpy(gencmnFileName, filename);
    309         }
    310         if(out==NULL) {
    311             fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
    312             exit(U_FILE_ACCESS_ERROR);
    313         }
    314 
    315         /* write the source file */
    316         sprintf(buffer,
    317             "/*\n"
    318             " * ICU common data table of contents for %s.%s ,\n"
    319             " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
    320             " */\n\n"
    321             "#include \"unicode/utypes.h\"\n"
    322             "#include \"unicode/udata.h\"\n"
    323             "\n"
    324             "/* external symbol declarations for data */\n",
    325             name, type);
    326         T_FileStream_writeLine(out, buffer);
    327 
    328         sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
    329         T_FileStream_writeLine(out, buffer);
    330         for(i=1; i<fileCount; ++i) {
    331             sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
    332             T_FileStream_writeLine(out, buffer);
    333         }
    334         T_FileStream_writeLine(out, ";\n\n");
    335 
    336         sprintf(
    337             buffer,
    338             "U_EXPORT struct {\n"
    339             "    uint16_t headerSize;\n"
    340             "    uint8_t magic1, magic2;\n"
    341             "    UDataInfo info;\n"
    342             "    char padding[%lu];\n"
    343             "    uint32_t count, reserved;\n"
    344             "    struct {\n"
    345             "        const char *name;\n"
    346             "        const void *data;\n"
    347             "    } toc[%lu];\n"
    348             "} U_EXPORT2 %s_dat = {\n"
    349             "    32, 0xda, 0x27, {\n"
    350             "        %lu, 0,\n"
    351             "        %u, %u, %u, 0,\n"
    352             "        {0x54, 0x6f, 0x43, 0x50},\n"
    353             "        {1, 0, 0, 0},\n"
    354             "        {0, 0, 0, 0}\n"
    355             "    },\n"
    356             "    \"\", %lu, 0, {\n",
    357             (unsigned long)32-4-sizeof(UDataInfo),
    358             (unsigned long)fileCount,
    359             entrypointName,
    360             (unsigned long)sizeof(UDataInfo),
    361             U_IS_BIG_ENDIAN,
    362             U_CHARSET_FAMILY,
    363             U_SIZEOF_UCHAR,
    364             (unsigned long)fileCount
    365         );
    366         T_FileStream_writeLine(out, buffer);
    367 
    368         sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
    369         T_FileStream_writeLine(out, buffer);
    370         for(i=1; i<fileCount; ++i) {
    371             sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
    372             T_FileStream_writeLine(out, buffer);
    373         }
    374 
    375         T_FileStream_writeLine(out, "\n    }\n};\n");
    376         T_FileStream_close(out);
    377 
    378         uprv_free(symPrefix);
    379     }
    380 }
    381 
    382 static void
    383 addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
    384     char *s;
    385     uint32_t length;
    386     char *fullPath = NULL;
    387 
    388     if(fileCount==fileMax) {
    389       fileMax += CHUNK_FILE_COUNT;
    390       files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
    391       if(files==NULL) {
    392         fprintf(stderr, "pkgdata/gencmn: Could not allocate %ld bytes for %d files\n", (fileMax*sizeof(files[0])), fileCount);
    393         exit(U_MEMORY_ALLOCATION_ERROR);
    394       }
    395     }
    396 
    397     if(!sourceTOC) {
    398         FileStream *file;
    399 
    400         if(uprv_pathIsAbsolute(filename)) {
    401             fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
    402             exit(U_ILLEGAL_ARGUMENT_ERROR);
    403         }
    404         fullPath = pathToFullPath(filename, source);
    405 
    406         /* store the pathname */
    407         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
    408         s=allocString(length);
    409         uprv_strcpy(s, name);
    410         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
    411         uprv_strcat(s, filename);
    412 
    413         /* get the basename */
    414         fixDirToTreePath(s);
    415         files[fileCount].basename=s;
    416         files[fileCount].basenameLength=length;
    417 
    418         files[fileCount].pathname=fullPath;
    419 
    420         basenameTotal+=length;
    421 
    422         /* try to open the file */
    423         file=T_FileStream_open(fullPath, "rb");
    424         if(file==NULL) {
    425             fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
    426             exit(U_FILE_ACCESS_ERROR);
    427         }
    428 
    429         /* get the file length */
    430         length=T_FileStream_size(file);
    431         if(T_FileStream_error(file) || length<=20) {
    432             fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
    433             exit(U_FILE_ACCESS_ERROR);
    434         }
    435 
    436         T_FileStream_close(file);
    437 
    438         /* do not add files that are longer than maxSize */
    439         if(maxSize && length>maxSize) {
    440             if (verbose) {
    441                 printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
    442             }
    443             return;
    444         }
    445         files[fileCount].fileSize=length;
    446     } else {
    447         char *t;
    448 
    449         /* get and store the basename */
    450         /* need to include the package name */
    451         length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
    452         s=allocString(length);
    453         uprv_strcpy(s, name);
    454         uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
    455         uprv_strcat(s, filename);
    456         fixDirToTreePath(s);
    457         files[fileCount].basename=s;
    458 
    459 
    460         /* turn the basename into an entry point name and store in the pathname field */
    461         t=files[fileCount].pathname=allocString(length);
    462         while(--length>0) {
    463             if(*s=='.' || *s=='-' || *s=='/') {
    464                 *t='_';
    465             } else {
    466                 *t=*s;
    467             }
    468             ++s;
    469             ++t;
    470         }
    471         *t=0;
    472     }
    473     ++fileCount;
    474 }
    475 
    476 static char *
    477 allocString(uint32_t length) {
    478     uint32_t top=stringTop+length;
    479     char *p;
    480 
    481     if(top>STRING_STORE_SIZE) {
    482         fprintf(stderr, "gencmn: out of memory\n");
    483         exit(U_MEMORY_ALLOCATION_ERROR);
    484     }
    485     p=stringStore+stringTop;
    486     stringTop=top;
    487     return p;
    488 }
    489 
    490 static char *
    491 pathToFullPath(const char *path, const char *source) {
    492     int32_t length;
    493     int32_t newLength;
    494     char *fullPath;
    495     int32_t n;
    496 
    497     length = (uint32_t)(uprv_strlen(path) + 1);
    498     newLength = (length + 1 + (int32_t)uprv_strlen(source));
    499     fullPath = uprv_malloc(newLength);
    500     if(source != NULL) {
    501         uprv_strcpy(fullPath, source);
    502         uprv_strcat(fullPath, U_FILE_SEP_STRING);
    503     } else {
    504         fullPath[0] = 0;
    505     }
    506     n = (int32_t)uprv_strlen(fullPath);
    507     uprv_strcat(fullPath, path);
    508 
    509 #if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    510 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
    511     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
    512     for(;fullPath[n];n++) {
    513         if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
    514             fullPath[n] = U_FILE_SEP_CHAR;
    515         }
    516     }
    517 #endif
    518 #endif
    519 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    520     /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
    521     for(;fullPath[n];n++) {
    522         if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
    523             fullPath[n] = U_FILE_SEP_CHAR;
    524         }
    525     }
    526 #endif
    527     return fullPath;
    528 }
    529 
    530 static int
    531 compareFiles(const void *file1, const void *file2) {
    532     /* sort by basename */
    533     return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
    534 }
    535 
    536 static void
    537 fixDirToTreePath(char *s)
    538 {
    539 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
    540     char *t;
    541 #endif
    542 #if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    543     for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
    544         *t = U_TREE_ENTRY_SEP_CHAR;
    545     }
    546 #endif
    547 #if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
    548     for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
    549         *t = U_TREE_ENTRY_SEP_CHAR;
    550     }
    551 #endif
    552 }
    553