Home | History | Annotate | Download | only in makeconv
      1 /*
      2  ********************************************************************************
      3  *
      4  *   Copyright (C) 1998-2008, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  ********************************************************************************
      8  *
      9  *
     10  *  makeconv.c:
     11  *  tool creating a binary (compressed) representation of the conversion mapping
     12  *  table (IBM NLTC ucmap format).
     13  *
     14  *  05/04/2000    helena     Added fallback mapping into the picture...
     15  *  06/29/2000  helena      Major rewrite of the callback APIs.
     16  */
     17 
     18 #include <stdio.h>
     19 #include "unicode/putil.h"
     20 #include "unicode/ucnv_err.h"
     21 #include "ucnv_bld.h"
     22 #include "ucnv_imp.h"
     23 #include "ucnv_cnv.h"
     24 #include "cstring.h"
     25 #include "cmemory.h"
     26 #include "uinvchar.h"
     27 #include "filestrm.h"
     28 #include "toolutil.h"
     29 #include "uoptions.h"
     30 #include "unicode/udata.h"
     31 #include "unewdata.h"
     32 #include "uparse.h"
     33 #include "ucm.h"
     34 #include "makeconv.h"
     35 #include "genmbcs.h"
     36 
     37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     38 
     39 #define DEBUG 0
     40 
     41 typedef struct ConvData {
     42     UCMFile *ucm;
     43     NewConverter *cnvData, *extData;
     44     UConverterSharedData sharedData;
     45     UConverterStaticData staticData;
     46 } ConvData;
     47 
     48 static void
     49 initConvData(ConvData *data) {
     50     uprv_memset(data, 0, sizeof(ConvData));
     51     data->sharedData.structSize=sizeof(UConverterSharedData);
     52     data->staticData.structSize=sizeof(UConverterStaticData);
     53     data->sharedData.staticData=&data->staticData;
     54 }
     55 
     56 static void
     57 cleanupConvData(ConvData *data) {
     58     if(data!=NULL) {
     59         if(data->cnvData!=NULL) {
     60             data->cnvData->close(data->cnvData);
     61             data->cnvData=NULL;
     62         }
     63         if(data->extData!=NULL) {
     64             data->extData->close(data->extData);
     65             data->extData=NULL;
     66         }
     67         ucm_close(data->ucm);
     68         data->ucm=NULL;
     69     }
     70 }
     71 
     72 /*
     73  * from ucnvstat.c - static prototypes of data-based converters
     74  */
     75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
     76 
     77 /*
     78  * Global - verbosity
     79  */
     80 UBool VERBOSE = FALSE;
     81 UBool SMALL = FALSE;
     82 
     83 static void
     84 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
     85 
     86 /*
     87  * Set up the UNewData and write the converter..
     88  */
     89 static void
     90 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
     91 
     92 UBool haveCopyright=TRUE;
     93 
     94 static UDataInfo dataInfo={
     95     sizeof(UDataInfo),
     96     0,
     97 
     98     U_IS_BIG_ENDIAN,
     99     U_CHARSET_FAMILY,
    100     sizeof(UChar),
    101     0,
    102 
    103     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
    104     {6, 2, 0, 0},                 /* formatVersion */
    105     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
    106 };
    107 
    108 static void
    109 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
    110 {
    111     UNewDataMemory *mem = NULL;
    112     uint32_t sz2;
    113     uint32_t size = 0;
    114     int32_t tableType;
    115 
    116     if(U_FAILURE(*status))
    117       {
    118         return;
    119       }
    120 
    121     tableType=TABLE_NONE;
    122     if(data->cnvData!=NULL) {
    123         tableType|=TABLE_BASE;
    124     }
    125     if(data->extData!=NULL) {
    126         tableType|=TABLE_EXT;
    127     }
    128 
    129     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
    130 
    131     if(U_FAILURE(*status))
    132       {
    133         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
    134                 cnvName,
    135                 "cnv",
    136                 u_errorName(*status));
    137         return;
    138       }
    139 
    140     if(VERBOSE)
    141       {
    142         printf("- Opened udata %s.%s\n", cnvName, "cnv");
    143       }
    144 
    145 
    146     /* all read only, clean, platform independent data.  Mmmm. :)  */
    147     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
    148     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
    149     /* Now, write the table */
    150     if(tableType&TABLE_BASE) {
    151         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
    152     }
    153     if(tableType&TABLE_EXT) {
    154         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
    155     }
    156 
    157     sz2 = udata_finish(mem, status);
    158     if(size != sz2)
    159     {
    160         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
    161         *status=U_INTERNAL_PROGRAM_ERROR;
    162     }
    163     if(VERBOSE)
    164     {
    165       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
    166     }
    167 }
    168 
    169 enum {
    170     OPT_HELP_H,
    171     OPT_HELP_QUESTION_MARK,
    172     OPT_COPYRIGHT,
    173     OPT_VERSION,
    174     OPT_DESTDIR,
    175     OPT_VERBOSE,
    176     OPT_SMALL,
    177     OPT_COUNT
    178 };
    179 
    180 static UOption options[]={
    181     UOPTION_HELP_H,
    182     UOPTION_HELP_QUESTION_MARK,
    183     UOPTION_COPYRIGHT,
    184     UOPTION_VERSION,
    185     UOPTION_DESTDIR,
    186     UOPTION_VERBOSE,
    187     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
    188 };
    189 
    190 int main(int argc, char* argv[])
    191 {
    192     ConvData data;
    193     UErrorCode err = U_ZERO_ERROR, localError;
    194     char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    195     const char* destdir, *arg;
    196     size_t destdirlen;
    197     char* dot = NULL, *outBasename;
    198     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    199     char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    200     UVersionInfo icuVersion;
    201     UBool printFilename;
    202 
    203     err = U_ZERO_ERROR;
    204 
    205     U_MAIN_INIT_ARGS(argc, argv);
    206 
    207     /* Set up the ICU version number */
    208     u_getVersion(icuVersion);
    209     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
    210 
    211     /* preset then read command line options */
    212     options[OPT_DESTDIR].value=u_getDataDirectory();
    213     argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
    214 
    215     /* error handling, printing usage message */
    216     if(argc<0) {
    217         fprintf(stderr,
    218             "error in command line argument \"%s\"\n",
    219             argv[-argc]);
    220     } else if(argc<2) {
    221         argc=-1;
    222     }
    223     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
    224         FILE *stdfile=argc<0 ? stderr : stdout;
    225         fprintf(stdfile,
    226             "usage: %s [-options] files...\n"
    227             "\tread .ucm codepage mapping files and write .cnv files\n"
    228             "options:\n"
    229             "\t-h or -? or --help  this usage text\n"
    230             "\t-V or --version     show a version message\n"
    231             "\t-c or --copyright   include a copyright notice\n"
    232             "\t-d or --destdir     destination directory, followed by the path\n"
    233             "\t-v or --verbose     Turn on verbose output\n",
    234             argv[0]);
    235         fprintf(stdfile,
    236             "\t      --small       Generate smaller .cnv files. They will be\n"
    237             "\t                    significantly smaller but may not be compatible with\n"
    238             "\t                    older versions of ICU and will require heap memory\n"
    239             "\t                    allocation when loaded.\n");
    240         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    241     }
    242 
    243     if(options[OPT_VERSION].doesOccur) {
    244         printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
    245                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
    246         printf("%s\n", U_COPYRIGHT_STRING);
    247         exit(0);
    248     }
    249 
    250     /* get the options values */
    251     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    252     destdir = options[OPT_DESTDIR].value;
    253     VERBOSE = options[OPT_VERBOSE].doesOccur;
    254     SMALL = options[OPT_SMALL].doesOccur;
    255 
    256     if (destdir != NULL && *destdir != 0) {
    257         uprv_strcpy(outFileName, destdir);
    258         destdirlen = uprv_strlen(destdir);
    259         outBasename = outFileName + destdirlen;
    260         if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
    261             *outBasename++ = U_FILE_SEP_CHAR;
    262             ++destdirlen;
    263         }
    264     } else {
    265         destdirlen = 0;
    266         outBasename = outFileName;
    267     }
    268 
    269 #if DEBUG
    270     {
    271       int i;
    272       printf("makeconv: processing %d files...\n", argc - 1);
    273       for(i=1; i<argc; ++i) {
    274         printf("%s ", argv[i]);
    275       }
    276       printf("\n");
    277       fflush(stdout);
    278     }
    279 #endif
    280 
    281     err = U_ZERO_ERROR;
    282     printFilename = (UBool) (argc > 2 || VERBOSE);
    283     for (++argv; --argc; ++argv)
    284     {
    285         arg = getLongPathname(*argv);
    286 
    287         /* Check for potential buffer overflow */
    288         if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
    289         {
    290             fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
    291             return U_BUFFER_OVERFLOW_ERROR;
    292         }
    293 
    294         /*produces the right destination path for display*/
    295         if (destdirlen != 0)
    296         {
    297             const char *basename;
    298 
    299             /* find the last file sepator */
    300             basename = findBasename(arg);
    301             uprv_strcpy(outBasename, basename);
    302         }
    303         else
    304         {
    305             uprv_strcpy(outFileName, arg);
    306         }
    307 
    308         /*removes the extension if any is found*/
    309         dot = uprv_strrchr(outBasename, '.');
    310         if (dot)
    311         {
    312             *dot = '\0';
    313         }
    314 
    315         /* the basename without extension is the converter name */
    316         uprv_strcpy(cnvName, outBasename);
    317 
    318         /*Adds the target extension*/
    319         uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
    320 
    321 #if DEBUG
    322         printf("makeconv: processing %s  ...\n", arg);
    323         fflush(stdout);
    324 #endif
    325         localError = U_ZERO_ERROR;
    326         initConvData(&data);
    327         createConverter(&data, arg, &localError);
    328 
    329         if (U_FAILURE(localError))
    330         {
    331             /* if an error is found, print out an error msg and keep going */
    332             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
    333                 u_errorName(localError));
    334             if(U_SUCCESS(err)) {
    335                 err = localError;
    336             }
    337         }
    338         else
    339         {
    340             /* Insure the static data name matches the  file name */
    341             /* Changed to ignore directory and only compare base name
    342              LDH 1/2/08*/
    343             char *p;
    344             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
    345 
    346             if(p == NULL)            /* OK, try alternate */
    347             {
    348                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
    349                 if(p == NULL)
    350                 {
    351                     p=cnvName; /* If no separators, no problem */
    352                 }
    353             }
    354             else
    355             {
    356                 p++;   /* If found separtor, don't include it in compare */
    357             }
    358             if(uprv_stricmp(p,data.staticData.name))
    359             {
    360                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
    361                     cnvName,  CONVERTER_FILE_EXTENSION,
    362                     data.staticData.name);
    363             }
    364 
    365             uprv_strcpy((char*)data.staticData.name, cnvName);
    366 
    367             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
    368                 fprintf(stderr,
    369                     "Error: A converter name must contain only invariant characters.\n"
    370                     "%s is not a valid converter name.\n",
    371                     data.staticData.name);
    372                 if(U_SUCCESS(err)) {
    373                     err = U_INVALID_TABLE_FORMAT;
    374                 }
    375             }
    376 
    377             uprv_strcpy(cnvNameWithPkg, cnvName);
    378 
    379             localError = U_ZERO_ERROR;
    380             writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
    381 
    382             if(U_FAILURE(localError))
    383             {
    384                 /* if an error is found, print out an error msg and keep going*/
    385                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
    386                     u_errorName(localError));
    387                 if(U_SUCCESS(err)) {
    388                     err = localError;
    389                 }
    390             }
    391             else if (printFilename)
    392             {
    393                 puts(outBasename);
    394             }
    395         }
    396         fflush(stdout);
    397         fflush(stderr);
    398 
    399         cleanupConvData(&data);
    400     }
    401 
    402     return err;
    403 }
    404 
    405 static void
    406 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
    407     if( (name[0]=='i' || name[0]=='I') &&
    408         (name[1]=='b' || name[1]=='B') &&
    409         (name[2]=='m' || name[2]=='M')
    410     ) {
    411         name+=3;
    412         if(*name=='-') {
    413             ++name;
    414         }
    415         *pPlatform=UCNV_IBM;
    416         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
    417     } else {
    418         *pPlatform=UCNV_UNKNOWN;
    419         *pCCSID=0;
    420     }
    421 }
    422 
    423 static void
    424 readHeader(ConvData *data,
    425            FileStream* convFile,
    426            const char* converterName,
    427            UErrorCode *pErrorCode) {
    428     char line[200];
    429     char *s, *key, *value;
    430     const UConverterStaticData *prototype;
    431     UConverterStaticData *staticData;
    432 
    433     if(U_FAILURE(*pErrorCode)) {
    434         return;
    435     }
    436 
    437     staticData=&data->staticData;
    438     staticData->platform=UCNV_IBM;
    439     staticData->subCharLen=0;
    440 
    441     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    442         /* basic parsing and handling of state-related items */
    443         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
    444             continue;
    445         }
    446 
    447         /* stop at the beginning of the mapping section */
    448         if(uprv_strcmp(line, "CHARMAP")==0) {
    449             break;
    450         }
    451 
    452         /* collect the information from the header field, ignore unknown keys */
    453         if(uprv_strcmp(key, "code_set_name")==0) {
    454             if(*value!=0) {
    455                 uprv_strcpy((char *)staticData->name, value);
    456                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
    457             }
    458         } else if(uprv_strcmp(key, "subchar")==0) {
    459             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    460             int8_t length;
    461 
    462             s=value;
    463             length=ucm_parseBytes(bytes, line, (const char **)&s);
    464             if(1<=length && length<=4 && *s==0) {
    465                 staticData->subCharLen=length;
    466                 uprv_memcpy(staticData->subChar, bytes, length);
    467             } else {
    468                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
    469                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    470                 return;
    471             }
    472         } else if(uprv_strcmp(key, "subchar1")==0) {
    473             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    474 
    475             s=value;
    476             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
    477                 staticData->subChar1=bytes[0];
    478             } else {
    479                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
    480                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    481                 return;
    482             }
    483         }
    484     }
    485 
    486     /* copy values from the UCMFile to the static data */
    487     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
    488     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
    489     staticData->conversionType=data->ucm->states.conversionType;
    490 
    491     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
    492         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
    493         *pErrorCode=U_INVALID_TABLE_FORMAT;
    494         return;
    495     }
    496 
    497     /*
    498      * Now that we know the type, copy any 'default' values from the table.
    499      * We need not check the type any further because the parser only
    500      * recognizes what we have prototypes for.
    501      *
    502      * For delta (extension-only) tables, copy values from the base file
    503      * instead, see createConverter().
    504      */
    505     if(data->ucm->baseName[0]==0) {
    506         prototype=ucnv_converterStaticData[staticData->conversionType];
    507         if(prototype!=NULL) {
    508             if(staticData->name[0]==0) {
    509                 uprv_strcpy((char *)staticData->name, prototype->name);
    510             }
    511 
    512             if(staticData->codepage==0) {
    513                 staticData->codepage=prototype->codepage;
    514             }
    515 
    516             if(staticData->platform==0) {
    517                 staticData->platform=prototype->platform;
    518             }
    519 
    520             if(staticData->minBytesPerChar==0) {
    521                 staticData->minBytesPerChar=prototype->minBytesPerChar;
    522             }
    523 
    524             if(staticData->maxBytesPerChar==0) {
    525                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
    526             }
    527 
    528             if(staticData->subCharLen==0) {
    529                 staticData->subCharLen=prototype->subCharLen;
    530                 if(prototype->subCharLen>0) {
    531                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
    532                 }
    533             }
    534         }
    535     }
    536 
    537     if(data->ucm->states.outputType<0) {
    538         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
    539     }
    540 
    541     if( staticData->subChar1!=0 &&
    542             (staticData->minBytesPerChar>1 ||
    543                 (staticData->conversionType!=UCNV_MBCS &&
    544                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    545     ) {
    546         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
    547         *pErrorCode=U_INVALID_TABLE_FORMAT;
    548     }
    549 }
    550 
    551 /* return TRUE if a base table was read, FALSE for an extension table */
    552 static UBool
    553 readFile(ConvData *data, const char* converterName,
    554          UErrorCode *pErrorCode) {
    555     char line[200];
    556     char *end;
    557     FileStream *convFile;
    558 
    559     UCMStates *baseStates;
    560     UBool dataIsBase;
    561 
    562     if(U_FAILURE(*pErrorCode)) {
    563         return FALSE;
    564     }
    565 
    566     data->ucm=ucm_open();
    567 
    568     convFile=T_FileStream_open(converterName, "r");
    569     if(convFile==NULL) {
    570         *pErrorCode=U_FILE_ACCESS_ERROR;
    571         return FALSE;
    572     }
    573 
    574     readHeader(data, convFile, converterName, pErrorCode);
    575     if(U_FAILURE(*pErrorCode)) {
    576         return FALSE;
    577     }
    578 
    579     if(data->ucm->baseName[0]==0) {
    580         dataIsBase=TRUE;
    581         baseStates=&data->ucm->states;
    582         ucm_processStates(baseStates);
    583     } else {
    584         dataIsBase=FALSE;
    585         baseStates=NULL;
    586     }
    587 
    588     /* read the base table */
    589     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
    590     if(U_FAILURE(*pErrorCode)) {
    591         return FALSE;
    592     }
    593 
    594     /* read an extension table if there is one */
    595     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    596         end=uprv_strchr(line, 0);
    597         while(line<end &&
    598               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
    599             --end;
    600         }
    601         *end=0;
    602 
    603         if(line[0]=='#' || u_skipWhitespace(line)==end) {
    604             continue; /* ignore empty and comment lines */
    605         }
    606 
    607         if(0==uprv_strcmp(line, "CHARMAP")) {
    608             /* read the extension table */
    609             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
    610         } else {
    611             fprintf(stderr, "unexpected text after the base mapping table\n");
    612         }
    613         break;
    614     }
    615 
    616     T_FileStream_close(convFile);
    617 
    618     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
    619         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
    620         *pErrorCode=U_INVALID_TABLE_FORMAT;
    621     }
    622 
    623     return dataIsBase;
    624 }
    625 
    626 static void
    627 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    628     ConvData baseData;
    629     UBool dataIsBase;
    630 
    631     UConverterStaticData *staticData;
    632     UCMStates *states, *baseStates;
    633 
    634     if(U_FAILURE(*pErrorCode)) {
    635         return;
    636     }
    637 
    638     initConvData(data);
    639 
    640     dataIsBase=readFile(data, converterName, pErrorCode);
    641     if(U_FAILURE(*pErrorCode)) {
    642         return;
    643     }
    644 
    645     staticData=&data->staticData;
    646     states=&data->ucm->states;
    647 
    648     if(dataIsBase) {
    649         /*
    650          * Build a normal .cnv file with a base table
    651          * and an optional extension table.
    652          */
    653         data->cnvData=MBCSOpen(data->ucm);
    654         if(data->cnvData==NULL) {
    655             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    656 
    657         } else if(!data->cnvData->isValid(data->cnvData,
    658                             staticData->subChar, staticData->subCharLen)
    659         ) {
    660             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    661             *pErrorCode=U_INVALID_TABLE_FORMAT;
    662 
    663         } else if(staticData->subChar1!=0 &&
    664                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
    665         ) {
    666             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    667             *pErrorCode=U_INVALID_TABLE_FORMAT;
    668 
    669         } else if(
    670             data->ucm->ext->mappingsLength>0 &&
    671             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    672         ) {
    673             *pErrorCode=U_INVALID_TABLE_FORMAT;
    674         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
    675             /* sort the table so that it can be turned into UTF-8-friendly data */
    676             ucm_sortTable(data->ucm->base);
    677         }
    678 
    679         if(U_SUCCESS(*pErrorCode)) {
    680             if(
    681                 /* add the base table after ucm_checkBaseExt()! */
    682                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
    683             ) {
    684                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    685             } else {
    686                 /*
    687                  * addTable() may have requested moving more mappings to the extension table
    688                  * if they fit into the base toUnicode table but not into the
    689                  * base fromUnicode table.
    690                  * (Especially for UTF-8-friendly fromUnicode tables.)
    691                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
    692                  * to be excluded from the extension toUnicode data.
    693                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
    694                  * the base fromUnicode table.
    695                  */
    696                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
    697                 ucm_sortTable(data->ucm->ext);
    698                 if(data->ucm->ext->mappingsLength>0) {
    699                     /* prepare the extension table, if there is one */
    700                     data->extData=CnvExtOpen(data->ucm);
    701                     if(data->extData==NULL) {
    702                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    703                     } else if(
    704                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
    705                     ) {
    706                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    707                     }
    708                 }
    709             }
    710         }
    711     } else {
    712         /* Build an extension-only .cnv file. */
    713         char baseFilename[500];
    714         char *basename;
    715 
    716         initConvData(&baseData);
    717 
    718         /* assemble a path/filename for data->ucm->baseName */
    719         uprv_strcpy(baseFilename, converterName);
    720         basename=(char *)findBasename(baseFilename);
    721         uprv_strcpy(basename, data->ucm->baseName);
    722         uprv_strcat(basename, ".ucm");
    723 
    724         /* read the base table */
    725         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
    726         if(U_FAILURE(*pErrorCode)) {
    727             return;
    728         } else if(!dataIsBase) {
    729             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
    730             *pErrorCode=U_INVALID_TABLE_FORMAT;
    731         } else {
    732             /* prepare the extension table */
    733             data->extData=CnvExtOpen(data->ucm);
    734             if(data->extData==NULL) {
    735                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    736             } else {
    737                 /* fill in gaps in extension file header fields */
    738                 UCMapping *m, *mLimit;
    739                 uint8_t fallbackFlags;
    740 
    741                 baseStates=&baseData.ucm->states;
    742                 if(states->conversionType==UCNV_DBCS) {
    743                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
    744                 } else if(states->minCharLength==0) {
    745                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
    746                 }
    747                 if(states->maxCharLength<states->minCharLength) {
    748                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
    749                 }
    750 
    751                 if(staticData->subCharLen==0) {
    752                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
    753                     staticData->subCharLen=baseData.staticData.subCharLen;
    754                 }
    755                 /*
    756                  * do not copy subChar1 -
    757                  * only use what is explicitly specified
    758                  * because it cannot be unset in the extension file header
    759                  */
    760 
    761                 /* get the fallback flags */
    762                 fallbackFlags=0;
    763                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    764                     m<mLimit && fallbackFlags!=3;
    765                     ++m
    766                 ) {
    767                     if(m->f==1) {
    768                         fallbackFlags|=1;
    769                     } else if(m->f==3) {
    770                         fallbackFlags|=2;
    771                     }
    772                 }
    773 
    774                 if(fallbackFlags&1) {
    775                     staticData->hasFromUnicodeFallback=TRUE;
    776                 }
    777                 if(fallbackFlags&2) {
    778                     staticData->hasToUnicodeFallback=TRUE;
    779                 }
    780 
    781                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
    782                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    783                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    784 
    785                 } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
    786                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    787                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    788 
    789                 } else if(
    790                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
    791                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    792                 ) {
    793                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    794                 } else {
    795                     if(states->maxCharLength>1) {
    796                         /*
    797                          * When building a normal .cnv file with a base table
    798                          * for an MBCS (not SBCS) table with explicit precision flags,
    799                          * the MBCSAddTable() function marks some mappings for moving
    800                          * to the extension table.
    801                          * They fit into the base toUnicode table but not into the
    802                          * base fromUnicode table.
    803                          * (Note: We do have explicit precision flags because they are
    804                          * required for extension table generation, and
    805                          * ucm_checkBaseExt() verified it.)
    806                          *
    807                          * We do not call MBCSAddTable() here (we probably could)
    808                          * so we need to do the analysis before building the extension table.
    809                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
    810                          * Redundant mappings in the extension table are ok except they cost some size.
    811                          *
    812                          * Do this after ucm_checkBaseExt().
    813                          */
    814                         const MBCSData *mbcsData=MBCSGetDummy();
    815                         int32_t needsMove=0;
    816                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    817                             m<mLimit;
    818                             ++m
    819                         ) {
    820                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
    821                                 m->f|=MBCS_FROM_U_EXT_FLAG;
    822                                 m->moveFlag=UCM_MOVE_TO_EXT;
    823                                 ++needsMove;
    824                             }
    825                         }
    826 
    827                         if(needsMove!=0) {
    828                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
    829                             ucm_sortTable(data->ucm->ext);
    830                         }
    831                     }
    832                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
    833                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    834                     }
    835                 }
    836             }
    837         }
    838 
    839         cleanupConvData(&baseData);
    840     }
    841 }
    842 
    843 /*
    844  * Hey, Emacs, please set the following:
    845  *
    846  * Local Variables:
    847  * indent-tabs-mode: nil
    848  * End:
    849  *
    850  */
    851