Home | History | Annotate | Download | only in makeconv
      1 /*
      2  ********************************************************************************
      3  *
      4  *   Copyright (C) 1998-2010, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  ********************************************************************************
      8  *
      9  *
     10  *  makeconv.c:
     11  *  tool creating a binary (compressed) representation of the conversion mapping
     12  *  table (IBM NLTC ucmap format).
     13  *
     14  *  05/04/2000    helena     Added fallback mapping into the picture...
     15  *  06/29/2000  helena      Major rewrite of the callback APIs.
     16  */
     17 
     18 #include <stdio.h>
     19 #include "unicode/putil.h"
     20 #include "unicode/ucnv_err.h"
     21 #include "ucnv_bld.h"
     22 #include "ucnv_imp.h"
     23 #include "ucnv_cnv.h"
     24 #include "cstring.h"
     25 #include "cmemory.h"
     26 #include "uinvchar.h"
     27 #include "filestrm.h"
     28 #include "toolutil.h"
     29 #include "uoptions.h"
     30 #include "unicode/udata.h"
     31 #include "unewdata.h"
     32 #include "uparse.h"
     33 #include "ucm.h"
     34 #include "makeconv.h"
     35 #include "genmbcs.h"
     36 
     37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     38 
     39 #define DEBUG 0
     40 
     41 typedef struct ConvData {
     42     UCMFile *ucm;
     43     NewConverter *cnvData, *extData;
     44     UConverterSharedData sharedData;
     45     UConverterStaticData staticData;
     46 } ConvData;
     47 
     48 static void
     49 initConvData(ConvData *data) {
     50     uprv_memset(data, 0, sizeof(ConvData));
     51     data->sharedData.structSize=sizeof(UConverterSharedData);
     52     data->staticData.structSize=sizeof(UConverterStaticData);
     53     data->sharedData.staticData=&data->staticData;
     54 }
     55 
     56 static void
     57 cleanupConvData(ConvData *data) {
     58     if(data!=NULL) {
     59         if(data->cnvData!=NULL) {
     60             data->cnvData->close(data->cnvData);
     61             data->cnvData=NULL;
     62         }
     63         if(data->extData!=NULL) {
     64             data->extData->close(data->extData);
     65             data->extData=NULL;
     66         }
     67         ucm_close(data->ucm);
     68         data->ucm=NULL;
     69     }
     70 }
     71 
     72 /*
     73  * from ucnvstat.c - static prototypes of data-based converters
     74  */
     75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
     76 
     77 /*
     78  * Global - verbosity
     79  */
     80 UBool VERBOSE = FALSE;
     81 UBool SMALL = FALSE;
     82 UBool IGNORE_SISO_CHECK = FALSE;
     83 
     84 static void
     85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
     86 
     87 /*
     88  * Set up the UNewData and write the converter..
     89  */
     90 static void
     91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
     92 
     93 UBool haveCopyright=TRUE;
     94 
     95 static UDataInfo dataInfo={
     96     sizeof(UDataInfo),
     97     0,
     98 
     99     U_IS_BIG_ENDIAN,
    100     U_CHARSET_FAMILY,
    101     sizeof(UChar),
    102     0,
    103 
    104     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
    105     {6, 2, 0, 0},                 /* formatVersion */
    106     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
    107 };
    108 
    109 static void
    110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
    111 {
    112     UNewDataMemory *mem = NULL;
    113     uint32_t sz2;
    114     uint32_t size = 0;
    115     int32_t tableType;
    116 
    117     if(U_FAILURE(*status))
    118       {
    119         return;
    120       }
    121 
    122     tableType=TABLE_NONE;
    123     if(data->cnvData!=NULL) {
    124         tableType|=TABLE_BASE;
    125     }
    126     if(data->extData!=NULL) {
    127         tableType|=TABLE_EXT;
    128     }
    129 
    130     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
    131 
    132     if(U_FAILURE(*status))
    133       {
    134         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
    135                 cnvName,
    136                 "cnv",
    137                 u_errorName(*status));
    138         return;
    139       }
    140 
    141     if(VERBOSE)
    142       {
    143         printf("- Opened udata %s.%s\n", cnvName, "cnv");
    144       }
    145 
    146 
    147     /* all read only, clean, platform independent data.  Mmmm. :)  */
    148     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
    149     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
    150     /* Now, write the table */
    151     if(tableType&TABLE_BASE) {
    152         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
    153     }
    154     if(tableType&TABLE_EXT) {
    155         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
    156     }
    157 
    158     sz2 = udata_finish(mem, status);
    159     if(size != sz2)
    160     {
    161         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
    162         *status=U_INTERNAL_PROGRAM_ERROR;
    163     }
    164     if(VERBOSE)
    165     {
    166       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
    167     }
    168 }
    169 
    170 enum {
    171     OPT_HELP_H,
    172     OPT_HELP_QUESTION_MARK,
    173     OPT_COPYRIGHT,
    174     OPT_VERSION,
    175     OPT_DESTDIR,
    176     OPT_VERBOSE,
    177     OPT_SMALL,
    178     OPT_IGNORE_SISO_CHECK,
    179     OPT_COUNT
    180 };
    181 
    182 static UOption options[]={
    183     UOPTION_HELP_H,
    184     UOPTION_HELP_QUESTION_MARK,
    185     UOPTION_COPYRIGHT,
    186     UOPTION_VERSION,
    187     UOPTION_DESTDIR,
    188     UOPTION_VERBOSE,
    189     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
    190     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
    191 };
    192 
    193 int main(int argc, char* argv[])
    194 {
    195     ConvData data;
    196     UErrorCode err = U_ZERO_ERROR, localError;
    197     char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    198     const char* destdir, *arg;
    199     size_t destdirlen;
    200     char* dot = NULL, *outBasename;
    201     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    202     char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    203     UVersionInfo icuVersion;
    204     UBool printFilename;
    205 
    206     err = U_ZERO_ERROR;
    207 
    208     U_MAIN_INIT_ARGS(argc, argv);
    209 
    210     /* Set up the ICU version number */
    211     u_getVersion(icuVersion);
    212     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
    213 
    214     /* preset then read command line options */
    215     options[OPT_DESTDIR].value=u_getDataDirectory();
    216     argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
    217 
    218     /* error handling, printing usage message */
    219     if(argc<0) {
    220         fprintf(stderr,
    221             "error in command line argument \"%s\"\n",
    222             argv[-argc]);
    223     } else if(argc<2) {
    224         argc=-1;
    225     }
    226     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
    227         FILE *stdfile=argc<0 ? stderr : stdout;
    228         fprintf(stdfile,
    229             "usage: %s [-options] files...\n"
    230             "\tread .ucm codepage mapping files and write .cnv files\n"
    231             "options:\n"
    232             "\t-h or -? or --help  this usage text\n"
    233             "\t-V or --version     show a version message\n"
    234             "\t-c or --copyright   include a copyright notice\n"
    235             "\t-d or --destdir     destination directory, followed by the path\n"
    236             "\t-v or --verbose     Turn on verbose output\n",
    237             argv[0]);
    238         fprintf(stdfile,
    239             "\t      --small       Generate smaller .cnv files. They will be\n"
    240             "\t                    significantly smaller but may not be compatible with\n"
    241             "\t                    older versions of ICU and will require heap memory\n"
    242             "\t                    allocation when loaded.\n"
    243             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
    244         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    245     }
    246 
    247     if(options[OPT_VERSION].doesOccur) {
    248         printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
    249                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
    250         printf("%s\n", U_COPYRIGHT_STRING);
    251         exit(0);
    252     }
    253 
    254     /* get the options values */
    255     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    256     destdir = options[OPT_DESTDIR].value;
    257     VERBOSE = options[OPT_VERBOSE].doesOccur;
    258     SMALL = options[OPT_SMALL].doesOccur;
    259 
    260     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
    261         IGNORE_SISO_CHECK = TRUE;
    262     }
    263 
    264     if (destdir != NULL && *destdir != 0) {
    265         uprv_strcpy(outFileName, destdir);
    266         destdirlen = uprv_strlen(destdir);
    267         outBasename = outFileName + destdirlen;
    268         if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
    269             *outBasename++ = U_FILE_SEP_CHAR;
    270             ++destdirlen;
    271         }
    272     } else {
    273         destdirlen = 0;
    274         outBasename = outFileName;
    275     }
    276 
    277 #if DEBUG
    278     {
    279       int i;
    280       printf("makeconv: processing %d files...\n", argc - 1);
    281       for(i=1; i<argc; ++i) {
    282         printf("%s ", argv[i]);
    283       }
    284       printf("\n");
    285       fflush(stdout);
    286     }
    287 #endif
    288 
    289     err = U_ZERO_ERROR;
    290     printFilename = (UBool) (argc > 2 || VERBOSE);
    291     for (++argv; --argc; ++argv)
    292     {
    293         arg = getLongPathname(*argv);
    294 
    295         /* Check for potential buffer overflow */
    296         if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
    297         {
    298             fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
    299             return U_BUFFER_OVERFLOW_ERROR;
    300         }
    301 
    302         /*produces the right destination path for display*/
    303         if (destdirlen != 0)
    304         {
    305             const char *basename;
    306 
    307             /* find the last file sepator */
    308             basename = findBasename(arg);
    309             uprv_strcpy(outBasename, basename);
    310         }
    311         else
    312         {
    313             uprv_strcpy(outFileName, arg);
    314         }
    315 
    316         /*removes the extension if any is found*/
    317         dot = uprv_strrchr(outBasename, '.');
    318         if (dot)
    319         {
    320             *dot = '\0';
    321         }
    322 
    323         /* the basename without extension is the converter name */
    324         uprv_strcpy(cnvName, outBasename);
    325 
    326         /*Adds the target extension*/
    327         uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
    328 
    329 #if DEBUG
    330         printf("makeconv: processing %s  ...\n", arg);
    331         fflush(stdout);
    332 #endif
    333         localError = U_ZERO_ERROR;
    334         initConvData(&data);
    335         createConverter(&data, arg, &localError);
    336 
    337         if (U_FAILURE(localError))
    338         {
    339             /* if an error is found, print out an error msg and keep going */
    340             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
    341                 u_errorName(localError));
    342             if(U_SUCCESS(err)) {
    343                 err = localError;
    344             }
    345         }
    346         else
    347         {
    348             /* Insure the static data name matches the  file name */
    349             /* Changed to ignore directory and only compare base name
    350              LDH 1/2/08*/
    351             char *p;
    352             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
    353 
    354             if(p == NULL)            /* OK, try alternate */
    355             {
    356                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
    357                 if(p == NULL)
    358                 {
    359                     p=cnvName; /* If no separators, no problem */
    360                 }
    361             }
    362             else
    363             {
    364                 p++;   /* If found separtor, don't include it in compare */
    365             }
    366             if(uprv_stricmp(p,data.staticData.name))
    367             {
    368                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
    369                     cnvName,  CONVERTER_FILE_EXTENSION,
    370                     data.staticData.name);
    371             }
    372 
    373             uprv_strcpy((char*)data.staticData.name, cnvName);
    374 
    375             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
    376                 fprintf(stderr,
    377                     "Error: A converter name must contain only invariant characters.\n"
    378                     "%s is not a valid converter name.\n",
    379                     data.staticData.name);
    380                 if(U_SUCCESS(err)) {
    381                     err = U_INVALID_TABLE_FORMAT;
    382                 }
    383             }
    384 
    385             uprv_strcpy(cnvNameWithPkg, cnvName);
    386 
    387             localError = U_ZERO_ERROR;
    388             writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
    389 
    390             if(U_FAILURE(localError))
    391             {
    392                 /* if an error is found, print out an error msg and keep going*/
    393                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
    394                     u_errorName(localError));
    395                 if(U_SUCCESS(err)) {
    396                     err = localError;
    397                 }
    398             }
    399             else if (printFilename)
    400             {
    401                 puts(outBasename);
    402             }
    403         }
    404         fflush(stdout);
    405         fflush(stderr);
    406 
    407         cleanupConvData(&data);
    408     }
    409 
    410     return err;
    411 }
    412 
    413 static void
    414 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
    415     if( (name[0]=='i' || name[0]=='I') &&
    416         (name[1]=='b' || name[1]=='B') &&
    417         (name[2]=='m' || name[2]=='M')
    418     ) {
    419         name+=3;
    420         if(*name=='-') {
    421             ++name;
    422         }
    423         *pPlatform=UCNV_IBM;
    424         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
    425     } else {
    426         *pPlatform=UCNV_UNKNOWN;
    427         *pCCSID=0;
    428     }
    429 }
    430 
    431 static void
    432 readHeader(ConvData *data,
    433            FileStream* convFile,
    434            const char* converterName,
    435            UErrorCode *pErrorCode) {
    436     char line[200];
    437     char *s, *key, *value;
    438     const UConverterStaticData *prototype;
    439     UConverterStaticData *staticData;
    440 
    441     if(U_FAILURE(*pErrorCode)) {
    442         return;
    443     }
    444 
    445     staticData=&data->staticData;
    446     staticData->platform=UCNV_IBM;
    447     staticData->subCharLen=0;
    448 
    449     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    450         /* basic parsing and handling of state-related items */
    451         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
    452             continue;
    453         }
    454 
    455         /* stop at the beginning of the mapping section */
    456         if(uprv_strcmp(line, "CHARMAP")==0) {
    457             break;
    458         }
    459 
    460         /* collect the information from the header field, ignore unknown keys */
    461         if(uprv_strcmp(key, "code_set_name")==0) {
    462             if(*value!=0) {
    463                 uprv_strcpy((char *)staticData->name, value);
    464                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
    465             }
    466         } else if(uprv_strcmp(key, "subchar")==0) {
    467             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    468             int8_t length;
    469 
    470             s=value;
    471             length=ucm_parseBytes(bytes, line, (const char **)&s);
    472             if(1<=length && length<=4 && *s==0) {
    473                 staticData->subCharLen=length;
    474                 uprv_memcpy(staticData->subChar, bytes, length);
    475             } else {
    476                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
    477                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    478                 return;
    479             }
    480         } else if(uprv_strcmp(key, "subchar1")==0) {
    481             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    482 
    483             s=value;
    484             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
    485                 staticData->subChar1=bytes[0];
    486             } else {
    487                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
    488                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    489                 return;
    490             }
    491         }
    492     }
    493 
    494     /* copy values from the UCMFile to the static data */
    495     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
    496     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
    497     staticData->conversionType=data->ucm->states.conversionType;
    498 
    499     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
    500         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
    501         *pErrorCode=U_INVALID_TABLE_FORMAT;
    502         return;
    503     }
    504 
    505     /*
    506      * Now that we know the type, copy any 'default' values from the table.
    507      * We need not check the type any further because the parser only
    508      * recognizes what we have prototypes for.
    509      *
    510      * For delta (extension-only) tables, copy values from the base file
    511      * instead, see createConverter().
    512      */
    513     if(data->ucm->baseName[0]==0) {
    514         prototype=ucnv_converterStaticData[staticData->conversionType];
    515         if(prototype!=NULL) {
    516             if(staticData->name[0]==0) {
    517                 uprv_strcpy((char *)staticData->name, prototype->name);
    518             }
    519 
    520             if(staticData->codepage==0) {
    521                 staticData->codepage=prototype->codepage;
    522             }
    523 
    524             if(staticData->platform==0) {
    525                 staticData->platform=prototype->platform;
    526             }
    527 
    528             if(staticData->minBytesPerChar==0) {
    529                 staticData->minBytesPerChar=prototype->minBytesPerChar;
    530             }
    531 
    532             if(staticData->maxBytesPerChar==0) {
    533                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
    534             }
    535 
    536             if(staticData->subCharLen==0) {
    537                 staticData->subCharLen=prototype->subCharLen;
    538                 if(prototype->subCharLen>0) {
    539                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
    540                 }
    541             }
    542         }
    543     }
    544 
    545     if(data->ucm->states.outputType<0) {
    546         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
    547     }
    548 
    549     if( staticData->subChar1!=0 &&
    550             (staticData->minBytesPerChar>1 ||
    551                 (staticData->conversionType!=UCNV_MBCS &&
    552                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    553     ) {
    554         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
    555         *pErrorCode=U_INVALID_TABLE_FORMAT;
    556     }
    557 }
    558 
    559 /* return TRUE if a base table was read, FALSE for an extension table */
    560 static UBool
    561 readFile(ConvData *data, const char* converterName,
    562          UErrorCode *pErrorCode) {
    563     char line[200];
    564     char *end;
    565     FileStream *convFile;
    566 
    567     UCMStates *baseStates;
    568     UBool dataIsBase;
    569 
    570     if(U_FAILURE(*pErrorCode)) {
    571         return FALSE;
    572     }
    573 
    574     data->ucm=ucm_open();
    575 
    576     convFile=T_FileStream_open(converterName, "r");
    577     if(convFile==NULL) {
    578         *pErrorCode=U_FILE_ACCESS_ERROR;
    579         return FALSE;
    580     }
    581 
    582     readHeader(data, convFile, converterName, pErrorCode);
    583     if(U_FAILURE(*pErrorCode)) {
    584         return FALSE;
    585     }
    586 
    587     if(data->ucm->baseName[0]==0) {
    588         dataIsBase=TRUE;
    589         baseStates=&data->ucm->states;
    590         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
    591     } else {
    592         dataIsBase=FALSE;
    593         baseStates=NULL;
    594     }
    595 
    596     /* read the base table */
    597     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
    598     if(U_FAILURE(*pErrorCode)) {
    599         return FALSE;
    600     }
    601 
    602     /* read an extension table if there is one */
    603     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    604         end=uprv_strchr(line, 0);
    605         while(line<end &&
    606               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
    607             --end;
    608         }
    609         *end=0;
    610 
    611         if(line[0]=='#' || u_skipWhitespace(line)==end) {
    612             continue; /* ignore empty and comment lines */
    613         }
    614 
    615         if(0==uprv_strcmp(line, "CHARMAP")) {
    616             /* read the extension table */
    617             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
    618         } else {
    619             fprintf(stderr, "unexpected text after the base mapping table\n");
    620         }
    621         break;
    622     }
    623 
    624     T_FileStream_close(convFile);
    625 
    626     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
    627         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
    628         *pErrorCode=U_INVALID_TABLE_FORMAT;
    629     }
    630 
    631     return dataIsBase;
    632 }
    633 
    634 static void
    635 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    636     ConvData baseData;
    637     UBool dataIsBase;
    638 
    639     UConverterStaticData *staticData;
    640     UCMStates *states, *baseStates;
    641 
    642     if(U_FAILURE(*pErrorCode)) {
    643         return;
    644     }
    645 
    646     initConvData(data);
    647 
    648     dataIsBase=readFile(data, converterName, pErrorCode);
    649     if(U_FAILURE(*pErrorCode)) {
    650         return;
    651     }
    652 
    653     staticData=&data->staticData;
    654     states=&data->ucm->states;
    655 
    656     if(dataIsBase) {
    657         /*
    658          * Build a normal .cnv file with a base table
    659          * and an optional extension table.
    660          */
    661         data->cnvData=MBCSOpen(data->ucm);
    662         if(data->cnvData==NULL) {
    663             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    664 
    665         } else if(!data->cnvData->isValid(data->cnvData,
    666                             staticData->subChar, staticData->subCharLen)
    667         ) {
    668             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    669             *pErrorCode=U_INVALID_TABLE_FORMAT;
    670 
    671         } else if(staticData->subChar1!=0 &&
    672                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
    673         ) {
    674             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    675             *pErrorCode=U_INVALID_TABLE_FORMAT;
    676 
    677         } else if(
    678             data->ucm->ext->mappingsLength>0 &&
    679             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    680         ) {
    681             *pErrorCode=U_INVALID_TABLE_FORMAT;
    682         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
    683             /* sort the table so that it can be turned into UTF-8-friendly data */
    684             ucm_sortTable(data->ucm->base);
    685         }
    686 
    687         if(U_SUCCESS(*pErrorCode)) {
    688             if(
    689                 /* add the base table after ucm_checkBaseExt()! */
    690                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
    691             ) {
    692                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    693             } else {
    694                 /*
    695                  * addTable() may have requested moving more mappings to the extension table
    696                  * if they fit into the base toUnicode table but not into the
    697                  * base fromUnicode table.
    698                  * (Especially for UTF-8-friendly fromUnicode tables.)
    699                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
    700                  * to be excluded from the extension toUnicode data.
    701                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
    702                  * the base fromUnicode table.
    703                  */
    704                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
    705                 ucm_sortTable(data->ucm->ext);
    706                 if(data->ucm->ext->mappingsLength>0) {
    707                     /* prepare the extension table, if there is one */
    708                     data->extData=CnvExtOpen(data->ucm);
    709                     if(data->extData==NULL) {
    710                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    711                     } else if(
    712                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
    713                     ) {
    714                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    715                     }
    716                 }
    717             }
    718         }
    719     } else {
    720         /* Build an extension-only .cnv file. */
    721         char baseFilename[500];
    722         char *basename;
    723 
    724         initConvData(&baseData);
    725 
    726         /* assemble a path/filename for data->ucm->baseName */
    727         uprv_strcpy(baseFilename, converterName);
    728         basename=(char *)findBasename(baseFilename);
    729         uprv_strcpy(basename, data->ucm->baseName);
    730         uprv_strcat(basename, ".ucm");
    731 
    732         /* read the base table */
    733         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
    734         if(U_FAILURE(*pErrorCode)) {
    735             return;
    736         } else if(!dataIsBase) {
    737             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
    738             *pErrorCode=U_INVALID_TABLE_FORMAT;
    739         } else {
    740             /* prepare the extension table */
    741             data->extData=CnvExtOpen(data->ucm);
    742             if(data->extData==NULL) {
    743                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    744             } else {
    745                 /* fill in gaps in extension file header fields */
    746                 UCMapping *m, *mLimit;
    747                 uint8_t fallbackFlags;
    748 
    749                 baseStates=&baseData.ucm->states;
    750                 if(states->conversionType==UCNV_DBCS) {
    751                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
    752                 } else if(states->minCharLength==0) {
    753                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
    754                 }
    755                 if(states->maxCharLength<states->minCharLength) {
    756                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
    757                 }
    758 
    759                 if(staticData->subCharLen==0) {
    760                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
    761                     staticData->subCharLen=baseData.staticData.subCharLen;
    762                 }
    763                 /*
    764                  * do not copy subChar1 -
    765                  * only use what is explicitly specified
    766                  * because it cannot be unset in the extension file header
    767                  */
    768 
    769                 /* get the fallback flags */
    770                 fallbackFlags=0;
    771                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    772                     m<mLimit && fallbackFlags!=3;
    773                     ++m
    774                 ) {
    775                     if(m->f==1) {
    776                         fallbackFlags|=1;
    777                     } else if(m->f==3) {
    778                         fallbackFlags|=2;
    779                     }
    780                 }
    781 
    782                 if(fallbackFlags&1) {
    783                     staticData->hasFromUnicodeFallback=TRUE;
    784                 }
    785                 if(fallbackFlags&2) {
    786                     staticData->hasToUnicodeFallback=TRUE;
    787                 }
    788 
    789                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
    790                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    791                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    792 
    793                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
    794                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    795                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    796 
    797                 } else if(
    798                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
    799                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    800                 ) {
    801                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    802                 } else {
    803                     if(states->maxCharLength>1) {
    804                         /*
    805                          * When building a normal .cnv file with a base table
    806                          * for an MBCS (not SBCS) table with explicit precision flags,
    807                          * the MBCSAddTable() function marks some mappings for moving
    808                          * to the extension table.
    809                          * They fit into the base toUnicode table but not into the
    810                          * base fromUnicode table.
    811                          * (Note: We do have explicit precision flags because they are
    812                          * required for extension table generation, and
    813                          * ucm_checkBaseExt() verified it.)
    814                          *
    815                          * We do not call MBCSAddTable() here (we probably could)
    816                          * so we need to do the analysis before building the extension table.
    817                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
    818                          * Redundant mappings in the extension table are ok except they cost some size.
    819                          *
    820                          * Do this after ucm_checkBaseExt().
    821                          */
    822                         const MBCSData *mbcsData=MBCSGetDummy();
    823                         int32_t needsMove=0;
    824                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    825                             m<mLimit;
    826                             ++m
    827                         ) {
    828                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
    829                                 m->f|=MBCS_FROM_U_EXT_FLAG;
    830                                 m->moveFlag=UCM_MOVE_TO_EXT;
    831                                 ++needsMove;
    832                             }
    833                         }
    834 
    835                         if(needsMove!=0) {
    836                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
    837                             ucm_sortTable(data->ucm->ext);
    838                         }
    839                     }
    840                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
    841                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    842                     }
    843                 }
    844             }
    845         }
    846 
    847         cleanupConvData(&baseData);
    848     }
    849 }
    850 
    851 /*
    852  * Hey, Emacs, please set the following:
    853  *
    854  * Local Variables:
    855  * indent-tabs-mode: nil
    856  * End:
    857  *
    858  */
    859