Home | History | Annotate | Download | only in makeconv
      1 /*
      2  ********************************************************************************
      3  *
      4  *   Copyright (C) 1998-2015, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  ********************************************************************************
      8  *
      9  *
     10  *  makeconv.cpp:
     11  *  tool creating a binary (compressed) representation of the conversion mapping
     12  *  table (IBM NLTC ucmap format).
     13  *
     14  *  05/04/2000    helena     Added fallback mapping into the picture...
     15  *  06/29/2000  helena      Major rewrite of the callback APIs.
     16  */
     17 
     18 #include <stdio.h>
     19 #include "unicode/putil.h"
     20 #include "unicode/ucnv_err.h"
     21 #include "charstr.h"
     22 #include "ucnv_bld.h"
     23 #include "ucnv_imp.h"
     24 #include "ucnv_cnv.h"
     25 #include "cstring.h"
     26 #include "cmemory.h"
     27 #include "uinvchar.h"
     28 #include "filestrm.h"
     29 #include "toolutil.h"
     30 #include "uoptions.h"
     31 #include "unicode/udata.h"
     32 #include "unewdata.h"
     33 #include "uparse.h"
     34 #include "ucm.h"
     35 #include "makeconv.h"
     36 #include "genmbcs.h"
     37 
     38 #define DEBUG 0
     39 
     40 typedef struct ConvData {
     41     UCMFile *ucm;
     42     NewConverter *cnvData, *extData;
     43     UConverterSharedData sharedData;
     44     UConverterStaticData staticData;
     45 } ConvData;
     46 
     47 static void
     48 initConvData(ConvData *data) {
     49     uprv_memset(data, 0, sizeof(ConvData));
     50     data->sharedData.structSize=sizeof(UConverterSharedData);
     51     data->staticData.structSize=sizeof(UConverterStaticData);
     52     data->sharedData.staticData=&data->staticData;
     53 }
     54 
     55 static void
     56 cleanupConvData(ConvData *data) {
     57     if(data!=NULL) {
     58         if(data->cnvData!=NULL) {
     59             data->cnvData->close(data->cnvData);
     60             data->cnvData=NULL;
     61         }
     62         if(data->extData!=NULL) {
     63             data->extData->close(data->extData);
     64             data->extData=NULL;
     65         }
     66         ucm_close(data->ucm);
     67         data->ucm=NULL;
     68     }
     69 }
     70 
     71 /*
     72  * from ucnvstat.c - static prototypes of data-based converters
     73  */
     74 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
     75 
     76 /*
     77  * Global - verbosity
     78  */
     79 UBool VERBOSE = FALSE;
     80 UBool QUIET = FALSE;
     81 UBool SMALL = FALSE;
     82 UBool IGNORE_SISO_CHECK = FALSE;
     83 
     84 static void
     85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
     86 
     87 /*
     88  * Set up the UNewData and write the converter..
     89  */
     90 static void
     91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
     92 
     93 UBool haveCopyright=TRUE;
     94 
     95 static UDataInfo dataInfo={
     96     sizeof(UDataInfo),
     97     0,
     98 
     99     U_IS_BIG_ENDIAN,
    100     U_CHARSET_FAMILY,
    101     sizeof(UChar),
    102     0,
    103 
    104     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
    105     {6, 2, 0, 0},                 /* formatVersion */
    106     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
    107 };
    108 
    109 static void
    110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
    111 {
    112     UNewDataMemory *mem = NULL;
    113     uint32_t sz2;
    114     uint32_t size = 0;
    115     int32_t tableType;
    116 
    117     if(U_FAILURE(*status))
    118       {
    119         return;
    120       }
    121 
    122     tableType=TABLE_NONE;
    123     if(data->cnvData!=NULL) {
    124         tableType|=TABLE_BASE;
    125     }
    126     if(data->extData!=NULL) {
    127         tableType|=TABLE_EXT;
    128     }
    129 
    130     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
    131 
    132     if(U_FAILURE(*status))
    133       {
    134         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
    135                 cnvName,
    136                 "cnv",
    137                 u_errorName(*status));
    138         return;
    139       }
    140 
    141     if(VERBOSE)
    142       {
    143         printf("- Opened udata %s.%s\n", cnvName, "cnv");
    144       }
    145 
    146 
    147     /* all read only, clean, platform independent data.  Mmmm. :)  */
    148     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
    149     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
    150     /* Now, write the table */
    151     if(tableType&TABLE_BASE) {
    152         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
    153     }
    154     if(tableType&TABLE_EXT) {
    155         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
    156     }
    157 
    158     sz2 = udata_finish(mem, status);
    159     if(size != sz2)
    160     {
    161         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
    162         *status=U_INTERNAL_PROGRAM_ERROR;
    163     }
    164     if(VERBOSE)
    165     {
    166       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
    167     }
    168 }
    169 
    170 enum {
    171     OPT_HELP_H,
    172     OPT_HELP_QUESTION_MARK,
    173     OPT_COPYRIGHT,
    174     OPT_VERSION,
    175     OPT_DESTDIR,
    176     OPT_VERBOSE,
    177     OPT_SMALL,
    178     OPT_IGNORE_SISO_CHECK,
    179     OPT_QUIET,
    180 
    181     OPT_COUNT
    182 };
    183 
    184 static UOption options[]={
    185     UOPTION_HELP_H,
    186     UOPTION_HELP_QUESTION_MARK,
    187     UOPTION_COPYRIGHT,
    188     UOPTION_VERSION,
    189     UOPTION_DESTDIR,
    190     UOPTION_VERBOSE,
    191     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
    192     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
    193     UOPTION_QUIET,
    194 };
    195 
    196 int main(int argc, char* argv[])
    197 {
    198     ConvData data;
    199     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    200 
    201     U_MAIN_INIT_ARGS(argc, argv);
    202 
    203     /* Set up the ICU version number */
    204     UVersionInfo icuVersion;
    205     u_getVersion(icuVersion);
    206     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
    207 
    208     /* preset then read command line options */
    209     options[OPT_DESTDIR].value=u_getDataDirectory();
    210     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    211 
    212     /* error handling, printing usage message */
    213     if(argc<0) {
    214         fprintf(stderr,
    215             "error in command line argument \"%s\"\n",
    216             argv[-argc]);
    217     } else if(argc<2) {
    218         argc=-1;
    219     }
    220     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
    221         FILE *stdfile=argc<0 ? stderr : stdout;
    222         fprintf(stdfile,
    223             "usage: %s [-options] files...\n"
    224             "\tread .ucm codepage mapping files and write .cnv files\n"
    225             "options:\n"
    226             "\t-h or -? or --help  this usage text\n"
    227             "\t-V or --version     show a version message\n"
    228             "\t-c or --copyright   include a copyright notice\n"
    229             "\t-d or --destdir     destination directory, followed by the path\n"
    230             "\t-v or --verbose     Turn on verbose output\n"
    231             "\t-q or --quiet       do not display warnings and progress\n",
    232             argv[0]);
    233         fprintf(stdfile,
    234             "\t      --small       Generate smaller .cnv files. They will be\n"
    235             "\t                    significantly smaller but may not be compatible with\n"
    236             "\t                    older versions of ICU and will require heap memory\n"
    237             "\t                    allocation when loaded.\n"
    238             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
    239         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    240     }
    241 
    242     if(options[OPT_VERSION].doesOccur) {
    243         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
    244                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
    245         printf("%s\n", U_COPYRIGHT_STRING);
    246         exit(0);
    247     }
    248 
    249     /* get the options values */
    250     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    251     const char *destdir = options[OPT_DESTDIR].value;
    252     VERBOSE = options[OPT_VERBOSE].doesOccur;
    253     QUIET = options[OPT_QUIET].doesOccur;
    254     SMALL = options[OPT_SMALL].doesOccur;
    255 
    256     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
    257         IGNORE_SISO_CHECK = TRUE;
    258     }
    259 
    260     icu::CharString outFileName;
    261     UErrorCode err = U_ZERO_ERROR;
    262     if (destdir != NULL && *destdir != 0) {
    263         outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
    264         if (U_FAILURE(err)) {
    265             return err;
    266         }
    267     }
    268     int32_t outBasenameStart = outFileName.length();
    269 
    270 #if DEBUG
    271     {
    272       int i;
    273       printf("makeconv: processing %d files...\n", argc - 1);
    274       for(i=1; i<argc; ++i) {
    275         printf("%s ", argv[i]);
    276       }
    277       printf("\n");
    278       fflush(stdout);
    279     }
    280 #endif
    281 
    282     UBool printFilename = (UBool) (argc > 2 || VERBOSE);
    283     for (++argv; --argc; ++argv)
    284     {
    285         UErrorCode localError = U_ZERO_ERROR;
    286         const char *arg = getLongPathname(*argv);
    287 
    288         /*produces the right destination path for display*/
    289         outFileName.truncate(outBasenameStart);
    290         if (outBasenameStart != 0)
    291         {
    292             /* find the last file sepator */
    293             const char *basename = findBasename(arg);
    294             outFileName.append(basename, localError);
    295         }
    296         else
    297         {
    298             outFileName.append(arg, localError);
    299         }
    300         if (U_FAILURE(localError)) {
    301             return localError;
    302         }
    303 
    304         /*removes the extension if any is found*/
    305         int32_t lastDotIndex = outFileName.lastIndexOf('.');
    306         if (lastDotIndex >= outBasenameStart) {
    307             outFileName.truncate(lastDotIndex);
    308         }
    309 
    310         /* the basename without extension is the converter name */
    311         if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
    312             fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
    313             return U_BUFFER_OVERFLOW_ERROR;
    314         }
    315         uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
    316 
    317         /*Adds the target extension*/
    318         outFileName.append(CONVERTER_FILE_EXTENSION, localError);
    319         if (U_FAILURE(localError)) {
    320             return localError;
    321         }
    322 
    323 #if DEBUG
    324         printf("makeconv: processing %s  ...\n", arg);
    325         fflush(stdout);
    326 #endif
    327         initConvData(&data);
    328         createConverter(&data, arg, &localError);
    329 
    330         if (U_FAILURE(localError))
    331         {
    332             /* if an error is found, print out an error msg and keep going */
    333             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
    334                     outFileName.data(), arg, u_errorName(localError));
    335             if(U_SUCCESS(err)) {
    336                 err = localError;
    337             }
    338         }
    339         else
    340         {
    341             /* Insure the static data name matches the  file name */
    342             /* Changed to ignore directory and only compare base name
    343              LDH 1/2/08*/
    344             char *p;
    345             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
    346 
    347             if(p == NULL)            /* OK, try alternate */
    348             {
    349                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
    350                 if(p == NULL)
    351                 {
    352                     p=cnvName; /* If no separators, no problem */
    353                 }
    354             }
    355             else
    356             {
    357                 p++;   /* If found separator, don't include it in compare */
    358             }
    359             if(uprv_stricmp(p,data.staticData.name) && !QUIET)
    360             {
    361                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
    362                     cnvName,  CONVERTER_FILE_EXTENSION,
    363                     data.staticData.name);
    364             }
    365 
    366             uprv_strcpy((char*)data.staticData.name, cnvName);
    367 
    368             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
    369                 fprintf(stderr,
    370                     "Error: A converter name must contain only invariant characters.\n"
    371                     "%s is not a valid converter name.\n",
    372                     data.staticData.name);
    373                 if(U_SUCCESS(err)) {
    374                     err = U_INVALID_TABLE_FORMAT;
    375                 }
    376             }
    377 
    378             localError = U_ZERO_ERROR;
    379             writeConverterData(&data, cnvName, destdir, &localError);
    380 
    381             if(U_FAILURE(localError))
    382             {
    383                 /* if an error is found, print out an error msg and keep going*/
    384                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
    385                     u_errorName(localError));
    386                 if(U_SUCCESS(err)) {
    387                     err = localError;
    388                 }
    389             }
    390             else if (printFilename)
    391             {
    392                 puts(outFileName.data() + outBasenameStart);
    393             }
    394         }
    395         fflush(stdout);
    396         fflush(stderr);
    397 
    398         cleanupConvData(&data);
    399     }
    400 
    401     return err;
    402 }
    403 
    404 static void
    405 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
    406     if( (name[0]=='i' || name[0]=='I') &&
    407         (name[1]=='b' || name[1]=='B') &&
    408         (name[2]=='m' || name[2]=='M')
    409     ) {
    410         name+=3;
    411         if(*name=='-') {
    412             ++name;
    413         }
    414         *pPlatform=UCNV_IBM;
    415         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
    416     } else {
    417         *pPlatform=UCNV_UNKNOWN;
    418         *pCCSID=0;
    419     }
    420 }
    421 
    422 static void
    423 readHeader(ConvData *data,
    424            FileStream* convFile,
    425            UErrorCode *pErrorCode) {
    426     char line[1024];
    427     char *s, *key, *value;
    428     const UConverterStaticData *prototype;
    429     UConverterStaticData *staticData;
    430 
    431     if(U_FAILURE(*pErrorCode)) {
    432         return;
    433     }
    434 
    435     staticData=&data->staticData;
    436     staticData->platform=UCNV_IBM;
    437     staticData->subCharLen=0;
    438 
    439     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    440         /* basic parsing and handling of state-related items */
    441         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
    442             continue;
    443         }
    444 
    445         /* stop at the beginning of the mapping section */
    446         if(uprv_strcmp(line, "CHARMAP")==0) {
    447             break;
    448         }
    449 
    450         /* collect the information from the header field, ignore unknown keys */
    451         if(uprv_strcmp(key, "code_set_name")==0) {
    452             if(*value!=0) {
    453                 uprv_strcpy((char *)staticData->name, value);
    454                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
    455             }
    456         } else if(uprv_strcmp(key, "subchar")==0) {
    457             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    458             int8_t length;
    459 
    460             s=value;
    461             length=ucm_parseBytes(bytes, line, (const char **)&s);
    462             if(1<=length && length<=4 && *s==0) {
    463                 staticData->subCharLen=length;
    464                 uprv_memcpy(staticData->subChar, bytes, length);
    465             } else {
    466                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
    467                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    468                 return;
    469             }
    470         } else if(uprv_strcmp(key, "subchar1")==0) {
    471             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    472 
    473             s=value;
    474             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
    475                 staticData->subChar1=bytes[0];
    476             } else {
    477                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
    478                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    479                 return;
    480             }
    481         }
    482     }
    483 
    484     /* copy values from the UCMFile to the static data */
    485     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
    486     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
    487     staticData->conversionType=data->ucm->states.conversionType;
    488 
    489     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
    490         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
    491         *pErrorCode=U_INVALID_TABLE_FORMAT;
    492         return;
    493     }
    494 
    495     /*
    496      * Now that we know the type, copy any 'default' values from the table.
    497      * We need not check the type any further because the parser only
    498      * recognizes what we have prototypes for.
    499      *
    500      * For delta (extension-only) tables, copy values from the base file
    501      * instead, see createConverter().
    502      */
    503     if(data->ucm->baseName[0]==0) {
    504         prototype=ucnv_converterStaticData[staticData->conversionType];
    505         if(prototype!=NULL) {
    506             if(staticData->name[0]==0) {
    507                 uprv_strcpy((char *)staticData->name, prototype->name);
    508             }
    509 
    510             if(staticData->codepage==0) {
    511                 staticData->codepage=prototype->codepage;
    512             }
    513 
    514             if(staticData->platform==0) {
    515                 staticData->platform=prototype->platform;
    516             }
    517 
    518             if(staticData->minBytesPerChar==0) {
    519                 staticData->minBytesPerChar=prototype->minBytesPerChar;
    520             }
    521 
    522             if(staticData->maxBytesPerChar==0) {
    523                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
    524             }
    525 
    526             if(staticData->subCharLen==0) {
    527                 staticData->subCharLen=prototype->subCharLen;
    528                 if(prototype->subCharLen>0) {
    529                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
    530                 }
    531             }
    532         }
    533     }
    534 
    535     if(data->ucm->states.outputType<0) {
    536         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
    537     }
    538 
    539     if( staticData->subChar1!=0 &&
    540             (staticData->minBytesPerChar>1 ||
    541                 (staticData->conversionType!=UCNV_MBCS &&
    542                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    543     ) {
    544         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
    545         *pErrorCode=U_INVALID_TABLE_FORMAT;
    546     }
    547 }
    548 
    549 /* return TRUE if a base table was read, FALSE for an extension table */
    550 static UBool
    551 readFile(ConvData *data, const char* converterName,
    552          UErrorCode *pErrorCode) {
    553     char line[1024];
    554     char *end;
    555     FileStream *convFile;
    556 
    557     UCMStates *baseStates;
    558     UBool dataIsBase;
    559 
    560     if(U_FAILURE(*pErrorCode)) {
    561         return FALSE;
    562     }
    563 
    564     data->ucm=ucm_open();
    565 
    566     convFile=T_FileStream_open(converterName, "r");
    567     if(convFile==NULL) {
    568         *pErrorCode=U_FILE_ACCESS_ERROR;
    569         return FALSE;
    570     }
    571 
    572     readHeader(data, convFile, pErrorCode);
    573     if(U_FAILURE(*pErrorCode)) {
    574         return FALSE;
    575     }
    576 
    577     if(data->ucm->baseName[0]==0) {
    578         dataIsBase=TRUE;
    579         baseStates=&data->ucm->states;
    580         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
    581     } else {
    582         dataIsBase=FALSE;
    583         baseStates=NULL;
    584     }
    585 
    586     /* read the base table */
    587     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
    588     if(U_FAILURE(*pErrorCode)) {
    589         return FALSE;
    590     }
    591 
    592     /* read an extension table if there is one */
    593     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    594         end=uprv_strchr(line, 0);
    595         while(line<end &&
    596               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
    597             --end;
    598         }
    599         *end=0;
    600 
    601         if(line[0]=='#' || u_skipWhitespace(line)==end) {
    602             continue; /* ignore empty and comment lines */
    603         }
    604 
    605         if(0==uprv_strcmp(line, "CHARMAP")) {
    606             /* read the extension table */
    607             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
    608         } else {
    609             fprintf(stderr, "unexpected text after the base mapping table\n");
    610         }
    611         break;
    612     }
    613 
    614     T_FileStream_close(convFile);
    615 
    616     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
    617         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
    618         *pErrorCode=U_INVALID_TABLE_FORMAT;
    619     }
    620 
    621     return dataIsBase;
    622 }
    623 
    624 static void
    625 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    626     ConvData baseData;
    627     UBool dataIsBase;
    628 
    629     UConverterStaticData *staticData;
    630     UCMStates *states, *baseStates;
    631 
    632     if(U_FAILURE(*pErrorCode)) {
    633         return;
    634     }
    635 
    636     initConvData(data);
    637 
    638     dataIsBase=readFile(data, converterName, pErrorCode);
    639     if(U_FAILURE(*pErrorCode)) {
    640         return;
    641     }
    642 
    643     staticData=&data->staticData;
    644     states=&data->ucm->states;
    645 
    646     if(dataIsBase) {
    647         /*
    648          * Build a normal .cnv file with a base table
    649          * and an optional extension table.
    650          */
    651         data->cnvData=MBCSOpen(data->ucm);
    652         if(data->cnvData==NULL) {
    653             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    654 
    655         } else if(!data->cnvData->isValid(data->cnvData,
    656                             staticData->subChar, staticData->subCharLen)
    657         ) {
    658             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    659             *pErrorCode=U_INVALID_TABLE_FORMAT;
    660 
    661         } else if(staticData->subChar1!=0 &&
    662                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
    663         ) {
    664             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    665             *pErrorCode=U_INVALID_TABLE_FORMAT;
    666 
    667         } else if(
    668             data->ucm->ext->mappingsLength>0 &&
    669             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    670         ) {
    671             *pErrorCode=U_INVALID_TABLE_FORMAT;
    672         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
    673             /* sort the table so that it can be turned into UTF-8-friendly data */
    674             ucm_sortTable(data->ucm->base);
    675         }
    676 
    677         if(U_SUCCESS(*pErrorCode)) {
    678             if(
    679                 /* add the base table after ucm_checkBaseExt()! */
    680                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
    681             ) {
    682                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    683             } else {
    684                 /*
    685                  * addTable() may have requested moving more mappings to the extension table
    686                  * if they fit into the base toUnicode table but not into the
    687                  * base fromUnicode table.
    688                  * (Especially for UTF-8-friendly fromUnicode tables.)
    689                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
    690                  * to be excluded from the extension toUnicode data.
    691                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
    692                  * the base fromUnicode table.
    693                  */
    694                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
    695                 ucm_sortTable(data->ucm->ext);
    696                 if(data->ucm->ext->mappingsLength>0) {
    697                     /* prepare the extension table, if there is one */
    698                     data->extData=CnvExtOpen(data->ucm);
    699                     if(data->extData==NULL) {
    700                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    701                     } else if(
    702                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
    703                     ) {
    704                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    705                     }
    706                 }
    707             }
    708         }
    709     } else {
    710         /* Build an extension-only .cnv file. */
    711         char baseFilename[500];
    712         char *basename;
    713 
    714         initConvData(&baseData);
    715 
    716         /* assemble a path/filename for data->ucm->baseName */
    717         uprv_strcpy(baseFilename, converterName);
    718         basename=(char *)findBasename(baseFilename);
    719         uprv_strcpy(basename, data->ucm->baseName);
    720         uprv_strcat(basename, ".ucm");
    721 
    722         /* read the base table */
    723         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
    724         if(U_FAILURE(*pErrorCode)) {
    725             return;
    726         } else if(!dataIsBase) {
    727             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
    728             *pErrorCode=U_INVALID_TABLE_FORMAT;
    729         } else {
    730             /* prepare the extension table */
    731             data->extData=CnvExtOpen(data->ucm);
    732             if(data->extData==NULL) {
    733                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    734             } else {
    735                 /* fill in gaps in extension file header fields */
    736                 UCMapping *m, *mLimit;
    737                 uint8_t fallbackFlags;
    738 
    739                 baseStates=&baseData.ucm->states;
    740                 if(states->conversionType==UCNV_DBCS) {
    741                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
    742                 } else if(states->minCharLength==0) {
    743                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
    744                 }
    745                 if(states->maxCharLength<states->minCharLength) {
    746                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
    747                 }
    748 
    749                 if(staticData->subCharLen==0) {
    750                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
    751                     staticData->subCharLen=baseData.staticData.subCharLen;
    752                 }
    753                 /*
    754                  * do not copy subChar1 -
    755                  * only use what is explicitly specified
    756                  * because it cannot be unset in the extension file header
    757                  */
    758 
    759                 /* get the fallback flags */
    760                 fallbackFlags=0;
    761                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    762                     m<mLimit && fallbackFlags!=3;
    763                     ++m
    764                 ) {
    765                     if(m->f==1) {
    766                         fallbackFlags|=1;
    767                     } else if(m->f==3) {
    768                         fallbackFlags|=2;
    769                     }
    770                 }
    771 
    772                 if(fallbackFlags&1) {
    773                     staticData->hasFromUnicodeFallback=TRUE;
    774                 }
    775                 if(fallbackFlags&2) {
    776                     staticData->hasToUnicodeFallback=TRUE;
    777                 }
    778 
    779                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
    780                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    781                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    782 
    783                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
    784                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    785                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    786 
    787                 } else if(
    788                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
    789                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    790                 ) {
    791                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    792                 } else {
    793                     if(states->maxCharLength>1) {
    794                         /*
    795                          * When building a normal .cnv file with a base table
    796                          * for an MBCS (not SBCS) table with explicit precision flags,
    797                          * the MBCSAddTable() function marks some mappings for moving
    798                          * to the extension table.
    799                          * They fit into the base toUnicode table but not into the
    800                          * base fromUnicode table.
    801                          * (Note: We do have explicit precision flags because they are
    802                          * required for extension table generation, and
    803                          * ucm_checkBaseExt() verified it.)
    804                          *
    805                          * We do not call MBCSAddTable() here (we probably could)
    806                          * so we need to do the analysis before building the extension table.
    807                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
    808                          * Redundant mappings in the extension table are ok except they cost some size.
    809                          *
    810                          * Do this after ucm_checkBaseExt().
    811                          */
    812                         const MBCSData *mbcsData=MBCSGetDummy();
    813                         int32_t needsMove=0;
    814                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    815                             m<mLimit;
    816                             ++m
    817                         ) {
    818                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
    819                                 m->f|=MBCS_FROM_U_EXT_FLAG;
    820                                 m->moveFlag=UCM_MOVE_TO_EXT;
    821                                 ++needsMove;
    822                             }
    823                         }
    824 
    825                         if(needsMove!=0) {
    826                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
    827                             ucm_sortTable(data->ucm->ext);
    828                         }
    829                     }
    830                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
    831                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    832                     }
    833                 }
    834             }
    835         }
    836 
    837         cleanupConvData(&baseData);
    838     }
    839 }
    840 
    841 /*
    842  * Hey, Emacs, please set the following:
    843  *
    844  * Local Variables:
    845  * indent-tabs-mode: nil
    846  * End:
    847  *
    848  */
    849