Home | History | Annotate | Download | only in makeconv
      1 /*
      2  ********************************************************************************
      3  *
      4  *   Copyright (C) 1998-2014, International Business Machines
      5  *   Corporation and others.  All Rights Reserved.
      6  *
      7  ********************************************************************************
      8  *
      9  *
     10  *  makeconv.c:
     11  *  tool creating a binary (compressed) representation of the conversion mapping
     12  *  table (IBM NLTC ucmap format).
     13  *
     14  *  05/04/2000    helena     Added fallback mapping into the picture...
     15  *  06/29/2000  helena      Major rewrite of the callback APIs.
     16  */
     17 
     18 #include <stdio.h>
     19 #include "unicode/putil.h"
     20 #include "unicode/ucnv_err.h"
     21 #include "ucnv_bld.h"
     22 #include "ucnv_imp.h"
     23 #include "ucnv_cnv.h"
     24 #include "cstring.h"
     25 #include "cmemory.h"
     26 #include "uinvchar.h"
     27 #include "filestrm.h"
     28 #include "toolutil.h"
     29 #include "uoptions.h"
     30 #include "unicode/udata.h"
     31 #include "unewdata.h"
     32 #include "uparse.h"
     33 #include "ucm.h"
     34 #include "makeconv.h"
     35 #include "genmbcs.h"
     36 
     37 #define DEBUG 0
     38 
     39 typedef struct ConvData {
     40     UCMFile *ucm;
     41     NewConverter *cnvData, *extData;
     42     UConverterSharedData sharedData;
     43     UConverterStaticData staticData;
     44 } ConvData;
     45 
     46 static void
     47 initConvData(ConvData *data) {
     48     uprv_memset(data, 0, sizeof(ConvData));
     49     data->sharedData.structSize=sizeof(UConverterSharedData);
     50     data->staticData.structSize=sizeof(UConverterStaticData);
     51     data->sharedData.staticData=&data->staticData;
     52 }
     53 
     54 static void
     55 cleanupConvData(ConvData *data) {
     56     if(data!=NULL) {
     57         if(data->cnvData!=NULL) {
     58             data->cnvData->close(data->cnvData);
     59             data->cnvData=NULL;
     60         }
     61         if(data->extData!=NULL) {
     62             data->extData->close(data->extData);
     63             data->extData=NULL;
     64         }
     65         ucm_close(data->ucm);
     66         data->ucm=NULL;
     67     }
     68 }
     69 
     70 /*
     71  * from ucnvstat.c - static prototypes of data-based converters
     72  */
     73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
     74 
     75 /*
     76  * Global - verbosity
     77  */
     78 UBool VERBOSE = FALSE;
     79 UBool SMALL = FALSE;
     80 UBool IGNORE_SISO_CHECK = FALSE;
     81 
     82 static void
     83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
     84 
     85 /*
     86  * Set up the UNewData and write the converter..
     87  */
     88 static void
     89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
     90 
     91 UBool haveCopyright=TRUE;
     92 
     93 static UDataInfo dataInfo={
     94     sizeof(UDataInfo),
     95     0,
     96 
     97     U_IS_BIG_ENDIAN,
     98     U_CHARSET_FAMILY,
     99     sizeof(UChar),
    100     0,
    101 
    102     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
    103     {6, 2, 0, 0},                 /* formatVersion */
    104     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
    105 };
    106 
    107 static void
    108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
    109 {
    110     UNewDataMemory *mem = NULL;
    111     uint32_t sz2;
    112     uint32_t size = 0;
    113     int32_t tableType;
    114 
    115     if(U_FAILURE(*status))
    116       {
    117         return;
    118       }
    119 
    120     tableType=TABLE_NONE;
    121     if(data->cnvData!=NULL) {
    122         tableType|=TABLE_BASE;
    123     }
    124     if(data->extData!=NULL) {
    125         tableType|=TABLE_EXT;
    126     }
    127 
    128     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
    129 
    130     if(U_FAILURE(*status))
    131       {
    132         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
    133                 cnvName,
    134                 "cnv",
    135                 u_errorName(*status));
    136         return;
    137       }
    138 
    139     if(VERBOSE)
    140       {
    141         printf("- Opened udata %s.%s\n", cnvName, "cnv");
    142       }
    143 
    144 
    145     /* all read only, clean, platform independent data.  Mmmm. :)  */
    146     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
    147     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
    148     /* Now, write the table */
    149     if(tableType&TABLE_BASE) {
    150         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
    151     }
    152     if(tableType&TABLE_EXT) {
    153         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
    154     }
    155 
    156     sz2 = udata_finish(mem, status);
    157     if(size != sz2)
    158     {
    159         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
    160         *status=U_INTERNAL_PROGRAM_ERROR;
    161     }
    162     if(VERBOSE)
    163     {
    164       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
    165     }
    166 }
    167 
    168 enum {
    169     OPT_HELP_H,
    170     OPT_HELP_QUESTION_MARK,
    171     OPT_COPYRIGHT,
    172     OPT_VERSION,
    173     OPT_DESTDIR,
    174     OPT_VERBOSE,
    175     OPT_SMALL,
    176     OPT_IGNORE_SISO_CHECK,
    177     OPT_COUNT
    178 };
    179 
    180 static UOption options[]={
    181     UOPTION_HELP_H,
    182     UOPTION_HELP_QUESTION_MARK,
    183     UOPTION_COPYRIGHT,
    184     UOPTION_VERSION,
    185     UOPTION_DESTDIR,
    186     UOPTION_VERBOSE,
    187     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
    188     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
    189 };
    190 
    191 int main(int argc, char* argv[])
    192 {
    193     ConvData data;
    194     UErrorCode err = U_ZERO_ERROR, localError;
    195     char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    196     const char* destdir, *arg;
    197     size_t destdirlen;
    198     char* dot = NULL, *outBasename;
    199     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    200     char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
    201     UVersionInfo icuVersion;
    202     UBool printFilename;
    203 
    204     err = U_ZERO_ERROR;
    205 
    206     U_MAIN_INIT_ARGS(argc, argv);
    207 
    208     /* Set up the ICU version number */
    209     u_getVersion(icuVersion);
    210     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
    211 
    212     /* preset then read command line options */
    213     options[OPT_DESTDIR].value=u_getDataDirectory();
    214     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    215 
    216     /* error handling, printing usage message */
    217     if(argc<0) {
    218         fprintf(stderr,
    219             "error in command line argument \"%s\"\n",
    220             argv[-argc]);
    221     } else if(argc<2) {
    222         argc=-1;
    223     }
    224     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
    225         FILE *stdfile=argc<0 ? stderr : stdout;
    226         fprintf(stdfile,
    227             "usage: %s [-options] files...\n"
    228             "\tread .ucm codepage mapping files and write .cnv files\n"
    229             "options:\n"
    230             "\t-h or -? or --help  this usage text\n"
    231             "\t-V or --version     show a version message\n"
    232             "\t-c or --copyright   include a copyright notice\n"
    233             "\t-d or --destdir     destination directory, followed by the path\n"
    234             "\t-v or --verbose     Turn on verbose output\n",
    235             argv[0]);
    236         fprintf(stdfile,
    237             "\t      --small       Generate smaller .cnv files. They will be\n"
    238             "\t                    significantly smaller but may not be compatible with\n"
    239             "\t                    older versions of ICU and will require heap memory\n"
    240             "\t                    allocation when loaded.\n"
    241             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
    242         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    243     }
    244 
    245     if(options[OPT_VERSION].doesOccur) {
    246         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
    247                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
    248         printf("%s\n", U_COPYRIGHT_STRING);
    249         exit(0);
    250     }
    251 
    252     /* get the options values */
    253     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
    254     destdir = options[OPT_DESTDIR].value;
    255     VERBOSE = options[OPT_VERBOSE].doesOccur;
    256     SMALL = options[OPT_SMALL].doesOccur;
    257 
    258     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
    259         IGNORE_SISO_CHECK = TRUE;
    260     }
    261 
    262     if (destdir != NULL && *destdir != 0) {
    263         uprv_strcpy(outFileName, destdir);
    264         destdirlen = uprv_strlen(destdir);
    265         outBasename = outFileName + destdirlen;
    266         if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
    267             *outBasename++ = U_FILE_SEP_CHAR;
    268             ++destdirlen;
    269         }
    270     } else {
    271         destdirlen = 0;
    272         outBasename = outFileName;
    273     }
    274 
    275 #if DEBUG
    276     {
    277       int i;
    278       printf("makeconv: processing %d files...\n", argc - 1);
    279       for(i=1; i<argc; ++i) {
    280         printf("%s ", argv[i]);
    281       }
    282       printf("\n");
    283       fflush(stdout);
    284     }
    285 #endif
    286 
    287     err = U_ZERO_ERROR;
    288     printFilename = (UBool) (argc > 2 || VERBOSE);
    289     for (++argv; --argc; ++argv)
    290     {
    291         arg = getLongPathname(*argv);
    292 
    293         /* Check for potential buffer overflow */
    294         if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
    295         {
    296             fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
    297             return U_BUFFER_OVERFLOW_ERROR;
    298         }
    299 
    300         /*produces the right destination path for display*/
    301         if (destdirlen != 0)
    302         {
    303             const char *basename;
    304 
    305             /* find the last file sepator */
    306             basename = findBasename(arg);
    307             uprv_strcpy(outBasename, basename);
    308         }
    309         else
    310         {
    311             uprv_strcpy(outFileName, arg);
    312         }
    313 
    314         /*removes the extension if any is found*/
    315         dot = uprv_strrchr(outBasename, '.');
    316         if (dot)
    317         {
    318             *dot = '\0';
    319         }
    320 
    321         /* the basename without extension is the converter name */
    322         uprv_strcpy(cnvName, outBasename);
    323 
    324         /*Adds the target extension*/
    325         uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
    326 
    327 #if DEBUG
    328         printf("makeconv: processing %s  ...\n", arg);
    329         fflush(stdout);
    330 #endif
    331         localError = U_ZERO_ERROR;
    332         initConvData(&data);
    333         createConverter(&data, arg, &localError);
    334 
    335         if (U_FAILURE(localError))
    336         {
    337             /* if an error is found, print out an error msg and keep going */
    338             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
    339                 u_errorName(localError));
    340             if(U_SUCCESS(err)) {
    341                 err = localError;
    342             }
    343         }
    344         else
    345         {
    346             /* Insure the static data name matches the  file name */
    347             /* Changed to ignore directory and only compare base name
    348              LDH 1/2/08*/
    349             char *p;
    350             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
    351 
    352             if(p == NULL)            /* OK, try alternate */
    353             {
    354                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
    355                 if(p == NULL)
    356                 {
    357                     p=cnvName; /* If no separators, no problem */
    358                 }
    359             }
    360             else
    361             {
    362                 p++;   /* If found separtor, don't include it in compare */
    363             }
    364             if(uprv_stricmp(p,data.staticData.name))
    365             {
    366                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
    367                     cnvName,  CONVERTER_FILE_EXTENSION,
    368                     data.staticData.name);
    369             }
    370 
    371             uprv_strcpy((char*)data.staticData.name, cnvName);
    372 
    373             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
    374                 fprintf(stderr,
    375                     "Error: A converter name must contain only invariant characters.\n"
    376                     "%s is not a valid converter name.\n",
    377                     data.staticData.name);
    378                 if(U_SUCCESS(err)) {
    379                     err = U_INVALID_TABLE_FORMAT;
    380                 }
    381             }
    382 
    383             uprv_strcpy(cnvNameWithPkg, cnvName);
    384 
    385             localError = U_ZERO_ERROR;
    386             writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
    387 
    388             if(U_FAILURE(localError))
    389             {
    390                 /* if an error is found, print out an error msg and keep going*/
    391                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
    392                     u_errorName(localError));
    393                 if(U_SUCCESS(err)) {
    394                     err = localError;
    395                 }
    396             }
    397             else if (printFilename)
    398             {
    399                 puts(outBasename);
    400             }
    401         }
    402         fflush(stdout);
    403         fflush(stderr);
    404 
    405         cleanupConvData(&data);
    406     }
    407 
    408     return err;
    409 }
    410 
    411 static void
    412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
    413     if( (name[0]=='i' || name[0]=='I') &&
    414         (name[1]=='b' || name[1]=='B') &&
    415         (name[2]=='m' || name[2]=='M')
    416     ) {
    417         name+=3;
    418         if(*name=='-') {
    419             ++name;
    420         }
    421         *pPlatform=UCNV_IBM;
    422         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
    423     } else {
    424         *pPlatform=UCNV_UNKNOWN;
    425         *pCCSID=0;
    426     }
    427 }
    428 
    429 static void
    430 readHeader(ConvData *data,
    431            FileStream* convFile,
    432            const char* converterName,
    433            UErrorCode *pErrorCode) {
    434     char line[1024];
    435     char *s, *key, *value;
    436     const UConverterStaticData *prototype;
    437     UConverterStaticData *staticData;
    438 
    439     if(U_FAILURE(*pErrorCode)) {
    440         return;
    441     }
    442 
    443     staticData=&data->staticData;
    444     staticData->platform=UCNV_IBM;
    445     staticData->subCharLen=0;
    446 
    447     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    448         /* basic parsing and handling of state-related items */
    449         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
    450             continue;
    451         }
    452 
    453         /* stop at the beginning of the mapping section */
    454         if(uprv_strcmp(line, "CHARMAP")==0) {
    455             break;
    456         }
    457 
    458         /* collect the information from the header field, ignore unknown keys */
    459         if(uprv_strcmp(key, "code_set_name")==0) {
    460             if(*value!=0) {
    461                 uprv_strcpy((char *)staticData->name, value);
    462                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
    463             }
    464         } else if(uprv_strcmp(key, "subchar")==0) {
    465             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    466             int8_t length;
    467 
    468             s=value;
    469             length=ucm_parseBytes(bytes, line, (const char **)&s);
    470             if(1<=length && length<=4 && *s==0) {
    471                 staticData->subCharLen=length;
    472                 uprv_memcpy(staticData->subChar, bytes, length);
    473             } else {
    474                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
    475                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    476                 return;
    477             }
    478         } else if(uprv_strcmp(key, "subchar1")==0) {
    479             uint8_t bytes[UCNV_EXT_MAX_BYTES];
    480 
    481             s=value;
    482             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
    483                 staticData->subChar1=bytes[0];
    484             } else {
    485                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
    486                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    487                 return;
    488             }
    489         }
    490     }
    491 
    492     /* copy values from the UCMFile to the static data */
    493     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
    494     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
    495     staticData->conversionType=data->ucm->states.conversionType;
    496 
    497     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
    498         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
    499         *pErrorCode=U_INVALID_TABLE_FORMAT;
    500         return;
    501     }
    502 
    503     /*
    504      * Now that we know the type, copy any 'default' values from the table.
    505      * We need not check the type any further because the parser only
    506      * recognizes what we have prototypes for.
    507      *
    508      * For delta (extension-only) tables, copy values from the base file
    509      * instead, see createConverter().
    510      */
    511     if(data->ucm->baseName[0]==0) {
    512         prototype=ucnv_converterStaticData[staticData->conversionType];
    513         if(prototype!=NULL) {
    514             if(staticData->name[0]==0) {
    515                 uprv_strcpy((char *)staticData->name, prototype->name);
    516             }
    517 
    518             if(staticData->codepage==0) {
    519                 staticData->codepage=prototype->codepage;
    520             }
    521 
    522             if(staticData->platform==0) {
    523                 staticData->platform=prototype->platform;
    524             }
    525 
    526             if(staticData->minBytesPerChar==0) {
    527                 staticData->minBytesPerChar=prototype->minBytesPerChar;
    528             }
    529 
    530             if(staticData->maxBytesPerChar==0) {
    531                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
    532             }
    533 
    534             if(staticData->subCharLen==0) {
    535                 staticData->subCharLen=prototype->subCharLen;
    536                 if(prototype->subCharLen>0) {
    537                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
    538                 }
    539             }
    540         }
    541     }
    542 
    543     if(data->ucm->states.outputType<0) {
    544         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
    545     }
    546 
    547     if( staticData->subChar1!=0 &&
    548             (staticData->minBytesPerChar>1 ||
    549                 (staticData->conversionType!=UCNV_MBCS &&
    550                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
    551     ) {
    552         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
    553         *pErrorCode=U_INVALID_TABLE_FORMAT;
    554     }
    555 }
    556 
    557 /* return TRUE if a base table was read, FALSE for an extension table */
    558 static UBool
    559 readFile(ConvData *data, const char* converterName,
    560          UErrorCode *pErrorCode) {
    561     char line[1024];
    562     char *end;
    563     FileStream *convFile;
    564 
    565     UCMStates *baseStates;
    566     UBool dataIsBase;
    567 
    568     if(U_FAILURE(*pErrorCode)) {
    569         return FALSE;
    570     }
    571 
    572     data->ucm=ucm_open();
    573 
    574     convFile=T_FileStream_open(converterName, "r");
    575     if(convFile==NULL) {
    576         *pErrorCode=U_FILE_ACCESS_ERROR;
    577         return FALSE;
    578     }
    579 
    580     readHeader(data, convFile, converterName, pErrorCode);
    581     if(U_FAILURE(*pErrorCode)) {
    582         return FALSE;
    583     }
    584 
    585     if(data->ucm->baseName[0]==0) {
    586         dataIsBase=TRUE;
    587         baseStates=&data->ucm->states;
    588         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
    589     } else {
    590         dataIsBase=FALSE;
    591         baseStates=NULL;
    592     }
    593 
    594     /* read the base table */
    595     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
    596     if(U_FAILURE(*pErrorCode)) {
    597         return FALSE;
    598     }
    599 
    600     /* read an extension table if there is one */
    601     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
    602         end=uprv_strchr(line, 0);
    603         while(line<end &&
    604               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
    605             --end;
    606         }
    607         *end=0;
    608 
    609         if(line[0]=='#' || u_skipWhitespace(line)==end) {
    610             continue; /* ignore empty and comment lines */
    611         }
    612 
    613         if(0==uprv_strcmp(line, "CHARMAP")) {
    614             /* read the extension table */
    615             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
    616         } else {
    617             fprintf(stderr, "unexpected text after the base mapping table\n");
    618         }
    619         break;
    620     }
    621 
    622     T_FileStream_close(convFile);
    623 
    624     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
    625         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
    626         *pErrorCode=U_INVALID_TABLE_FORMAT;
    627     }
    628 
    629     return dataIsBase;
    630 }
    631 
    632 static void
    633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
    634     ConvData baseData;
    635     UBool dataIsBase;
    636 
    637     UConverterStaticData *staticData;
    638     UCMStates *states, *baseStates;
    639 
    640     if(U_FAILURE(*pErrorCode)) {
    641         return;
    642     }
    643 
    644     initConvData(data);
    645 
    646     dataIsBase=readFile(data, converterName, pErrorCode);
    647     if(U_FAILURE(*pErrorCode)) {
    648         return;
    649     }
    650 
    651     staticData=&data->staticData;
    652     states=&data->ucm->states;
    653 
    654     if(dataIsBase) {
    655         /*
    656          * Build a normal .cnv file with a base table
    657          * and an optional extension table.
    658          */
    659         data->cnvData=MBCSOpen(data->ucm);
    660         if(data->cnvData==NULL) {
    661             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    662 
    663         } else if(!data->cnvData->isValid(data->cnvData,
    664                             staticData->subChar, staticData->subCharLen)
    665         ) {
    666             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    667             *pErrorCode=U_INVALID_TABLE_FORMAT;
    668 
    669         } else if(staticData->subChar1!=0 &&
    670                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
    671         ) {
    672             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    673             *pErrorCode=U_INVALID_TABLE_FORMAT;
    674 
    675         } else if(
    676             data->ucm->ext->mappingsLength>0 &&
    677             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    678         ) {
    679             *pErrorCode=U_INVALID_TABLE_FORMAT;
    680         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
    681             /* sort the table so that it can be turned into UTF-8-friendly data */
    682             ucm_sortTable(data->ucm->base);
    683         }
    684 
    685         if(U_SUCCESS(*pErrorCode)) {
    686             if(
    687                 /* add the base table after ucm_checkBaseExt()! */
    688                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
    689             ) {
    690                 *pErrorCode=U_INVALID_TABLE_FORMAT;
    691             } else {
    692                 /*
    693                  * addTable() may have requested moving more mappings to the extension table
    694                  * if they fit into the base toUnicode table but not into the
    695                  * base fromUnicode table.
    696                  * (Especially for UTF-8-friendly fromUnicode tables.)
    697                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
    698                  * to be excluded from the extension toUnicode data.
    699                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
    700                  * the base fromUnicode table.
    701                  */
    702                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
    703                 ucm_sortTable(data->ucm->ext);
    704                 if(data->ucm->ext->mappingsLength>0) {
    705                     /* prepare the extension table, if there is one */
    706                     data->extData=CnvExtOpen(data->ucm);
    707                     if(data->extData==NULL) {
    708                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    709                     } else if(
    710                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
    711                     ) {
    712                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    713                     }
    714                 }
    715             }
    716         }
    717     } else {
    718         /* Build an extension-only .cnv file. */
    719         char baseFilename[500];
    720         char *basename;
    721 
    722         initConvData(&baseData);
    723 
    724         /* assemble a path/filename for data->ucm->baseName */
    725         uprv_strcpy(baseFilename, converterName);
    726         basename=(char *)findBasename(baseFilename);
    727         uprv_strcpy(basename, data->ucm->baseName);
    728         uprv_strcat(basename, ".ucm");
    729 
    730         /* read the base table */
    731         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
    732         if(U_FAILURE(*pErrorCode)) {
    733             return;
    734         } else if(!dataIsBase) {
    735             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
    736             *pErrorCode=U_INVALID_TABLE_FORMAT;
    737         } else {
    738             /* prepare the extension table */
    739             data->extData=CnvExtOpen(data->ucm);
    740             if(data->extData==NULL) {
    741                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    742             } else {
    743                 /* fill in gaps in extension file header fields */
    744                 UCMapping *m, *mLimit;
    745                 uint8_t fallbackFlags;
    746 
    747                 baseStates=&baseData.ucm->states;
    748                 if(states->conversionType==UCNV_DBCS) {
    749                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
    750                 } else if(states->minCharLength==0) {
    751                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
    752                 }
    753                 if(states->maxCharLength<states->minCharLength) {
    754                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
    755                 }
    756 
    757                 if(staticData->subCharLen==0) {
    758                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
    759                     staticData->subCharLen=baseData.staticData.subCharLen;
    760                 }
    761                 /*
    762                  * do not copy subChar1 -
    763                  * only use what is explicitly specified
    764                  * because it cannot be unset in the extension file header
    765                  */
    766 
    767                 /* get the fallback flags */
    768                 fallbackFlags=0;
    769                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    770                     m<mLimit && fallbackFlags!=3;
    771                     ++m
    772                 ) {
    773                     if(m->f==1) {
    774                         fallbackFlags|=1;
    775                     } else if(m->f==3) {
    776                         fallbackFlags|=2;
    777                     }
    778                 }
    779 
    780                 if(fallbackFlags&1) {
    781                     staticData->hasFromUnicodeFallback=TRUE;
    782                 }
    783                 if(fallbackFlags&2) {
    784                     staticData->hasToUnicodeFallback=TRUE;
    785                 }
    786 
    787                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
    788                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
    789                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    790 
    791                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
    792                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
    793                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    794 
    795                 } else if(
    796                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
    797                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
    798                 ) {
    799                     *pErrorCode=U_INVALID_TABLE_FORMAT;
    800                 } else {
    801                     if(states->maxCharLength>1) {
    802                         /*
    803                          * When building a normal .cnv file with a base table
    804                          * for an MBCS (not SBCS) table with explicit precision flags,
    805                          * the MBCSAddTable() function marks some mappings for moving
    806                          * to the extension table.
    807                          * They fit into the base toUnicode table but not into the
    808                          * base fromUnicode table.
    809                          * (Note: We do have explicit precision flags because they are
    810                          * required for extension table generation, and
    811                          * ucm_checkBaseExt() verified it.)
    812                          *
    813                          * We do not call MBCSAddTable() here (we probably could)
    814                          * so we need to do the analysis before building the extension table.
    815                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
    816                          * Redundant mappings in the extension table are ok except they cost some size.
    817                          *
    818                          * Do this after ucm_checkBaseExt().
    819                          */
    820                         const MBCSData *mbcsData=MBCSGetDummy();
    821                         int32_t needsMove=0;
    822                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
    823                             m<mLimit;
    824                             ++m
    825                         ) {
    826                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
    827                                 m->f|=MBCS_FROM_U_EXT_FLAG;
    828                                 m->moveFlag=UCM_MOVE_TO_EXT;
    829                                 ++needsMove;
    830                             }
    831                         }
    832 
    833                         if(needsMove!=0) {
    834                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
    835                             ucm_sortTable(data->ucm->ext);
    836                         }
    837                     }
    838                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
    839                         *pErrorCode=U_INVALID_TABLE_FORMAT;
    840                     }
    841                 }
    842             }
    843         }
    844 
    845         cleanupConvData(&baseData);
    846     }
    847 }
    848 
    849 /*
    850  * Hey, Emacs, please set the following:
    851  *
    852  * Local Variables:
    853  * indent-tabs-mode: nil
    854  * End:
    855  *
    856  */
    857