Home | History | Annotate | Download | only in gencnval
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1999-2015 International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  gencnval.c
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 1999nov05
     14 *   created by: Markus W. Scherer
     15 *
     16 *   This program reads convrtrs.txt and writes a memory-mappable
     17 *   converter name alias table to cnvalias.dat .
     18 *
     19 *   This program currently writes version 2.1 of the data format. See
     20 *   ucnv_io.c for more details on the format. Note that version 2.1
     21 *   is written in such a way that a 2.0 reader will be able to use it,
     22 *   and a 2.1 reader will be able to read 2.0.
     23 */
     24 
     25 #include "unicode/utypes.h"
     26 #include "unicode/putil.h"
     27 #include "unicode/ucnv.h" /* ucnv_compareNames() */
     28 #include "ucnv_io.h"
     29 #include "cmemory.h"
     30 #include "cstring.h"
     31 #include "uinvchar.h"
     32 #include "filestrm.h"
     33 #include "unicode/uclean.h"
     34 #include "unewdata.h"
     35 #include "uoptions.h"
     36 
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <ctype.h>
     40 
     41 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
     42 
     43 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
     44  That is the maximum size for the string stores combined
     45  because the strings are index at 16-bit boundries by a
     46  16-bit index, and there is only one section for the
     47  strings.
     48  */
     49 #define STRING_STORE_SIZE 0x1FBFE   /* 130046 */
     50 #define TAG_STORE_SIZE      0x400   /* 1024 */
     51 
     52 /* The combined tag and converter count can affect the number of lists
     53  created.  The size of all lists must be less than (2^17 - 1)
     54  because the lists are indexed as a 16-bit array with a 16-bit index.
     55  */
     56 #define MAX_TAG_COUNT 0x3F      /* 63 */
     57 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
     58 #define MAX_ALIAS_COUNT 0xFFFF  /* 65535 */
     59 
     60 /* The maximum number of aliases that a standard tag/converter combination can have.
     61  At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
     62  this value. I don't recommend more than 31 for this value.
     63  */
     64 #define MAX_TC_ALIAS_COUNT 0x1F    /* 31 */
     65 
     66 #define MAX_LINE_SIZE 0x7FFF    /* 32767 */
     67 #define MAX_LIST_SIZE 0xFFFF    /* 65535 */
     68 
     69 #define DATA_NAME "cnvalias"
     70 #define DATA_TYPE "icu" /* ICU alias table */
     71 
     72 #define ALL_TAG_STR "ALL"
     73 #define ALL_TAG_NUM 1
     74 #define EMPTY_TAG_NUM 0
     75 
     76 /* UDataInfo cf. udata.h */
     77 static const UDataInfo dataInfo={
     78     sizeof(UDataInfo),
     79     0,
     80 
     81     U_IS_BIG_ENDIAN,
     82     U_CHARSET_FAMILY,
     83     sizeof(UChar),
     84     0,
     85 
     86     {0x43, 0x76, 0x41, 0x6c},     /* dataFormat="CvAl" */
     87     {3, 0, 1, 0},                 /* formatVersion */
     88     {1, 4, 2, 0}                  /* dataVersion */
     89 };
     90 
     91 typedef struct {
     92     char *store;
     93     uint32_t top;
     94     uint32_t max;
     95 } StringBlock;
     96 
     97 static char stringStore[STRING_STORE_SIZE];
     98 static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
     99 
    100 typedef struct {
    101     uint16_t    aliasCount;
    102     uint16_t    *aliases;     /* Index into stringStore */
    103 } AliasList;
    104 
    105 typedef struct {
    106     uint16_t converter;     /* Index into stringStore */
    107     uint16_t totalAliasCount;    /* Total aliases in this column */
    108 } Converter;
    109 
    110 static Converter converters[MAX_CONV_COUNT];
    111 static uint16_t converterCount=0;
    112 
    113 static char tagStore[TAG_STORE_SIZE];
    114 static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
    115 
    116 typedef struct {
    117     uint16_t    tag;        /* Index into tagStore */
    118     uint16_t    totalAliasCount; /* Total aliases in this row */
    119     AliasList   aliasList[MAX_CONV_COUNT];
    120 } Tag;
    121 
    122 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
    123 static Tag tags[MAX_TAG_COUNT];
    124 static uint16_t tagCount = 0;
    125 
    126 /* Used for storing all aliases  */
    127 static uint16_t knownAliases[MAX_ALIAS_COUNT];
    128 static uint16_t knownAliasesCount = 0;
    129 /*static uint16_t duplicateKnownAliasesCount = 0;*/
    130 
    131 /* Used for storing the lists section that point to aliases */
    132 static uint16_t aliasLists[MAX_LIST_SIZE];
    133 static uint16_t aliasListsSize = 0;
    134 
    135 /* Were the standard tags declared before the aliases. */
    136 static UBool standardTagsUsed = FALSE;
    137 static UBool verbose = FALSE;
    138 static UBool quiet = FALSE;
    139 static int lineNum = 1;
    140 
    141 static UConverterAliasOptions tableOptions = {
    142     UCNV_IO_STD_NORMALIZED,
    143     1 /* containsCnvOptionInfo */
    144 };
    145 
    146 
    147 /**
    148  * path to convrtrs.txt
    149  */
    150 const char *path;
    151 
    152 /* prototypes --------------------------------------------------------------- */
    153 
    154 static void
    155 parseLine(const char *line);
    156 
    157 static void
    158 parseFile(FileStream *in);
    159 
    160 static int32_t
    161 chomp(char *line);
    162 
    163 static void
    164 addOfficialTaggedStandards(char *line, int32_t lineLen);
    165 
    166 static uint16_t
    167 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
    168 
    169 static uint16_t
    170 addConverter(const char *converter);
    171 
    172 static char *
    173 allocString(StringBlock *block, const char *s, int32_t length);
    174 
    175 static uint16_t
    176 addToKnownAliases(const char *alias);
    177 
    178 static int
    179 compareAliases(const void *alias1, const void *alias2);
    180 
    181 static uint16_t
    182 getTagNumber(const char *tag, uint16_t tagLen);
    183 
    184 /*static void
    185 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
    186 
    187 static void
    188 writeAliasTable(UNewDataMemory *out);
    189 
    190 /* -------------------------------------------------------------------------- */
    191 
    192 /* Presumes that you used allocString() */
    193 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
    194 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
    195 
    196 /* Presumes that you used allocString() */
    197 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
    198 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
    199 
    200 enum
    201 {
    202     HELP1,
    203     HELP2,
    204     VERBOSE,
    205     COPYRIGHT,
    206     DESTDIR,
    207     SOURCEDIR,
    208     QUIET
    209 };
    210 
    211 static UOption options[]={
    212     UOPTION_HELP_H,
    213     UOPTION_HELP_QUESTION_MARK,
    214     UOPTION_VERBOSE,
    215     UOPTION_COPYRIGHT,
    216     UOPTION_DESTDIR,
    217     UOPTION_SOURCEDIR,
    218     UOPTION_QUIET
    219 };
    220 
    221 extern int
    222 main(int argc, char* argv[]) {
    223     int i, n;
    224     char pathBuf[512];
    225     FileStream *in;
    226     UNewDataMemory *out;
    227     UErrorCode errorCode=U_ZERO_ERROR;
    228 
    229     U_MAIN_INIT_ARGS(argc, argv);
    230 
    231     /* preset then read command line options */
    232     options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
    233     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
    234 
    235     /* error handling, printing usage message */
    236     if(argc<0) {
    237         fprintf(stderr,
    238             "error in command line argument \"%s\"\n",
    239             argv[-argc]);
    240     }
    241     if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
    242         fprintf(stderr,
    243             "usage: %s [-options] [convrtrs.txt]\n"
    244             "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
    245             "options:\n"
    246             "\t-h or -? or --help  this usage text\n"
    247             "\t-v or --verbose     prints out extra information about the alias table\n"
    248             "\t-q or --quiet       do not display warnings and progress\n"
    249             "\t-c or --copyright   include a copyright notice\n"
    250             "\t-d or --destdir     destination directory, followed by the path\n"
    251             "\t-s or --sourcedir   source directory, followed by the path\n",
    252             argv[0]);
    253         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    254     }
    255 
    256     if(options[VERBOSE].doesOccur) {
    257         verbose = TRUE;
    258     }
    259 
    260     if(options[QUIET].doesOccur) {
    261         quiet = TRUE;
    262     }
    263 
    264     if(argc>=2) {
    265         path=argv[1];
    266     } else {
    267         path=options[SOURCEDIR].value;
    268         if(path!=NULL && *path!=0) {
    269             char *end;
    270 
    271             uprv_strcpy(pathBuf, path);
    272             end = uprv_strchr(pathBuf, 0);
    273             if(*(end-1)!=U_FILE_SEP_CHAR) {
    274                 *(end++)=U_FILE_SEP_CHAR;
    275             }
    276             uprv_strcpy(end, "convrtrs.txt");
    277             path=pathBuf;
    278         } else {
    279             path = "convrtrs.txt";
    280         }
    281     }
    282 
    283     uprv_memset(stringStore, 0, sizeof(stringStore));
    284     uprv_memset(tagStore, 0, sizeof(tagStore));
    285     uprv_memset(converters, 0, sizeof(converters));
    286     uprv_memset(tags, 0, sizeof(tags));
    287     uprv_memset(aliasLists, 0, sizeof(aliasLists));
    288     uprv_memset(knownAliases, 0, sizeof(aliasLists));
    289 
    290 
    291     in=T_FileStream_open(path, "r");
    292     if(in==NULL) {
    293         fprintf(stderr, "gencnval: unable to open input file %s\n", path);
    294         exit(U_FILE_ACCESS_ERROR);
    295     }
    296     parseFile(in);
    297     T_FileStream_close(in);
    298 
    299     /* create the output file */
    300     out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
    301                      options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
    302     if(U_FAILURE(errorCode)) {
    303         fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
    304         exit(errorCode);
    305     }
    306 
    307     /* write the table of aliases based on a tag/converter name combination */
    308     writeAliasTable(out);
    309 
    310     /* finish */
    311     udata_finish(out, &errorCode);
    312     if(U_FAILURE(errorCode)) {
    313         fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
    314         exit(errorCode);
    315     }
    316 
    317     /* clean up tags */
    318     for (i = 0; i < MAX_TAG_COUNT; i++) {
    319         for (n = 0; n < MAX_CONV_COUNT; n++) {
    320             if (tags[i].aliasList[n].aliases!=NULL) {
    321                 uprv_free(tags[i].aliasList[n].aliases);
    322             }
    323         }
    324     }
    325 
    326     return 0;
    327 }
    328 
    329 static void
    330 parseFile(FileStream *in) {
    331     char line[MAX_LINE_SIZE];
    332     char lastLine[MAX_LINE_SIZE];
    333     int32_t lineSize = 0;
    334     int32_t lastLineSize = 0;
    335     UBool validParse = TRUE;
    336 
    337     lineNum = 0;
    338 
    339     /* Add the empty tag, which is for untagged aliases */
    340     getTagNumber("", 0);
    341     getTagNumber(ALL_TAG_STR, 3);
    342     allocString(&stringBlock, "", 0);
    343 
    344     /* read the list of aliases */
    345     while (validParse) {
    346         validParse = FALSE;
    347 
    348         /* Read non-empty lines that don't start with a space character. */
    349         while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
    350             lastLineSize = chomp(lastLine);
    351             if (lineSize == 0 || (lastLineSize > 0 && isspace((int)*lastLine))) {
    352                 uprv_strcpy(line + lineSize, lastLine);
    353                 lineSize += lastLineSize;
    354             } else if (lineSize > 0) {
    355                 validParse = TRUE;
    356                 break;
    357             }
    358             lineNum++;
    359         }
    360 
    361         if (validParse || lineSize > 0) {
    362             if (isspace((int)*line)) {
    363                 fprintf(stderr, "%s:%d: error: cannot start an alias with a space\n", path, lineNum-1);
    364                 exit(U_PARSE_ERROR);
    365             } else if (line[0] == '{') {
    366                 if (!standardTagsUsed && line[lineSize - 1] != '}') {
    367                     fprintf(stderr, "%s:%d: error: alias needs to start with a converter name\n", path, lineNum);
    368                     exit(U_PARSE_ERROR);
    369                 }
    370                 addOfficialTaggedStandards(line, lineSize);
    371                 standardTagsUsed = TRUE;
    372             } else {
    373                 if (standardTagsUsed) {
    374                     parseLine(line);
    375                 }
    376                 else {
    377                     fprintf(stderr, "%s:%d: error: alias table needs to start a list of standard tags\n", path, lineNum);
    378                     exit(U_PARSE_ERROR);
    379                 }
    380             }
    381             /* Was the last line consumed */
    382             if (lastLineSize > 0) {
    383                 uprv_strcpy(line, lastLine);
    384                 lineSize = lastLineSize;
    385             }
    386             else {
    387                 lineSize = 0;
    388             }
    389         }
    390         lineNum++;
    391     }
    392 }
    393 
    394 /* This works almost like the Perl chomp.
    395  It removes the newlines, comments and trailing whitespace (not preceding whitespace).
    396 */
    397 static int32_t
    398 chomp(char *line) {
    399     char *s = line;
    400     char *lastNonSpace = line;
    401     while(*s!=0) {
    402         /* truncate at a newline or a comment */
    403         if(*s == '\r' || *s == '\n' || *s == '#') {
    404             *s = 0;
    405             break;
    406         }
    407         if (!isspace((int)*s)) {
    408             lastNonSpace = s;
    409         }
    410         ++s;
    411     }
    412     if (lastNonSpace++ > line) {
    413         *lastNonSpace = 0;
    414         s = lastNonSpace;
    415     }
    416     return (int32_t)(s - line);
    417 }
    418 
    419 static void
    420 parseLine(const char *line) {
    421     uint16_t pos=0, start, limit, length, cnv;
    422     char *converter, *alias;
    423 
    424     /* skip leading white space */
    425     /* There is no whitespace at the beginning anymore */
    426 /*    while(line[pos]!=0 && isspace(line[pos])) {
    427         ++pos;
    428     }
    429 */
    430 
    431     /* is there nothing on this line? */
    432     if(line[pos]==0) {
    433         return;
    434     }
    435 
    436     /* get the converter name */
    437     start=pos;
    438     while(line[pos]!=0 && !isspace((int)line[pos])) {
    439         ++pos;
    440     }
    441     limit=pos;
    442 
    443     /* store the converter name */
    444     length=(uint16_t)(limit-start);
    445     converter=allocString(&stringBlock, line+start, length);
    446 
    447     /* add the converter to the converter table */
    448     cnv=addConverter(converter);
    449 
    450     /* The name itself may be tagged, so let's added it to the aliases list properly */
    451     pos = start;
    452 
    453     /* get all the real aliases */
    454     for(;;) {
    455 
    456         /* skip white space */
    457         while(line[pos]!=0 && isspace((int)line[pos])) {
    458             ++pos;
    459         }
    460 
    461         /* is there no more alias name on this line? */
    462         if(line[pos]==0) {
    463             break;
    464         }
    465 
    466         /* get an alias name */
    467         start=pos;
    468         while(line[pos]!=0 && line[pos]!='{' && !isspace((int)line[pos])) {
    469             ++pos;
    470         }
    471         limit=pos;
    472 
    473         /* store the alias name */
    474         length=(uint16_t)(limit-start);
    475         if (start == 0) {
    476             /* add the converter as its own alias to the alias table */
    477             alias = converter;
    478             addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
    479         }
    480         else {
    481             alias=allocString(&stringBlock, line+start, length);
    482             addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
    483         }
    484         addToKnownAliases(alias);
    485 
    486         /* add the alias/converter pair to the alias table */
    487         /* addAlias(alias, 0, cnv, FALSE);*/
    488 
    489         /* skip whitespace */
    490         while (line[pos] && isspace((int)line[pos])) {
    491             ++pos;
    492         }
    493 
    494         /* handle tags if they are present */
    495         if (line[pos] == '{') {
    496             ++pos;
    497             do {
    498                 start = pos;
    499                 while (line[pos] && line[pos] != '}' && !isspace((int)line[pos])) {
    500                     ++pos;
    501                 }
    502                 limit = pos;
    503 
    504                 if (start != limit) {
    505                     /* add the tag to the tag table */
    506                     uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
    507                     addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
    508                 }
    509 
    510                 while (line[pos] && isspace((int)line[pos])) {
    511                     ++pos;
    512                 }
    513             } while (line[pos] && line[pos] != '}');
    514 
    515             if (line[pos] == '}') {
    516                 ++pos;
    517             } else {
    518                 fprintf(stderr, "%s:%d: Unterminated tag list\n", path, lineNum);
    519                 exit(U_UNMATCHED_BRACES);
    520             }
    521         } else {
    522             addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
    523         }
    524     }
    525 }
    526 
    527 static uint16_t
    528 getTagNumber(const char *tag, uint16_t tagLen) {
    529     char *atag;
    530     uint16_t t;
    531     UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
    532 
    533     if (tagCount >= MAX_TAG_COUNT) {
    534         fprintf(stderr, "%s:%d: too many tags\n", path, lineNum);
    535         exit(U_BUFFER_OVERFLOW_ERROR);
    536     }
    537 
    538     if (preferredName) {
    539 /*        puts(tag);*/
    540         tagLen--;
    541     }
    542 
    543     for (t = 0; t < tagCount; ++t) {
    544         const char *currTag = GET_TAG_STR(tags[t].tag);
    545         if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
    546             return t;
    547         }
    548     }
    549 
    550     /* we need to add this tag */
    551     if (tagCount >= MAX_TAG_COUNT) {
    552         fprintf(stderr, "%s:%d: error: too many tags\n", path, lineNum);
    553         exit(U_BUFFER_OVERFLOW_ERROR);
    554     }
    555 
    556     /* allocate a new entry in the tag table */
    557     atag = allocString(&tagBlock, tag, tagLen);
    558 
    559     if (standardTagsUsed) {
    560         fprintf(stderr, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n",
    561             path, lineNum, atag);
    562         exit(1);
    563     }
    564     else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
    565         fprintf(stderr, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
    566             path, lineNum, atag);
    567     }
    568 
    569     /* add the tag to the tag table */
    570     tags[tagCount].tag = GET_TAG_NUM(atag);
    571     /* The aliasList should be set to 0's already */
    572 
    573     return tagCount++;
    574 }
    575 
    576 /*static void
    577 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
    578     tags[tag].aliases[converter] = alias;
    579 }
    580 */
    581 
    582 static void
    583 addOfficialTaggedStandards(char *line, int32_t lineLen) {
    584     char *atag;
    585     char *endTagExp;
    586     char *tag;
    587     static const char WHITESPACE[] = " \t";
    588 
    589     if (tagCount > UCNV_NUM_RESERVED_TAGS) {
    590         fprintf(stderr, "%s:%d: error: official tags already added\n", path, lineNum);
    591         exit(U_BUFFER_OVERFLOW_ERROR);
    592     }
    593     tag = strchr(line, '{');
    594     if (tag == NULL) {
    595         /* Why were we called? */
    596         fprintf(stderr, "%s:%d: error: Missing start of tag group\n", path, lineNum);
    597         exit(U_PARSE_ERROR);
    598     }
    599     tag++;
    600     endTagExp = strchr(tag, '}');
    601     if (endTagExp == NULL) {
    602         fprintf(stderr, "%s:%d: error: Missing end of tag group\n", path, lineNum);
    603         exit(U_PARSE_ERROR);
    604     }
    605     endTagExp[0] = 0;
    606 
    607     tag = strtok(tag, WHITESPACE);
    608     while (tag != NULL) {
    609 /*        printf("Adding original tag \"%s\"\n", tag);*/
    610 
    611         /* allocate a new entry in the tag table */
    612         atag = allocString(&tagBlock, tag, -1);
    613 
    614         /* add the tag to the tag table */
    615         tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
    616 
    617         /* The aliasList should already be set to 0's */
    618 
    619         /* Get next tag */
    620         tag = strtok(NULL, WHITESPACE);
    621     }
    622 }
    623 
    624 static uint16_t
    625 addToKnownAliases(const char *alias) {
    626 /*    uint32_t idx; */
    627     /* strict matching */
    628 /*    for (idx = 0; idx < knownAliasesCount; idx++) {
    629         uint16_t num = GET_ALIAS_NUM(alias);
    630         if (knownAliases[idx] != num
    631             && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
    632         {
    633             fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path,
    634                 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
    635             duplicateKnownAliasesCount++;
    636             break;
    637         }
    638         else if (knownAliases[idx] != num
    639             && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
    640         {
    641             if (verbose) {
    642                 fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path,
    643                     lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
    644             }
    645             duplicateKnownAliasesCount++;
    646             break;
    647         }
    648     }
    649 */
    650     if (knownAliasesCount >= MAX_ALIAS_COUNT) {
    651         fprintf(stderr, "%s:%d: warning: Too many aliases defined for all converters\n",
    652             path, lineNum);
    653         exit(U_BUFFER_OVERFLOW_ERROR);
    654     }
    655     /* TODO: We could try to unlist exact duplicates. */
    656     return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
    657 }
    658 
    659 /*
    660 @param standard When standard is 0, then it's the "empty" tag.
    661 */
    662 static uint16_t
    663 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
    664     uint32_t idx, idx2;
    665     UBool startEmptyWithoutDefault = FALSE;
    666     AliasList *aliasList;
    667 
    668     if(standard>=MAX_TAG_COUNT) {
    669         fprintf(stderr, "%s:%d: error: too many standard tags\n", path, lineNum);
    670         exit(U_BUFFER_OVERFLOW_ERROR);
    671     }
    672     if(converter>=MAX_CONV_COUNT) {
    673         fprintf(stderr, "%s:%d: error: too many converter names\n", path, lineNum);
    674         exit(U_BUFFER_OVERFLOW_ERROR);
    675     }
    676     aliasList = &tags[standard].aliasList[converter];
    677 
    678     if (strchr(alias, '}')) {
    679         fprintf(stderr, "%s:%d: error: unmatched } found\n", path,
    680             lineNum);
    681     }
    682 
    683     if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
    684         fprintf(stderr, "%s:%d: error: too many aliases for alias %s and converter %s\n", path,
    685             lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
    686         exit(U_BUFFER_OVERFLOW_ERROR);
    687     }
    688 
    689     /* Show this warning only once. All aliases are added to the "ALL" tag. */
    690     if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
    691         /* Normally these option values are parsed at runtime, and they can
    692            be discarded when the alias is a default converter. Options should
    693            only be on a converter and not an alias. */
    694         if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
    695         {
    696             fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
    697                 lineNum, alias);
    698         }
    699         if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
    700         {
    701             fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
    702                 lineNum, alias);
    703         }
    704     }
    705 
    706     if (standard != ALL_TAG_NUM) {
    707         /* Check for duplicate aliases for this tag on all converters */
    708         for (idx = 0; idx < converterCount; idx++) {
    709             for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
    710                 uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
    711                 if (aliasNum
    712                     && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
    713                 {
    714                     if (idx == converter) {
    715                         /*
    716                          * (alias, standard) duplicates are harmless if they map to the same converter.
    717                          * Only print a warning in verbose mode, or if the alias is a precise duplicate,
    718                          * not just a lenient-match duplicate.
    719                          */
    720                         if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
    721                             fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path,
    722                                 lineNum, alias, GET_ALIAS_STR(aliasNum),
    723                                 GET_TAG_STR(tags[standard].tag),
    724                                 GET_ALIAS_STR(converters[converter].converter));
    725                         }
    726                     } else {
    727                         fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path,
    728                             lineNum, alias, GET_ALIAS_STR(aliasNum),
    729                             GET_TAG_STR(tags[standard].tag),
    730                             GET_ALIAS_STR(converters[converter].converter),
    731                             GET_ALIAS_STR(converters[idx].converter));
    732                     }
    733                     break;
    734                 }
    735             }
    736         }
    737 
    738         /* Check for duplicate default aliases for this converter on all tags */
    739         /* It's okay to have multiple standards prefer the same name */
    740 /*        if (verbose && !dupFound) {
    741             for (idx = 0; idx < tagCount; idx++) {
    742                 if (tags[idx].aliasList[converter].aliases) {
    743                     uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
    744                     if (aliasNum
    745                         && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
    746                     {
    747                         fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path,
    748                             lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
    749                         break;
    750                     }
    751                 }
    752             }
    753         }*/
    754     }
    755 
    756     if (aliasList->aliasCount <= 0) {
    757         aliasList->aliasCount++;
    758         startEmptyWithoutDefault = TRUE;
    759     }
    760     aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
    761     if (startEmptyWithoutDefault) {
    762         aliasList->aliases[0] = 0;
    763     }
    764     if (defaultName) {
    765         if (aliasList->aliases[0] != 0) {
    766             fprintf(stderr, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path,
    767                 lineNum,
    768                 alias,
    769                 GET_ALIAS_STR(aliasList->aliases[0]),
    770                 GET_TAG_STR(tags[standard].tag),
    771                 GET_ALIAS_STR(converters[converter].converter));
    772             exit(U_PARSE_ERROR);
    773         }
    774         aliasList->aliases[0] = GET_ALIAS_NUM(alias);
    775     } else {
    776         aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
    777     }
    778 /*    aliasList->converter = converter;*/
    779 
    780     converters[converter].totalAliasCount++; /* One more to the column */
    781     tags[standard].totalAliasCount++; /* One more to the row */
    782 
    783     return aliasList->aliasCount;
    784 }
    785 
    786 static uint16_t
    787 addConverter(const char *converter) {
    788     uint32_t idx;
    789     if(converterCount>=MAX_CONV_COUNT) {
    790         fprintf(stderr, "%s:%d: error: too many converters\n", path, lineNum);
    791         exit(U_BUFFER_OVERFLOW_ERROR);
    792     }
    793 
    794     for (idx = 0; idx < converterCount; idx++) {
    795         if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
    796             fprintf(stderr, "%s:%d: error: duplicate converter %s found!\n", path, lineNum, converter);
    797             exit(U_PARSE_ERROR);
    798             break;
    799         }
    800     }
    801 
    802     converters[converterCount].converter = GET_ALIAS_NUM(converter);
    803     converters[converterCount].totalAliasCount = 0;
    804 
    805     return converterCount++;
    806 }
    807 
    808 /* resolve this alias based on the prioritization of the standard tags. */
    809 static void
    810 resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
    811     uint16_t idx, idx2, idx3;
    812 
    813     for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
    814         for (idx2 = 0; idx2 < converterCount; idx2++) {
    815             for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
    816                 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
    817                 if (aliasNum == alias) {
    818                     *tagNum = idx;
    819                     *converterNum = idx2;
    820                     return;
    821                 }
    822             }
    823         }
    824     }
    825     /* Do the leftovers last, just in case */
    826     /* There is no need to do the ALL tag */
    827     idx = 0;
    828     for (idx2 = 0; idx2 < converterCount; idx2++) {
    829         for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
    830             uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
    831             if (aliasNum == alias) {
    832                 *tagNum = idx;
    833                 *converterNum = idx2;
    834                 return;
    835             }
    836         }
    837     }
    838     *tagNum = UINT16_MAX;
    839     *converterNum = UINT16_MAX;
    840     fprintf(stderr, "%s: warning: alias %s not found\n",
    841         path,
    842         GET_ALIAS_STR(alias));
    843     return;
    844 }
    845 
    846 /* The knownAliases should be sorted before calling this function */
    847 static uint32_t
    848 resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
    849     uint32_t uniqueAliasIdx = 0;
    850     uint32_t idx;
    851     uint16_t currTagNum, oldTagNum;
    852     uint16_t currConvNum, oldConvNum;
    853     const char *lastName;
    854 
    855     resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
    856     uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
    857     oldConvNum = currConvNum;
    858     uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
    859     uniqueAliasIdx++;
    860     lastName = GET_ALIAS_STR(knownAliases[0]);
    861 
    862     for (idx = 1; idx < knownAliasesCount; idx++) {
    863         resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
    864         if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
    865             /* duplicate found */
    866             if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
    867                 || oldTagNum == 0) {
    868                 oldTagNum = currTagNum;
    869                 uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
    870                 uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
    871                 if (verbose) {
    872                     printf("using %s instead of %s -> %s",
    873                         GET_ALIAS_STR(knownAliases[idx]),
    874                         lastName,
    875                         GET_ALIAS_STR(converters[currConvNum].converter));
    876                     if (oldConvNum != currConvNum) {
    877                         printf(" (alias conflict)");
    878                     }
    879                     puts("");
    880                 }
    881             }
    882             else {
    883                 /* else ignore it */
    884                 if (verbose) {
    885                     printf("folding %s into %s -> %s",
    886                         GET_ALIAS_STR(knownAliases[idx]),
    887                         lastName,
    888                         GET_ALIAS_STR(converters[oldConvNum].converter));
    889                     if (oldConvNum != currConvNum) {
    890                         printf(" (alias conflict)");
    891                     }
    892                     puts("");
    893                 }
    894             }
    895             if (oldConvNum != currConvNum) {
    896                 uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
    897             }
    898         }
    899         else {
    900             uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
    901             oldConvNum = currConvNum;
    902             uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
    903             uniqueAliasIdx++;
    904             lastName = GET_ALIAS_STR(knownAliases[idx]);
    905             oldTagNum = currTagNum;
    906             /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
    907         }
    908         if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
    909             uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
    910         }
    911     }
    912     return uniqueAliasIdx;
    913 }
    914 
    915 static void
    916 createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
    917     uint32_t aliasNum;
    918     AliasList *aliasList = &tags[tag].aliasList[converter];
    919 
    920     if (aliasList->aliasCount == 0) {
    921         aliasArrLists[tag*converterCount + converter] = 0;
    922     }
    923     else {
    924         aliasLists[aliasListsSize++] = aliasList->aliasCount;
    925 
    926         /* write into the array area a 1's based index. */
    927         aliasArrLists[tag*converterCount + converter] = aliasListsSize;
    928 
    929 /*        printf("tag %s converter %s\n",
    930             GET_TAG_STR(tags[tag].tag),
    931             GET_ALIAS_STR(converters[converter].converter));*/
    932         for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
    933             uint16_t value;
    934 /*            printf("   %s\n",
    935                 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
    936             if (aliasList->aliases[aliasNum]) {
    937                 value = aliasList->aliases[aliasNum] + offset;
    938             } else {
    939                 value = 0;
    940                 if (tag != 0 && !quiet) { /* Only show the warning when it's not the leftover tag. */
    941                     fprintf(stderr, "%s: warning: tag %s does not have a default alias for %s\n",
    942                             path,
    943                             GET_TAG_STR(tags[tag].tag),
    944                             GET_ALIAS_STR(converters[converter].converter));
    945                 }
    946             }
    947             aliasLists[aliasListsSize++] = value;
    948             if (aliasListsSize >= MAX_LIST_SIZE) {
    949                 fprintf(stderr, "%s: error: Too many alias lists\n", path);
    950                 exit(U_BUFFER_OVERFLOW_ERROR);
    951             }
    952 
    953         }
    954     }
    955 }
    956 
    957 static void
    958 createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
    959     int32_t currStrLen;
    960     uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
    961     while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
    962         int32_t currStrSize = currStrLen + 1;
    963         if (currStrLen > 0) {
    964             int32_t normStrLen;
    965             ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
    966             normStrLen = uprv_strlen(normalizedStrings);
    967             if (normStrLen > 0) {
    968                 uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
    969             }
    970         }
    971         stringBlockLength -= currStrSize;
    972         normalizedStrings += currStrSize;
    973         origStringBlock += currStrSize;
    974     }
    975 }
    976 
    977 static void
    978 writeAliasTable(UNewDataMemory *out) {
    979     uint32_t i, j;
    980     uint32_t uniqueAliasesSize;
    981     uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
    982     uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
    983     uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
    984     uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
    985 
    986     qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
    987     uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
    988 
    989     /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
    990     aliasListsSize = 0;
    991 
    992     /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
    993     for (i = 0; i < tagCount; ++i) {
    994         for (j = 0; j < converterCount; ++j) {
    995             createOneAliasList(aliasArrLists, i, j, aliasOffset);
    996         }
    997     }
    998 
    999     /* Write the size of the TOC */
   1000     if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
   1001         udata_write32(out, 8);
   1002     }
   1003     else {
   1004         udata_write32(out, 9);
   1005     }
   1006 
   1007     /* Write the sizes of each section */
   1008     /* All sizes are the number of uint16_t units, not bytes */
   1009     udata_write32(out, converterCount);
   1010     udata_write32(out, tagCount);
   1011     udata_write32(out, uniqueAliasesSize);  /* list of aliases */
   1012     udata_write32(out, uniqueAliasesSize);  /* The preresolved form of mapping an untagged the alias to a converter */
   1013     udata_write32(out, tagCount * converterCount);
   1014     udata_write32(out, aliasListsSize + 1);
   1015     udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
   1016     udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
   1017     if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
   1018         udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
   1019     }
   1020 
   1021     /* write the table of converters */
   1022     /* Think of this as the column headers */
   1023     for(i=0; i<converterCount; ++i) {
   1024         udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
   1025     }
   1026 
   1027     /* write the table of tags */
   1028     /* Think of this as the row headers */
   1029     for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
   1030         udata_write16(out, tags[i].tag);
   1031     }
   1032     /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
   1033     udata_write16(out, tags[EMPTY_TAG_NUM].tag);
   1034     udata_write16(out, tags[ALL_TAG_NUM].tag);
   1035 
   1036     /* Write the unique list of aliases */
   1037     udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
   1038 
   1039     /* Write the unique list of aliases */
   1040     udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
   1041 
   1042     /* Write the array to the lists */
   1043     udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
   1044     /* Now write the leftover part of the array for the EMPTY and ALL lists */
   1045     udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
   1046 
   1047     /* Offset the next array to make the index start at 1. */
   1048     udata_write16(out, 0xDEAD);
   1049 
   1050     /* Write the lists */
   1051     udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
   1052 
   1053     /* Write any options for the alias table. */
   1054     udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
   1055 
   1056     /* write the tags strings */
   1057     udata_writeString(out, tagBlock.store, tagBlock.top);
   1058 
   1059     /* write the aliases strings */
   1060     udata_writeString(out, stringBlock.store, stringBlock.top);
   1061 
   1062     /* write the normalized aliases strings */
   1063     if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
   1064         char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
   1065         createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
   1066         createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
   1067 
   1068         /* Write out the complete normalized array. */
   1069         udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
   1070         uprv_free(normalizedStrings);
   1071     }
   1072 
   1073     uprv_free(uniqueAliasesToConverter);
   1074     uprv_free(uniqueAliases);
   1075     uprv_free(aliasArrLists);
   1076 }
   1077 
   1078 static char *
   1079 allocString(StringBlock *block, const char *s, int32_t length) {
   1080     uint32_t top;
   1081     char *p;
   1082 
   1083     if(length<0) {
   1084         length=(int32_t)uprv_strlen(s);
   1085     }
   1086 
   1087     /*
   1088      * add 1 for the terminating NUL
   1089      * and round up (+1 &~1)
   1090      * to keep the addresses on a 16-bit boundary
   1091      */
   1092     top=block->top + (uint32_t)((length + 1 + 1) & ~1);
   1093 
   1094     if(top >= block->max) {
   1095         fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum);
   1096         exit(U_MEMORY_ALLOCATION_ERROR);
   1097     }
   1098 
   1099     /* get the pointer and copy the string */
   1100     p = block->store + block->top;
   1101     uprv_memcpy(p, s, length);
   1102     p[length] = 0; /* NUL-terminate it */
   1103     if((length & 1) == 0) {
   1104         p[length + 1] = 0; /* set the padding byte */
   1105     }
   1106 
   1107     /* check for invariant characters now that we have a NUL-terminated string for easy output */
   1108     if(!uprv_isInvariantString(p, length)) {
   1109         fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p);
   1110         exit(U_INVALID_TABLE_FORMAT);
   1111     }
   1112 
   1113     block->top = top;
   1114     return p;
   1115 }
   1116 
   1117 static int
   1118 compareAliases(const void *alias1, const void *alias2) {
   1119     /* Names like IBM850 and ibm-850 need to be sorted together */
   1120     int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
   1121     if (!result) {
   1122         /* Sort the shortest first */
   1123         return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
   1124     }
   1125     return result;
   1126 }
   1127 
   1128 /*
   1129  * Hey, Emacs, please set the following:
   1130  *
   1131  * Local Variables:
   1132  * indent-tabs-mode: nil
   1133  * End:
   1134  *
   1135  */
   1136 
   1137