Home | History | Annotate | Download | only in gencnval
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2016 International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  gencnval.c
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999nov05
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This program reads convrtrs.txt and writes a memory-mappable
     19 *   converter name alias table to cnvalias.dat .
     20 *
     21 *   This program currently writes version 2.1 of the data format. See
     22 *   ucnv_io.c for more details on the format. Note that version 2.1
     23 *   is written in such a way that a 2.0 reader will be able to use it,
     24 *   and a 2.1 reader will be able to read 2.0.
     25 */
     26 
     27 #include "unicode/utypes.h"
     28 #include "unicode/putil.h"
     29 #include "unicode/ucnv.h" /* ucnv_compareNames() */
     30 #include "ucnv_io.h"
     31 #include "cmemory.h"
     32 #include "cstring.h"
     33 #include "uinvchar.h"
     34 #include "filestrm.h"
     35 #include "unicode/uclean.h"
     36 #include "unewdata.h"
     37 #include "uoptions.h"
     38 
     39 #include <stdio.h>
     40 #include <stdlib.h>
     41 #include <ctype.h>
     42 
     43 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
     44 
     45 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
     46  That is the maximum size for the string stores combined
     47  because the strings are index at 16-bit boundries by a
     48  16-bit index, and there is only one section for the
     49  strings.
     50  */
     51 #define STRING_STORE_SIZE 0x1FBFE   /* 130046 */
     52 #define TAG_STORE_SIZE      0x400   /* 1024 */
     53 
     54 /* The combined tag and converter count can affect the number of lists
     55  created.  The size of all lists must be less than (2^17 - 1)
     56  because the lists are indexed as a 16-bit array with a 16-bit index.
     57  */
     58 #define MAX_TAG_COUNT 0x3F      /* 63 */
     59 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
     60 #define MAX_ALIAS_COUNT 0xFFFF  /* 65535 */
     61 
     62 /* The maximum number of aliases that a standard tag/converter combination can have.
     63  At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
     64  this value. I don't recommend more than 31 for this value.
     65  */
     66 #define MAX_TC_ALIAS_COUNT 0x1F    /* 31 */
     67 
     68 #define MAX_LINE_SIZE 0x7FFF    /* 32767 */
     69 #define MAX_LIST_SIZE 0xFFFF    /* 65535 */
     70 
     71 #define DATA_NAME "cnvalias"
     72 #define DATA_TYPE "icu" /* ICU alias table */
     73 
     74 #define ALL_TAG_STR "ALL"
     75 #define ALL_TAG_NUM 1
     76 #define EMPTY_TAG_NUM 0
     77 
     78 /* UDataInfo cf. udata.h */
     79 static const UDataInfo dataInfo={
     80     sizeof(UDataInfo),
     81     0,
     82 
     83     U_IS_BIG_ENDIAN,
     84     U_CHARSET_FAMILY,
     85     sizeof(UChar),
     86     0,
     87 
     88     {0x43, 0x76, 0x41, 0x6c},     /* dataFormat="CvAl" */
     89     {3, 0, 1, 0},                 /* formatVersion */
     90     {1, 4, 2, 0}                  /* dataVersion */
     91 };
     92 
     93 typedef struct {
     94     char *store;
     95     uint32_t top;
     96     uint32_t max;
     97 } StringBlock;
     98 
     99 static char stringStore[STRING_STORE_SIZE];
    100 static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
    101 
    102 typedef struct {
    103     uint16_t    aliasCount;
    104     uint16_t    *aliases;     /* Index into stringStore */
    105 } AliasList;
    106 
    107 typedef struct {
    108     uint16_t converter;     /* Index into stringStore */
    109     uint16_t totalAliasCount;    /* Total aliases in this column */
    110 } Converter;
    111 
    112 static Converter converters[MAX_CONV_COUNT];
    113 static uint16_t converterCount=0;
    114 
    115 static char tagStore[TAG_STORE_SIZE];
    116 static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
    117 
    118 typedef struct {
    119     uint16_t    tag;        /* Index into tagStore */
    120     uint16_t    totalAliasCount; /* Total aliases in this row */
    121     AliasList   aliasList[MAX_CONV_COUNT];
    122 } Tag;
    123 
    124 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
    125 static Tag tags[MAX_TAG_COUNT];
    126 static uint16_t tagCount = 0;
    127 
    128 /* Used for storing all aliases  */
    129 static uint16_t knownAliases[MAX_ALIAS_COUNT];
    130 static uint16_t knownAliasesCount = 0;
    131 /*static uint16_t duplicateKnownAliasesCount = 0;*/
    132 
    133 /* Used for storing the lists section that point to aliases */
    134 static uint16_t aliasLists[MAX_LIST_SIZE];
    135 static uint16_t aliasListsSize = 0;
    136 
    137 /* Were the standard tags declared before the aliases. */
    138 static UBool standardTagsUsed = FALSE;
    139 static UBool verbose = FALSE;
    140 static UBool quiet = FALSE;
    141 static int lineNum = 1;
    142 
    143 static UConverterAliasOptions tableOptions = {
    144     UCNV_IO_STD_NORMALIZED,
    145     1 /* containsCnvOptionInfo */
    146 };
    147 
    148 
    149 /**
    150  * path to convrtrs.txt
    151  */
    152 const char *path;
    153 
    154 /* prototypes --------------------------------------------------------------- */
    155 
    156 static void
    157 parseLine(const char *line);
    158 
    159 static void
    160 parseFile(FileStream *in);
    161 
    162 static int32_t
    163 chomp(char *line);
    164 
    165 static void
    166 addOfficialTaggedStandards(char *line, int32_t lineLen);
    167 
    168 static uint16_t
    169 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
    170 
    171 static uint16_t
    172 addConverter(const char *converter);
    173 
    174 static char *
    175 allocString(StringBlock *block, const char *s, int32_t length);
    176 
    177 static uint16_t
    178 addToKnownAliases(const char *alias);
    179 
    180 static int
    181 compareAliases(const void *alias1, const void *alias2);
    182 
    183 static uint16_t
    184 getTagNumber(const char *tag, uint16_t tagLen);
    185 
    186 /*static void
    187 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
    188 
    189 static void
    190 writeAliasTable(UNewDataMemory *out);
    191 
    192 /* -------------------------------------------------------------------------- */
    193 
    194 /* Presumes that you used allocString() */
    195 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
    196 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
    197 
    198 /* Presumes that you used allocString() */
    199 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
    200 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
    201 
    202 enum
    203 {
    204     HELP1,
    205     HELP2,
    206     VERBOSE,
    207     COPYRIGHT,
    208     DESTDIR,
    209     SOURCEDIR,
    210     QUIET
    211 };
    212 
    213 static UOption options[]={
    214     UOPTION_HELP_H,
    215     UOPTION_HELP_QUESTION_MARK,
    216     UOPTION_VERBOSE,
    217     UOPTION_COPYRIGHT,
    218     UOPTION_DESTDIR,
    219     UOPTION_SOURCEDIR,
    220     UOPTION_QUIET
    221 };
    222 
    223 extern int
    224 main(int argc, char* argv[]) {
    225     int i, n;
    226     char pathBuf[512];
    227     FileStream *in;
    228     UNewDataMemory *out;
    229     UErrorCode errorCode=U_ZERO_ERROR;
    230 
    231     U_MAIN_INIT_ARGS(argc, argv);
    232 
    233     /* preset then read command line options */
    234     options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
    235     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
    236 
    237     /* error handling, printing usage message */
    238     if(argc<0) {
    239         fprintf(stderr,
    240             "error in command line argument \"%s\"\n",
    241             argv[-argc]);
    242     }
    243     if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
    244         fprintf(stderr,
    245             "usage: %s [-options] [convrtrs.txt]\n"
    246             "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
    247             "options:\n"
    248             "\t-h or -? or --help  this usage text\n"
    249             "\t-v or --verbose     prints out extra information about the alias table\n"
    250             "\t-q or --quiet       do not display warnings and progress\n"
    251             "\t-c or --copyright   include a copyright notice\n"
    252             "\t-d or --destdir     destination directory, followed by the path\n"
    253             "\t-s or --sourcedir   source directory, followed by the path\n",
    254             argv[0]);
    255         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    256     }
    257 
    258     if(options[VERBOSE].doesOccur) {
    259         verbose = TRUE;
    260     }
    261 
    262     if(options[QUIET].doesOccur) {
    263         quiet = TRUE;
    264     }
    265 
    266     if(argc>=2) {
    267         path=argv[1];
    268     } else {
    269         path=options[SOURCEDIR].value;
    270         if(path!=NULL && *path!=0) {
    271             char *end;
    272 
    273             uprv_strcpy(pathBuf, path);
    274             end = uprv_strchr(pathBuf, 0);
    275             if(*(end-1)!=U_FILE_SEP_CHAR) {
    276                 *(end++)=U_FILE_SEP_CHAR;
    277             }
    278             uprv_strcpy(end, "convrtrs.txt");
    279             path=pathBuf;
    280         } else {
    281             path = "convrtrs.txt";
    282         }
    283     }
    284 
    285     uprv_memset(stringStore, 0, sizeof(stringStore));
    286     uprv_memset(tagStore, 0, sizeof(tagStore));
    287     uprv_memset(converters, 0, sizeof(converters));
    288     uprv_memset(tags, 0, sizeof(tags));
    289     uprv_memset(aliasLists, 0, sizeof(aliasLists));
    290     uprv_memset(knownAliases, 0, sizeof(aliasLists));
    291 
    292 
    293     in=T_FileStream_open(path, "r");
    294     if(in==NULL) {
    295         fprintf(stderr, "gencnval: unable to open input file %s\n", path);
    296         exit(U_FILE_ACCESS_ERROR);
    297     }
    298     parseFile(in);
    299     T_FileStream_close(in);
    300 
    301     /* create the output file */
    302     out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
    303                      options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
    304     if(U_FAILURE(errorCode)) {
    305         fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
    306         exit(errorCode);
    307     }
    308 
    309     /* write the table of aliases based on a tag/converter name combination */
    310     writeAliasTable(out);
    311 
    312     /* finish */
    313     udata_finish(out, &errorCode);
    314     if(U_FAILURE(errorCode)) {
    315         fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
    316         exit(errorCode);
    317     }
    318 
    319     /* clean up tags */
    320     for (i = 0; i < MAX_TAG_COUNT; i++) {
    321         for (n = 0; n < MAX_CONV_COUNT; n++) {
    322             if (tags[i].aliasList[n].aliases!=NULL) {
    323                 uprv_free(tags[i].aliasList[n].aliases);
    324             }
    325         }
    326     }
    327 
    328     return 0;
    329 }
    330 
    331 static void
    332 parseFile(FileStream *in) {
    333     char line[MAX_LINE_SIZE];
    334     char lastLine[MAX_LINE_SIZE];
    335     int32_t lineSize = 0;
    336     int32_t lastLineSize = 0;
    337     UBool validParse = TRUE;
    338 
    339     lineNum = 0;
    340 
    341     /* Add the empty tag, which is for untagged aliases */
    342     getTagNumber("", 0);
    343     getTagNumber(ALL_TAG_STR, 3);
    344     allocString(&stringBlock, "", 0);
    345 
    346     /* read the list of aliases */
    347     while (validParse) {
    348         validParse = FALSE;
    349 
    350         /* Read non-empty lines that don't start with a space character. */
    351         while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
    352             lastLineSize = chomp(lastLine);
    353             if (lineSize == 0 || (lastLineSize > 0 && isspace((int)*lastLine))) {
    354                 uprv_strcpy(line + lineSize, lastLine);
    355                 lineSize += lastLineSize;
    356             } else if (lineSize > 0) {
    357                 validParse = TRUE;
    358                 break;
    359             }
    360             lineNum++;
    361         }
    362 
    363         if (validParse || lineSize > 0) {
    364             if (isspace((int)*line)) {
    365                 fprintf(stderr, "%s:%d: error: cannot start an alias with a space\n", path, lineNum-1);
    366                 exit(U_PARSE_ERROR);
    367             } else if (line[0] == '{') {
    368                 if (!standardTagsUsed && line[lineSize - 1] != '}') {
    369                     fprintf(stderr, "%s:%d: error: alias needs to start with a converter name\n", path, lineNum);
    370                     exit(U_PARSE_ERROR);
    371                 }
    372                 addOfficialTaggedStandards(line, lineSize);
    373                 standardTagsUsed = TRUE;
    374             } else {
    375                 if (standardTagsUsed) {
    376                     parseLine(line);
    377                 }
    378                 else {
    379                     fprintf(stderr, "%s:%d: error: alias table needs to start a list of standard tags\n", path, lineNum);
    380                     exit(U_PARSE_ERROR);
    381                 }
    382             }
    383             /* Was the last line consumed */
    384             if (lastLineSize > 0) {
    385                 uprv_strcpy(line, lastLine);
    386                 lineSize = lastLineSize;
    387             }
    388             else {
    389                 lineSize = 0;
    390             }
    391         }
    392         lineNum++;
    393     }
    394 }
    395 
    396 /* This works almost like the Perl chomp.
    397  It removes the newlines, comments and trailing whitespace (not preceding whitespace).
    398 */
    399 static int32_t
    400 chomp(char *line) {
    401     char *s = line;
    402     char *lastNonSpace = line;
    403     while(*s!=0) {
    404         /* truncate at a newline or a comment */
    405         if(*s == '\r' || *s == '\n' || *s == '#') {
    406             *s = 0;
    407             break;
    408         }
    409         if (!isspace((int)*s)) {
    410             lastNonSpace = s;
    411         }
    412         ++s;
    413     }
    414     if (lastNonSpace++ > line) {
    415         *lastNonSpace = 0;
    416         s = lastNonSpace;
    417     }
    418     return (int32_t)(s - line);
    419 }
    420 
    421 static void
    422 parseLine(const char *line) {
    423     uint16_t pos=0, start, limit, length, cnv;
    424     char *converter, *alias;
    425 
    426     /* skip leading white space */
    427     /* There is no whitespace at the beginning anymore */
    428 /*    while(line[pos]!=0 && isspace(line[pos])) {
    429         ++pos;
    430     }
    431 */
    432 
    433     /* is there nothing on this line? */
    434     if(line[pos]==0) {
    435         return;
    436     }
    437 
    438     /* get the converter name */
    439     start=pos;
    440     while(line[pos]!=0 && !isspace((int)line[pos])) {
    441         ++pos;
    442     }
    443     limit=pos;
    444 
    445     /* store the converter name */
    446     length=(uint16_t)(limit-start);
    447     converter=allocString(&stringBlock, line+start, length);
    448 
    449     /* add the converter to the converter table */
    450     cnv=addConverter(converter);
    451 
    452     /* The name itself may be tagged, so let's added it to the aliases list properly */
    453     pos = start;
    454 
    455     /* get all the real aliases */
    456     for(;;) {
    457 
    458         /* skip white space */
    459         while(line[pos]!=0 && isspace((int)line[pos])) {
    460             ++pos;
    461         }
    462 
    463         /* is there no more alias name on this line? */
    464         if(line[pos]==0) {
    465             break;
    466         }
    467 
    468         /* get an alias name */
    469         start=pos;
    470         while(line[pos]!=0 && line[pos]!='{' && !isspace((int)line[pos])) {
    471             ++pos;
    472         }
    473         limit=pos;
    474 
    475         /* store the alias name */
    476         length=(uint16_t)(limit-start);
    477         if (start == 0) {
    478             /* add the converter as its own alias to the alias table */
    479             alias = converter;
    480             addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
    481         }
    482         else {
    483             alias=allocString(&stringBlock, line+start, length);
    484             addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
    485         }
    486         addToKnownAliases(alias);
    487 
    488         /* add the alias/converter pair to the alias table */
    489         /* addAlias(alias, 0, cnv, FALSE);*/
    490 
    491         /* skip whitespace */
    492         while (line[pos] && isspace((int)line[pos])) {
    493             ++pos;
    494         }
    495 
    496         /* handle tags if they are present */
    497         if (line[pos] == '{') {
    498             ++pos;
    499             do {
    500                 start = pos;
    501                 while (line[pos] && line[pos] != '}' && !isspace((int)line[pos])) {
    502                     ++pos;
    503                 }
    504                 limit = pos;
    505 
    506                 if (start != limit) {
    507                     /* add the tag to the tag table */
    508                     uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
    509                     addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
    510                 }
    511 
    512                 while (line[pos] && isspace((int)line[pos])) {
    513                     ++pos;
    514                 }
    515             } while (line[pos] && line[pos] != '}');
    516 
    517             if (line[pos] == '}') {
    518                 ++pos;
    519             } else {
    520                 fprintf(stderr, "%s:%d: Unterminated tag list\n", path, lineNum);
    521                 exit(U_UNMATCHED_BRACES);
    522             }
    523         } else {
    524             addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
    525         }
    526     }
    527 }
    528 
    529 static uint16_t
    530 getTagNumber(const char *tag, uint16_t tagLen) {
    531     char *atag;
    532     uint16_t t;
    533     UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
    534 
    535     if (tagCount >= MAX_TAG_COUNT) {
    536         fprintf(stderr, "%s:%d: too many tags\n", path, lineNum);
    537         exit(U_BUFFER_OVERFLOW_ERROR);
    538     }
    539 
    540     if (preferredName) {
    541 /*        puts(tag);*/
    542         tagLen--;
    543     }
    544 
    545     for (t = 0; t < tagCount; ++t) {
    546         const char *currTag = GET_TAG_STR(tags[t].tag);
    547         if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
    548             return t;
    549         }
    550     }
    551 
    552     /* we need to add this tag */
    553     if (tagCount >= MAX_TAG_COUNT) {
    554         fprintf(stderr, "%s:%d: error: too many tags\n", path, lineNum);
    555         exit(U_BUFFER_OVERFLOW_ERROR);
    556     }
    557 
    558     /* allocate a new entry in the tag table */
    559     atag = allocString(&tagBlock, tag, tagLen);
    560 
    561     if (standardTagsUsed) {
    562         fprintf(stderr, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n",
    563             path, lineNum, atag);
    564         exit(1);
    565     }
    566     else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
    567         fprintf(stderr, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
    568             path, lineNum, atag);
    569     }
    570 
    571     /* add the tag to the tag table */
    572     tags[tagCount].tag = GET_TAG_NUM(atag);
    573     /* The aliasList should be set to 0's already */
    574 
    575     return tagCount++;
    576 }
    577 
    578 /*static void
    579 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
    580     tags[tag].aliases[converter] = alias;
    581 }
    582 */
    583 
    584 static void
    585 addOfficialTaggedStandards(char *line, int32_t lineLen) {
    586     char *atag;
    587     char *endTagExp;
    588     char *tag;
    589     static const char WHITESPACE[] = " \t";
    590 
    591     if (tagCount > UCNV_NUM_RESERVED_TAGS) {
    592         fprintf(stderr, "%s:%d: error: official tags already added\n", path, lineNum);
    593         exit(U_BUFFER_OVERFLOW_ERROR);
    594     }
    595     tag = strchr(line, '{');
    596     if (tag == NULL) {
    597         /* Why were we called? */
    598         fprintf(stderr, "%s:%d: error: Missing start of tag group\n", path, lineNum);
    599         exit(U_PARSE_ERROR);
    600     }
    601     tag++;
    602     endTagExp = strchr(tag, '}');
    603     if (endTagExp == NULL) {
    604         fprintf(stderr, "%s:%d: error: Missing end of tag group\n", path, lineNum);
    605         exit(U_PARSE_ERROR);
    606     }
    607     endTagExp[0] = 0;
    608 
    609     tag = strtok(tag, WHITESPACE);
    610     while (tag != NULL) {
    611 /*        printf("Adding original tag \"%s\"\n", tag);*/
    612 
    613         /* allocate a new entry in the tag table */
    614         atag = allocString(&tagBlock, tag, -1);
    615 
    616         /* add the tag to the tag table */
    617         tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
    618 
    619         /* The aliasList should already be set to 0's */
    620 
    621         /* Get next tag */
    622         tag = strtok(NULL, WHITESPACE);
    623     }
    624 }
    625 
    626 static uint16_t
    627 addToKnownAliases(const char *alias) {
    628 /*    uint32_t idx; */
    629     /* strict matching */
    630 /*    for (idx = 0; idx < knownAliasesCount; idx++) {
    631         uint16_t num = GET_ALIAS_NUM(alias);
    632         if (knownAliases[idx] != num
    633             && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
    634         {
    635             fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path,
    636                 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
    637             duplicateKnownAliasesCount++;
    638             break;
    639         }
    640         else if (knownAliases[idx] != num
    641             && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
    642         {
    643             if (verbose) {
    644                 fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path,
    645                     lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
    646             }
    647             duplicateKnownAliasesCount++;
    648             break;
    649         }
    650     }
    651 */
    652     if (knownAliasesCount >= MAX_ALIAS_COUNT) {
    653         fprintf(stderr, "%s:%d: warning: Too many aliases defined for all converters\n",
    654             path, lineNum);
    655         exit(U_BUFFER_OVERFLOW_ERROR);
    656     }
    657     /* TODO: We could try to unlist exact duplicates. */
    658     return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
    659 }
    660 
    661 /*
    662 @param standard When standard is 0, then it's the "empty" tag.
    663 */
    664 static uint16_t
    665 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
    666     uint32_t idx, idx2;
    667     UBool startEmptyWithoutDefault = FALSE;
    668     AliasList *aliasList;
    669 
    670     if(standard>=MAX_TAG_COUNT) {
    671         fprintf(stderr, "%s:%d: error: too many standard tags\n", path, lineNum);
    672         exit(U_BUFFER_OVERFLOW_ERROR);
    673     }
    674     if(converter>=MAX_CONV_COUNT) {
    675         fprintf(stderr, "%s:%d: error: too many converter names\n", path, lineNum);
    676         exit(U_BUFFER_OVERFLOW_ERROR);
    677     }
    678     aliasList = &tags[standard].aliasList[converter];
    679 
    680     if (strchr(alias, '}')) {
    681         fprintf(stderr, "%s:%d: error: unmatched } found\n", path,
    682             lineNum);
    683     }
    684 
    685     if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
    686         fprintf(stderr, "%s:%d: error: too many aliases for alias %s and converter %s\n", path,
    687             lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
    688         exit(U_BUFFER_OVERFLOW_ERROR);
    689     }
    690 
    691     /* Show this warning only once. All aliases are added to the "ALL" tag. */
    692     if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
    693         /* Normally these option values are parsed at runtime, and they can
    694            be discarded when the alias is a default converter. Options should
    695            only be on a converter and not an alias. */
    696         if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
    697         {
    698             fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
    699                 lineNum, alias);
    700         }
    701         if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
    702         {
    703             fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
    704                 lineNum, alias);
    705         }
    706     }
    707 
    708     if (standard != ALL_TAG_NUM) {
    709         /* Check for duplicate aliases for this tag on all converters */
    710         for (idx = 0; idx < converterCount; idx++) {
    711             for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
    712                 uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
    713                 if (aliasNum
    714                     && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
    715                 {
    716                     if (idx == converter) {
    717                         /*
    718                          * (alias, standard) duplicates are harmless if they map to the same converter.
    719                          * Only print a warning in verbose mode, or if the alias is a precise duplicate,
    720                          * not just a lenient-match duplicate.
    721                          */
    722                         if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
    723                             fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path,
    724                                 lineNum, alias, GET_ALIAS_STR(aliasNum),
    725                                 GET_TAG_STR(tags[standard].tag),
    726                                 GET_ALIAS_STR(converters[converter].converter));
    727                         }
    728                     } else {
    729                         fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path,
    730                             lineNum, alias, GET_ALIAS_STR(aliasNum),
    731                             GET_TAG_STR(tags[standard].tag),
    732                             GET_ALIAS_STR(converters[converter].converter),
    733                             GET_ALIAS_STR(converters[idx].converter));
    734                     }
    735                     break;
    736                 }
    737             }
    738         }
    739 
    740         /* Check for duplicate default aliases for this converter on all tags */
    741         /* It's okay to have multiple standards prefer the same name */
    742 /*        if (verbose && !dupFound) {
    743             for (idx = 0; idx < tagCount; idx++) {
    744                 if (tags[idx].aliasList[converter].aliases) {
    745                     uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
    746                     if (aliasNum
    747                         && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
    748                     {
    749                         fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path,
    750                             lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
    751                         break;
    752                     }
    753                 }
    754             }
    755         }*/
    756     }
    757 
    758     if (aliasList->aliasCount <= 0) {
    759         aliasList->aliasCount++;
    760         startEmptyWithoutDefault = TRUE;
    761     }
    762     aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
    763     if (startEmptyWithoutDefault) {
    764         aliasList->aliases[0] = 0;
    765     }
    766     if (defaultName) {
    767         if (aliasList->aliases[0] != 0) {
    768             fprintf(stderr, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path,
    769                 lineNum,
    770                 alias,
    771                 GET_ALIAS_STR(aliasList->aliases[0]),
    772                 GET_TAG_STR(tags[standard].tag),
    773                 GET_ALIAS_STR(converters[converter].converter));
    774             exit(U_PARSE_ERROR);
    775         }
    776         aliasList->aliases[0] = GET_ALIAS_NUM(alias);
    777     } else {
    778         aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
    779     }
    780 /*    aliasList->converter = converter;*/
    781 
    782     converters[converter].totalAliasCount++; /* One more to the column */
    783     tags[standard].totalAliasCount++; /* One more to the row */
    784 
    785     return aliasList->aliasCount;
    786 }
    787 
    788 static uint16_t
    789 addConverter(const char *converter) {
    790     uint32_t idx;
    791     if(converterCount>=MAX_CONV_COUNT) {
    792         fprintf(stderr, "%s:%d: error: too many converters\n", path, lineNum);
    793         exit(U_BUFFER_OVERFLOW_ERROR);
    794     }
    795 
    796     for (idx = 0; idx < converterCount; idx++) {
    797         if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
    798             fprintf(stderr, "%s:%d: error: duplicate converter %s found!\n", path, lineNum, converter);
    799             exit(U_PARSE_ERROR);
    800             break;
    801         }
    802     }
    803 
    804     converters[converterCount].converter = GET_ALIAS_NUM(converter);
    805     converters[converterCount].totalAliasCount = 0;
    806 
    807     return converterCount++;
    808 }
    809 
    810 /* resolve this alias based on the prioritization of the standard tags. */
    811 static void
    812 resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
    813     uint16_t idx, idx2, idx3;
    814 
    815     for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
    816         for (idx2 = 0; idx2 < converterCount; idx2++) {
    817             for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
    818                 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
    819                 if (aliasNum == alias) {
    820                     *tagNum = idx;
    821                     *converterNum = idx2;
    822                     return;
    823                 }
    824             }
    825         }
    826     }
    827     /* Do the leftovers last, just in case */
    828     /* There is no need to do the ALL tag */
    829     idx = 0;
    830     for (idx2 = 0; idx2 < converterCount; idx2++) {
    831         for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
    832             uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
    833             if (aliasNum == alias) {
    834                 *tagNum = idx;
    835                 *converterNum = idx2;
    836                 return;
    837             }
    838         }
    839     }
    840     *tagNum = UINT16_MAX;
    841     *converterNum = UINT16_MAX;
    842     fprintf(stderr, "%s: warning: alias %s not found\n",
    843         path,
    844         GET_ALIAS_STR(alias));
    845     return;
    846 }
    847 
    848 /* The knownAliases should be sorted before calling this function */
    849 static uint32_t
    850 resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
    851     uint32_t uniqueAliasIdx = 0;
    852     uint32_t idx;
    853     uint16_t currTagNum, oldTagNum;
    854     uint16_t currConvNum, oldConvNum;
    855     const char *lastName;
    856 
    857     resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
    858     uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
    859     oldConvNum = currConvNum;
    860     uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
    861     uniqueAliasIdx++;
    862     lastName = GET_ALIAS_STR(knownAliases[0]);
    863 
    864     for (idx = 1; idx < knownAliasesCount; idx++) {
    865         resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
    866         if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
    867             /* duplicate found */
    868             if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
    869                 || oldTagNum == 0) {
    870                 oldTagNum = currTagNum;
    871                 uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
    872                 uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
    873                 if (verbose) {
    874                     printf("using %s instead of %s -> %s",
    875                         GET_ALIAS_STR(knownAliases[idx]),
    876                         lastName,
    877                         GET_ALIAS_STR(converters[currConvNum].converter));
    878                     if (oldConvNum != currConvNum) {
    879                         printf(" (alias conflict)");
    880                     }
    881                     puts("");
    882                 }
    883             }
    884             else {
    885                 /* else ignore it */
    886                 if (verbose) {
    887                     printf("folding %s into %s -> %s",
    888                         GET_ALIAS_STR(knownAliases[idx]),
    889                         lastName,
    890                         GET_ALIAS_STR(converters[oldConvNum].converter));
    891                     if (oldConvNum != currConvNum) {
    892                         printf(" (alias conflict)");
    893                     }
    894                     puts("");
    895                 }
    896             }
    897             if (oldConvNum != currConvNum) {
    898                 uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
    899             }
    900         }
    901         else {
    902             uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
    903             oldConvNum = currConvNum;
    904             uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
    905             uniqueAliasIdx++;
    906             lastName = GET_ALIAS_STR(knownAliases[idx]);
    907             oldTagNum = currTagNum;
    908             /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
    909         }
    910         if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
    911             uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
    912         }
    913     }
    914     return uniqueAliasIdx;
    915 }
    916 
    917 static void
    918 createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
    919     uint32_t aliasNum;
    920     AliasList *aliasList = &tags[tag].aliasList[converter];
    921 
    922     if (aliasList->aliasCount == 0) {
    923         aliasArrLists[tag*converterCount + converter] = 0;
    924     }
    925     else {
    926         aliasLists[aliasListsSize++] = aliasList->aliasCount;
    927 
    928         /* write into the array area a 1's based index. */
    929         aliasArrLists[tag*converterCount + converter] = aliasListsSize;
    930 
    931 /*        printf("tag %s converter %s\n",
    932             GET_TAG_STR(tags[tag].tag),
    933             GET_ALIAS_STR(converters[converter].converter));*/
    934         for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
    935             uint16_t value;
    936 /*            printf("   %s\n",
    937                 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
    938             if (aliasList->aliases[aliasNum]) {
    939                 value = aliasList->aliases[aliasNum] + offset;
    940             } else {
    941                 value = 0;
    942                 if (tag != 0 && !quiet) { /* Only show the warning when it's not the leftover tag. */
    943                     fprintf(stderr, "%s: warning: tag %s does not have a default alias for %s\n",
    944                             path,
    945                             GET_TAG_STR(tags[tag].tag),
    946                             GET_ALIAS_STR(converters[converter].converter));
    947                 }
    948             }
    949             aliasLists[aliasListsSize++] = value;
    950             if (aliasListsSize >= MAX_LIST_SIZE) {
    951                 fprintf(stderr, "%s: error: Too many alias lists\n", path);
    952                 exit(U_BUFFER_OVERFLOW_ERROR);
    953             }
    954 
    955         }
    956     }
    957 }
    958 
    959 static void
    960 createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
    961     int32_t currStrLen;
    962     uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
    963     while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
    964         int32_t currStrSize = currStrLen + 1;
    965         if (currStrLen > 0) {
    966             int32_t normStrLen;
    967             ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
    968             normStrLen = uprv_strlen(normalizedStrings);
    969             if (normStrLen > 0) {
    970                 uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
    971             }
    972         }
    973         stringBlockLength -= currStrSize;
    974         normalizedStrings += currStrSize;
    975         origStringBlock += currStrSize;
    976     }
    977 }
    978 
    979 static void
    980 writeAliasTable(UNewDataMemory *out) {
    981     uint32_t i, j;
    982     uint32_t uniqueAliasesSize;
    983     uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
    984     uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
    985     uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
    986     uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
    987 
    988     qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
    989     uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
    990 
    991     /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
    992     aliasListsSize = 0;
    993 
    994     /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
    995     for (i = 0; i < tagCount; ++i) {
    996         for (j = 0; j < converterCount; ++j) {
    997             createOneAliasList(aliasArrLists, i, j, aliasOffset);
    998         }
    999     }
   1000 
   1001     /* Write the size of the TOC */
   1002     if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
   1003         udata_write32(out, 8);
   1004     }
   1005     else {
   1006         udata_write32(out, 9);
   1007     }
   1008 
   1009     /* Write the sizes of each section */
   1010     /* All sizes are the number of uint16_t units, not bytes */
   1011     udata_write32(out, converterCount);
   1012     udata_write32(out, tagCount);
   1013     udata_write32(out, uniqueAliasesSize);  /* list of aliases */
   1014     udata_write32(out, uniqueAliasesSize);  /* The preresolved form of mapping an untagged the alias to a converter */
   1015     udata_write32(out, tagCount * converterCount);
   1016     udata_write32(out, aliasListsSize + 1);
   1017     udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
   1018     udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
   1019     if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
   1020         udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
   1021     }
   1022 
   1023     /* write the table of converters */
   1024     /* Think of this as the column headers */
   1025     for(i=0; i<converterCount; ++i) {
   1026         udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
   1027     }
   1028 
   1029     /* write the table of tags */
   1030     /* Think of this as the row headers */
   1031     for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
   1032         udata_write16(out, tags[i].tag);
   1033     }
   1034     /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
   1035     udata_write16(out, tags[EMPTY_TAG_NUM].tag);
   1036     udata_write16(out, tags[ALL_TAG_NUM].tag);
   1037 
   1038     /* Write the unique list of aliases */
   1039     udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
   1040 
   1041     /* Write the unique list of aliases */
   1042     udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
   1043 
   1044     /* Write the array to the lists */
   1045     udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
   1046     /* Now write the leftover part of the array for the EMPTY and ALL lists */
   1047     udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
   1048 
   1049     /* Offset the next array to make the index start at 1. */
   1050     udata_write16(out, 0xDEAD);
   1051 
   1052     /* Write the lists */
   1053     udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
   1054 
   1055     /* Write any options for the alias table. */
   1056     udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
   1057 
   1058     /* write the tags strings */
   1059     udata_writeString(out, tagBlock.store, tagBlock.top);
   1060 
   1061     /* write the aliases strings */
   1062     udata_writeString(out, stringBlock.store, stringBlock.top);
   1063 
   1064     /* write the normalized aliases strings */
   1065     if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
   1066         char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
   1067         createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
   1068         createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
   1069 
   1070         /* Write out the complete normalized array. */
   1071         udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
   1072         uprv_free(normalizedStrings);
   1073     }
   1074 
   1075     uprv_free(uniqueAliasesToConverter);
   1076     uprv_free(uniqueAliases);
   1077     uprv_free(aliasArrLists);
   1078 }
   1079 
   1080 static char *
   1081 allocString(StringBlock *block, const char *s, int32_t length) {
   1082     uint32_t top;
   1083     char *p;
   1084 
   1085     if(length<0) {
   1086         length=(int32_t)uprv_strlen(s);
   1087     }
   1088 
   1089     /*
   1090      * add 1 for the terminating NUL
   1091      * and round up (+1 &~1)
   1092      * to keep the addresses on a 16-bit boundary
   1093      */
   1094     top=block->top + (uint32_t)((length + 1 + 1) & ~1);
   1095 
   1096     if(top >= block->max) {
   1097         fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum);
   1098         exit(U_MEMORY_ALLOCATION_ERROR);
   1099     }
   1100 
   1101     /* get the pointer and copy the string */
   1102     p = block->store + block->top;
   1103     uprv_memcpy(p, s, length);
   1104     p[length] = 0; /* NUL-terminate it */
   1105     if((length & 1) == 0) {
   1106         p[length + 1] = 0; /* set the padding byte */
   1107     }
   1108 
   1109     /* check for invariant characters now that we have a NUL-terminated string for easy output */
   1110     if(!uprv_isInvariantString(p, length)) {
   1111         fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p);
   1112         exit(U_INVALID_TABLE_FORMAT);
   1113     }
   1114 
   1115     block->top = top;
   1116     return p;
   1117 }
   1118 
   1119 static int
   1120 compareAliases(const void *alias1, const void *alias2) {
   1121     /* Names like IBM850 and ibm-850 need to be sorted together */
   1122     int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
   1123     if (!result) {
   1124         /* Sort the shortest first */
   1125         return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
   1126     }
   1127     return result;
   1128 }
   1129 
   1130 /*
   1131  * Hey, Emacs, please set the following:
   1132  *
   1133  * Local Variables:
   1134  * indent-tabs-mode: nil
   1135  * End:
   1136  *
   1137  */
   1138 
   1139