Home | History | Annotate | Download | only in genrb
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1998-2010, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File parse.c
     10 *
     11 * Modification History:
     12 *
     13 *   Date          Name          Description
     14 *   05/26/99     stephen       Creation.
     15 *   02/25/00     weiv          Overhaul to write udata
     16 *   5/10/01      Ram           removed ustdio dependency
     17 *   06/10/2001  Dominic Ludlam <dom (at) recoil.org> Rewritten
     18 *******************************************************************************
     19 */
     20 
     21 #include "ucol_imp.h"
     22 #include "parse.h"
     23 #include "errmsg.h"
     24 #include "uhash.h"
     25 #include "cmemory.h"
     26 #include "cstring.h"
     27 #include "uinvchar.h"
     28 #include "read.h"
     29 #include "ustr.h"
     30 #include "reslist.h"
     31 #include "rbt_pars.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/putil.h"
     34 #include <stdio.h>
     35 
     36 /* Number of tokens to read ahead of the current stream position */
     37 #define MAX_LOOKAHEAD   3
     38 
     39 #define CR               0x000D
     40 #define LF               0x000A
     41 #define SPACE            0x0020
     42 #define TAB              0x0009
     43 #define ESCAPE           0x005C
     44 #define HASH             0x0023
     45 #define QUOTE            0x0027
     46 #define ZERO             0x0030
     47 #define STARTCOMMAND     0x005B
     48 #define ENDCOMMAND       0x005D
     49 #define OPENSQBRACKET    0x005B
     50 #define CLOSESQBRACKET   0x005D
     51 
     52 typedef struct SResource *
     53 ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
     54 
     55 struct Lookahead
     56 {
     57      enum   ETokenType type;
     58      struct UString    value;
     59      struct UString    comment;
     60      uint32_t          line;
     61 };
     62 
     63 /* keep in sync with token defines in read.h */
     64 const char *tokenNames[TOK_TOKEN_COUNT] =
     65 {
     66      "string",             /* A string token, such as "MonthNames" */
     67      "'{'",                 /* An opening brace character */
     68      "'}'",                 /* A closing brace character */
     69      "','",                 /* A comma */
     70      "':'",                 /* A colon */
     71 
     72      "<end of file>",     /* End of the file has been reached successfully */
     73      "<end of line>"
     74 };
     75 
     76 /* Just to store "TRUE" */
     77 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
     78 
     79 static struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
     80 static uint32_t          lookaheadPosition;
     81 static UCHARBUF         *buffer;
     82 
     83 static struct SRBRoot *bundle;
     84 static const char     *inputdir;
     85 static uint32_t        inputdirLength;
     86 static const char     *outputdir;
     87 static uint32_t        outputdirLength;
     88 
     89 static UBool gMakeBinaryCollation = TRUE;
     90 static UBool gOmitCollationRules  = FALSE;
     91 
     92 static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
     93 
     94 /* The nature of the lookahead buffer:
     95    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
     96    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
     97    When getToken is called, the current pointer is moved to the next slot and the
     98    old slot is filled with the next token from the reader by calling getNextToken.
     99    The token values are stored in the slot, which means that token values don't
    100    survive a call to getToken, ie.
    101 
    102    UString *value;
    103 
    104    getToken(&value, NULL, status);
    105    getToken(NULL,   NULL, status);       bad - value is now a different string
    106 */
    107 static void
    108 initLookahead(UCHARBUF *buf, UErrorCode *status)
    109 {
    110     static uint32_t initTypeStrings = 0;
    111     uint32_t i;
    112 
    113     if (!initTypeStrings)
    114     {
    115         initTypeStrings = 1;
    116     }
    117 
    118     lookaheadPosition   = 0;
    119     buffer              = buf;
    120 
    121     resetLineNumber();
    122 
    123     for (i = 0; i < MAX_LOOKAHEAD; i++)
    124     {
    125         lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
    126         if (U_FAILURE(*status))
    127         {
    128             return;
    129         }
    130     }
    131 
    132     *status = U_ZERO_ERROR;
    133 }
    134 
    135 static void
    136 cleanupLookahead()
    137 {
    138     uint32_t i;
    139     for (i = 0; i < MAX_LOOKAHEAD; i++)
    140     {
    141         ustr_deinit(&lookahead[i].value);
    142         ustr_deinit(&lookahead[i].comment);
    143     }
    144 
    145 }
    146 
    147 static enum ETokenType
    148 getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
    149 {
    150     enum ETokenType result;
    151     uint32_t          i;
    152 
    153     result = lookahead[lookaheadPosition].type;
    154 
    155     if (tokenValue != NULL)
    156     {
    157         *tokenValue = &lookahead[lookaheadPosition].value;
    158     }
    159 
    160     if (linenumber != NULL)
    161     {
    162         *linenumber = lookahead[lookaheadPosition].line;
    163     }
    164 
    165     if (comment != NULL)
    166     {
    167         ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
    168     }
    169 
    170     i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
    171     lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
    172     ustr_setlen(&lookahead[i].comment, 0, status);
    173     ustr_setlen(&lookahead[i].value, 0, status);
    174     lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
    175 
    176     /* printf("getToken, returning %s\n", tokenNames[result]); */
    177 
    178     return result;
    179 }
    180 
    181 static enum ETokenType
    182 peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
    183 {
    184     uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
    185 
    186     if (U_FAILURE(*status))
    187     {
    188         return TOK_ERROR;
    189     }
    190 
    191     if (lookaheadCount >= MAX_LOOKAHEAD)
    192     {
    193         *status = U_INTERNAL_PROGRAM_ERROR;
    194         return TOK_ERROR;
    195     }
    196 
    197     if (tokenValue != NULL)
    198     {
    199         *tokenValue = &lookahead[i].value;
    200     }
    201 
    202     if (linenumber != NULL)
    203     {
    204         *linenumber = lookahead[i].line;
    205     }
    206 
    207     if(comment != NULL){
    208         ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
    209     }
    210 
    211     return lookahead[i].type;
    212 }
    213 
    214 static void
    215 expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
    216 {
    217     uint32_t        line;
    218 
    219     enum ETokenType token = getToken(tokenValue, comment, &line, status);
    220 
    221     if (linenumber != NULL)
    222     {
    223         *linenumber = line;
    224     }
    225 
    226     if (U_FAILURE(*status))
    227     {
    228         return;
    229     }
    230 
    231     if (token != expectedToken)
    232     {
    233         *status = U_INVALID_FORMAT_ERROR;
    234         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
    235     }
    236     else
    237     {
    238         *status = U_ZERO_ERROR;
    239     }
    240 }
    241 
    242 static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
    243 {
    244     struct UString *tokenValue;
    245     char           *result;
    246     uint32_t        count;
    247 
    248     expect(TOK_STRING, &tokenValue, comment, line, status);
    249 
    250     if (U_FAILURE(*status))
    251     {
    252         return NULL;
    253     }
    254 
    255     count = u_strlen(tokenValue->fChars);
    256     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
    257         *status = U_INVALID_FORMAT_ERROR;
    258         error(*line, "invariant characters required for table keys, binary data, etc.");
    259         return NULL;
    260     }
    261 
    262     result = uprv_malloc(count+1);
    263 
    264     if (result == NULL)
    265     {
    266         *status = U_MEMORY_ALLOCATION_ERROR;
    267         return NULL;
    268     }
    269 
    270     u_UCharsToChars(tokenValue->fChars, result, count+1);
    271     return result;
    272 }
    273 
    274 static struct SResource *
    275 parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    276 {
    277     struct SResource *result = NULL;
    278     struct UString   *tokenValue;
    279     FileStream       *file          = NULL;
    280     char              filename[256] = { '\0' };
    281     char              cs[128]       = { '\0' };
    282     uint32_t          line;
    283     int               len=0;
    284     UBool quoted = FALSE;
    285     UCHARBUF *ucbuf=NULL;
    286     UChar32   c     = 0;
    287     const char* cp  = NULL;
    288     UChar *pTarget     = NULL;
    289     UChar *target      = NULL;
    290     UChar *targetLimit = NULL;
    291     int32_t size = 0;
    292 
    293     expect(TOK_STRING, &tokenValue, NULL, &line, status);
    294 
    295     if(isVerbose()){
    296         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    297     }
    298 
    299     if (U_FAILURE(*status))
    300     {
    301         return NULL;
    302     }
    303     /* make the filename including the directory */
    304     if (inputdir != NULL)
    305     {
    306         uprv_strcat(filename, inputdir);
    307 
    308         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
    309         {
    310             uprv_strcat(filename, U_FILE_SEP_STRING);
    311         }
    312     }
    313 
    314     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    315 
    316     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    317 
    318     if (U_FAILURE(*status))
    319     {
    320         return NULL;
    321     }
    322     uprv_strcat(filename, cs);
    323 
    324     if(gOmitCollationRules) {
    325         return res_none();
    326     }
    327 
    328     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    329 
    330     if (U_FAILURE(*status)) {
    331         error(line, "An error occured while opening the input file %s\n", filename);
    332         return NULL;
    333     }
    334 
    335     /* We allocate more space than actually required
    336     * since the actual size needed for storing UChars
    337     * is not known in UTF-8 byte stream
    338     */
    339     size        = ucbuf_size(ucbuf) + 1;
    340     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
    341     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    342     target      = pTarget;
    343     targetLimit = pTarget+size;
    344 
    345     /* read the rules into the buffer */
    346     while (target < targetLimit)
    347     {
    348         c = ucbuf_getc(ucbuf, status);
    349         if(c == QUOTE) {
    350             quoted = (UBool)!quoted;
    351         }
    352         /* weiv (06/26/2002): adding the following:
    353          * - preserving spaces in commands [...]
    354          * - # comments until the end of line
    355          */
    356         if (c == STARTCOMMAND && !quoted)
    357         {
    358             /* preserve commands
    359              * closing bracket will be handled by the
    360              * append at the end of the loop
    361              */
    362             while(c != ENDCOMMAND) {
    363                 U_APPEND_CHAR32(c, target,len);
    364                 c = ucbuf_getc(ucbuf, status);
    365             }
    366         }
    367         else if (c == HASH && !quoted) {
    368             /* skip comments */
    369             while(c != CR && c != LF) {
    370                 c = ucbuf_getc(ucbuf, status);
    371             }
    372             continue;
    373         }
    374         else if (c == ESCAPE)
    375         {
    376             c = unescape(ucbuf, status);
    377 
    378             if (c == U_ERR)
    379             {
    380                 uprv_free(pTarget);
    381                 T_FileStream_close(file);
    382                 return NULL;
    383             }
    384         }
    385         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
    386         {
    387             /* ignore spaces carriage returns
    388             * and line feed unless in the form \uXXXX
    389             */
    390             continue;
    391         }
    392 
    393         /* Append UChar * after dissembling if c > 0xffff*/
    394         if (c != U_EOF)
    395         {
    396             U_APPEND_CHAR32(c, target,len);
    397         }
    398         else
    399         {
    400             break;
    401         }
    402     }
    403 
    404     /* terminate the string */
    405     if(target < targetLimit){
    406         *target = 0x0000;
    407     }
    408 
    409     result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
    410 
    411 
    412     ucbuf_close(ucbuf);
    413     uprv_free(pTarget);
    414     T_FileStream_close(file);
    415 
    416     return result;
    417 }
    418 
    419 static struct SResource *
    420 parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    421 {
    422     struct SResource *result = NULL;
    423     struct UString   *tokenValue;
    424     FileStream       *file          = NULL;
    425     char              filename[256] = { '\0' };
    426     char              cs[128]       = { '\0' };
    427     uint32_t          line;
    428     UCHARBUF *ucbuf=NULL;
    429     const char* cp  = NULL;
    430     UChar *pTarget     = NULL;
    431     const UChar *pSource     = NULL;
    432     int32_t size = 0;
    433 
    434     expect(TOK_STRING, &tokenValue, NULL, &line, status);
    435 
    436     if(isVerbose()){
    437         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    438     }
    439 
    440     if (U_FAILURE(*status))
    441     {
    442         return NULL;
    443     }
    444     /* make the filename including the directory */
    445     if (inputdir != NULL)
    446     {
    447         uprv_strcat(filename, inputdir);
    448 
    449         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
    450         {
    451             uprv_strcat(filename, U_FILE_SEP_STRING);
    452         }
    453     }
    454 
    455     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    456 
    457     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    458 
    459     if (U_FAILURE(*status))
    460     {
    461         return NULL;
    462     }
    463     uprv_strcat(filename, cs);
    464 
    465 
    466     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    467 
    468     if (U_FAILURE(*status)) {
    469         error(line, "An error occured while opening the input file %s\n", filename);
    470         return NULL;
    471     }
    472 
    473     /* We allocate more space than actually required
    474     * since the actual size needed for storing UChars
    475     * is not known in UTF-8 byte stream
    476     */
    477     pSource = ucbuf_getBuffer(ucbuf, &size, status);
    478     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
    479     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    480 
    481 #if !UCONFIG_NO_TRANSLITERATION
    482     size = utrans_stripRules(pSource, size, pTarget, status);
    483 #else
    484     size = 0;
    485     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
    486 #endif
    487     result = string_open(bundle, tag, pTarget, size, NULL, status);
    488 
    489     ucbuf_close(ucbuf);
    490     uprv_free(pTarget);
    491     T_FileStream_close(file);
    492 
    493     return result;
    494 }
    495 static struct SResource* dependencyArray = NULL;
    496 
    497 static struct SResource *
    498 parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    499 {
    500     struct SResource *result = NULL;
    501     struct SResource *elem = NULL;
    502     struct UString   *tokenValue;
    503     uint32_t          line;
    504     char              filename[256] = { '\0' };
    505     char              cs[128]       = { '\0' };
    506 
    507     expect(TOK_STRING, &tokenValue, NULL, &line, status);
    508 
    509     if(isVerbose()){
    510         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    511     }
    512 
    513     if (U_FAILURE(*status))
    514     {
    515         return NULL;
    516     }
    517     /* make the filename including the directory */
    518     if (outputdir != NULL)
    519     {
    520         uprv_strcat(filename, outputdir);
    521 
    522         if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR)
    523         {
    524             uprv_strcat(filename, U_FILE_SEP_STRING);
    525         }
    526     }
    527 
    528     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    529 
    530     if (U_FAILURE(*status))
    531     {
    532         return NULL;
    533     }
    534     uprv_strcat(filename, cs);
    535     if(!T_FileStream_file_exists(filename)){
    536         if(isStrict()){
    537             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    538         }else{
    539             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    540         }
    541     }
    542     if(dependencyArray==NULL){
    543         dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status);
    544     }
    545     if(tag!=NULL){
    546         result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    547     }
    548     elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
    549 
    550     array_add(dependencyArray, elem, status);
    551 
    552     if (U_FAILURE(*status))
    553     {
    554         return NULL;
    555     }
    556     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    557     return result;
    558 }
    559 static struct SResource *
    560 parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    561 {
    562     struct UString   *tokenValue;
    563     struct SResource *result = NULL;
    564 
    565 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
    566     {
    567         return parseUCARules(tag, startline, status);
    568     }*/
    569     if(isVerbose()){
    570         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    571     }
    572     expect(TOK_STRING, &tokenValue, NULL, NULL, status);
    573 
    574     if (U_SUCCESS(*status))
    575     {
    576         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    577         doesn't survive expect either) */
    578 
    579         result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    580         if(U_SUCCESS(*status) && result) {
    581             expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    582 
    583             if (U_FAILURE(*status))
    584             {
    585                 res_close(result);
    586                 return NULL;
    587             }
    588         }
    589     }
    590 
    591     return result;
    592 }
    593 
    594 static struct SResource *
    595 parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
    596 {
    597     struct UString   *tokenValue;
    598     struct SResource *result  = NULL;
    599 
    600     expect(TOK_STRING, &tokenValue, NULL, NULL, status);
    601 
    602     if(isVerbose()){
    603         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    604     }
    605 
    606     if (U_SUCCESS(*status))
    607     {
    608         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    609         doesn't survive expect either) */
    610 
    611         result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    612 
    613         expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    614 
    615         if (U_FAILURE(*status))
    616         {
    617             res_close(result);
    618             return NULL;
    619         }
    620     }
    621 
    622     return result;
    623 }
    624 
    625 static struct SResource *
    626 addCollation(struct SResource  *result, uint32_t startline, UErrorCode *status)
    627 {
    628     struct SResource  *member = NULL;
    629     struct UString    *tokenValue;
    630     struct UString     comment;
    631     enum   ETokenType  token;
    632     char               subtag[1024];
    633     UVersionInfo       version;
    634     UBool              override = FALSE;
    635     uint32_t           line;
    636     /* '{' . (name resource)* '}' */
    637     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
    638 
    639     for (;;)
    640     {
    641         ustr_init(&comment);
    642         token = getToken(&tokenValue, &comment, &line, status);
    643 
    644         if (token == TOK_CLOSE_BRACE)
    645         {
    646             return result;
    647         }
    648 
    649         if (token != TOK_STRING)
    650         {
    651             res_close(result);
    652             *status = U_INVALID_FORMAT_ERROR;
    653 
    654             if (token == TOK_EOF)
    655             {
    656                 error(startline, "unterminated table");
    657             }
    658             else
    659             {
    660                 error(line, "Unexpected token %s", tokenNames[token]);
    661             }
    662 
    663             return NULL;
    664         }
    665 
    666         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
    667 
    668         if (U_FAILURE(*status))
    669         {
    670             res_close(result);
    671             return NULL;
    672         }
    673 
    674         member = parseResource(subtag, NULL, status);
    675 
    676         if (U_FAILURE(*status))
    677         {
    678             res_close(result);
    679             return NULL;
    680         }
    681 
    682         if (uprv_strcmp(subtag, "Version") == 0)
    683         {
    684             char     ver[40];
    685             int32_t length = member->u.fString.fLength;
    686 
    687             if (length >= (int32_t) sizeof(ver))
    688             {
    689                 length = (int32_t) sizeof(ver) - 1;
    690             }
    691 
    692             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
    693             u_versionFromString(version, ver);
    694 
    695             table_add(result, member, line, status);
    696 
    697         }
    698         else if (uprv_strcmp(subtag, "Override") == 0)
    699         {
    700             override = FALSE;
    701 
    702             if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
    703             {
    704                 override = TRUE;
    705             }
    706             table_add(result, member, line, status);
    707 
    708         }
    709         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
    710         {
    711             /* discard duplicate %%CollationBin if any*/
    712         }
    713         else if (uprv_strcmp(subtag, "Sequence") == 0)
    714         {
    715 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
    716             warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
    717 #else
    718             if(gMakeBinaryCollation) {
    719                 UErrorCode intStatus = U_ZERO_ERROR;
    720 
    721                 /* do the collation elements */
    722                 int32_t     len   = 0;
    723                 uint8_t   *data  = NULL;
    724                 UCollator *coll  = NULL;
    725                 UParseError parseError;
    726 
    727                 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
    728                     UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
    729 
    730                 if (U_SUCCESS(intStatus) && coll != NULL)
    731                 {
    732                     len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
    733                     data = (uint8_t *)uprv_malloc(len);
    734                     intStatus = U_ZERO_ERROR;
    735                     len = ucol_cloneBinary(coll, data, len, &intStatus);
    736                     /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
    737 
    738                     /* tailoring rules version */
    739                     /* This is wrong! */
    740                     /*coll->dataInfo.dataVersion[1] = version[0];*/
    741                     /* Copy tailoring version. Builder version already */
    742                     /* set in ucol_openRules */
    743                     ((UCATableHeader *)data)->version[1] = version[0];
    744                     ((UCATableHeader *)data)->version[2] = version[1];
    745                     ((UCATableHeader *)data)->version[3] = version[2];
    746 
    747                     if (U_SUCCESS(intStatus) && data != NULL)
    748                     {
    749                         struct SResource *collationBin = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
    750                         table_add(result, collationBin, line, status);
    751                         uprv_free(data);
    752                     }
    753                     else
    754                     {
    755                         warning(line, "could not obtain rules from collator");
    756                         if(isStrict()){
    757                             *status = U_INVALID_FORMAT_ERROR;
    758                             return NULL;
    759                         }
    760                     }
    761 
    762                     ucol_close(coll);
    763                 }
    764                 else
    765                 {
    766                     if(intStatus == U_FILE_ACCESS_ERROR) {
    767                       error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
    768                       *status = intStatus;
    769                       return NULL;
    770                     }
    771                     warning(line, "%%Collation could not be constructed from CollationElements - check context!");
    772                     if(isStrict()){
    773                         *status = intStatus;
    774                         return NULL;
    775                     }
    776                 }
    777             } else {
    778                 if(isVerbose()) {
    779                     printf("Not building Collation binary\n");
    780                 }
    781             }
    782 #endif
    783             /* in order to achieve smaller data files, we can direct genrb */
    784             /* to omit collation rules */
    785             if(gOmitCollationRules) {
    786                 bundle_closeString(bundle, member);
    787             } else {
    788                 table_add(result, member, line, status);
    789             }
    790         }
    791 
    792         /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
    793 
    794         /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
    795 
    796         if (U_FAILURE(*status))
    797         {
    798             res_close(result);
    799             return NULL;
    800         }
    801     }
    802 
    803     /* not reached */
    804     /* A compiler warning will appear if all paths don't contain a return statement. */
    805 /*    *status = U_INTERNAL_PROGRAM_ERROR;
    806     return NULL;*/
    807 }
    808 
    809 static struct SResource *
    810 parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
    811 {
    812     struct SResource  *result = NULL;
    813     struct SResource  *member = NULL;
    814     struct SResource  *collationRes = NULL;
    815     struct UString    *tokenValue;
    816     struct UString     comment;
    817     enum   ETokenType  token;
    818     char               subtag[1024], typeKeyword[1024];
    819     uint32_t           line;
    820 
    821     result = table_open(bundle, tag, NULL, status);
    822 
    823     if (result == NULL || U_FAILURE(*status))
    824     {
    825         return NULL;
    826     }
    827     if(isVerbose()){
    828         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    829     }
    830     if(!newCollation) {
    831         return addCollation(result, startline, status);
    832     }
    833     else {
    834         for(;;) {
    835             ustr_init(&comment);
    836             token = getToken(&tokenValue, &comment, &line, status);
    837 
    838             if (token == TOK_CLOSE_BRACE)
    839             {
    840                 return result;
    841             }
    842 
    843             if (token != TOK_STRING)
    844             {
    845                 res_close(result);
    846                 *status = U_INVALID_FORMAT_ERROR;
    847 
    848                 if (token == TOK_EOF)
    849                 {
    850                     error(startline, "unterminated table");
    851                 }
    852                 else
    853                 {
    854                     error(line, "Unexpected token %s", tokenNames[token]);
    855                 }
    856 
    857                 return NULL;
    858             }
    859 
    860             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
    861 
    862             if (U_FAILURE(*status))
    863             {
    864                 res_close(result);
    865                 return NULL;
    866             }
    867 
    868             if (uprv_strcmp(subtag, "default") == 0)
    869             {
    870                 member = parseResource(subtag, NULL, status);
    871 
    872                 if (U_FAILURE(*status))
    873                 {
    874                     res_close(result);
    875                     return NULL;
    876                 }
    877 
    878                 table_add(result, member, line, status);
    879             }
    880             else
    881             {
    882                 token = peekToken(0, &tokenValue, &line, &comment, status);
    883                 /* this probably needs to be refactored or recursively use the parser */
    884                 /* first we assume that our collation table won't have the explicit type */
    885                 /* then, we cannot handle aliases */
    886                 if(token == TOK_OPEN_BRACE) {
    887                     token = getToken(&tokenValue, &comment, &line, status);
    888                     collationRes = table_open(bundle, subtag, NULL, status);
    889                     table_add(result, addCollation(collationRes, startline, status), startline, status);
    890                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
    891                     /* we could have a table too */
    892                     token = peekToken(1, &tokenValue, &line, &comment, status);
    893                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
    894                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
    895                         member = parseResource(subtag, NULL, status);
    896 
    897                         if (U_FAILURE(*status))
    898                         {
    899                             res_close(result);
    900                             return NULL;
    901                         }
    902 
    903                         table_add(result, member, line, status);
    904                     } else {
    905                         res_close(result);
    906                         *status = U_INVALID_FORMAT_ERROR;
    907                         return NULL;
    908                     }
    909                 } else {
    910                     res_close(result);
    911                     *status = U_INVALID_FORMAT_ERROR;
    912                     return NULL;
    913                 }
    914             }
    915 
    916             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
    917 
    918             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
    919 
    920             if (U_FAILURE(*status))
    921             {
    922                 res_close(result);
    923                 return NULL;
    924             }
    925         }
    926     }
    927 }
    928 
    929 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
    930    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
    931 static struct SResource *
    932 realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
    933 {
    934     struct SResource  *member = NULL;
    935     struct UString    *tokenValue=NULL;
    936     struct UString    comment;
    937     enum   ETokenType token;
    938     char              subtag[1024];
    939     uint32_t          line;
    940     UBool             readToken = FALSE;
    941 
    942     /* '{' . (name resource)* '}' */
    943     if(isVerbose()){
    944         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
    945     }
    946     for (;;)
    947     {
    948         ustr_init(&comment);
    949         token = getToken(&tokenValue, &comment, &line, status);
    950 
    951         if (token == TOK_CLOSE_BRACE)
    952         {
    953             if (!readToken) {
    954                 warning(startline, "Encountered empty table");
    955             }
    956             return table;
    957         }
    958 
    959         if (token != TOK_STRING)
    960         {
    961             *status = U_INVALID_FORMAT_ERROR;
    962 
    963             if (token == TOK_EOF)
    964             {
    965                 error(startline, "unterminated table");
    966             }
    967             else
    968             {
    969                 error(line, "unexpected token %s", tokenNames[token]);
    970             }
    971 
    972             return NULL;
    973         }
    974 
    975         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
    976             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
    977         } else {
    978             *status = U_INVALID_FORMAT_ERROR;
    979             error(line, "invariant characters required for table keys");
    980             return NULL;
    981         }
    982 
    983         if (U_FAILURE(*status))
    984         {
    985             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
    986             return NULL;
    987         }
    988 
    989         member = parseResource(subtag, &comment, status);
    990 
    991         if (member == NULL || U_FAILURE(*status))
    992         {
    993             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
    994             return NULL;
    995         }
    996 
    997         table_add(table, member, line, status);
    998 
    999         if (U_FAILURE(*status))
   1000         {
   1001             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
   1002             return NULL;
   1003         }
   1004         readToken = TRUE;
   1005         ustr_deinit(&comment);
   1006     }
   1007 
   1008     /* not reached */
   1009     /* A compiler warning will appear if all paths don't contain a return statement. */
   1010 /*     *status = U_INTERNAL_PROGRAM_ERROR;
   1011      return NULL;*/
   1012 }
   1013 
   1014 static struct SResource *
   1015 parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1016 {
   1017     struct SResource *result;
   1018 
   1019     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
   1020     {
   1021         return parseCollationElements(tag, startline, FALSE, status);
   1022     }
   1023     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
   1024     {
   1025         return parseCollationElements(tag, startline, TRUE, status);
   1026     }
   1027     if(isVerbose()){
   1028         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1029     }
   1030 
   1031     result = table_open(bundle, tag, comment, status);
   1032 
   1033     if (result == NULL || U_FAILURE(*status))
   1034     {
   1035         return NULL;
   1036     }
   1037 
   1038     return realParseTable(result, tag, startline,  status);
   1039 }
   1040 
   1041 static struct SResource *
   1042 parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1043 {
   1044     struct SResource  *result = NULL;
   1045     struct SResource  *member = NULL;
   1046     struct UString    *tokenValue;
   1047     struct UString    memberComments;
   1048     enum   ETokenType token;
   1049     UBool             readToken = FALSE;
   1050 
   1051     result = array_open(bundle, tag, comment, status);
   1052 
   1053     if (result == NULL || U_FAILURE(*status))
   1054     {
   1055         return NULL;
   1056     }
   1057     if(isVerbose()){
   1058         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1059     }
   1060 
   1061     ustr_init(&memberComments);
   1062 
   1063     /* '{' . resource [','] '}' */
   1064     for (;;)
   1065     {
   1066         /* reset length */
   1067         ustr_setlen(&memberComments, 0, status);
   1068 
   1069         /* check for end of array, but don't consume next token unless it really is the end */
   1070         token = peekToken(0, &tokenValue, NULL, &memberComments, status);
   1071 
   1072 
   1073         if (token == TOK_CLOSE_BRACE)
   1074         {
   1075             getToken(NULL, NULL, NULL, status);
   1076             if (!readToken) {
   1077                 warning(startline, "Encountered empty array");
   1078             }
   1079             break;
   1080         }
   1081 
   1082         if (token == TOK_EOF)
   1083         {
   1084             res_close(result);
   1085             *status = U_INVALID_FORMAT_ERROR;
   1086             error(startline, "unterminated array");
   1087             return NULL;
   1088         }
   1089 
   1090         /* string arrays are a special case */
   1091         if (token == TOK_STRING)
   1092         {
   1093             getToken(&tokenValue, &memberComments, NULL, status);
   1094             member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
   1095         }
   1096         else
   1097         {
   1098             member = parseResource(NULL, &memberComments, status);
   1099         }
   1100 
   1101         if (member == NULL || U_FAILURE(*status))
   1102         {
   1103             res_close(result);
   1104             return NULL;
   1105         }
   1106 
   1107         array_add(result, member, status);
   1108 
   1109         if (U_FAILURE(*status))
   1110         {
   1111             res_close(result);
   1112             return NULL;
   1113         }
   1114 
   1115         /* eat optional comma if present */
   1116         token = peekToken(0, NULL, NULL, NULL, status);
   1117 
   1118         if (token == TOK_COMMA)
   1119         {
   1120             getToken(NULL, NULL, NULL, status);
   1121         }
   1122 
   1123         if (U_FAILURE(*status))
   1124         {
   1125             res_close(result);
   1126             return NULL;
   1127         }
   1128         readToken = TRUE;
   1129     }
   1130 
   1131     ustr_deinit(&memberComments);
   1132     return result;
   1133 }
   1134 
   1135 static struct SResource *
   1136 parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1137 {
   1138     struct SResource  *result = NULL;
   1139     enum   ETokenType  token;
   1140     char              *string;
   1141     int32_t            value;
   1142     UBool              readToken = FALSE;
   1143     char              *stopstring;
   1144     uint32_t           len;
   1145     struct UString     memberComments;
   1146 
   1147     result = intvector_open(bundle, tag, comment, status);
   1148 
   1149     if (result == NULL || U_FAILURE(*status))
   1150     {
   1151         return NULL;
   1152     }
   1153 
   1154     if(isVerbose()){
   1155         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1156     }
   1157     ustr_init(&memberComments);
   1158     /* '{' . string [','] '}' */
   1159     for (;;)
   1160     {
   1161         ustr_setlen(&memberComments, 0, status);
   1162 
   1163         /* check for end of array, but don't consume next token unless it really is the end */
   1164         token = peekToken(0, NULL, NULL,&memberComments, status);
   1165 
   1166         if (token == TOK_CLOSE_BRACE)
   1167         {
   1168             /* it's the end, consume the close brace */
   1169             getToken(NULL, NULL, NULL, status);
   1170             if (!readToken) {
   1171                 warning(startline, "Encountered empty int vector");
   1172             }
   1173             ustr_deinit(&memberComments);
   1174             return result;
   1175         }
   1176 
   1177         string = getInvariantString(NULL, NULL, status);
   1178 
   1179         if (U_FAILURE(*status))
   1180         {
   1181             res_close(result);
   1182             return NULL;
   1183         }
   1184 
   1185         /* For handling illegal char in the Intvector */
   1186         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
   1187         len=(uint32_t)(stopstring-string);
   1188 
   1189         if(len==uprv_strlen(string))
   1190         {
   1191             intvector_add(result, value, status);
   1192             uprv_free(string);
   1193             token = peekToken(0, NULL, NULL, NULL, status);
   1194         }
   1195         else
   1196         {
   1197             uprv_free(string);
   1198             *status=U_INVALID_CHAR_FOUND;
   1199         }
   1200 
   1201         if (U_FAILURE(*status))
   1202         {
   1203             res_close(result);
   1204             return NULL;
   1205         }
   1206 
   1207         /* the comma is optional (even though it is required to prevent the reader from concatenating
   1208         consecutive entries) so that a missing comma on the last entry isn't an error */
   1209         if (token == TOK_COMMA)
   1210         {
   1211             getToken(NULL, NULL, NULL, status);
   1212         }
   1213         readToken = TRUE;
   1214     }
   1215 
   1216     /* not reached */
   1217     /* A compiler warning will appear if all paths don't contain a return statement. */
   1218 /*    intvector_close(result, status);
   1219     *status = U_INTERNAL_PROGRAM_ERROR;
   1220     return NULL;*/
   1221 }
   1222 
   1223 static struct SResource *
   1224 parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1225 {
   1226     struct SResource *result = NULL;
   1227     uint8_t          *value;
   1228     char             *string;
   1229     char              toConv[3] = {'\0', '\0', '\0'};
   1230     uint32_t          count;
   1231     uint32_t          i;
   1232     uint32_t          line;
   1233     char             *stopstring;
   1234     uint32_t          len;
   1235 
   1236     string = getInvariantString(&line, NULL, status);
   1237 
   1238     if (string == NULL || U_FAILURE(*status))
   1239     {
   1240         return NULL;
   1241     }
   1242 
   1243     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1244 
   1245     if (U_FAILURE(*status))
   1246     {
   1247         uprv_free(string);
   1248         return NULL;
   1249     }
   1250 
   1251     if(isVerbose()){
   1252         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1253     }
   1254 
   1255     count = (uint32_t)uprv_strlen(string);
   1256     if (count > 0){
   1257         if((count % 2)==0){
   1258             value = uprv_malloc(sizeof(uint8_t) * count);
   1259 
   1260             if (value == NULL)
   1261             {
   1262                 uprv_free(string);
   1263                 *status = U_MEMORY_ALLOCATION_ERROR;
   1264                 return NULL;
   1265             }
   1266 
   1267             for (i = 0; i < count; i += 2)
   1268             {
   1269                 toConv[0] = string[i];
   1270                 toConv[1] = string[i + 1];
   1271 
   1272                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
   1273                 len=(uint32_t)(stopstring-toConv);
   1274 
   1275                 if(len!=uprv_strlen(toConv))
   1276                 {
   1277                     uprv_free(string);
   1278                     *status=U_INVALID_CHAR_FOUND;
   1279                     return NULL;
   1280                 }
   1281             }
   1282 
   1283             result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
   1284 
   1285             uprv_free(value);
   1286         }
   1287         else
   1288         {
   1289             *status = U_INVALID_CHAR_FOUND;
   1290             uprv_free(string);
   1291             error(line, "Encountered invalid binary string");
   1292             return NULL;
   1293         }
   1294     }
   1295     else
   1296     {
   1297         result = bin_open(bundle, tag, 0, NULL, "",comment,status);
   1298         warning(startline, "Encountered empty binary tag");
   1299     }
   1300     uprv_free(string);
   1301 
   1302     return result;
   1303 }
   1304 
   1305 static struct SResource *
   1306 parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1307 {
   1308     struct SResource *result = NULL;
   1309     int32_t           value;
   1310     char             *string;
   1311     char             *stopstring;
   1312     uint32_t          len;
   1313 
   1314     string = getInvariantString(NULL, NULL, status);
   1315 
   1316     if (string == NULL || U_FAILURE(*status))
   1317     {
   1318         return NULL;
   1319     }
   1320 
   1321     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1322 
   1323     if (U_FAILURE(*status))
   1324     {
   1325         uprv_free(string);
   1326         return NULL;
   1327     }
   1328 
   1329     if(isVerbose()){
   1330         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1331     }
   1332 
   1333     if (uprv_strlen(string) <= 0)
   1334     {
   1335         warning(startline, "Encountered empty integer. Default value is 0.");
   1336     }
   1337 
   1338     /* Allow integer support for hexdecimal, octal digit and decimal*/
   1339     /* and handle illegal char in the integer*/
   1340     value = uprv_strtoul(string, &stopstring, 0);
   1341     len=(uint32_t)(stopstring-string);
   1342     if(len==uprv_strlen(string))
   1343     {
   1344         result = int_open(bundle, tag, value, comment, status);
   1345     }
   1346     else
   1347     {
   1348         *status=U_INVALID_CHAR_FOUND;
   1349     }
   1350     uprv_free(string);
   1351 
   1352     return result;
   1353 }
   1354 
   1355 static struct SResource *
   1356 parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1357 {
   1358     struct SResource *result;
   1359     FileStream       *file;
   1360     int32_t           len;
   1361     uint8_t          *data;
   1362     char             *filename;
   1363     uint32_t          line;
   1364     char     *fullname = NULL;
   1365     int32_t numRead = 0;
   1366     filename = getInvariantString(&line, NULL, status);
   1367 
   1368     if (U_FAILURE(*status))
   1369     {
   1370         return NULL;
   1371     }
   1372 
   1373     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1374 
   1375     if (U_FAILURE(*status))
   1376     {
   1377         uprv_free(filename);
   1378         return NULL;
   1379     }
   1380 
   1381     if(isVerbose()){
   1382         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1383     }
   1384 
   1385     /* Open the input file for reading */
   1386     if (inputdir == NULL)
   1387     {
   1388 #if 1
   1389         /*
   1390          * Always save file file name, even if there's
   1391          * no input directory specified. MIGHT BREAK SOMETHING
   1392          */
   1393         int32_t filenameLength = uprv_strlen(filename);
   1394 
   1395         fullname = (char *) uprv_malloc(filenameLength + 1);
   1396         uprv_strcpy(fullname, filename);
   1397 #endif
   1398 
   1399         file = T_FileStream_open(filename, "rb");
   1400     }
   1401     else
   1402     {
   1403 
   1404         int32_t  count     = (int32_t)uprv_strlen(filename);
   1405 
   1406         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
   1407         {
   1408             fullname = (char *) uprv_malloc(inputdirLength + count + 2);
   1409 
   1410             /* test for NULL */
   1411             if(fullname == NULL)
   1412             {
   1413                 *status = U_MEMORY_ALLOCATION_ERROR;
   1414                 return NULL;
   1415             }
   1416 
   1417             uprv_strcpy(fullname, inputdir);
   1418 
   1419             fullname[inputdirLength]      = U_FILE_SEP_CHAR;
   1420             fullname[inputdirLength + 1] = '\0';
   1421 
   1422             uprv_strcat(fullname, filename);
   1423         }
   1424         else
   1425         {
   1426             fullname = (char *) uprv_malloc(inputdirLength + count + 1);
   1427 
   1428             /* test for NULL */
   1429             if(fullname == NULL)
   1430             {
   1431                 *status = U_MEMORY_ALLOCATION_ERROR;
   1432                 return NULL;
   1433             }
   1434 
   1435             uprv_strcpy(fullname, inputdir);
   1436             uprv_strcat(fullname, filename);
   1437         }
   1438 
   1439         file = T_FileStream_open(fullname, "rb");
   1440 
   1441     }
   1442 
   1443     if (file == NULL)
   1444     {
   1445         error(line, "couldn't open input file %s", filename);
   1446         *status = U_FILE_ACCESS_ERROR;
   1447         return NULL;
   1448     }
   1449 
   1450     len  = T_FileStream_size(file);
   1451     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
   1452     /* test for NULL */
   1453     if(data == NULL)
   1454     {
   1455         *status = U_MEMORY_ALLOCATION_ERROR;
   1456         T_FileStream_close (file);
   1457         return NULL;
   1458     }
   1459 
   1460     numRead = T_FileStream_read  (file, data, len);
   1461     T_FileStream_close (file);
   1462 
   1463     result = bin_open(bundle, tag, len, data, fullname, comment, status);
   1464 
   1465     uprv_free(data);
   1466     uprv_free(filename);
   1467     uprv_free(fullname);
   1468 
   1469     return result;
   1470 }
   1471 
   1472 static struct SResource *
   1473 parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1474 {
   1475     struct SResource *result;
   1476     int32_t           len=0;
   1477     char             *filename;
   1478     uint32_t          line;
   1479     UChar *pTarget     = NULL;
   1480 
   1481     UCHARBUF *ucbuf;
   1482     char     *fullname = NULL;
   1483     int32_t  count     = 0;
   1484     const char* cp = NULL;
   1485     const UChar* uBuffer = NULL;
   1486 
   1487     filename = getInvariantString(&line, NULL, status);
   1488     count     = (int32_t)uprv_strlen(filename);
   1489 
   1490     if (U_FAILURE(*status))
   1491     {
   1492         return NULL;
   1493     }
   1494 
   1495     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1496 
   1497     if (U_FAILURE(*status))
   1498     {
   1499         uprv_free(filename);
   1500         return NULL;
   1501     }
   1502 
   1503     if(isVerbose()){
   1504         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1505     }
   1506 
   1507     fullname = (char *) uprv_malloc(inputdirLength + count + 2);
   1508     /* test for NULL */
   1509     if(fullname == NULL)
   1510     {
   1511         *status = U_MEMORY_ALLOCATION_ERROR;
   1512         uprv_free(filename);
   1513         return NULL;
   1514     }
   1515 
   1516     if(inputdir!=NULL){
   1517         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
   1518         {
   1519 
   1520             uprv_strcpy(fullname, inputdir);
   1521 
   1522             fullname[inputdirLength]      = U_FILE_SEP_CHAR;
   1523             fullname[inputdirLength + 1] = '\0';
   1524 
   1525             uprv_strcat(fullname, filename);
   1526         }
   1527         else
   1528         {
   1529             uprv_strcpy(fullname, inputdir);
   1530             uprv_strcat(fullname, filename);
   1531         }
   1532     }else{
   1533         uprv_strcpy(fullname,filename);
   1534     }
   1535 
   1536     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
   1537 
   1538     if (U_FAILURE(*status)) {
   1539         error(line, "couldn't open input file %s\n", filename);
   1540         return NULL;
   1541     }
   1542 
   1543     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
   1544     result = string_open(bundle, tag, uBuffer, len, comment, status);
   1545 
   1546     uprv_free(pTarget);
   1547 
   1548     uprv_free(filename);
   1549     uprv_free(fullname);
   1550 
   1551     return result;
   1552 }
   1553 
   1554 
   1555 
   1556 
   1557 
   1558 U_STRING_DECL(k_type_string,    "string",    6);
   1559 U_STRING_DECL(k_type_binary,    "binary",    6);
   1560 U_STRING_DECL(k_type_bin,       "bin",       3);
   1561 U_STRING_DECL(k_type_table,     "table",     5);
   1562 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
   1563 U_STRING_DECL(k_type_int,       "int",       3);
   1564 U_STRING_DECL(k_type_integer,   "integer",   7);
   1565 U_STRING_DECL(k_type_array,     "array",     5);
   1566 U_STRING_DECL(k_type_alias,     "alias",     5);
   1567 U_STRING_DECL(k_type_intvector, "intvector", 9);
   1568 U_STRING_DECL(k_type_import,    "import",    6);
   1569 U_STRING_DECL(k_type_include,   "include",   7);
   1570 U_STRING_DECL(k_type_reserved,  "reserved",  8);
   1571 
   1572 /* Various non-standard processing plugins that create one or more special resources. */
   1573 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1574 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
   1575 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
   1576 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
   1577 
   1578 typedef enum EResourceType
   1579 {
   1580     RT_UNKNOWN,
   1581     RT_STRING,
   1582     RT_BINARY,
   1583     RT_TABLE,
   1584     RT_TABLE_NO_FALLBACK,
   1585     RT_INTEGER,
   1586     RT_ARRAY,
   1587     RT_ALIAS,
   1588     RT_INTVECTOR,
   1589     RT_IMPORT,
   1590     RT_INCLUDE,
   1591     RT_PROCESS_UCA_RULES,
   1592     RT_PROCESS_COLLATION,
   1593     RT_PROCESS_TRANSLITERATOR,
   1594     RT_PROCESS_DEPENDENCY,
   1595     RT_RESERVED
   1596 } EResourceType;
   1597 
   1598 static struct {
   1599     const char *nameChars;   /* only used for debugging */
   1600     const UChar *nameUChars;
   1601     ParseResourceFunction *parseFunction;
   1602 } gResourceTypes[] = {
   1603     {"Unknown", NULL, NULL},
   1604     {"string", k_type_string, parseString},
   1605     {"binary", k_type_binary, parseBinary},
   1606     {"table", k_type_table, parseTable},
   1607     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
   1608     {"integer", k_type_integer, parseInteger},
   1609     {"array", k_type_array, parseArray},
   1610     {"alias", k_type_alias, parseAlias},
   1611     {"intvector", k_type_intvector, parseIntVector},
   1612     {"import", k_type_import, parseImport},
   1613     {"include", k_type_include, parseInclude},
   1614     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
   1615     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
   1616     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
   1617     {"process(dependency)", k_type_plugin_dependency, parseDependency},
   1618     {"reserved", NULL, NULL}
   1619 };
   1620 
   1621 void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
   1622 {
   1623     uint32_t i;
   1624 
   1625     U_STRING_INIT(k_type_string,    "string",    6);
   1626     U_STRING_INIT(k_type_binary,    "binary",    6);
   1627     U_STRING_INIT(k_type_bin,       "bin",       3);
   1628     U_STRING_INIT(k_type_table,     "table",     5);
   1629     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
   1630     U_STRING_INIT(k_type_int,       "int",       3);
   1631     U_STRING_INIT(k_type_integer,   "integer",   7);
   1632     U_STRING_INIT(k_type_array,     "array",     5);
   1633     U_STRING_INIT(k_type_alias,     "alias",     5);
   1634     U_STRING_INIT(k_type_intvector, "intvector", 9);
   1635     U_STRING_INIT(k_type_import,    "import",    6);
   1636     U_STRING_INIT(k_type_reserved,  "reserved",  8);
   1637     U_STRING_INIT(k_type_include,   "include",   7);
   1638 
   1639     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1640     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
   1641     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
   1642     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
   1643 
   1644     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
   1645     {
   1646         ustr_init(&lookahead[i].value);
   1647     }
   1648     gMakeBinaryCollation = !omitBinaryCollation;
   1649     gOmitCollationRules = omitCollationRules;
   1650 }
   1651 
   1652 static U_INLINE UBool isTable(enum EResourceType type) {
   1653     return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
   1654 }
   1655 
   1656 static enum EResourceType
   1657 parseResourceType(UErrorCode *status)
   1658 {
   1659     struct UString        *tokenValue;
   1660     struct UString        comment;
   1661     enum   EResourceType  result = RT_UNKNOWN;
   1662     uint32_t              line=0;
   1663     ustr_init(&comment);
   1664     expect(TOK_STRING, &tokenValue, &comment, &line, status);
   1665 
   1666     if (U_FAILURE(*status))
   1667     {
   1668         return RT_UNKNOWN;
   1669     }
   1670 
   1671     *status = U_ZERO_ERROR;
   1672 
   1673     /* Search for normal types */
   1674     result=RT_UNKNOWN;
   1675     while (++result < RT_RESERVED) {
   1676         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
   1677             break;
   1678         }
   1679     }
   1680     /* Now search for the aliases */
   1681     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
   1682         result = RT_INTEGER;
   1683     }
   1684     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
   1685         result = RT_BINARY;
   1686     }
   1687     else if (result == RT_RESERVED) {
   1688         char tokenBuffer[1024];
   1689         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
   1690         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
   1691         *status = U_INVALID_FORMAT_ERROR;
   1692         error(line, "unknown resource type '%s'", tokenBuffer);
   1693     }
   1694 
   1695     return result;
   1696 }
   1697 
   1698 /* parse a non-top-level resource */
   1699 static struct SResource *
   1700 parseResource(char *tag, const struct UString *comment, UErrorCode *status)
   1701 {
   1702     enum   ETokenType      token;
   1703     enum   EResourceType  resType = RT_UNKNOWN;
   1704     ParseResourceFunction *parseFunction = NULL;
   1705     struct UString        *tokenValue;
   1706     uint32_t                 startline;
   1707     uint32_t                 line;
   1708 
   1709     token = getToken(&tokenValue, NULL, &startline, status);
   1710 
   1711     if(isVerbose()){
   1712         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1713     }
   1714 
   1715     /* name . [ ':' type ] '{' resource '}' */
   1716     /* This function parses from the colon onwards.  If the colon is present, parse the
   1717     type then try to parse a resource of that type.  If there is no explicit type,
   1718     work it out using the lookahead tokens. */
   1719     switch (token)
   1720     {
   1721     case TOK_EOF:
   1722         *status = U_INVALID_FORMAT_ERROR;
   1723         error(startline, "Unexpected EOF encountered");
   1724         return NULL;
   1725 
   1726     case TOK_ERROR:
   1727         *status = U_INVALID_FORMAT_ERROR;
   1728         return NULL;
   1729 
   1730     case TOK_COLON:
   1731         resType = parseResourceType(status);
   1732         expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
   1733 
   1734         if (U_FAILURE(*status))
   1735         {
   1736             return NULL;
   1737         }
   1738 
   1739         break;
   1740 
   1741     case TOK_OPEN_BRACE:
   1742         break;
   1743 
   1744     default:
   1745         *status = U_INVALID_FORMAT_ERROR;
   1746         error(startline, "syntax error while reading a resource, expected '{' or ':'");
   1747         return NULL;
   1748     }
   1749 
   1750     if (resType == RT_UNKNOWN)
   1751     {
   1752         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
   1753         We could have any of the following:
   1754         { {         => array (nested)
   1755         { :/}       => array
   1756         { string ,  => string array
   1757 
   1758         { string {  => table
   1759 
   1760         { string :/{    => table
   1761         { string }      => string
   1762         */
   1763 
   1764         token = peekToken(0, NULL, &line, NULL,status);
   1765 
   1766         if (U_FAILURE(*status))
   1767         {
   1768             return NULL;
   1769         }
   1770 
   1771         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
   1772         {
   1773             resType = RT_ARRAY;
   1774         }
   1775         else if (token == TOK_STRING)
   1776         {
   1777             token = peekToken(1, NULL, &line, NULL, status);
   1778 
   1779             if (U_FAILURE(*status))
   1780             {
   1781                 return NULL;
   1782             }
   1783 
   1784             switch (token)
   1785             {
   1786             case TOK_COMMA:         resType = RT_ARRAY;  break;
   1787             case TOK_OPEN_BRACE:    resType = RT_TABLE;  break;
   1788             case TOK_CLOSE_BRACE:   resType = RT_STRING; break;
   1789             case TOK_COLON:         resType = RT_TABLE;  break;
   1790             default:
   1791                 *status = U_INVALID_FORMAT_ERROR;
   1792                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
   1793                 return NULL;
   1794             }
   1795         }
   1796         else
   1797         {
   1798             *status = U_INVALID_FORMAT_ERROR;
   1799             error(line, "Unexpected token after '{'");
   1800             return NULL;
   1801         }
   1802 
   1803         /* printf("Type guessed as %s\n", resourceNames[resType]); */
   1804     } else if(resType == RT_TABLE_NO_FALLBACK) {
   1805         *status = U_INVALID_FORMAT_ERROR;
   1806         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
   1807         return NULL;
   1808     }
   1809 
   1810     /* We should now know what we need to parse next, so call the appropriate parser
   1811     function and return. */
   1812     parseFunction = gResourceTypes[resType].parseFunction;
   1813     if (parseFunction != NULL) {
   1814         return parseFunction(tag, startline, comment, status);
   1815     }
   1816     else {
   1817         *status = U_INTERNAL_PROGRAM_ERROR;
   1818         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
   1819     }
   1820 
   1821     return NULL;
   1822 }
   1823 
   1824 /* parse the top-level resource */
   1825 struct SRBRoot *
   1826 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
   1827 {
   1828     struct UString    *tokenValue;
   1829     struct UString    comment;
   1830     uint32_t           line;
   1831     enum EResourceType bundleType;
   1832     enum ETokenType    token;
   1833 
   1834     initLookahead(buf, status);
   1835 
   1836     inputdir       = inputDir;
   1837     inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
   1838     outputdir       = outputDir;
   1839     outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0;
   1840 
   1841     ustr_init(&comment);
   1842     expect(TOK_STRING, &tokenValue, &comment, NULL, status);
   1843 
   1844     bundle = bundle_open(&comment, FALSE, status);
   1845 
   1846     if (bundle == NULL || U_FAILURE(*status))
   1847     {
   1848         return NULL;
   1849     }
   1850 
   1851 
   1852     bundle_setlocale(bundle, tokenValue->fChars, status);
   1853     /* The following code is to make Empty bundle work no matter with :table specifer or not */
   1854     token = getToken(NULL, NULL, &line, status);
   1855     if(token==TOK_COLON) {
   1856         *status=U_ZERO_ERROR;
   1857         bundleType=parseResourceType(status);
   1858 
   1859         if(isTable(bundleType))
   1860         {
   1861             expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
   1862         }
   1863         else
   1864         {
   1865             *status=U_PARSE_ERROR;
   1866             error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
   1867         }
   1868     }
   1869     else
   1870     {
   1871         /* not a colon */
   1872         if(token==TOK_OPEN_BRACE)
   1873         {
   1874             *status=U_ZERO_ERROR;
   1875             bundleType=RT_TABLE;
   1876         }
   1877         else
   1878         {
   1879             /* neither colon nor open brace */
   1880             *status=U_PARSE_ERROR;
   1881             bundleType=RT_UNKNOWN;
   1882             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
   1883         }
   1884     }
   1885 
   1886     if (U_FAILURE(*status))
   1887     {
   1888         bundle_close(bundle, status);
   1889         return NULL;
   1890     }
   1891 
   1892     if(bundleType==RT_TABLE_NO_FALLBACK) {
   1893         /*
   1894          * Parse a top-level table with the table(nofallback) declaration.
   1895          * This is the same as a regular table, but also sets the
   1896          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
   1897          */
   1898         bundle->noFallback=TRUE;
   1899     }
   1900     /* top-level tables need not handle special table names like "collations" */
   1901     realParseTable(bundle->fRoot, NULL, line, status);
   1902 
   1903     if(dependencyArray!=NULL){
   1904         table_add(bundle->fRoot, dependencyArray, 0, status);
   1905         dependencyArray = NULL;
   1906     }
   1907     if (U_FAILURE(*status))
   1908     {
   1909         bundle_close(bundle, status);
   1910         res_close(dependencyArray);
   1911         return NULL;
   1912     }
   1913 
   1914     if (getToken(NULL, NULL, &line, status) != TOK_EOF)
   1915     {
   1916         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
   1917         if(isStrict()){
   1918             *status = U_INVALID_FORMAT_ERROR;
   1919             return NULL;
   1920         }
   1921     }
   1922 
   1923     cleanupLookahead();
   1924     ustr_deinit(&comment);
   1925     return bundle;
   1926 }
   1927 
   1928