Home | History | Annotate | Download | only in genrb
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1998-2015, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File parse.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date          Name          Description
     14 *   05/26/99     stephen       Creation.
     15 *   02/25/00     weiv          Overhaul to write udata
     16 *   5/10/01      Ram           removed ustdio dependency
     17 *   06/10/2001  Dominic Ludlam <dom (at) recoil.org> Rewritten
     18 *******************************************************************************
     19 */
     20 
     21 // Safer use of UnicodeString.
     22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     23 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     24 #endif
     25 
     26 // Less important, but still a good idea.
     27 #ifndef UNISTR_FROM_STRING_EXPLICIT
     28 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     29 #endif
     30 
     31 #include "parse.h"
     32 #include "errmsg.h"
     33 #include "uhash.h"
     34 #include "cmemory.h"
     35 #include "cstring.h"
     36 #include "uinvchar.h"
     37 #include "read.h"
     38 #include "ustr.h"
     39 #include "reslist.h"
     40 #include "rbt_pars.h"
     41 #include "genrb.h"
     42 #include "unicode/ustring.h"
     43 #include "unicode/uscript.h"
     44 #include "unicode/utf16.h"
     45 #include "unicode/putil.h"
     46 #include "collationbuilder.h"
     47 #include "collationdata.h"
     48 #include "collationdatareader.h"
     49 #include "collationdatawriter.h"
     50 #include "collationfastlatinbuilder.h"
     51 #include "collationinfo.h"
     52 #include "collationroot.h"
     53 #include "collationruleparser.h"
     54 #include "collationtailoring.h"
     55 #include <stdio.h>
     56 
     57 /* Number of tokens to read ahead of the current stream position */
     58 #define MAX_LOOKAHEAD   3
     59 
     60 #define CR               0x000D
     61 #define LF               0x000A
     62 #define SPACE            0x0020
     63 #define TAB              0x0009
     64 #define ESCAPE           0x005C
     65 #define HASH             0x0023
     66 #define QUOTE            0x0027
     67 #define ZERO             0x0030
     68 #define STARTCOMMAND     0x005B
     69 #define ENDCOMMAND       0x005D
     70 #define OPENSQBRACKET    0x005B
     71 #define CLOSESQBRACKET   0x005D
     72 
     73 using icu::LocalPointer;
     74 using icu::UnicodeString;
     75 
     76 struct Lookahead
     77 {
     78      enum   ETokenType type;
     79      struct UString    value;
     80      struct UString    comment;
     81      uint32_t          line;
     82 };
     83 
     84 /* keep in sync with token defines in read.h */
     85 const char *tokenNames[TOK_TOKEN_COUNT] =
     86 {
     87      "string",             /* A string token, such as "MonthNames" */
     88      "'{'",                 /* An opening brace character */
     89      "'}'",                 /* A closing brace character */
     90      "','",                 /* A comma */
     91      "':'",                 /* A colon */
     92 
     93      "<end of file>",     /* End of the file has been reached successfully */
     94      "<end of line>"
     95 };
     96 
     97 /* Just to store "TRUE" */
     98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
     99 
    100 typedef struct {
    101     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
    102     uint32_t          lookaheadPosition;
    103     UCHARBUF         *buffer;
    104     struct SRBRoot *bundle;
    105     const char     *inputdir;
    106     uint32_t        inputdirLength;
    107     const char     *outputdir;
    108     uint32_t        outputdirLength;
    109     const char     *filename;
    110     UBool           makeBinaryCollation;
    111     UBool           omitCollationRules;
    112 } ParseState;
    113 
    114 typedef struct SResource *
    115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
    116 
    117 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
    118 
    119 /* The nature of the lookahead buffer:
    120    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
    121    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
    122    When getToken is called, the current pointer is moved to the next slot and the
    123    old slot is filled with the next token from the reader by calling getNextToken.
    124    The token values are stored in the slot, which means that token values don't
    125    survive a call to getToken, ie.
    126 
    127    UString *value;
    128 
    129    getToken(&value, NULL, status);
    130    getToken(NULL,   NULL, status);       bad - value is now a different string
    131 */
    132 static void
    133 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
    134 {
    135     static uint32_t initTypeStrings = 0;
    136     uint32_t i;
    137 
    138     if (!initTypeStrings)
    139     {
    140         initTypeStrings = 1;
    141     }
    142 
    143     state->lookaheadPosition   = 0;
    144     state->buffer              = buf;
    145 
    146     resetLineNumber();
    147 
    148     for (i = 0; i < MAX_LOOKAHEAD; i++)
    149     {
    150         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
    151         if (U_FAILURE(*status))
    152         {
    153             return;
    154         }
    155     }
    156 
    157     *status = U_ZERO_ERROR;
    158 }
    159 
    160 static void
    161 cleanupLookahead(ParseState* state)
    162 {
    163     uint32_t i;
    164     for (i = 0; i <= MAX_LOOKAHEAD; i++)
    165     {
    166         ustr_deinit(&state->lookahead[i].value);
    167         ustr_deinit(&state->lookahead[i].comment);
    168     }
    169 
    170 }
    171 
    172 static enum ETokenType
    173 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
    174 {
    175     enum ETokenType result;
    176     uint32_t          i;
    177 
    178     result = state->lookahead[state->lookaheadPosition].type;
    179 
    180     if (tokenValue != NULL)
    181     {
    182         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
    183     }
    184 
    185     if (linenumber != NULL)
    186     {
    187         *linenumber = state->lookahead[state->lookaheadPosition].line;
    188     }
    189 
    190     if (comment != NULL)
    191     {
    192         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
    193     }
    194 
    195     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
    196     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
    197     ustr_setlen(&state->lookahead[i].comment, 0, status);
    198     ustr_setlen(&state->lookahead[i].value, 0, status);
    199     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
    200 
    201     /* printf("getToken, returning %s\n", tokenNames[result]); */
    202 
    203     return result;
    204 }
    205 
    206 static enum ETokenType
    207 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
    208 {
    209     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
    210 
    211     if (U_FAILURE(*status))
    212     {
    213         return TOK_ERROR;
    214     }
    215 
    216     if (lookaheadCount >= MAX_LOOKAHEAD)
    217     {
    218         *status = U_INTERNAL_PROGRAM_ERROR;
    219         return TOK_ERROR;
    220     }
    221 
    222     if (tokenValue != NULL)
    223     {
    224         *tokenValue = &state->lookahead[i].value;
    225     }
    226 
    227     if (linenumber != NULL)
    228     {
    229         *linenumber = state->lookahead[i].line;
    230     }
    231 
    232     if(comment != NULL){
    233         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
    234     }
    235 
    236     return state->lookahead[i].type;
    237 }
    238 
    239 static void
    240 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
    241 {
    242     uint32_t        line;
    243 
    244     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
    245 
    246     if (linenumber != NULL)
    247     {
    248         *linenumber = line;
    249     }
    250 
    251     if (U_FAILURE(*status))
    252     {
    253         return;
    254     }
    255 
    256     if (token != expectedToken)
    257     {
    258         *status = U_INVALID_FORMAT_ERROR;
    259         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
    260     }
    261     else
    262     {
    263         *status = U_ZERO_ERROR;
    264     }
    265 }
    266 
    267 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
    268 {
    269     struct UString *tokenValue;
    270     char           *result;
    271     uint32_t        count;
    272 
    273     expect(state, TOK_STRING, &tokenValue, comment, line, status);
    274 
    275     if (U_FAILURE(*status))
    276     {
    277         return NULL;
    278     }
    279 
    280     count = u_strlen(tokenValue->fChars);
    281     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
    282         *status = U_INVALID_FORMAT_ERROR;
    283         error(*line, "invariant characters required for table keys, binary data, etc.");
    284         return NULL;
    285     }
    286 
    287     result = static_cast<char *>(uprv_malloc(count+1));
    288 
    289     if (result == NULL)
    290     {
    291         *status = U_MEMORY_ALLOCATION_ERROR;
    292         return NULL;
    293     }
    294 
    295     u_UCharsToChars(tokenValue->fChars, result, count+1);
    296     return result;
    297 }
    298 
    299 static struct SResource *
    300 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
    301 {
    302     struct SResource *result = NULL;
    303     struct UString   *tokenValue;
    304     FileStream       *file          = NULL;
    305     char              filename[256] = { '\0' };
    306     char              cs[128]       = { '\0' };
    307     uint32_t          line;
    308     UBool quoted = FALSE;
    309     UCHARBUF *ucbuf=NULL;
    310     UChar32   c     = 0;
    311     const char* cp  = NULL;
    312     UChar *pTarget     = NULL;
    313     UChar *target      = NULL;
    314     UChar *targetLimit = NULL;
    315     int32_t size = 0;
    316 
    317     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    318 
    319     if(isVerbose()){
    320         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    321     }
    322 
    323     if (U_FAILURE(*status))
    324     {
    325         return NULL;
    326     }
    327     /* make the filename including the directory */
    328     if (state->inputdir != NULL)
    329     {
    330         uprv_strcat(filename, state->inputdir);
    331 
    332         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
    333         {
    334             uprv_strcat(filename, U_FILE_SEP_STRING);
    335         }
    336     }
    337 
    338     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    339 
    340     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    341 
    342     if (U_FAILURE(*status))
    343     {
    344         return NULL;
    345     }
    346     uprv_strcat(filename, cs);
    347 
    348     if(state->omitCollationRules) {
    349         return res_none();
    350     }
    351 
    352     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    353 
    354     if (U_FAILURE(*status)) {
    355         error(line, "An error occured while opening the input file %s\n", filename);
    356         return NULL;
    357     }
    358 
    359     /* We allocate more space than actually required
    360     * since the actual size needed for storing UChars
    361     * is not known in UTF-8 byte stream
    362     */
    363     size        = ucbuf_size(ucbuf) + 1;
    364     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
    365     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    366     target      = pTarget;
    367     targetLimit = pTarget+size;
    368 
    369     /* read the rules into the buffer */
    370     while (target < targetLimit)
    371     {
    372         c = ucbuf_getc(ucbuf, status);
    373         if(c == QUOTE) {
    374             quoted = (UBool)!quoted;
    375         }
    376         /* weiv (06/26/2002): adding the following:
    377          * - preserving spaces in commands [...]
    378          * - # comments until the end of line
    379          */
    380         if (c == STARTCOMMAND && !quoted)
    381         {
    382             /* preserve commands
    383              * closing bracket will be handled by the
    384              * append at the end of the loop
    385              */
    386             while(c != ENDCOMMAND) {
    387                 U_APPEND_CHAR32_ONLY(c, target);
    388                 c = ucbuf_getc(ucbuf, status);
    389             }
    390         }
    391         else if (c == HASH && !quoted) {
    392             /* skip comments */
    393             while(c != CR && c != LF) {
    394                 c = ucbuf_getc(ucbuf, status);
    395             }
    396             continue;
    397         }
    398         else if (c == ESCAPE)
    399         {
    400             c = unescape(ucbuf, status);
    401 
    402             if (c == (UChar32)U_ERR)
    403             {
    404                 uprv_free(pTarget);
    405                 T_FileStream_close(file);
    406                 return NULL;
    407             }
    408         }
    409         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
    410         {
    411             /* ignore spaces carriage returns
    412             * and line feed unless in the form \uXXXX
    413             */
    414             continue;
    415         }
    416 
    417         /* Append UChar * after dissembling if c > 0xffff*/
    418         if (c != (UChar32)U_EOF)
    419         {
    420             U_APPEND_CHAR32_ONLY(c, target);
    421         }
    422         else
    423         {
    424             break;
    425         }
    426     }
    427 
    428     /* terminate the string */
    429     if(target < targetLimit){
    430         *target = 0x0000;
    431     }
    432 
    433     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
    434 
    435 
    436     ucbuf_close(ucbuf);
    437     uprv_free(pTarget);
    438     T_FileStream_close(file);
    439 
    440     return result;
    441 }
    442 
    443 static struct SResource *
    444 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
    445 {
    446     struct SResource *result = NULL;
    447     struct UString   *tokenValue;
    448     FileStream       *file          = NULL;
    449     char              filename[256] = { '\0' };
    450     char              cs[128]       = { '\0' };
    451     uint32_t          line;
    452     UCHARBUF *ucbuf=NULL;
    453     const char* cp  = NULL;
    454     UChar *pTarget     = NULL;
    455     const UChar *pSource     = NULL;
    456     int32_t size = 0;
    457 
    458     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    459 
    460     if(isVerbose()){
    461         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    462     }
    463 
    464     if (U_FAILURE(*status))
    465     {
    466         return NULL;
    467     }
    468     /* make the filename including the directory */
    469     if (state->inputdir != NULL)
    470     {
    471         uprv_strcat(filename, state->inputdir);
    472 
    473         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
    474         {
    475             uprv_strcat(filename, U_FILE_SEP_STRING);
    476         }
    477     }
    478 
    479     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    480 
    481     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    482 
    483     if (U_FAILURE(*status))
    484     {
    485         return NULL;
    486     }
    487     uprv_strcat(filename, cs);
    488 
    489 
    490     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
    491 
    492     if (U_FAILURE(*status)) {
    493         error(line, "An error occured while opening the input file %s\n", filename);
    494         return NULL;
    495     }
    496 
    497     /* We allocate more space than actually required
    498     * since the actual size needed for storing UChars
    499     * is not known in UTF-8 byte stream
    500     */
    501     pSource = ucbuf_getBuffer(ucbuf, &size, status);
    502     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
    503     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
    504 
    505 #if !UCONFIG_NO_TRANSLITERATION
    506     size = utrans_stripRules(pSource, size, pTarget, status);
    507 #else
    508     size = 0;
    509     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
    510 #endif
    511     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
    512 
    513     ucbuf_close(ucbuf);
    514     uprv_free(pTarget);
    515     T_FileStream_close(file);
    516 
    517     return result;
    518 }
    519 static struct SResource* dependencyArray = NULL;
    520 
    521 static struct SResource *
    522 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    523 {
    524     struct SResource *result = NULL;
    525     struct SResource *elem = NULL;
    526     struct UString   *tokenValue;
    527     uint32_t          line;
    528     char              filename[256] = { '\0' };
    529     char              cs[128]       = { '\0' };
    530 
    531     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
    532 
    533     if(isVerbose()){
    534         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    535     }
    536 
    537     if (U_FAILURE(*status))
    538     {
    539         return NULL;
    540     }
    541     /* make the filename including the directory */
    542     if (state->outputdir != NULL)
    543     {
    544         uprv_strcat(filename, state->outputdir);
    545 
    546         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
    547         {
    548             uprv_strcat(filename, U_FILE_SEP_STRING);
    549         }
    550     }
    551 
    552     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
    553 
    554     if (U_FAILURE(*status))
    555     {
    556         return NULL;
    557     }
    558     uprv_strcat(filename, cs);
    559     if(!T_FileStream_file_exists(filename)){
    560         if(isStrict()){
    561             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    562         }else{
    563             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
    564         }
    565     }
    566     if(dependencyArray==NULL){
    567         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
    568     }
    569     if(tag!=NULL){
    570         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    571     }
    572     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
    573 
    574     array_add(dependencyArray, elem, status);
    575 
    576     if (U_FAILURE(*status))
    577     {
    578         return NULL;
    579     }
    580     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    581     return result;
    582 }
    583 static struct SResource *
    584 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
    585 {
    586     struct UString   *tokenValue;
    587     struct SResource *result = NULL;
    588 
    589 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
    590     {
    591         return parseUCARules(tag, startline, status);
    592     }*/
    593     if(isVerbose()){
    594         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    595     }
    596     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
    597 
    598     if (U_SUCCESS(*status))
    599     {
    600         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    601         doesn't survive expect either) */
    602 
    603         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    604         if(U_SUCCESS(*status) && result) {
    605             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    606 
    607             if (U_FAILURE(*status))
    608             {
    609                 res_close(result);
    610                 return NULL;
    611             }
    612         }
    613     }
    614 
    615     return result;
    616 }
    617 
    618 static struct SResource *
    619 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
    620 {
    621     struct UString   *tokenValue;
    622     struct SResource *result  = NULL;
    623 
    624     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
    625 
    626     if(isVerbose()){
    627         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
    628     }
    629 
    630     if (U_SUCCESS(*status))
    631     {
    632         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
    633         doesn't survive expect either) */
    634 
    635         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
    636 
    637         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
    638 
    639         if (U_FAILURE(*status))
    640         {
    641             res_close(result);
    642             return NULL;
    643         }
    644     }
    645 
    646     return result;
    647 }
    648 
    649 #if !UCONFIG_NO_COLLATION
    650 
    651 namespace {
    652 
    653 static struct SResource* resLookup(struct SResource* res, const char* key){
    654     struct SResource *current = NULL;
    655     struct SResTable *list;
    656     if (res == res_none()) {
    657         return NULL;
    658     }
    659 
    660     list = &(res->u.fTable);
    661 
    662     current = list->fFirst;
    663     while (current != NULL) {
    664         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
    665             return current;
    666         }
    667         current = current->fNext;
    668     }
    669     return NULL;
    670 }
    671 
    672 class GenrbImporter : public icu::CollationRuleParser::Importer {
    673 public:
    674     GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
    675     virtual ~GenrbImporter();
    676     virtual void getRules(
    677             const char *localeID, const char *collationType,
    678             UnicodeString &rules,
    679             const char *&errorReason, UErrorCode &errorCode);
    680 
    681 private:
    682     const char *inputDir;
    683     const char *outputDir;
    684 };
    685 
    686 GenrbImporter::~GenrbImporter() {}
    687 
    688 void
    689 GenrbImporter::getRules(
    690         const char *localeID, const char *collationType,
    691         UnicodeString &rules,
    692         const char *& /*errorReason*/, UErrorCode &errorCode) {
    693     struct SRBRoot *data         = NULL;
    694     UCHARBUF       *ucbuf        = NULL;
    695     int localeLength = strlen(localeID);
    696     char* filename = (char*)uprv_malloc(localeLength+5);
    697     char           *inputDirBuf  = NULL;
    698     char           *openFileName = NULL;
    699     const char* cp = "";
    700     int32_t i = 0;
    701     int32_t dirlen  = 0;
    702     int32_t filelen = 0;
    703     struct SResource* root;
    704     struct SResource* collations;
    705     struct SResource* collation;
    706     struct SResource* sequence;
    707 
    708     memcpy(filename, localeID, localeLength);
    709     for(i = 0; i < localeLength; i++){
    710         if(filename[i] == '-'){
    711             filename[i] = '_';
    712         }
    713     }
    714     filename[localeLength]   = '.';
    715     filename[localeLength+1] = 't';
    716     filename[localeLength+2] = 'x';
    717     filename[localeLength+3] = 't';
    718     filename[localeLength+4] = 0;
    719 
    720 
    721     if (U_FAILURE(errorCode)) {
    722         return;
    723     }
    724     if(filename==NULL){
    725         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    726         return;
    727     }else{
    728         filelen = (int32_t)uprv_strlen(filename);
    729     }
    730     if(inputDir == NULL) {
    731         const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
    732         openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
    733         openFileName[0] = '\0';
    734         if (filenameBegin != NULL) {
    735             /*
    736              * When a filename ../../../data/root.txt is specified,
    737              * we presume that the input directory is ../../../data
    738              * This is very important when the resource file includes
    739              * another file, like UCARules.txt or thaidict.brk.
    740              */
    741             int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
    742             inputDirBuf = (char *)uprv_malloc(filenameSize);
    743 
    744             /* test for NULL */
    745             if(inputDirBuf == NULL) {
    746                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    747                 goto finish;
    748             }
    749 
    750             uprv_strncpy(inputDirBuf, filename, filenameSize);
    751             inputDirBuf[filenameSize - 1] = 0;
    752             inputDir = inputDirBuf;
    753             dirlen  = (int32_t)uprv_strlen(inputDir);
    754         }
    755     }else{
    756         dirlen  = (int32_t)uprv_strlen(inputDir);
    757 
    758         if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
    759             openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
    760 
    761             /* test for NULL */
    762             if(openFileName == NULL) {
    763                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    764                 goto finish;
    765             }
    766 
    767             openFileName[0] = '\0';
    768             /*
    769              * append the input dir to openFileName if the first char in
    770              * filename is not file seperation char and the last char input directory is  not '.'.
    771              * This is to support :
    772              * genrb -s. /home/icu/data
    773              * genrb -s. icu/data
    774              * The user cannot mix notations like
    775              * genrb -s. /icu/data --- the absolute path specified. -s redundant
    776              * user should use
    777              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
    778              */
    779             if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
    780                 uprv_strcpy(openFileName, inputDir);
    781                 openFileName[dirlen]     = U_FILE_SEP_CHAR;
    782             }
    783             openFileName[dirlen + 1] = '\0';
    784         } else {
    785             openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
    786 
    787             /* test for NULL */
    788             if(openFileName == NULL) {
    789                 errorCode = U_MEMORY_ALLOCATION_ERROR;
    790                 goto finish;
    791             }
    792 
    793             uprv_strcpy(openFileName, inputDir);
    794 
    795         }
    796     }
    797     uprv_strcat(openFileName, filename);
    798     /* printf("%s\n", openFileName);  */
    799     errorCode = U_ZERO_ERROR;
    800     ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
    801 
    802     if(errorCode == U_FILE_ACCESS_ERROR) {
    803 
    804         fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
    805         goto finish;
    806     }
    807     if (ucbuf == NULL || U_FAILURE(errorCode)) {
    808         fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
    809         goto finish;
    810     }
    811 
    812     /* Parse the data into an SRBRoot */
    813     data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
    814     if (U_FAILURE(errorCode)) {
    815         goto finish;
    816     }
    817 
    818     root = data->fRoot;
    819     collations = resLookup(root, "collations");
    820     if (collations != NULL) {
    821       collation = resLookup(collations, collationType);
    822       if (collation != NULL) {
    823         sequence = resLookup(collation, "Sequence");
    824         if (sequence != NULL) {
    825           // No string pointer aliasing so that we need not hold onto the resource bundle.
    826           rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength);
    827         }
    828       }
    829     }
    830 
    831 finish:
    832     if (inputDirBuf != NULL) {
    833         uprv_free(inputDirBuf);
    834     }
    835 
    836     if (openFileName != NULL) {
    837         uprv_free(openFileName);
    838     }
    839 
    840     if(ucbuf) {
    841         ucbuf_close(ucbuf);
    842     }
    843 }
    844 
    845 // Quick-and-dirty escaping function.
    846 // Assumes that we are on an ASCII-based platform.
    847 static void
    848 escape(const UChar *s, char *buffer) {
    849     int32_t length = u_strlen(s);
    850     int32_t i = 0;
    851     for (;;) {
    852         UChar32 c;
    853         U16_NEXT(s, i, length, c);
    854         if (c == 0) {
    855             *buffer = 0;
    856             return;
    857         } else if (0x20 <= c && c <= 0x7e) {
    858             // printable ASCII
    859             *buffer++ = (char)c;  // assumes ASCII-based platform
    860         } else {
    861             buffer += sprintf(buffer, "\\u%04X", (int)c);
    862         }
    863     }
    864 }
    865 
    866 }  // namespace
    867 
    868 #endif  // !UCONFIG_NO_COLLATION
    869 
    870 static struct SResource *
    871 addCollation(ParseState* state, struct SResource  *result, const char *collationType,
    872              uint32_t startline, UErrorCode *status)
    873 {
    874     // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
    875     struct SResource  *member = NULL;
    876     struct UString    *tokenValue;
    877     struct UString     comment;
    878     enum   ETokenType  token;
    879     char               subtag[1024];
    880     UnicodeString      rules;
    881     UBool              haveRules = FALSE;
    882     UVersionInfo       version;
    883     uint32_t           line;
    884 
    885     /* '{' . (name resource)* '}' */
    886     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
    887 
    888     for (;;)
    889     {
    890         ustr_init(&comment);
    891         token = getToken(state, &tokenValue, &comment, &line, status);
    892 
    893         if (token == TOK_CLOSE_BRACE)
    894         {
    895             break;
    896         }
    897 
    898         if (token != TOK_STRING)
    899         {
    900             res_close(result);
    901             *status = U_INVALID_FORMAT_ERROR;
    902 
    903             if (token == TOK_EOF)
    904             {
    905                 error(startline, "unterminated table");
    906             }
    907             else
    908             {
    909                 error(line, "Unexpected token %s", tokenNames[token]);
    910             }
    911 
    912             return NULL;
    913         }
    914 
    915         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
    916 
    917         if (U_FAILURE(*status))
    918         {
    919             res_close(result);
    920             return NULL;
    921         }
    922 
    923         member = parseResource(state, subtag, NULL, status);
    924 
    925         if (U_FAILURE(*status))
    926         {
    927             res_close(result);
    928             return NULL;
    929         }
    930         if (result == NULL)
    931         {
    932             // Ignore the parsed resources, continue parsing.
    933         }
    934         else if (uprv_strcmp(subtag, "Version") == 0)
    935         {
    936             char     ver[40];
    937             int32_t length = member->u.fString.fLength;
    938 
    939             if (length >= (int32_t) sizeof(ver))
    940             {
    941                 length = (int32_t) sizeof(ver) - 1;
    942             }
    943 
    944             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
    945             u_versionFromString(version, ver);
    946 
    947             table_add(result, member, line, status);
    948             member = NULL;
    949         }
    950         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
    951         {
    952             /* discard duplicate %%CollationBin if any*/
    953         }
    954         else if (uprv_strcmp(subtag, "Sequence") == 0)
    955         {
    956             rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
    957             haveRules = TRUE;
    958             // Defer building the collator until we have seen
    959             // all sub-elements of the collation table, including the Version.
    960             /* in order to achieve smaller data files, we can direct genrb */
    961             /* to omit collation rules */
    962             if(!state->omitCollationRules) {
    963                 table_add(result, member, line, status);
    964                 member = NULL;
    965             }
    966         }
    967         else  // Just copy non-special items.
    968         {
    969             table_add(result, member, line, status);
    970             member = NULL;
    971         }
    972         res_close(member);  // TODO: use LocalPointer
    973         if (U_FAILURE(*status))
    974         {
    975             res_close(result);
    976             return NULL;
    977         }
    978     }
    979 
    980     if (!haveRules) { return result; }
    981 
    982 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
    983     warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
    984     (void)collationType;
    985 #else
    986     // CLDR ticket #3949, ICU ticket #8082:
    987     // Do not build collation binary data for for-import-only "private" collation rule strings.
    988     if (uprv_strncmp(collationType, "private-", 8) == 0) {
    989         if(isVerbose()) {
    990             printf("Not building %s~%s collation binary\n", state->filename, collationType);
    991         }
    992         return result;
    993     }
    994 
    995     if(!state->makeBinaryCollation) {
    996         if(isVerbose()) {
    997             printf("Not building %s~%s collation binary\n", state->filename, collationType);
    998         }
    999         return result;
   1000     }
   1001     UErrorCode intStatus = U_ZERO_ERROR;
   1002     UParseError parseError;
   1003     uprv_memset(&parseError, 0, sizeof(parseError));
   1004     GenrbImporter importer(state->inputdir, state->outputdir);
   1005     const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
   1006     if(U_FAILURE(intStatus)) {
   1007         error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
   1008         res_close(result);
   1009         return NULL;  // TODO: use LocalUResourceBundlePointer for result
   1010     }
   1011     icu::CollationBuilder builder(base, intStatus);
   1012     if(uprv_strncmp(collationType, "search", 6) == 0) {
   1013         builder.disableFastLatin();  // build fast-Latin table unless search collator
   1014     }
   1015     LocalPointer<icu::CollationTailoring> t(
   1016             builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
   1017     if(U_FAILURE(intStatus)) {
   1018         const char *reason = builder.getErrorReason();
   1019         if(reason == NULL) { reason = ""; }
   1020         error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
   1021                 state->filename, collationType,
   1022                 (long)parseError.offset, u_errorName(intStatus), reason);
   1023         if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
   1024             // Print pre- and post-context.
   1025             char preBuffer[100], postBuffer[100];
   1026             escape(parseError.preContext, preBuffer);
   1027             escape(parseError.postContext, postBuffer);
   1028             error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
   1029         }
   1030         if(isStrict()) {
   1031             *status = intStatus;
   1032             res_close(result);
   1033             return NULL;
   1034         }
   1035     }
   1036     icu::LocalMemory<uint8_t> buffer;
   1037     int32_t capacity = 100000;
   1038     uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
   1039     if(dest == NULL) {
   1040         fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
   1041                 (long)capacity);
   1042         *status = U_MEMORY_ALLOCATION_ERROR;
   1043         res_close(result);
   1044         return NULL;
   1045     }
   1046     int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
   1047     int32_t totalSize = icu::CollationDataWriter::writeTailoring(
   1048             *t, *t->settings, indexes, dest, capacity, intStatus);
   1049     if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
   1050         intStatus = U_ZERO_ERROR;
   1051         capacity = totalSize;
   1052         dest = buffer.allocateInsteadAndCopy(capacity);
   1053         if(dest == NULL) {
   1054             fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
   1055                     (long)capacity);
   1056             *status = U_MEMORY_ALLOCATION_ERROR;
   1057             res_close(result);
   1058             return NULL;
   1059         }
   1060         totalSize = icu::CollationDataWriter::writeTailoring(
   1061                 *t, *t->settings, indexes, dest, capacity, intStatus);
   1062     }
   1063     if(U_FAILURE(intStatus)) {
   1064         fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
   1065                 u_errorName(intStatus));
   1066         res_close(result);
   1067         return NULL;
   1068     }
   1069     if(isVerbose()) {
   1070         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
   1071         icu::CollationInfo::printSizes(totalSize, indexes);
   1072         if(t->settings->hasReordering()) {
   1073             printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
   1074             icu::CollationInfo::printReorderRanges(
   1075                     *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
   1076         }
   1077     }
   1078     struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
   1079     table_add(result, collationBin, line, status);
   1080     if (U_FAILURE(*status)) {
   1081         res_close(result);
   1082         return NULL;
   1083     }
   1084 #endif
   1085     return result;
   1086 }
   1087 
   1088 static UBool
   1089 keepCollationType(const char *type) {  // android-changed
   1090     // BEGIN android-added
   1091     if (uprv_strcmp(type, "big5han") == 0) { return FALSE; }
   1092     if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; }
   1093     // END android-added
   1094     return TRUE;
   1095 }
   1096 
   1097 static struct SResource *
   1098 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
   1099 {
   1100     struct SResource  *result = NULL;
   1101     struct SResource  *member = NULL;
   1102     struct SResource  *collationRes = NULL;
   1103     struct UString    *tokenValue;
   1104     struct UString     comment;
   1105     enum   ETokenType  token;
   1106     char               subtag[1024], typeKeyword[1024];
   1107     uint32_t           line;
   1108 
   1109     result = table_open(state->bundle, tag, NULL, status);
   1110 
   1111     if (result == NULL || U_FAILURE(*status))
   1112     {
   1113         return NULL;
   1114     }
   1115     if(isVerbose()){
   1116         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1117     }
   1118     if(!newCollation) {
   1119         return addCollation(state, result, "(no type)", startline, status);
   1120     }
   1121     else {
   1122         for(;;) {
   1123             ustr_init(&comment);
   1124             token = getToken(state, &tokenValue, &comment, &line, status);
   1125 
   1126             if (token == TOK_CLOSE_BRACE)
   1127             {
   1128                 return result;
   1129             }
   1130 
   1131             if (token != TOK_STRING)
   1132             {
   1133                 res_close(result);
   1134                 *status = U_INVALID_FORMAT_ERROR;
   1135 
   1136                 if (token == TOK_EOF)
   1137                 {
   1138                     error(startline, "unterminated table");
   1139                 }
   1140                 else
   1141                 {
   1142                     error(line, "Unexpected token %s", tokenNames[token]);
   1143                 }
   1144 
   1145                 return NULL;
   1146             }
   1147 
   1148             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   1149 
   1150             if (U_FAILURE(*status))
   1151             {
   1152                 res_close(result);
   1153                 return NULL;
   1154             }
   1155 
   1156             if (uprv_strcmp(subtag, "default") == 0)
   1157             {
   1158                 member = parseResource(state, subtag, NULL, status);
   1159 
   1160                 if (U_FAILURE(*status))
   1161                 {
   1162                     res_close(result);
   1163                     return NULL;
   1164                 }
   1165 
   1166                 table_add(result, member, line, status);
   1167             }
   1168             else
   1169             {
   1170                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
   1171                 /* this probably needs to be refactored or recursively use the parser */
   1172                 /* first we assume that our collation table won't have the explicit type */
   1173                 /* then, we cannot handle aliases */
   1174                 if(token == TOK_OPEN_BRACE) {
   1175                     token = getToken(state, &tokenValue, &comment, &line, status);
   1176                     if (keepCollationType(subtag)) {
   1177                         collationRes = table_open(state->bundle, subtag, NULL, status);
   1178                     } else {
   1179                         collationRes = NULL;
   1180                     }
   1181                     // need to parse the collation data regardless
   1182                     collationRes = addCollation(state, collationRes, subtag, startline, status);
   1183                     if (collationRes != NULL) {
   1184                         table_add(result, collationRes, startline, status);
   1185                     }
   1186                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
   1187                     /* we could have a table too */
   1188                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
   1189                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
   1190                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
   1191                         member = parseResource(state, subtag, NULL, status);
   1192                         if (U_FAILURE(*status))
   1193                         {
   1194                             res_close(result);
   1195                             return NULL;
   1196                         }
   1197 
   1198                         table_add(result, member, line, status);
   1199                     } else {
   1200                         res_close(result);
   1201                         *status = U_INVALID_FORMAT_ERROR;
   1202                         return NULL;
   1203                     }
   1204                 } else {
   1205                     res_close(result);
   1206                     *status = U_INVALID_FORMAT_ERROR;
   1207                     return NULL;
   1208                 }
   1209             }
   1210 
   1211             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
   1212 
   1213             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
   1214 
   1215             if (U_FAILURE(*status))
   1216             {
   1217                 res_close(result);
   1218                 return NULL;
   1219             }
   1220         }
   1221     }
   1222 }
   1223 
   1224 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
   1225    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
   1226 static struct SResource *
   1227 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
   1228 {
   1229     struct SResource  *member = NULL;
   1230     struct UString    *tokenValue=NULL;
   1231     struct UString    comment;
   1232     enum   ETokenType token;
   1233     char              subtag[1024];
   1234     uint32_t          line;
   1235     UBool             readToken = FALSE;
   1236 
   1237     /* '{' . (name resource)* '}' */
   1238 
   1239     if(isVerbose()){
   1240         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
   1241     }
   1242     for (;;)
   1243     {
   1244         ustr_init(&comment);
   1245         token = getToken(state, &tokenValue, &comment, &line, status);
   1246 
   1247         if (token == TOK_CLOSE_BRACE)
   1248         {
   1249             if (!readToken) {
   1250                 warning(startline, "Encountered empty table");
   1251             }
   1252             return table;
   1253         }
   1254 
   1255         if (token != TOK_STRING)
   1256         {
   1257             *status = U_INVALID_FORMAT_ERROR;
   1258 
   1259             if (token == TOK_EOF)
   1260             {
   1261                 error(startline, "unterminated table");
   1262             }
   1263             else
   1264             {
   1265                 error(line, "unexpected token %s", tokenNames[token]);
   1266             }
   1267 
   1268             return NULL;
   1269         }
   1270 
   1271         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
   1272             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
   1273         } else {
   1274             *status = U_INVALID_FORMAT_ERROR;
   1275             error(line, "invariant characters required for table keys");
   1276             return NULL;
   1277         }
   1278 
   1279         if (U_FAILURE(*status))
   1280         {
   1281             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
   1282             return NULL;
   1283         }
   1284 
   1285         member = parseResource(state, subtag, &comment, status);
   1286 
   1287         if (member == NULL || U_FAILURE(*status))
   1288         {
   1289             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
   1290             return NULL;
   1291         }
   1292 
   1293         table_add(table, member, line, status);
   1294 
   1295         if (U_FAILURE(*status))
   1296         {
   1297             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
   1298             return NULL;
   1299         }
   1300         readToken = TRUE;
   1301         ustr_deinit(&comment);
   1302    }
   1303 
   1304     /* not reached */
   1305     /* A compiler warning will appear if all paths don't contain a return statement. */
   1306 /*     *status = U_INTERNAL_PROGRAM_ERROR;
   1307      return NULL;*/
   1308 }
   1309 
   1310 static struct SResource *
   1311 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1312 {
   1313     struct SResource *result;
   1314 
   1315     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
   1316     {
   1317         return parseCollationElements(state, tag, startline, FALSE, status);
   1318     }
   1319     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
   1320     {
   1321         return parseCollationElements(state, tag, startline, TRUE, status);
   1322     }
   1323     if(isVerbose()){
   1324         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1325     }
   1326 
   1327     result = table_open(state->bundle, tag, comment, status);
   1328 
   1329     if (result == NULL || U_FAILURE(*status))
   1330     {
   1331         return NULL;
   1332     }
   1333     return realParseTable(state, result, tag, startline,  status);
   1334 }
   1335 
   1336 static struct SResource *
   1337 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1338 {
   1339     struct SResource  *result = NULL;
   1340     struct SResource  *member = NULL;
   1341     struct UString    *tokenValue;
   1342     struct UString    memberComments;
   1343     enum   ETokenType token;
   1344     UBool             readToken = FALSE;
   1345 
   1346     result = array_open(state->bundle, tag, comment, status);
   1347 
   1348     if (result == NULL || U_FAILURE(*status))
   1349     {
   1350         return NULL;
   1351     }
   1352     if(isVerbose()){
   1353         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1354     }
   1355 
   1356     ustr_init(&memberComments);
   1357 
   1358     /* '{' . resource [','] '}' */
   1359     for (;;)
   1360     {
   1361         /* reset length */
   1362         ustr_setlen(&memberComments, 0, status);
   1363 
   1364         /* check for end of array, but don't consume next token unless it really is the end */
   1365         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
   1366 
   1367 
   1368         if (token == TOK_CLOSE_BRACE)
   1369         {
   1370             getToken(state, NULL, NULL, NULL, status);
   1371             if (!readToken) {
   1372                 warning(startline, "Encountered empty array");
   1373             }
   1374             break;
   1375         }
   1376 
   1377         if (token == TOK_EOF)
   1378         {
   1379             res_close(result);
   1380             *status = U_INVALID_FORMAT_ERROR;
   1381             error(startline, "unterminated array");
   1382             return NULL;
   1383         }
   1384 
   1385         /* string arrays are a special case */
   1386         if (token == TOK_STRING)
   1387         {
   1388             getToken(state, &tokenValue, &memberComments, NULL, status);
   1389             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
   1390         }
   1391         else
   1392         {
   1393             member = parseResource(state, NULL, &memberComments, status);
   1394         }
   1395 
   1396         if (member == NULL || U_FAILURE(*status))
   1397         {
   1398             res_close(result);
   1399             return NULL;
   1400         }
   1401 
   1402         array_add(result, member, status);
   1403 
   1404         if (U_FAILURE(*status))
   1405         {
   1406             res_close(result);
   1407             return NULL;
   1408         }
   1409 
   1410         /* eat optional comma if present */
   1411         token = peekToken(state, 0, NULL, NULL, NULL, status);
   1412 
   1413         if (token == TOK_COMMA)
   1414         {
   1415             getToken(state, NULL, NULL, NULL, status);
   1416         }
   1417 
   1418         if (U_FAILURE(*status))
   1419         {
   1420             res_close(result);
   1421             return NULL;
   1422         }
   1423         readToken = TRUE;
   1424     }
   1425 
   1426     ustr_deinit(&memberComments);
   1427     return result;
   1428 }
   1429 
   1430 static struct SResource *
   1431 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1432 {
   1433     struct SResource  *result = NULL;
   1434     enum   ETokenType  token;
   1435     char              *string;
   1436     int32_t            value;
   1437     UBool              readToken = FALSE;
   1438     char              *stopstring;
   1439     uint32_t           len;
   1440     struct UString     memberComments;
   1441 
   1442     result = intvector_open(state->bundle, tag, comment, status);
   1443 
   1444     if (result == NULL || U_FAILURE(*status))
   1445     {
   1446         return NULL;
   1447     }
   1448 
   1449     if(isVerbose()){
   1450         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1451     }
   1452     ustr_init(&memberComments);
   1453     /* '{' . string [','] '}' */
   1454     for (;;)
   1455     {
   1456         ustr_setlen(&memberComments, 0, status);
   1457 
   1458         /* check for end of array, but don't consume next token unless it really is the end */
   1459         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
   1460 
   1461         if (token == TOK_CLOSE_BRACE)
   1462         {
   1463             /* it's the end, consume the close brace */
   1464             getToken(state, NULL, NULL, NULL, status);
   1465             if (!readToken) {
   1466                 warning(startline, "Encountered empty int vector");
   1467             }
   1468             ustr_deinit(&memberComments);
   1469             return result;
   1470         }
   1471 
   1472         string = getInvariantString(state, NULL, NULL, status);
   1473 
   1474         if (U_FAILURE(*status))
   1475         {
   1476             res_close(result);
   1477             return NULL;
   1478         }
   1479 
   1480         /* For handling illegal char in the Intvector */
   1481         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
   1482         len=(uint32_t)(stopstring-string);
   1483 
   1484         if(len==uprv_strlen(string))
   1485         {
   1486             intvector_add(result, value, status);
   1487             uprv_free(string);
   1488             token = peekToken(state, 0, NULL, NULL, NULL, status);
   1489         }
   1490         else
   1491         {
   1492             uprv_free(string);
   1493             *status=U_INVALID_CHAR_FOUND;
   1494         }
   1495 
   1496         if (U_FAILURE(*status))
   1497         {
   1498             res_close(result);
   1499             return NULL;
   1500         }
   1501 
   1502         /* the comma is optional (even though it is required to prevent the reader from concatenating
   1503         consecutive entries) so that a missing comma on the last entry isn't an error */
   1504         if (token == TOK_COMMA)
   1505         {
   1506             getToken(state, NULL, NULL, NULL, status);
   1507         }
   1508         readToken = TRUE;
   1509     }
   1510 
   1511     /* not reached */
   1512     /* A compiler warning will appear if all paths don't contain a return statement. */
   1513 /*    intvector_close(result, status);
   1514     *status = U_INTERNAL_PROGRAM_ERROR;
   1515     return NULL;*/
   1516 }
   1517 
   1518 static struct SResource *
   1519 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1520 {
   1521     struct SResource *result = NULL;
   1522     uint8_t          *value;
   1523     char             *string;
   1524     char              toConv[3] = {'\0', '\0', '\0'};
   1525     uint32_t          count;
   1526     uint32_t          i;
   1527     uint32_t          line;
   1528     char             *stopstring;
   1529     uint32_t          len;
   1530 
   1531     string = getInvariantString(state, &line, NULL, status);
   1532 
   1533     if (string == NULL || U_FAILURE(*status))
   1534     {
   1535         return NULL;
   1536     }
   1537 
   1538     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1539 
   1540     if (U_FAILURE(*status))
   1541     {
   1542         uprv_free(string);
   1543         return NULL;
   1544     }
   1545 
   1546     if(isVerbose()){
   1547         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1548     }
   1549 
   1550     count = (uint32_t)uprv_strlen(string);
   1551     if (count > 0){
   1552         if((count % 2)==0){
   1553             value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
   1554 
   1555             if (value == NULL)
   1556             {
   1557                 uprv_free(string);
   1558                 *status = U_MEMORY_ALLOCATION_ERROR;
   1559                 return NULL;
   1560             }
   1561 
   1562             for (i = 0; i < count; i += 2)
   1563             {
   1564                 toConv[0] = string[i];
   1565                 toConv[1] = string[i + 1];
   1566 
   1567                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
   1568                 len=(uint32_t)(stopstring-toConv);
   1569 
   1570                 if(len!=uprv_strlen(toConv))
   1571                 {
   1572                     uprv_free(string);
   1573                     *status=U_INVALID_CHAR_FOUND;
   1574                     return NULL;
   1575                 }
   1576             }
   1577 
   1578             result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
   1579 
   1580             uprv_free(value);
   1581         }
   1582         else
   1583         {
   1584             *status = U_INVALID_CHAR_FOUND;
   1585             uprv_free(string);
   1586             error(line, "Encountered invalid binary string");
   1587             return NULL;
   1588         }
   1589     }
   1590     else
   1591     {
   1592         result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
   1593         warning(startline, "Encountered empty binary tag");
   1594     }
   1595     uprv_free(string);
   1596 
   1597     return result;
   1598 }
   1599 
   1600 static struct SResource *
   1601 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
   1602 {
   1603     struct SResource *result = NULL;
   1604     int32_t           value;
   1605     char             *string;
   1606     char             *stopstring;
   1607     uint32_t          len;
   1608 
   1609     string = getInvariantString(state, NULL, NULL, status);
   1610 
   1611     if (string == NULL || U_FAILURE(*status))
   1612     {
   1613         return NULL;
   1614     }
   1615 
   1616     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1617 
   1618     if (U_FAILURE(*status))
   1619     {
   1620         uprv_free(string);
   1621         return NULL;
   1622     }
   1623 
   1624     if(isVerbose()){
   1625         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1626     }
   1627 
   1628     if (uprv_strlen(string) <= 0)
   1629     {
   1630         warning(startline, "Encountered empty integer. Default value is 0.");
   1631     }
   1632 
   1633     /* Allow integer support for hexdecimal, octal digit and decimal*/
   1634     /* and handle illegal char in the integer*/
   1635     value = uprv_strtoul(string, &stopstring, 0);
   1636     len=(uint32_t)(stopstring-string);
   1637     if(len==uprv_strlen(string))
   1638     {
   1639         result = int_open(state->bundle, tag, value, comment, status);
   1640     }
   1641     else
   1642     {
   1643         *status=U_INVALID_CHAR_FOUND;
   1644     }
   1645     uprv_free(string);
   1646 
   1647     return result;
   1648 }
   1649 
   1650 static struct SResource *
   1651 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1652 {
   1653     struct SResource *result;
   1654     FileStream       *file;
   1655     int32_t           len;
   1656     uint8_t          *data;
   1657     char             *filename;
   1658     uint32_t          line;
   1659     char     *fullname = NULL;
   1660     filename = getInvariantString(state, &line, NULL, status);
   1661 
   1662     if (U_FAILURE(*status))
   1663     {
   1664         return NULL;
   1665     }
   1666 
   1667     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1668 
   1669     if (U_FAILURE(*status))
   1670     {
   1671         uprv_free(filename);
   1672         return NULL;
   1673     }
   1674 
   1675     if(isVerbose()){
   1676         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1677     }
   1678 
   1679     /* Open the input file for reading */
   1680     if (state->inputdir == NULL)
   1681     {
   1682 #if 1
   1683         /*
   1684          * Always save file file name, even if there's
   1685          * no input directory specified. MIGHT BREAK SOMETHING
   1686          */
   1687         int32_t filenameLength = uprv_strlen(filename);
   1688 
   1689         fullname = (char *) uprv_malloc(filenameLength + 1);
   1690         uprv_strcpy(fullname, filename);
   1691 #endif
   1692 
   1693         file = T_FileStream_open(filename, "rb");
   1694     }
   1695     else
   1696     {
   1697 
   1698         int32_t  count     = (int32_t)uprv_strlen(filename);
   1699 
   1700         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
   1701         {
   1702             fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
   1703 
   1704             /* test for NULL */
   1705             if(fullname == NULL)
   1706             {
   1707                 *status = U_MEMORY_ALLOCATION_ERROR;
   1708                 return NULL;
   1709             }
   1710 
   1711             uprv_strcpy(fullname, state->inputdir);
   1712 
   1713             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
   1714             fullname[state->inputdirLength + 1] = '\0';
   1715 
   1716             uprv_strcat(fullname, filename);
   1717         }
   1718         else
   1719         {
   1720             fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
   1721 
   1722             /* test for NULL */
   1723             if(fullname == NULL)
   1724             {
   1725                 *status = U_MEMORY_ALLOCATION_ERROR;
   1726                 return NULL;
   1727             }
   1728 
   1729             uprv_strcpy(fullname, state->inputdir);
   1730             uprv_strcat(fullname, filename);
   1731         }
   1732 
   1733         file = T_FileStream_open(fullname, "rb");
   1734 
   1735     }
   1736 
   1737     if (file == NULL)
   1738     {
   1739         error(line, "couldn't open input file %s", filename);
   1740         *status = U_FILE_ACCESS_ERROR;
   1741         return NULL;
   1742     }
   1743 
   1744     len  = T_FileStream_size(file);
   1745     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
   1746     /* test for NULL */
   1747     if(data == NULL)
   1748     {
   1749         *status = U_MEMORY_ALLOCATION_ERROR;
   1750         T_FileStream_close (file);
   1751         return NULL;
   1752     }
   1753 
   1754     /* int32_t numRead = */ T_FileStream_read  (file, data, len);
   1755     T_FileStream_close (file);
   1756 
   1757     result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
   1758 
   1759     uprv_free(data);
   1760     uprv_free(filename);
   1761     uprv_free(fullname);
   1762 
   1763     return result;
   1764 }
   1765 
   1766 static struct SResource *
   1767 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
   1768 {
   1769     struct SResource *result;
   1770     int32_t           len=0;
   1771     char             *filename;
   1772     uint32_t          line;
   1773     UChar *pTarget     = NULL;
   1774 
   1775     UCHARBUF *ucbuf;
   1776     char     *fullname = NULL;
   1777     int32_t  count     = 0;
   1778     const char* cp = NULL;
   1779     const UChar* uBuffer = NULL;
   1780 
   1781     filename = getInvariantString(state, &line, NULL, status);
   1782     count     = (int32_t)uprv_strlen(filename);
   1783 
   1784     if (U_FAILURE(*status))
   1785     {
   1786         return NULL;
   1787     }
   1788 
   1789     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
   1790 
   1791     if (U_FAILURE(*status))
   1792     {
   1793         uprv_free(filename);
   1794         return NULL;
   1795     }
   1796 
   1797     if(isVerbose()){
   1798         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1799     }
   1800 
   1801     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
   1802     /* test for NULL */
   1803     if(fullname == NULL)
   1804     {
   1805         *status = U_MEMORY_ALLOCATION_ERROR;
   1806         uprv_free(filename);
   1807         return NULL;
   1808     }
   1809 
   1810     if(state->inputdir!=NULL){
   1811         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
   1812         {
   1813 
   1814             uprv_strcpy(fullname, state->inputdir);
   1815 
   1816             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
   1817             fullname[state->inputdirLength + 1] = '\0';
   1818 
   1819             uprv_strcat(fullname, filename);
   1820         }
   1821         else
   1822         {
   1823             uprv_strcpy(fullname, state->inputdir);
   1824             uprv_strcat(fullname, filename);
   1825         }
   1826     }else{
   1827         uprv_strcpy(fullname,filename);
   1828     }
   1829 
   1830     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
   1831 
   1832     if (U_FAILURE(*status)) {
   1833         error(line, "couldn't open input file %s\n", filename);
   1834         return NULL;
   1835     }
   1836 
   1837     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
   1838     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
   1839 
   1840     ucbuf_close(ucbuf);
   1841 
   1842     uprv_free(pTarget);
   1843 
   1844     uprv_free(filename);
   1845     uprv_free(fullname);
   1846 
   1847     return result;
   1848 }
   1849 
   1850 
   1851 
   1852 
   1853 
   1854 U_STRING_DECL(k_type_string,    "string",    6);
   1855 U_STRING_DECL(k_type_binary,    "binary",    6);
   1856 U_STRING_DECL(k_type_bin,       "bin",       3);
   1857 U_STRING_DECL(k_type_table,     "table",     5);
   1858 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
   1859 U_STRING_DECL(k_type_int,       "int",       3);
   1860 U_STRING_DECL(k_type_integer,   "integer",   7);
   1861 U_STRING_DECL(k_type_array,     "array",     5);
   1862 U_STRING_DECL(k_type_alias,     "alias",     5);
   1863 U_STRING_DECL(k_type_intvector, "intvector", 9);
   1864 U_STRING_DECL(k_type_import,    "import",    6);
   1865 U_STRING_DECL(k_type_include,   "include",   7);
   1866 
   1867 /* Various non-standard processing plugins that create one or more special resources. */
   1868 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1869 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
   1870 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
   1871 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
   1872 
   1873 typedef enum EResourceType
   1874 {
   1875     RESTYPE_UNKNOWN,
   1876     RESTYPE_STRING,
   1877     RESTYPE_BINARY,
   1878     RESTYPE_TABLE,
   1879     RESTYPE_TABLE_NO_FALLBACK,
   1880     RESTYPE_INTEGER,
   1881     RESTYPE_ARRAY,
   1882     RESTYPE_ALIAS,
   1883     RESTYPE_INTVECTOR,
   1884     RESTYPE_IMPORT,
   1885     RESTYPE_INCLUDE,
   1886     RESTYPE_PROCESS_UCA_RULES,
   1887     RESTYPE_PROCESS_COLLATION,
   1888     RESTYPE_PROCESS_TRANSLITERATOR,
   1889     RESTYPE_PROCESS_DEPENDENCY,
   1890     RESTYPE_RESERVED
   1891 } EResourceType;
   1892 
   1893 static struct {
   1894     const char *nameChars;   /* only used for debugging */
   1895     const UChar *nameUChars;
   1896     ParseResourceFunction *parseFunction;
   1897 } gResourceTypes[] = {
   1898     {"Unknown", NULL, NULL},
   1899     {"string", k_type_string, parseString},
   1900     {"binary", k_type_binary, parseBinary},
   1901     {"table", k_type_table, parseTable},
   1902     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
   1903     {"integer", k_type_integer, parseInteger},
   1904     {"array", k_type_array, parseArray},
   1905     {"alias", k_type_alias, parseAlias},
   1906     {"intvector", k_type_intvector, parseIntVector},
   1907     {"import", k_type_import, parseImport},
   1908     {"include", k_type_include, parseInclude},
   1909     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
   1910     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
   1911     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
   1912     {"process(dependency)", k_type_plugin_dependency, parseDependency},
   1913     {"reserved", NULL, NULL}
   1914 };
   1915 
   1916 void initParser()
   1917 {
   1918     U_STRING_INIT(k_type_string,    "string",    6);
   1919     U_STRING_INIT(k_type_binary,    "binary",    6);
   1920     U_STRING_INIT(k_type_bin,       "bin",       3);
   1921     U_STRING_INIT(k_type_table,     "table",     5);
   1922     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
   1923     U_STRING_INIT(k_type_int,       "int",       3);
   1924     U_STRING_INIT(k_type_integer,   "integer",   7);
   1925     U_STRING_INIT(k_type_array,     "array",     5);
   1926     U_STRING_INIT(k_type_alias,     "alias",     5);
   1927     U_STRING_INIT(k_type_intvector, "intvector", 9);
   1928     U_STRING_INIT(k_type_import,    "import",    6);
   1929     U_STRING_INIT(k_type_include,   "include",   7);
   1930 
   1931     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
   1932     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
   1933     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
   1934     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
   1935 }
   1936 
   1937 static inline UBool isTable(enum EResourceType type) {
   1938     return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
   1939 }
   1940 
   1941 static enum EResourceType
   1942 parseResourceType(ParseState* state, UErrorCode *status)
   1943 {
   1944     struct UString        *tokenValue;
   1945     struct UString        comment;
   1946     enum   EResourceType  result = RESTYPE_UNKNOWN;
   1947     uint32_t              line=0;
   1948     ustr_init(&comment);
   1949     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
   1950 
   1951     if (U_FAILURE(*status))
   1952     {
   1953         return RESTYPE_UNKNOWN;
   1954     }
   1955 
   1956     *status = U_ZERO_ERROR;
   1957 
   1958     /* Search for normal types */
   1959     result=RESTYPE_UNKNOWN;
   1960     while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
   1961         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
   1962             break;
   1963         }
   1964     }
   1965     /* Now search for the aliases */
   1966     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
   1967         result = RESTYPE_INTEGER;
   1968     }
   1969     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
   1970         result = RESTYPE_BINARY;
   1971     }
   1972     else if (result == RESTYPE_RESERVED) {
   1973         char tokenBuffer[1024];
   1974         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
   1975         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
   1976         *status = U_INVALID_FORMAT_ERROR;
   1977         error(line, "unknown resource type '%s'", tokenBuffer);
   1978     }
   1979 
   1980     return result;
   1981 }
   1982 
   1983 /* parse a non-top-level resource */
   1984 static struct SResource *
   1985 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
   1986 {
   1987     enum   ETokenType      token;
   1988     enum   EResourceType  resType = RESTYPE_UNKNOWN;
   1989     ParseResourceFunction *parseFunction = NULL;
   1990     struct UString        *tokenValue;
   1991     uint32_t                 startline;
   1992     uint32_t                 line;
   1993 
   1994 
   1995     token = getToken(state, &tokenValue, NULL, &startline, status);
   1996 
   1997     if(isVerbose()){
   1998         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
   1999     }
   2000 
   2001     /* name . [ ':' type ] '{' resource '}' */
   2002     /* This function parses from the colon onwards.  If the colon is present, parse the
   2003     type then try to parse a resource of that type.  If there is no explicit type,
   2004     work it out using the lookahead tokens. */
   2005     switch (token)
   2006     {
   2007     case TOK_EOF:
   2008         *status = U_INVALID_FORMAT_ERROR;
   2009         error(startline, "Unexpected EOF encountered");
   2010         return NULL;
   2011 
   2012     case TOK_ERROR:
   2013         *status = U_INVALID_FORMAT_ERROR;
   2014         return NULL;
   2015 
   2016     case TOK_COLON:
   2017         resType = parseResourceType(state, status);
   2018         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
   2019 
   2020         if (U_FAILURE(*status))
   2021         {
   2022             return NULL;
   2023         }
   2024 
   2025         break;
   2026 
   2027     case TOK_OPEN_BRACE:
   2028         break;
   2029 
   2030     default:
   2031         *status = U_INVALID_FORMAT_ERROR;
   2032         error(startline, "syntax error while reading a resource, expected '{' or ':'");
   2033         return NULL;
   2034     }
   2035 
   2036 
   2037     if (resType == RESTYPE_UNKNOWN)
   2038     {
   2039         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
   2040         We could have any of the following:
   2041         { {         => array (nested)
   2042         { :/}       => array
   2043         { string ,  => string array
   2044 
   2045         { string {  => table
   2046 
   2047         { string :/{    => table
   2048         { string }      => string
   2049         */
   2050 
   2051         token = peekToken(state, 0, NULL, &line, NULL,status);
   2052 
   2053         if (U_FAILURE(*status))
   2054         {
   2055             return NULL;
   2056         }
   2057 
   2058         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
   2059         {
   2060             resType = RESTYPE_ARRAY;
   2061         }
   2062         else if (token == TOK_STRING)
   2063         {
   2064             token = peekToken(state, 1, NULL, &line, NULL, status);
   2065 
   2066             if (U_FAILURE(*status))
   2067             {
   2068                 return NULL;
   2069             }
   2070 
   2071             switch (token)
   2072             {
   2073             case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
   2074             case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
   2075             case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
   2076             case TOK_COLON:         resType = RESTYPE_TABLE;  break;
   2077             default:
   2078                 *status = U_INVALID_FORMAT_ERROR;
   2079                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
   2080                 return NULL;
   2081             }
   2082         }
   2083         else
   2084         {
   2085             *status = U_INVALID_FORMAT_ERROR;
   2086             error(line, "Unexpected token after '{'");
   2087             return NULL;
   2088         }
   2089 
   2090         /* printf("Type guessed as %s\n", resourceNames[resType]); */
   2091     } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
   2092         *status = U_INVALID_FORMAT_ERROR;
   2093         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
   2094         return NULL;
   2095     }
   2096 
   2097 
   2098     /* We should now know what we need to parse next, so call the appropriate parser
   2099     function and return. */
   2100     parseFunction = gResourceTypes[resType].parseFunction;
   2101     if (parseFunction != NULL) {
   2102         return parseFunction(state, tag, startline, comment, status);
   2103     }
   2104     else {
   2105         *status = U_INTERNAL_PROGRAM_ERROR;
   2106         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
   2107     }
   2108 
   2109     return NULL;
   2110 }
   2111 
   2112 /* parse the top-level resource */
   2113 struct SRBRoot *
   2114 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
   2115       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
   2116 {
   2117     struct UString    *tokenValue;
   2118     struct UString    comment;
   2119     uint32_t           line;
   2120     enum EResourceType bundleType;
   2121     enum ETokenType    token;
   2122     ParseState state;
   2123     uint32_t i;
   2124 
   2125 
   2126     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
   2127     {
   2128         ustr_init(&state.lookahead[i].value);
   2129         ustr_init(&state.lookahead[i].comment);
   2130     }
   2131 
   2132     initLookahead(&state, buf, status);
   2133 
   2134     state.inputdir       = inputDir;
   2135     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
   2136     state.outputdir       = outputDir;
   2137     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
   2138     state.filename = filename;
   2139     state.makeBinaryCollation = makeBinaryCollation;
   2140     state.omitCollationRules = omitCollationRules;
   2141 
   2142     ustr_init(&comment);
   2143     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
   2144 
   2145     state.bundle = bundle_open(&comment, FALSE, status);
   2146 
   2147     if (state.bundle == NULL || U_FAILURE(*status))
   2148     {
   2149         return NULL;
   2150     }
   2151 
   2152 
   2153     bundle_setlocale(state.bundle, tokenValue->fChars, status);
   2154 
   2155     /* The following code is to make Empty bundle work no matter with :table specifer or not */
   2156     token = getToken(&state, NULL, NULL, &line, status);
   2157     if(token==TOK_COLON) {
   2158         *status=U_ZERO_ERROR;
   2159         bundleType=parseResourceType(&state, status);
   2160 
   2161         if(isTable(bundleType))
   2162         {
   2163             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
   2164         }
   2165         else
   2166         {
   2167             *status=U_PARSE_ERROR;
   2168              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
   2169         }
   2170     }
   2171     else
   2172     {
   2173         /* not a colon */
   2174         if(token==TOK_OPEN_BRACE)
   2175         {
   2176             *status=U_ZERO_ERROR;
   2177             bundleType=RESTYPE_TABLE;
   2178         }
   2179         else
   2180         {
   2181             /* neither colon nor open brace */
   2182             *status=U_PARSE_ERROR;
   2183             bundleType=RESTYPE_UNKNOWN;
   2184             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
   2185         }
   2186     }
   2187 
   2188     if (U_FAILURE(*status))
   2189     {
   2190         bundle_close(state.bundle, status);
   2191         return NULL;
   2192     }
   2193 
   2194     if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
   2195         /*
   2196          * Parse a top-level table with the table(nofallback) declaration.
   2197          * This is the same as a regular table, but also sets the
   2198          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
   2199          */
   2200         state.bundle->noFallback=TRUE;
   2201     }
   2202     /* top-level tables need not handle special table names like "collations" */
   2203     realParseTable(&state, state.bundle->fRoot, NULL, line, status);
   2204     if(dependencyArray!=NULL){
   2205         table_add(state.bundle->fRoot, dependencyArray, 0, status);
   2206         dependencyArray = NULL;
   2207     }
   2208    if (U_FAILURE(*status))
   2209     {
   2210         bundle_close(state.bundle, status);
   2211         res_close(dependencyArray);
   2212         return NULL;
   2213     }
   2214 
   2215     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
   2216     {
   2217         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
   2218         if(isStrict()){
   2219             *status = U_INVALID_FORMAT_ERROR;
   2220             return NULL;
   2221         }
   2222     }
   2223 
   2224     cleanupLookahead(&state);
   2225     ustr_deinit(&comment);
   2226     return state.bundle;
   2227 }
   2228